aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/tcmalloc
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tcmalloc
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/tcmalloc')
-rw-r--r--contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report255
-rw-r--r--contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report331
-rw-r--r--contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt492
-rw-r--r--contrib/libs/tcmalloc/CONTRIBUTING.md74
-rw-r--r--contrib/libs/tcmalloc/LICENSE202
-rw-r--r--contrib/libs/tcmalloc/README.md44
-rw-r--r--contrib/libs/tcmalloc/common.inc58
-rw-r--r--contrib/libs/tcmalloc/default/ya.make22
-rw-r--r--contrib/libs/tcmalloc/dynamic/ya.make2
-rw-r--r--contrib/libs/tcmalloc/malloc_extension/ya.make37
-rw-r--r--contrib/libs/tcmalloc/numa_256k/ya.make28
-rw-r--r--contrib/libs/tcmalloc/numa_large_pages/ya.make28
-rw-r--r--contrib/libs/tcmalloc/patches/fork.patch310
-rw-r--r--contrib/libs/tcmalloc/patches/userdata.patch220
-rw-r--r--contrib/libs/tcmalloc/patches/yandex.patch91
-rw-r--r--contrib/libs/tcmalloc/slow_but_small/ya.make21
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/BUILD1316
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/arena.cc78
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/arena.h68
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/arena_test.cc38
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/background.cc182
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/central_freelist.cc218
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/central_freelist.h142
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc198
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc121
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/common.cc204
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/common.h524
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc1140
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/cpu_cache.h390
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc599
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experiment.cc162
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experiment.h71
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experiment_config.h51
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc31
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc38
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc706
-rwxr-xr-xcontrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc679
-rwxr-xr-xcontrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc239
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc562
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h311
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc60
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc243
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc122
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc374
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_address_map.h148
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc85
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc175
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_allocator.h108
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc449
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_cache.cc494
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_cache.h228
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc563
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc676
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h175
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc957
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h2113
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc3799
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_pages.h343
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_region.h551
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc565
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h60
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h74
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/bits.h82
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc104
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc88
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h36
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc51
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/clock.h41
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/config.h136
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/declarations.h42
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/environment.cc45
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/environment.h42
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc45
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h252
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc156
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h172
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc129
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h254
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc146
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc239
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h65
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/logging.cc276
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/logging.h222
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc117
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc18
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc132
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h41
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc43
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc129
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/mincore.h65
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc61
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc193
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h42
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/numa.cc220
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/numa.h227
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc284
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/optimization.h45
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h56
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc352
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu.h342
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S524
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S41
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S606
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc87
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S463
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h1279
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc855
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc171
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h70
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h503
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc387
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc294
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h195
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc191
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/util.cc195
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal/util.h138
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h133
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc711
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/libc_override.h39
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h114
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h120
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h100
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc530
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/malloc_extension.h617
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc42
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc87
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc67
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc64
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h89
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc24
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h310
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc33
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator.cc196
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator.h241
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc89
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h97
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc145
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h79
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_heap.cc528
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_heap.h161
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h93
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc109
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/pagemap.cc73
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/pagemap.h431
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc166
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/pages.h298
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/parameters.cc271
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/parameters.h152
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc93
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h61
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/profile_test.cc281
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/realloc_test.cc104
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc81
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h49
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc30
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc114
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/sampler.cc206
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/sampler.h298
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/size_class_info.h79
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/size_classes.cc711
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc469
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc127
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/span.cc332
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/span.h589
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc212
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/span_stats.h50
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/span_test.cc191
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc155
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h97
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc389
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/static_vars.cc138
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/static_vars.h262
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stats.cc553
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stats.h271
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/stats_test.cc268
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/system-alloc.cc623
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/system-alloc.h91
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc147
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc2441
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/tcmalloc.h126
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc204
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h260
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/thread_cache.cc417
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/thread_cache.h345
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc132
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/tracking.h109
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc162
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache.h341
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc149
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc73
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h896
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h35
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc625
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc30
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc30
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc28
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc30
-rw-r--r--contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc28
-rw-r--r--contrib/libs/tcmalloc/ya.make38
199 files changed, 53413 insertions, 0 deletions
diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report
new file mode 100644
index 0000000000..f5c52b8f16
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report
@@ -0,0 +1,255 @@
+# File format ($ symbol means the beginning of a line):
+#
+# $ # this message
+# $ # =======================
+# $ # comments (all commentaries should starts with some number of spaces and # symbol)
+# ${action} {license id} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make
+# ${all_file_action} filename
+# $ # user commentaries (many lines)
+# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/3/ya.make
+# ${all_file_action} filename
+# $ # user commentaries
+# $ generated description
+# $ ...
+#
+# You can modify action, all_file_action and add commentaries
+# Available actions:
+# keep - keep license in contrib and use in credits
+# skip - skip license
+# remove - remove all files with this license
+# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file
+#
+# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory)
+# We suppose that that files can contain some license info
+# Available all file actions:
+# FILE_IGNORE - ignore file (do nothing)
+# FILE_INCLUDE - include all file data into licenses text file
+# =======================
+
+KEEP COPYRIGHT_SERVICE_LABEL 279545394b5ad4b6b26c0686ac5f9921
+BELONGS ya.make
+ License text:
+ // Copyright 2019 The TCMalloc Authors
+ Scancode info:
+ Original SPDX id: COPYRIGHT_SERVICE_LABEL
+ Score : 100.00
+ Match type : COPYRIGHT
+ Files with this license:
+ tcmalloc/BUILD [1:1]
+ tcmalloc/arena.cc [1:1]
+ tcmalloc/arena.h [1:1]
+ tcmalloc/background.cc [1:1]
+ tcmalloc/central_freelist.cc [1:1]
+ tcmalloc/central_freelist.h [1:1]
+ tcmalloc/central_freelist_test.cc [1:1]
+ tcmalloc/common.cc [1:1]
+ tcmalloc/common.h [1:1]
+ tcmalloc/cpu_cache.cc [1:1]
+ tcmalloc/cpu_cache.h [1:1]
+ tcmalloc/cpu_cache_test.cc [1:1]
+ tcmalloc/experiment.cc [1:1]
+ tcmalloc/experiment.h [1:1]
+ tcmalloc/experiment_config.h [1:1]
+ tcmalloc/experiment_config_test.cc [1:1]
+ tcmalloc/experiment_fuzz.cc [1:1]
+ tcmalloc/experimental_56_size_class.cc [1:1]
+ tcmalloc/experimental_pow2_below64_size_class.cc [1:1]
+ tcmalloc/experimental_pow2_size_class.cc [1:1]
+ tcmalloc/guarded_page_allocator.cc [1:1]
+ tcmalloc/guarded_page_allocator.h [1:1]
+ tcmalloc/guarded_page_allocator_benchmark.cc [1:1]
+ tcmalloc/guarded_page_allocator_test.cc [1:1]
+ tcmalloc/heap_profiling_test.cc [1:1]
+ tcmalloc/huge_address_map.cc [1:1]
+ tcmalloc/huge_address_map.h [1:1]
+ tcmalloc/huge_address_map_test.cc [1:1]
+ tcmalloc/huge_allocator.cc [1:1]
+ tcmalloc/huge_allocator.h [1:1]
+ tcmalloc/huge_allocator_test.cc [1:1]
+ tcmalloc/huge_cache.cc [1:1]
+ tcmalloc/huge_cache.h [1:1]
+ tcmalloc/huge_cache_test.cc [1:1]
+ tcmalloc/huge_page_aware_allocator.cc [1:1]
+ tcmalloc/huge_page_aware_allocator.h [1:1]
+ tcmalloc/huge_page_aware_allocator_test.cc [1:1]
+ tcmalloc/huge_page_filler.h [1:1]
+ tcmalloc/huge_page_filler_test.cc [1:1]
+ tcmalloc/huge_pages.h [1:1]
+ tcmalloc/huge_region.h [1:1]
+ tcmalloc/huge_region_test.cc [1:1]
+ tcmalloc/internal/atomic_danger.h [1:1]
+ tcmalloc/internal/atomic_stats_counter.h [1:1]
+ tcmalloc/internal/bits.h [1:1]
+ tcmalloc/internal/bits_test.cc [1:1]
+ tcmalloc/internal/config.h [1:1]
+ tcmalloc/internal/declarations.h [1:1]
+ tcmalloc/internal/environment.cc [1:1]
+ tcmalloc/internal/environment.h [1:1]
+ tcmalloc/internal/environment_test.cc [1:1]
+ tcmalloc/internal/lifetime_predictions_test.cc [1:1]
+ tcmalloc/internal/lifetime_tracker_test.cc [1:1]
+ tcmalloc/internal/linked_list.h [1:1]
+ tcmalloc/internal/linked_list_benchmark.cc [1:1]
+ tcmalloc/internal/linked_list_test.cc [1:1]
+ tcmalloc/internal/linux_syscall_support.h [1:1]
+ tcmalloc/internal/logging.cc [1:1]
+ tcmalloc/internal/logging.h [1:1]
+ tcmalloc/internal/logging_test.cc [1:1]
+ tcmalloc/internal/memory_stats.cc [1:1]
+ tcmalloc/internal/memory_stats.h [1:1]
+ tcmalloc/internal/memory_stats_test.cc [1:1]
+ tcmalloc/internal/mincore.cc [1:1]
+ tcmalloc/internal/mincore.h [1:1]
+ tcmalloc/internal/mincore_benchmark.cc [1:1]
+ tcmalloc/internal/mincore_test.cc [1:1]
+ tcmalloc/internal/mock_span.h [1:1]
+ tcmalloc/internal/parameter_accessors.h [1:1]
+ tcmalloc/internal/percpu.cc [1:1]
+ tcmalloc/internal/percpu.h [1:1]
+ tcmalloc/internal/percpu_rseq_asm.S [1:1]
+ tcmalloc/internal/percpu_rseq_ppc.S [2:2]
+ tcmalloc/internal/percpu_rseq_unsupported.cc [1:1]
+ tcmalloc/internal/percpu_rseq_x86_64.S [2:2]
+ tcmalloc/internal/percpu_tcmalloc.h [1:1]
+ tcmalloc/internal/percpu_tcmalloc_test.cc [1:1]
+ tcmalloc/internal/proc_maps.cc [1:1]
+ tcmalloc/internal/proc_maps.h [1:1]
+ tcmalloc/internal/range_tracker.h [1:1]
+ tcmalloc/internal/range_tracker_benchmark.cc [1:1]
+ tcmalloc/internal/range_tracker_test.cc [1:1]
+ tcmalloc/internal/timeseries_tracker.h [1:1]
+ tcmalloc/internal/timeseries_tracker_test.cc [1:1]
+ tcmalloc/internal/util.cc [1:1]
+ tcmalloc/internal/util.h [1:1]
+ tcmalloc/internal_malloc_extension.h [1:1]
+ tcmalloc/legacy_size_classes.cc [1:1]
+ tcmalloc/libc_override.h [1:1]
+ tcmalloc/libc_override_gcc_and_weak.h [1:1]
+ tcmalloc/libc_override_glibc.h [1:1]
+ tcmalloc/libc_override_redefine.h [1:1]
+ tcmalloc/malloc_extension.cc [1:1]
+ tcmalloc/malloc_extension.h [1:1]
+ tcmalloc/malloc_extension_fuzz.cc [1:1]
+ tcmalloc/malloc_extension_system_malloc_test.cc [1:1]
+ tcmalloc/malloc_extension_test.cc [1:1]
+ tcmalloc/noruntime_size_classes.cc [1:1]
+ tcmalloc/page_allocator.cc [1:1]
+ tcmalloc/page_allocator.h [1:1]
+ tcmalloc/page_allocator_interface.cc [1:1]
+ tcmalloc/page_allocator_interface.h [1:1]
+ tcmalloc/page_allocator_test.cc [1:1]
+ tcmalloc/page_allocator_test_util.h [1:1]
+ tcmalloc/page_heap.cc [1:1]
+ tcmalloc/page_heap.h [1:1]
+ tcmalloc/page_heap_allocator.h [1:1]
+ tcmalloc/page_heap_test.cc [1:1]
+ tcmalloc/pagemap.cc [1:1]
+ tcmalloc/pagemap.h [1:1]
+ tcmalloc/pagemap_test.cc [1:1]
+ tcmalloc/pages.h [1:1]
+ tcmalloc/parameters.cc [1:1]
+ tcmalloc/parameters.h [1:1]
+ tcmalloc/peak_heap_tracker.cc [1:1]
+ tcmalloc/peak_heap_tracker.h [1:1]
+ tcmalloc/profile_test.cc [1:1]
+ tcmalloc/realloc_test.cc [1:1]
+ tcmalloc/runtime_size_classes.cc [1:1]
+ tcmalloc/runtime_size_classes.h [1:1]
+ tcmalloc/runtime_size_classes_fuzz.cc [1:1]
+ tcmalloc/runtime_size_classes_test.cc [1:1]
+ tcmalloc/sampler.cc [1:1]
+ tcmalloc/sampler.h [1:1]
+ tcmalloc/size_class_info.h [1:1]
+ tcmalloc/size_classes.cc [1:1]
+ tcmalloc/size_classes_test.cc [1:1]
+ tcmalloc/size_classes_with_runtime_size_classes_test.cc [1:1]
+ tcmalloc/span.cc [1:1]
+ tcmalloc/span.h [1:1]
+ tcmalloc/span_benchmark.cc [1:1]
+ tcmalloc/span_stats.h [1:1]
+ tcmalloc/span_test.cc [1:1]
+ tcmalloc/stack_trace_table.cc [1:1]
+ tcmalloc/stack_trace_table.h [1:1]
+ tcmalloc/stack_trace_table_test.cc [1:1]
+ tcmalloc/static_vars.cc [1:1]
+ tcmalloc/static_vars.h [1:1]
+ tcmalloc/stats.cc [1:1]
+ tcmalloc/stats.h [1:1]
+ tcmalloc/stats_test.cc [1:1]
+ tcmalloc/system-alloc.cc [1:1]
+ tcmalloc/system-alloc.h [1:1]
+ tcmalloc/system-alloc_test.cc [1:1]
+ tcmalloc/tcmalloc.cc [1:1]
+ tcmalloc/tcmalloc.h [1:1]
+ tcmalloc/tcmalloc_large_test.cc [1:1]
+ tcmalloc/tcmalloc_policy.h [1:1]
+ tcmalloc/thread_cache.cc [1:1]
+ tcmalloc/thread_cache.h [1:1]
+ tcmalloc/thread_cache_test.cc [1:1]
+ tcmalloc/tracking.h [1:1]
+ tcmalloc/transfer_cache.cc [1:1]
+ tcmalloc/transfer_cache.h [1:1]
+ tcmalloc/want_hpaa.cc [1:1]
+ tcmalloc/want_hpaa_subrelease.cc [1:1]
+ tcmalloc/want_no_hpaa.cc [1:1]
+
+KEEP COPYRIGHT_SERVICE_LABEL 2f85f99f6e6cdec04f6948d273430658
+BELONGS ya.make
+ License text:
+ // Copyright 2021 The TCMalloc Authors
+ Scancode info:
+ Original SPDX id: COPYRIGHT_SERVICE_LABEL
+ Score : 100.00
+ Match type : COPYRIGHT
+ Files with this license:
+ tcmalloc/arena_test.cc [1:1]
+ tcmalloc/central_freelist_benchmark.cc [1:1]
+ tcmalloc/internal/cache_topology.cc [1:1]
+ tcmalloc/internal/cache_topology.h [1:1]
+ tcmalloc/internal/cache_topology_test.cc [1:1]
+ tcmalloc/internal/clock.h [1:1]
+ tcmalloc/internal/logging_test_helper.cc [1:1]
+ tcmalloc/internal/numa.cc [1:1]
+ tcmalloc/internal/numa.h [1:1]
+ tcmalloc/internal/numa_test.cc [1:1]
+ tcmalloc/want_numa_aware.cc [1:1]
+
+KEEP COPYRIGHT_SERVICE_LABEL 62f2df7d02ddf07de59d1a4e25e663aa
+BELONGS ya.make
+ License text:
+ // Copyright 2020 The TCMalloc Authors
+ Scancode info:
+ Original SPDX id: COPYRIGHT_SERVICE_LABEL
+ Score : 100.00
+ Match type : COPYRIGHT
+ Files with this license:
+ tcmalloc/internal/lifetime_predictions.h [1:1]
+ tcmalloc/internal/lifetime_tracker.h [1:1]
+ tcmalloc/internal/optimization.h [1:1]
+ tcmalloc/internal/percpu_rseq_aarch64.S [2:2]
+ tcmalloc/mock_central_freelist.cc [1:1]
+ tcmalloc/mock_central_freelist.h [1:1]
+ tcmalloc/mock_transfer_cache.cc [1:1]
+ tcmalloc/mock_transfer_cache.h [1:1]
+ tcmalloc/transfer_cache_benchmark.cc [1:1]
+ tcmalloc/transfer_cache_fuzz.cc [1:1]
+ tcmalloc/transfer_cache_internals.h [1:1]
+ tcmalloc/transfer_cache_stats.h [1:1]
+ tcmalloc/transfer_cache_test.cc [1:1]
+ tcmalloc/want_legacy_spans.cc [1:1]
+
+KEEP COPYRIGHT_SERVICE_LABEL b7c6499c855f04bbe7161fc4de3a41d6
+BELONGS ya.make
+ License text:
+ Delete(c);
+ SmallSpanStats small;
+ LargeSpanStats large;
+ Scancode info:
+ Original SPDX id: COPYRIGHT_SERVICE_LABEL
+ Score : 100.00
+ Match type : COPYRIGHT
+ Files with this license:
+ tcmalloc/huge_region_test.cc [433:435]
diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report
new file mode 100644
index 0000000000..29c5c149ce
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report
@@ -0,0 +1,331 @@
+# File format ($ symbol means the beginning of a line):
+#
+# $ # this message
+# $ # =======================
+# $ # comments (all commentaries should starts with some number of spaces and # symbol)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make
+# ${all_file_action} filename
+# $ # user commentaries (many lines)
+# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/3/ya.make
+# ${all_file_action} filename
+# $ # user commentaries
+# $ generated description
+# $ ...
+#
+# You can modify action, all_file_action and add commentaries
+# Available actions:
+# keep - keep license in contrib and use in credits
+# skip - skip license
+# remove - remove all files with this license
+# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file
+#
+# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory)
+# We suppose that that files can contain some license info
+# Available all file actions:
+# FILE_IGNORE - ignore file (do nothing)
+# FILE_INCLUDE - include all file data into licenses text file
+# =======================
+
+KEEP Apache-2.0 0e8699c5f5ea602534a6558430df2b8d
+BELONGS ya.make
+ Note: matched license text is too long. Read it in the source files.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 100.00
+ Match type : NOTICE
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ tcmalloc/arena.cc [3:13]
+ tcmalloc/arena.h [3:13]
+ tcmalloc/arena_test.cc [3:13]
+ tcmalloc/background.cc [3:13]
+ tcmalloc/central_freelist.cc [3:13]
+ tcmalloc/central_freelist.h [3:13]
+ tcmalloc/central_freelist_benchmark.cc [3:13]
+ tcmalloc/central_freelist_test.cc [3:13]
+ tcmalloc/common.cc [3:13]
+ tcmalloc/common.h [3:13]
+ tcmalloc/cpu_cache.cc [3:13]
+ tcmalloc/cpu_cache.h [3:13]
+ tcmalloc/cpu_cache_test.cc [3:13]
+ tcmalloc/experiment.cc [3:13]
+ tcmalloc/experiment.h [3:13]
+ tcmalloc/experiment_config.h [3:13]
+ tcmalloc/experiment_config_test.cc [3:13]
+ tcmalloc/experiment_fuzz.cc [3:13]
+ tcmalloc/experimental_56_size_class.cc [3:13]
+ tcmalloc/experimental_pow2_below64_size_class.cc [3:13]
+ tcmalloc/experimental_pow2_size_class.cc [3:13]
+ tcmalloc/guarded_page_allocator.cc [3:13]
+ tcmalloc/guarded_page_allocator.h [3:13]
+ tcmalloc/guarded_page_allocator_benchmark.cc [3:13]
+ tcmalloc/guarded_page_allocator_test.cc [3:13]
+ tcmalloc/heap_profiling_test.cc [3:13]
+ tcmalloc/huge_address_map.cc [3:13]
+ tcmalloc/huge_address_map.h [3:13]
+ tcmalloc/huge_address_map_test.cc [3:13]
+ tcmalloc/huge_allocator.cc [3:13]
+ tcmalloc/huge_allocator.h [3:13]
+ tcmalloc/huge_allocator_test.cc [3:13]
+ tcmalloc/huge_cache.cc [3:13]
+ tcmalloc/huge_cache.h [3:13]
+ tcmalloc/huge_cache_test.cc [3:13]
+ tcmalloc/huge_page_aware_allocator.cc [3:13]
+ tcmalloc/huge_page_aware_allocator.h [3:13]
+ tcmalloc/huge_page_aware_allocator_test.cc [3:13]
+ tcmalloc/huge_page_filler.h [3:13]
+ tcmalloc/huge_page_filler_test.cc [3:13]
+ tcmalloc/huge_pages.h [3:13]
+ tcmalloc/huge_region.h [3:13]
+ tcmalloc/huge_region_test.cc [3:13]
+ tcmalloc/internal/atomic_danger.h [3:13]
+ tcmalloc/internal/atomic_stats_counter.h [3:13]
+ tcmalloc/internal/bits.h [3:13]
+ tcmalloc/internal/bits_test.cc [3:13]
+ tcmalloc/internal/cache_topology.cc [3:13]
+ tcmalloc/internal/cache_topology.h [3:13]
+ tcmalloc/internal/cache_topology_test.cc [3:13]
+ tcmalloc/internal/clock.h [3:13]
+ tcmalloc/internal/config.h [3:13]
+ tcmalloc/internal/declarations.h [3:13]
+ tcmalloc/internal/environment.cc [3:13]
+ tcmalloc/internal/environment.h [3:13]
+ tcmalloc/internal/environment_test.cc [3:13]
+ tcmalloc/internal/lifetime_predictions.h [3:13]
+ tcmalloc/internal/lifetime_predictions_test.cc [3:13]
+ tcmalloc/internal/lifetime_tracker.h [3:13]
+ tcmalloc/internal/lifetime_tracker_test.cc [3:13]
+ tcmalloc/internal/linked_list.h [3:13]
+ tcmalloc/internal/linked_list_benchmark.cc [3:13]
+ tcmalloc/internal/linked_list_test.cc [3:13]
+ tcmalloc/internal/linux_syscall_support.h [3:13]
+ tcmalloc/internal/logging.cc [3:13]
+ tcmalloc/internal/logging.h [3:13]
+ tcmalloc/internal/logging_test.cc [3:13]
+ tcmalloc/internal/logging_test_helper.cc [3:13]
+ tcmalloc/internal/memory_stats.cc [3:13]
+ tcmalloc/internal/memory_stats.h [3:13]
+ tcmalloc/internal/memory_stats_test.cc [3:13]
+ tcmalloc/internal/mincore.cc [3:13]
+ tcmalloc/internal/mincore.h [3:13]
+ tcmalloc/internal/mincore_benchmark.cc [3:13]
+ tcmalloc/internal/mincore_test.cc [3:13]
+ tcmalloc/internal/mock_span.h [3:13]
+ tcmalloc/internal/numa.cc [3:13]
+ tcmalloc/internal/numa.h [3:13]
+ tcmalloc/internal/numa_test.cc [3:13]
+ tcmalloc/internal/optimization.h [3:13]
+ tcmalloc/internal/parameter_accessors.h [3:13]
+ tcmalloc/internal/percpu.cc [3:13]
+ tcmalloc/internal/percpu.h [3:13]
+ tcmalloc/internal/percpu_rseq_asm.S [3:13]
+ tcmalloc/internal/percpu_rseq_unsupported.cc [3:13]
+ tcmalloc/internal/percpu_tcmalloc.h [3:13]
+ tcmalloc/internal/percpu_tcmalloc_test.cc [3:13]
+ tcmalloc/internal/proc_maps.cc [3:13]
+ tcmalloc/internal/proc_maps.h [3:13]
+ tcmalloc/internal/range_tracker.h [3:13]
+ tcmalloc/internal/range_tracker_benchmark.cc [3:13]
+ tcmalloc/internal/range_tracker_test.cc [3:13]
+ tcmalloc/internal/timeseries_tracker.h [3:13]
+ tcmalloc/internal/timeseries_tracker_test.cc [3:13]
+ tcmalloc/internal/util.cc [3:13]
+ tcmalloc/internal/util.h [3:13]
+ tcmalloc/internal_malloc_extension.h [3:13]
+ tcmalloc/legacy_size_classes.cc [3:13]
+ tcmalloc/libc_override.h [3:13]
+ tcmalloc/libc_override_gcc_and_weak.h [3:13]
+ tcmalloc/libc_override_glibc.h [3:13]
+ tcmalloc/libc_override_redefine.h [3:13]
+ tcmalloc/malloc_extension.cc [3:13]
+ tcmalloc/malloc_extension.h [3:13]
+ tcmalloc/malloc_extension_fuzz.cc [3:13]
+ tcmalloc/malloc_extension_system_malloc_test.cc [3:13]
+ tcmalloc/malloc_extension_test.cc [3:13]
+ tcmalloc/mock_central_freelist.cc [3:13]
+ tcmalloc/mock_central_freelist.h [3:13]
+ tcmalloc/mock_transfer_cache.cc [3:13]
+ tcmalloc/mock_transfer_cache.h [3:13]
+ tcmalloc/noruntime_size_classes.cc [3:13]
+ tcmalloc/page_allocator.cc [3:13]
+ tcmalloc/page_allocator.h [3:13]
+ tcmalloc/page_allocator_interface.cc [3:13]
+ tcmalloc/page_allocator_interface.h [3:13]
+ tcmalloc/page_allocator_test.cc [3:13]
+ tcmalloc/page_allocator_test_util.h [3:13]
+ tcmalloc/page_heap.cc [3:13]
+ tcmalloc/page_heap.h [3:13]
+ tcmalloc/page_heap_allocator.h [3:13]
+ tcmalloc/page_heap_test.cc [3:13]
+ tcmalloc/pagemap.cc [3:13]
+ tcmalloc/pagemap.h [3:13]
+ tcmalloc/pagemap_test.cc [3:13]
+ tcmalloc/pages.h [3:13]
+ tcmalloc/parameters.cc [3:13]
+ tcmalloc/parameters.h [3:13]
+ tcmalloc/peak_heap_tracker.cc [3:13]
+ tcmalloc/peak_heap_tracker.h [3:13]
+ tcmalloc/profile_test.cc [3:13]
+ tcmalloc/realloc_test.cc [3:13]
+ tcmalloc/runtime_size_classes.cc [3:13]
+ tcmalloc/runtime_size_classes.h [3:13]
+ tcmalloc/runtime_size_classes_fuzz.cc [3:13]
+ tcmalloc/runtime_size_classes_test.cc [3:13]
+ tcmalloc/sampler.cc [3:13]
+ tcmalloc/sampler.h [3:13]
+ tcmalloc/size_class_info.h [3:13]
+ tcmalloc/size_classes.cc [3:13]
+ tcmalloc/size_classes_test.cc [3:13]
+ tcmalloc/size_classes_with_runtime_size_classes_test.cc [3:13]
+ tcmalloc/span.cc [3:13]
+ tcmalloc/span.h [3:13]
+ tcmalloc/span_benchmark.cc [3:13]
+ tcmalloc/span_stats.h [3:13]
+ tcmalloc/span_test.cc [3:13]
+ tcmalloc/stack_trace_table.cc [3:13]
+ tcmalloc/stack_trace_table.h [3:13]
+ tcmalloc/stack_trace_table_test.cc [3:13]
+ tcmalloc/static_vars.cc [3:13]
+ tcmalloc/static_vars.h [3:13]
+ tcmalloc/stats.cc [3:13]
+ tcmalloc/stats.h [3:13]
+ tcmalloc/stats_test.cc [3:13]
+ tcmalloc/system-alloc.cc [3:13]
+ tcmalloc/system-alloc.h [3:13]
+ tcmalloc/system-alloc_test.cc [3:13]
+ tcmalloc/tcmalloc.cc [3:13]
+ tcmalloc/tcmalloc.h [3:13]
+ tcmalloc/tcmalloc_large_test.cc [3:13]
+ tcmalloc/tcmalloc_policy.h [3:13]
+ tcmalloc/thread_cache.cc [3:13]
+ tcmalloc/thread_cache.h [3:13]
+ tcmalloc/thread_cache_test.cc [3:13]
+ tcmalloc/tracking.h [3:13]
+ tcmalloc/transfer_cache.cc [3:13]
+ tcmalloc/transfer_cache.h [3:13]
+ tcmalloc/transfer_cache_benchmark.cc [3:13]
+ tcmalloc/transfer_cache_fuzz.cc [3:13]
+ tcmalloc/transfer_cache_internals.h [3:13]
+ tcmalloc/transfer_cache_stats.h [3:13]
+ tcmalloc/transfer_cache_test.cc [3:13]
+ tcmalloc/want_hpaa.cc [3:13]
+ tcmalloc/want_hpaa_subrelease.cc [3:13]
+ tcmalloc/want_legacy_spans.cc [3:13]
+ tcmalloc/want_no_hpaa.cc [3:13]
+ tcmalloc/want_numa_aware.cc [3:13]
+
+KEEP Apache-2.0 24be4e5673a9c71cdba851c53ed9677b
+BELONGS ya.make
+ Note: matched license text is too long. Read it in the source files.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 100.00
+ Match type : NOTICE
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ tcmalloc/internal/percpu_rseq_aarch64.S [4:14]
+
+KEEP Apache-2.0 34ef0c6d1296bad9c0b8ea4447611e19
+BELONGS ya.make
+ Note: matched license text is too long. Read it in the source files.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 100.00
+ Match type : NOTICE
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ tcmalloc/BUILD [3:13]
+
+KEEP Apache-2.0 566444825cbcc83578050639168bd08f
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+ License text:
+ The TCMalloc library is licensed under the terms of the Apache
+ license. See LICENSE for more information.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 90.00
+ Match type : NOTICE
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ README.md [41:42]
+
+SKIP LicenseRef-scancode-generic-cla 5d780ffa423067f23c6a123ae33e7c18
+BELONGS ya.make
+ License text:
+ \## Contributor License Agreement
+ Scancode info:
+ Original SPDX id: LicenseRef-scancode-generic-cla
+ Score : 16.00
+ Match type : NOTICE
+ Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
+ Files with this license:
+ CONTRIBUTING.md [9:9]
+
+KEEP Apache-2.0 7007f7032a612d02b590073b4f7e5b25
+BELONGS ya.make
+ Note: matched license text is too long. Read it in the source files.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 100.00
+ Match type : NOTICE
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ tcmalloc/internal/percpu_rseq_ppc.S [4:14]
+ tcmalloc/internal/percpu_rseq_x86_64.S [4:14]
+
+SKIP LicenseRef-scancode-generic-cla 979d7de2e3ff119ee2c22c7efbec766d
+BELONGS ya.make
+ License text:
+ Contributions to this project must be accompanied by a Contributor License
+ Agreement. You (or your employer) retain the copyright to your contribution;
+ Scancode info:
+ Original SPDX id: LicenseRef-scancode-generic-cla
+ Score : 16.00
+ Match type : NOTICE
+ Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
+ Files with this license:
+ CONTRIBUTING.md [11:12]
+
+KEEP Apache-2.0 cac6cbe8ed5a3da569f7c01e4e486688
+BELONGS ya.make
+ Note: matched license text is too long. Read it in the source files.
+ Scancode info:
+ Original SPDX id: Apache-2.0
+ Score : 100.00
+ Match type : TEXT
+ Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+ Files with this license:
+ LICENSE [2:202]
+
+SKIP LicenseRef-scancode-other-permissive cd348406a46a4c91e9edaa5be5e9c074
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+ # File LICENSES allready included
+ License text:
+ license. See LICENSE for more information.
+ Scancode info:
+ Original SPDX id: LicenseRef-scancode-unknown-license-reference
+ Score : 100.00
+ Match type : REFERENCE
+ Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE
+ Files with this license:
+ README.md [42:42]
+
+KEEP Apache-2.0 e0f9a998414a9ae203fd34f4452d4dbc
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+ License text:
+ \## License
+ The TCMalloc library is licensed under the terms of the Apache
+ license. See LICENSE for more information.
+ Scancode info:
+ Original SPDX id: MIT
+ Score : 52.63
+ Match type : NOTICE
+ Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT
+ Files with this license:
+ README.md [39:42]
diff --git a/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt
new file mode 100644
index 0000000000..a5a0b42768
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt
@@ -0,0 +1,492 @@
+====================Apache-2.0====================
+ Apache License
+ Version 2.0, January 2004
+ https://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+====================Apache-2.0====================
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+
+
+====================Apache-2.0====================
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+
+
+====================Apache-2.0====================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+====================Apache-2.0====================
+## License
+
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+
+====================Apache-2.0====================
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+====================Apache-2.0====================
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+
+====================COPYRIGHT====================
+ Delete(c);
+ SmallSpanStats small;
+ LargeSpanStats large;
+
+
+====================COPYRIGHT====================
+// Copyright 2019 The TCMalloc Authors
+
+
+====================COPYRIGHT====================
+// Copyright 2020 The TCMalloc Authors
+
+
+====================COPYRIGHT====================
+// Copyright 2021 The TCMalloc Authors
+
+
+====================File: LICENSE====================
+
+ Apache License
+ Version 2.0, January 2004
+ https://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/contrib/libs/tcmalloc/CONTRIBUTING.md b/contrib/libs/tcmalloc/CONTRIBUTING.md
new file mode 100644
index 0000000000..d10cc0d08f
--- /dev/null
+++ b/contrib/libs/tcmalloc/CONTRIBUTING.md
@@ -0,0 +1,74 @@
+# How to Contribute to TCMalloc
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+NOTE: If you are new to GitHub, please start by reading [Pull Request
+howto](https://help.github.com/articles/about-pull-requests/)
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Guidelines for Pull Requests
+
+* All submissions, including submissions by project members, require review.
+ We use GitHub pull requests for this purpose. Consult
+ [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+ information on using pull requests.
+
+* If you are a Googler, it is preferable to first create an internal CL and
+ have it reviewed and submitted. The code propagation process will deliver
+ the change to GitHub.
+
+* Create **small PRs** that are narrowly focused on **addressing a single concern**.
+ When PRs try to fix several things at a time, if only one fix is considered
+ acceptable, nothing gets merged and both author's & review's time is wasted.
+ Create more PRs to address different concerns and everyone will be happy.
+
+* Provide a good **PR description** as a record of **what** change is being
+ made and **why** it was made. Link to a GitHub issue if it exists.
+
+* Don't fix code style and formatting unless you are already changing that line
+ to address an issue. Formatting of modified lines may be done using
+ `git clang-format`. PRs with irrelevant changes won't be merged. If you do
+ want to fix formatting or style, do that in a separate PR.
+
+* Unless your PR is trivial, you should expect there will be reviewer comments
+ that you'll need to address before merging. We expect you to be reasonably
+ responsive to those comments, otherwise the PR will be closed after 2-3 weeks
+ of inactivity.
+
+* Maintain **clean commit history** and use **meaningful commit messages**.
+ PRs with messy commit history are difficult to review and won't be merged.
+ Use `rebase -i upstream/master` to curate your commit history and/or to
+ bring in latest changes from master (but avoid rebasing in the middle of a
+ code review).
+
+* Keep your PR up to date with upstream/master (if there are merge conflicts,
+ we can't really merge your change).
+
+* **All tests need to be passing** before your change can be merged. We
+ recommend you **run tests locally** (see below)
+
+* Exceptions to the rules can be made if there's a compelling reason for doing
+ so. That is - the rules are here to serve us, not the other way around, and
+ the rules need to be serving their intended purpose to be valuable.
+
+## TCMalloc Committers
+
+The current members of the TCMalloc engineering team are the only committers at
+present.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/contrib/libs/tcmalloc/LICENSE b/contrib/libs/tcmalloc/LICENSE
new file mode 100644
index 0000000000..62589edd12
--- /dev/null
+++ b/contrib/libs/tcmalloc/LICENSE
@@ -0,0 +1,202 @@
+
+ Apache License
+ Version 2.0, January 2004
+ https://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
+
+ APPENDIX: How to apply the Apache License to your work.
+
+ To apply the Apache License to your work, attach the following
+ boilerplate notice, with the fields enclosed by brackets "[]"
+ replaced with your own identifying information. (Don't include
+ the brackets!) The text should be enclosed in the appropriate
+ comment syntax for the file format. We also recommend that a
+ file or class name and description of purpose be included on the
+ same "printed page" as the copyright notice for easier
+ identification within third-party archives.
+
+ Copyright [yyyy] [name of copyright owner]
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ https://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/contrib/libs/tcmalloc/README.md b/contrib/libs/tcmalloc/README.md
new file mode 100644
index 0000000000..c848b4838d
--- /dev/null
+++ b/contrib/libs/tcmalloc/README.md
@@ -0,0 +1,44 @@
+# TCMalloc
+
+This repository contains the TCMalloc C++ code.
+
+TCMalloc is Google's customized implementation of C's `malloc()` and C++'s
+`operator new` used for memory allocation within our C and C++ code. TCMalloc is
+a fast, multi-threaded malloc implementation.
+
+## Building TCMalloc
+
+[Bazel](https://bazel.build) is the official build system for TCMalloc.
+
+The [TCMalloc Platforms Guide](docs/platforms.md) contains information on
+platform support for TCMalloc.
+
+## Documentation
+
+All users of TCMalloc should consult the following documentation resources:
+
+* The [TCMalloc Quickstart](docs/quickstart.md) covers downloading, installing,
+ building, and testing TCMalloc, including incorporating within your codebase.
+* The [TCMalloc Overview](docs/overview.md) covers the basic architecture of
+ TCMalloc, and how that may affect configuration choices.
+* The [TCMalloc Reference](docs/reference.md) covers the C and C++ TCMalloc API
+ endpoints.
+
+More advanced usages of TCMalloc may find the following documentation useful:
+
+* The [TCMalloc Tuning Guide](docs/tuning.md) covers the configuration choices
+ in more depth, and also illustrates other ways to customize TCMalloc. This
+ also covers important operating system-level properties for improving TCMalloc
+ performance.
+* The [TCMalloc Design Doc](docs/design.md) covers how TCMalloc works
+ underneath the hood, and why certain design choices were made. Most developers
+ will not need this level of implementation detail.
+* The [TCMalloc Compatibility Guide](docs/compatibility.md) which documents our
+ expectations for how our APIs are used.
+
+## License
+
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+Disclaimer: This is not an officially supported Google product.
diff --git a/contrib/libs/tcmalloc/common.inc b/contrib/libs/tcmalloc/common.inc
new file mode 100644
index 0000000000..077942c387
--- /dev/null
+++ b/contrib/libs/tcmalloc/common.inc
@@ -0,0 +1,58 @@
+GLOBAL_SRCS(
+ # TCMalloc
+ tcmalloc/tcmalloc.cc
+
+ # Common Sources
+ tcmalloc/arena.cc
+ tcmalloc/background.cc
+ tcmalloc/central_freelist.cc
+ tcmalloc/common.cc
+ tcmalloc/cpu_cache.cc
+ tcmalloc/experimental_pow2_below64_size_class.cc
+ tcmalloc/experimental_pow2_size_class.cc
+ tcmalloc/legacy_size_classes.cc
+ tcmalloc/guarded_page_allocator.cc
+ tcmalloc/huge_address_map.cc
+ tcmalloc/huge_allocator.cc
+ tcmalloc/huge_cache.cc
+ tcmalloc/huge_page_aware_allocator.cc
+ tcmalloc/page_allocator.cc
+ tcmalloc/page_allocator_interface.cc
+ tcmalloc/page_heap.cc
+ tcmalloc/pagemap.cc
+ tcmalloc/parameters.cc
+ tcmalloc/peak_heap_tracker.cc
+ tcmalloc/sampler.cc
+ tcmalloc/size_classes.cc
+ tcmalloc/span.cc
+ tcmalloc/stack_trace_table.cc
+ tcmalloc/static_vars.cc
+ tcmalloc/stats.cc
+ tcmalloc/system-alloc.cc
+ tcmalloc/thread_cache.cc
+ tcmalloc/transfer_cache.cc
+
+ # Common deps
+ tcmalloc/experiment.cc
+ tcmalloc/noruntime_size_classes.cc
+
+ # Internal libraries
+ tcmalloc/internal/cache_topology.cc
+ tcmalloc/internal/environment.cc
+ tcmalloc/internal/logging.cc
+ tcmalloc/internal/memory_stats.cc
+ tcmalloc/internal/mincore.cc
+ tcmalloc/internal/numa.cc
+ tcmalloc/internal/percpu.cc
+ tcmalloc/internal/percpu_rseq_asm.S
+ tcmalloc/internal/percpu_rseq_unsupported.cc
+ tcmalloc/internal/util.cc
+)
+
+PEERDIR(
+ contrib/restricted/abseil-cpp
+ contrib/libs/tcmalloc/malloc_extension
+)
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/tcmalloc/default/ya.make b/contrib/libs/tcmalloc/default/ya.make
new file mode 100644
index 0000000000..b69b077e19
--- /dev/null
+++ b/contrib/libs/tcmalloc/default/ya.make
@@ -0,0 +1,22 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+ ayles
+ prime
+ g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+ # Options
+ tcmalloc/want_hpaa.cc
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/dynamic/ya.make b/contrib/libs/tcmalloc/dynamic/ya.make
new file mode 100644
index 0000000000..72f91dfc81
--- /dev/null
+++ b/contrib/libs/tcmalloc/dynamic/ya.make
@@ -0,0 +1,2 @@
+DLL_FOR(contrib/libs/tcmalloc tcmalloc)
+OWNER(g:contrib)
diff --git a/contrib/libs/tcmalloc/malloc_extension/ya.make b/contrib/libs/tcmalloc/malloc_extension/ya.make
new file mode 100644
index 0000000000..c9a07c2454
--- /dev/null
+++ b/contrib/libs/tcmalloc/malloc_extension/ya.make
@@ -0,0 +1,37 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+ prime
+ g:cpp-contrib
+)
+
+NO_UTIL()
+
+NO_COMPILER_WARNINGS()
+
+# https://github.com/google/tcmalloc
+VERSION(2020-11-23-a643d89610317be1eff9f7298104eef4c987d8d5)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+SRCS(
+ tcmalloc/malloc_extension.cc
+)
+
+PEERDIR(
+ contrib/restricted/abseil-cpp
+)
+
+ADDINCL(
+ GLOBAL contrib/libs/tcmalloc
+)
+
+CFLAGS(
+ -DTCMALLOC_256K_PAGES
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/numa_256k/ya.make b/contrib/libs/tcmalloc/numa_256k/ya.make
new file mode 100644
index 0000000000..ffede5df8b
--- /dev/null
+++ b/contrib/libs/tcmalloc/numa_256k/ya.make
@@ -0,0 +1,28 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+ ayles
+ prime
+ g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+ # Options
+ tcmalloc/want_hpaa.cc
+ tcmalloc/want_numa_aware.cc
+)
+
+CFLAGS(
+ -DTCMALLOC_256K_PAGES
+ -DTCMALLOC_NUMA_AWARE
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/numa_large_pages/ya.make b/contrib/libs/tcmalloc/numa_large_pages/ya.make
new file mode 100644
index 0000000000..f39c1e15ba
--- /dev/null
+++ b/contrib/libs/tcmalloc/numa_large_pages/ya.make
@@ -0,0 +1,28 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+ ayles
+ prime
+ g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+ # Options
+ tcmalloc/want_hpaa.cc
+ tcmalloc/want_numa_aware.cc
+)
+
+CFLAGS(
+ -DTCMALLOC_LARGE_PAGES
+ -DTCMALLOC_NUMA_AWARE
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/patches/fork.patch b/contrib/libs/tcmalloc/patches/fork.patch
new file mode 100644
index 0000000000..2503394431
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/fork.patch
@@ -0,0 +1,310 @@
+--- contrib/libs/tcmalloc/tcmalloc/central_freelist.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h (working tree)
+@@ -70,6 +70,14 @@ class CentralFreeList {
+
+ SpanStats GetSpanStats() const;
+
++ void AcquireInternalLocks() {
++ lock_.Lock();
++ }
++
++ void ReleaseInternalLocks() {
++ lock_.Unlock();
++ }
++
+ private:
+ // Release an object to spans.
+ // Returns object's span if it become completely free.
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (working tree)
+@@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const {
+ }
+ }
+
++void CPUCache::AcquireInternalLocks() {
++ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
++ ++cpu) {
++ resize_[cpu].lock.Lock();
++ }
++}
++
++void CPUCache::ReleaseInternalLocks() {
++ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
++ ++cpu) {
++ resize_[cpu].lock.Unlock();
++ }
++}
++
+ void CPUCache::PerClassResizeInfo::Init() {
+ state_.store(0, std::memory_order_relaxed);
+ }
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h (working tree)
+@@ -164,6 +164,9 @@ class CPUCache {
+ void Print(Printer* out) const;
+ void PrintInPbtxt(PbtxtRegion* region) const;
+
++ void AcquireInternalLocks();
++ void ReleaseInternalLocks();
++
+ private:
+ // Per-size-class freelist resizing info.
+ class PerClassResizeInfo {
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree)
+@@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t
+ MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+ int64_t value);
++
++ABSL_ATTRIBUTE_WEAK void
++MallocExtension_EnableForkSupport();
++
+ }
+
+ #endif
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree)
+@@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) {
+ #endif
+ }
+
++void MallocExtension::EnableForkSupport() {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++ if (&MallocExtension_EnableForkSupport != nullptr) {
++ MallocExtension_EnableForkSupport();
++ }
++#endif
++}
++
+ } // namespace tcmalloc
+
+ // Default implementation just returns size. The expectation is that
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree)
+@@ -468,6 +468,10 @@ class MallocExtension final {
+ // Specifies the release rate from the page heap. ProcessBackgroundActions
+ // must be called for this to be operative.
+ static void SetBackgroundReleaseRate(BytesPerSecond rate);
++
++ // Enables fork support.
++ // Allocator will continue to function correctly in the child, after calling fork().
++ static void EnableForkSupport();
+ };
+
+ } // namespace tcmalloc
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree)
+@@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+ Static::bucket_allocator_;
+ ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
++ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
+ ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ ABSL_CONST_INIT PageMap Static::pagemap_;
+ ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+@@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() {
+ pagemap_.MapRootWithSmallPages();
+ guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128);
+ inited_.store(true, std::memory_order_release);
++
++ pageheap_lock.Unlock();
++ pthread_atfork(
++ TCMallocPreFork,
++ TCMallocPostFork,
++ TCMallocPostFork);
++ pageheap_lock.Lock();
+ }
+ }
+
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree)
+@@ -50,6 +50,9 @@ class CPUCache;
+ class PageMap;
+ class ThreadCache;
+
++void TCMallocPreFork();
++void TCMallocPostFork();
++
+ class Static {
+ public:
+ // True if InitIfNecessary() has run to completion.
+@@ -124,6 +127,9 @@ class Static {
+ static void ActivateCPUCache() { cpu_cache_active_ = true; }
+ static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+
++ static bool ForkSupportEnabled() { return fork_support_enabled_; }
++ static void EnableForkSupport() { fork_support_enabled_ = true; }
++
+ static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+ return
+ #ifndef TCMALLOC_DEPRECATED_PERTHREAD
+@@ -169,6 +175,7 @@ class Static {
+ static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
+ ABSL_CONST_INIT static std::atomic<bool> inited_;
+ static bool cpu_cache_active_;
++ static bool fork_support_enabled_;
+ ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+ ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+ numa_topology_;
+--- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc (working tree)
+@@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0);
+
+ } // namespace
+
++void AcquireSystemAllocLock() {
++ spinlock.Lock();
++}
++
++void ReleaseSystemAllocLock() {
++ spinlock.Unlock();
++}
++
+ void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment,
+ const MemoryTag tag) {
+ // If default alignment is set request the minimum alignment provided by
+--- contrib/libs/tcmalloc/tcmalloc/system-alloc.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h (working tree)
+@@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment,
+ // call to SystemRelease.
+ int SystemReleaseErrors();
+
++void AcquireSystemAllocLock();
++void ReleaseSystemAllocLock();
++
+ // This call is a hint to the operating system that the pages
+ // contained in the specified range of memory will not be used for a
+ // while, and can be released for use by other processes or the OS.
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree)
+@@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem(
+ }
+ }
+
++extern "C" void MallocExtension_EnableForkSupport() {
++ Static::EnableForkSupport();
++}
++
++void TCMallocPreFork() {
++ if (!Static::ForkSupportEnabled()) {
++ return;
++ }
++
++ if (Static::CPUCacheActive()) {
++ Static::cpu_cache().AcquireInternalLocks();
++ }
++ Static::transfer_cache().AcquireInternalLocks();
++ guarded_page_lock.Lock();
++ release_lock.Lock();
++ pageheap_lock.Lock();
++ AcquireSystemAllocLock();
++}
++
++void TCMallocPostFork() {
++ if (!Static::ForkSupportEnabled()) {
++ return;
++ }
++
++ ReleaseSystemAllocLock();
++ pageheap_lock.Unlock();
++ guarded_page_lock.Unlock();
++ release_lock.Unlock();
++ Static::transfer_cache().ReleaseInternalLocks();
++ if (Static::CPUCacheActive()) {
++ Static::cpu_cache().ReleaseInternalLocks();
++ }
++}
++
+ // nallocx slow path.
+ // Moved to a separate function because size_class_with_alignment is not inlined
+ // which would cause nallocx to become non-leaf function with stack frame and
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h (working tree)
+@@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW
+ }
+ #endif
+
++void TCMallocInternalAcquireLocks();
++void TCMallocInternalReleaseLocks();
++
+ #endif // TCMALLOC_TCMALLOC_H_
+--- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h (working tree)
+@@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder {
+ }
+ }
+
++ void AcquireInternalLocks() {
++ for (int i = 0; i < kNumClasses; ++i) {
++ if (implementation_ == TransferCacheImplementation::Ring) {
++ cache_[i].rbtc.AcquireInternalLocks();
++ } else {
++ cache_[i].tc.AcquireInternalLocks();
++ }
++ }
++ }
++
++ void ReleaseInternalLocks() {
++ for (int i = 0; i < kNumClasses; ++i) {
++ if (implementation_ == TransferCacheImplementation::Ring) {
++ cache_[i].rbtc.ReleaseInternalLocks();
++ } else {
++ cache_[i].tc.ReleaseInternalLocks();
++ }
++ }
++ }
++
+ void InsertRange(int size_class, absl::Span<void *> batch) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ cache_[size_class].rbtc.InsertRange(size_class, batch);
+@@ -295,6 +315,9 @@ class TransferCacheManager {
+ return TransferCacheImplementation::None;
+ }
+
++ void AcquireInternalLocks() {}
++ void ReleaseInternalLocks() {}
++
+ private:
+ CentralFreeList freelist_[kNumClasses];
+ } ABSL_CACHELINE_ALIGNED;
+--- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h (working tree)
+@@ -366,6 +366,18 @@ class TransferCache {
+ return freelist_do_not_access_directly_;
+ }
+
++ void AcquireInternalLocks()
++ {
++ freelist().AcquireInternalLocks();
++ lock_.Lock();
++ }
++
++ void ReleaseInternalLocks()
++ {
++ lock_.Unlock();
++ freelist().ReleaseInternalLocks();
++ }
++
+ private:
+ // Returns first object of the i-th slot.
+ void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+@@ -468,6 +480,18 @@ class RingBufferTransferCache {
+
+ // These methods all do internal locking.
+
++ void AcquireInternalLocks()
++ {
++ freelist().AcquireInternalLocks();
++ lock_.Lock();
++ }
++
++ void ReleaseInternalLocks()
++ {
++ lock_.Unlock();
++ freelist().ReleaseInternalLocks();
++ }
++
+ // Insert the specified batch into the transfer cache. N is the number of
+ // elements in the range. RemoveRange() is the opposite operation.
+ void InsertRange(int size_class, absl::Span<void *> batch)
diff --git a/contrib/libs/tcmalloc/patches/userdata.patch b/contrib/libs/tcmalloc/patches/userdata.patch
new file mode 100644
index 0000000000..83373cebfe
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/userdata.patch
@@ -0,0 +1,220 @@
+--- contrib/libs/tcmalloc/tcmalloc/internal/logging.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/internal/logging.h (working tree)
+@@ -67,6 +67,8 @@ struct StackTrace {
+ // between the previous sample and this one
+ size_t weight;
+
++ void* user_data;
++
+ template <typename H>
+ friend H AbslHashValue(H h, const StackTrace& t) {
+ // As we use StackTrace as a key-value node in StackTraceTable, we only
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (working tree)
+@@ -120,6 +120,12 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+ ABSL_ATTRIBUTE_WEAK void
+ MallocExtension_EnableForkSupport();
+
++ABSL_ATTRIBUTE_WEAK void
++MallocExtension_SetSampleUserDataCallbacks(
++ tcmalloc::MallocExtension::CreateSampleUserDataCallback create,
++ tcmalloc::MallocExtension::CopySampleUserDataCallback copy,
++ tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy);
++
+ }
+
+ #endif
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (working tree)
+@@ -468,6 +468,21 @@ void MallocExtension::EnableForkSupport() {
+ #endif
+ }
+
++void MallocExtension::SetSampleUserDataCallbacks(
++ CreateSampleUserDataCallback create,
++ CopySampleUserDataCallback copy,
++ DestroySampleUserDataCallback destroy) {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++ if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) {
++ MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy);
++ }
++#else
++ (void)create;
++ (void)copy;
++ (void)destroy;
++#endif
++}
++
+ } // namespace tcmalloc
+
+ // Default implementation just returns size. The expectation is that
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (working tree)
+@@ -94,6 +94,8 @@ class Profile final {
+
+ int depth;
+ void* stack[kMaxStackDepth];
++
++ void* user_data;
+ };
+
+ void Iterate(absl::FunctionRef<void(const Sample&)> f) const;
+@@ -472,6 +474,16 @@ class MallocExtension final {
+ // Enables fork support.
+ // Allocator will continue to function correctly in the child, after calling fork().
+ static void EnableForkSupport();
++
++ using CreateSampleUserDataCallback = void*();
++ using CopySampleUserDataCallback = void*(void*);
++ using DestroySampleUserDataCallback = void(void*);
++
++ // Sets callbacks for lifetime control of custom user data attached to allocation samples
++ static void SetSampleUserDataCallbacks(
++ CreateSampleUserDataCallback create,
++ CopySampleUserDataCallback copy,
++ DestroySampleUserDataCallback destroy);
+ };
+
+ } // namespace tcmalloc
+--- contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc (working tree)
+@@ -55,6 +55,7 @@ void PeakHeapTracker::MaybeSaveSample() {
+ StackTrace *t = peak_sampled_span_stacks_, *next = nullptr;
+ while (t != nullptr) {
+ next = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1]);
++ Static::DestroySampleUserData(t->user_data);
+ Static::stacktrace_allocator().Delete(t);
+ t = next;
+ }
+@@ -63,7 +64,9 @@ void PeakHeapTracker::MaybeSaveSample() {
+ for (Span* s : Static::sampled_objects_) {
+ t = Static::stacktrace_allocator().New();
+
+- *t = *s->sampled_stack();
++ StackTrace* sampled_stack = s->sampled_stack();
++ *t = *sampled_stack;
++ t->user_data = Static::CopySampleUserData(sampled_stack->user_data);
+ if (t->depth == kMaxStackDepth) {
+ t->depth = kMaxStackDepth - 1;
+ }
+--- contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc (working tree)
+@@ -73,6 +73,7 @@ StackTraceTable::~StackTraceTable() {
+ Bucket* b = table_[i];
+ while (b != nullptr) {
+ Bucket* next = b->next;
++ Static::DestroySampleUserData(b->trace.user_data);
+ Static::bucket_allocator().Delete(b);
+ b = next;
+ }
+@@ -104,6 +105,7 @@ void StackTraceTable::AddTrace(double count, const StackTrace& t) {
+ b = Static::bucket_allocator().New();
+ b->hash = h;
+ b->trace = t;
++ b->trace.user_data = Static::CopySampleUserData(t.user_data);
+ b->count = count;
+ b->total_weight = t.weight * count;
+ b->next = table_[idx];
+@@ -135,6 +137,8 @@ void StackTraceTable::Iterate(
+ e.requested_alignment = b->trace.requested_alignment;
+ e.allocated_size = allocated_size;
+
++ e.user_data = b->trace.user_data;
++
+ e.depth = b->trace.depth;
+ static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth,
+ "Profile stack size smaller than internal stack sizes");
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc (working tree)
+@@ -60,6 +60,12 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+ ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
+ ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
++ABSL_CONST_INIT Static::CreateSampleUserDataCallback*
++ Static::create_sample_user_data_callback_ = nullptr;
++ABSL_CONST_INIT Static::CopySampleUserDataCallback*
++ Static::copy_sample_user_data_callback_ = nullptr;
++ABSL_CONST_INIT Static::DestroySampleUserDataCallback*
++ Static::destroy_sample_user_data_callback_ = nullptr;
+ ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ ABSL_CONST_INIT PageMap Static::pagemap_;
+ ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h (index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (working tree)
+@@ -130,6 +130,34 @@ class Static {
+ static bool ForkSupportEnabled() { return fork_support_enabled_; }
+ static void EnableForkSupport() { fork_support_enabled_ = true; }
+
++ using CreateSampleUserDataCallback = void*();
++ using CopySampleUserDataCallback = void*(void*);
++ using DestroySampleUserDataCallback = void(void*);
++
++ static void SetSampleUserDataCallbacks(
++ CreateSampleUserDataCallback create,
++ CopySampleUserDataCallback copy,
++ DestroySampleUserDataCallback destroy) {
++ create_sample_user_data_callback_ = create;
++ copy_sample_user_data_callback_ = copy;
++ destroy_sample_user_data_callback_ = destroy;
++ }
++
++ static void* CreateSampleUserData() {
++ if (create_sample_user_data_callback_)
++ return create_sample_user_data_callback_();
++ return nullptr;
++ }
++ static void* CopySampleUserData(void* user_data) {
++ if (copy_sample_user_data_callback_)
++ return copy_sample_user_data_callback_(user_data);
++ return nullptr;
++ }
++ static void DestroySampleUserData(void* user_data) {
++ if (destroy_sample_user_data_callback_)
++ destroy_sample_user_data_callback_(user_data);
++ }
++
+ static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+ return
+ #ifndef TCMALLOC_DEPRECATED_PERTHREAD
+@@ -176,6 +204,9 @@ class Static {
+ ABSL_CONST_INIT static std::atomic<bool> inited_;
+ static bool cpu_cache_active_;
+ static bool fork_support_enabled_;
++ static CreateSampleUserDataCallback* create_sample_user_data_callback_;
++ static CopySampleUserDataCallback* copy_sample_user_data_callback_;
++ static DestroySampleUserDataCallback* destroy_sample_user_data_callback_;
+ ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+ ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+ numa_topology_;
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (working tree)
+@@ -1151,6 +1151,13 @@ void TCMallocPostFork() {
+ }
+ }
+
++extern "C" void MallocExtension_SetSampleUserDataCallbacks(
++ MallocExtension::CreateSampleUserDataCallback create,
++ MallocExtension::CopySampleUserDataCallback copy,
++ MallocExtension::DestroySampleUserDataCallback destroy) {
++ Static::SetSampleUserDataCallbacks(create, copy, destroy);
++}
++
+ // nallocx slow path.
+ // Moved to a separate function because size_class_with_alignment is not inlined
+ // which would cause nallocx to become non-leaf function with stack frame and
+@@ -1500,6 +1507,7 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight,
+ tmp.requested_alignment = requested_alignment;
+ tmp.allocated_size = allocated_size;
+ tmp.weight = weight;
++ tmp.user_data = Static::CreateSampleUserData();
+
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+@@ -1629,6 +1637,7 @@ static void do_free_pages(void* ptr, const PageId p) {
+ 1);
+ }
+ notify_sampled_alloc = true;
++ Static::DestroySampleUserData(st->user_data);
+ Static::stacktrace_allocator().Delete(st);
+ }
+ if (IsSampledMemory(ptr)) {
diff --git a/contrib/libs/tcmalloc/patches/yandex.patch b/contrib/libs/tcmalloc/patches/yandex.patch
new file mode 100644
index 0000000000..12d11f2dad
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/yandex.patch
@@ -0,0 +1,91 @@
+commit ab4069ebdd376db4d32c29e1a2414565ec849249
+author: prime
+date: 2021-10-07T14:52:42+03:00
+
+ Apply yandex patches
+
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -1112,6 +1112,11 @@ extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() {
+ return tcmalloc::tcmalloc_internal::Static::CPUCacheActive();
+ }
+
++extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() {
++ tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false);
++ tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache();
++}
++
+ extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() {
+ return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size();
+ }
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -75,6 +75,7 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit(
+ ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty(
+ const char* name_data, size_t name_size, size_t* value);
+ ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive();
++ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches();
+ ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize();
+ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval(
+ absl::Duration* ret);
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -287,6 +287,16 @@ bool MallocExtension::PerCpuCachesActive() {
+ #endif
+ }
+
++void MallocExtension::DeactivatePerCpuCaches() {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++ if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) {
++ return;
++ }
++
++ MallocExtension_Internal_DeactivatePerCpuCaches();
++#endif
++}
++
+ int32_t MallocExtension::GetMaxPerCpuCacheSize() {
+ #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) {
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -329,6 +329,11 @@ class MallocExtension final {
+ // Gets whether TCMalloc is using per-CPU caches.
+ static bool PerCpuCachesActive();
+
++ // Extension for unified agent.
++ //
++ // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321
++ static void DeactivatePerCpuCaches();
++
+ // Gets the current maximum cache size per CPU cache.
+ static int32_t GetMaxPerCpuCacheSize();
+ // Sets the maximum cache size per CPU cache. This is a per-core limit.
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -122,6 +122,7 @@ class Static {
+ return cpu_cache_active_;
+ }
+ static void ActivateCPUCache() { cpu_cache_active_ = true; }
++ static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+
+ static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+ return
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc (ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -2210,14 +2210,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size)
+ TCMALLOC_ALIAS(TCMallocInternalNew);
+ #else
+ {
+- void* p = fast_alloc(CppPolicy().WithoutHooks(), size);
+- // We keep this next instruction out of fast_alloc for a reason: when
+- // it's in, and new just calls fast_alloc, the optimizer may fold the
+- // new call into fast_alloc, which messes up our whole section-based
+- // stacktracing (see ABSL_ATTRIBUTE_SECTION, above). This ensures fast_alloc
+- // isn't the last thing this fn calls, and prevents the folding.
+- MallocHook::InvokeNewHook(p, size);
+- return p;
++ return fast_alloc(CppPolicy().WithoutHooks(), size);
+ }
+ #endif // TCMALLOC_ALIAS
+
diff --git a/contrib/libs/tcmalloc/slow_but_small/ya.make b/contrib/libs/tcmalloc/slow_but_small/ya.make
new file mode 100644
index 0000000000..ddcb157d30
--- /dev/null
+++ b/contrib/libs/tcmalloc/slow_but_small/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+ ayles
+ prime
+ g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+CFLAGS(
+ -DTCMALLOC_SMALL_BUT_SLOW
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/tcmalloc/BUILD b/contrib/libs/tcmalloc/tcmalloc/BUILD
new file mode 100644
index 0000000000..e618b85eec
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/BUILD
@@ -0,0 +1,1316 @@
+# Copyright 2019 The TCMalloc Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Description:
+#
+# tcmalloc is a fast malloc implementation. See
+# https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of
+# how this malloc works.
+
+load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
+load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS")
+load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_testsuite")
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+config_setting(
+ name = "llvm",
+ flag_values = {
+ "@bazel_tools//tools/cpp:compiler": "clang",
+ },
+ visibility = [
+ "//tcmalloc/internal:__subpackages__",
+ "//tcmalloc/testing:__subpackages__",
+ ],
+)
+
+cc_library(
+ name = "experiment",
+ srcs = ["experiment.cc"],
+ hdrs = [
+ "experiment.h",
+ "experiment_config.h",
+ ],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":malloc_extension",
+ "//tcmalloc/internal:environment",
+ "//tcmalloc/internal:logging",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:optional",
+ ],
+)
+
+# Dependencies required by :tcmalloc and its variants. Since :common is built
+# several different ways, it should not be included on this list.
+tcmalloc_deps = [
+ ":experiment",
+ ":malloc_extension",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:config",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/base:dynamic_annotations",
+ "@com_google_absl//absl/debugging:leak_check",
+ "@com_google_absl//absl/debugging:stacktrace",
+ "@com_google_absl//absl/debugging:symbolize",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/numeric:bits",
+ "//tcmalloc/internal:config",
+ "//tcmalloc/internal:declarations",
+ "//tcmalloc/internal:linked_list",
+ "//tcmalloc/internal:logging",
+ "//tcmalloc/internal:memory_stats",
+ "//tcmalloc/internal:optimization",
+ "//tcmalloc/internal:percpu",
+]
+
+# This library provides tcmalloc always
+cc_library(
+ name = "tcmalloc",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = tcmalloc_deps + [
+ ":common",
+ ],
+ alwayslink = 1,
+)
+
+# Provides tcmalloc always; use per-thread mode.
+cc_library(
+ name = "tcmalloc_deprecated_perthread",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = [
+ "//tcmalloc/internal:__pkg__",
+ "//tcmalloc/testing:__pkg__",
+ ],
+ deps = tcmalloc_deps + [
+ ":common_deprecated_perthread",
+ ],
+ alwayslink = 1,
+)
+
+# An opt tcmalloc build with ASSERTs forced on (by turning off
+# NDEBUG). Useful for tracking down crashes in production binaries.
+# To use add malloc = "//tcmalloc:opt_with_assertions" in your
+# target's build rule.
+cc_library(
+ name = "opt_with_assertions",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = [
+ "-O2",
+ "-UNDEBUG",
+ ] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = tcmalloc_deps + [
+ ":common",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "size_class_info",
+ hdrs = ["size_class_info.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ "//tcmalloc/internal:logging",
+ ],
+)
+
+# List of common source files used by the various tcmalloc libraries.
+common_srcs = [
+ "arena.cc",
+ "arena.h",
+ "background.cc",
+ "central_freelist.cc",
+ "central_freelist.h",
+ "common.cc",
+ "common.h",
+ "cpu_cache.cc",
+ "cpu_cache.h",
+ "experimental_pow2_below64_size_class.cc",
+ "experimental_pow2_size_class.cc",
+ "legacy_size_classes.cc",
+ "guarded_page_allocator.h",
+ "guarded_page_allocator.cc",
+ "huge_address_map.cc",
+ "huge_allocator.cc",
+ "huge_allocator.h",
+ "huge_cache.cc",
+ "huge_cache.h",
+ "huge_region.h",
+ "huge_page_aware_allocator.cc",
+ "huge_page_aware_allocator.h",
+ "huge_page_filler.h",
+ "huge_pages.h",
+ "page_allocator.cc",
+ "page_allocator.h",
+ "page_allocator_interface.cc",
+ "page_allocator_interface.h",
+ "page_heap.cc",
+ "page_heap.h",
+ "page_heap_allocator.h",
+ "pagemap.cc",
+ "pagemap.h",
+ "parameters.cc",
+ "peak_heap_tracker.cc",
+ "sampler.cc",
+ "sampler.h",
+ "size_classes.cc",
+ "span.cc",
+ "span.h",
+ "span_stats.h",
+ "stack_trace_table.cc",
+ "stack_trace_table.h",
+ "static_vars.cc",
+ "static_vars.h",
+ "stats.cc",
+ "system-alloc.cc",
+ "system-alloc.h",
+ "thread_cache.cc",
+ "thread_cache.h",
+ "tracking.h",
+ "transfer_cache_stats.h",
+ "transfer_cache.cc",
+ "transfer_cache.h",
+ "transfer_cache_internals.h",
+]
+
+common_hdrs = [
+ "arena.h",
+ "central_freelist.h",
+ "common.h",
+ "cpu_cache.h",
+ "guarded_page_allocator.h",
+ "huge_address_map.h",
+ "huge_allocator.h",
+ "tcmalloc_policy.h",
+ "huge_cache.h",
+ "huge_page_filler.h",
+ "huge_pages.h",
+ "huge_region.h",
+ "huge_page_aware_allocator.h",
+ "page_allocator.h",
+ "page_allocator_interface.h",
+ "page_heap.h",
+ "page_heap_allocator.h",
+ "pages.h",
+ "pagemap.h",
+ "parameters.h",
+ "peak_heap_tracker.h",
+ "sampler.h",
+ "span.h",
+ "span_stats.h",
+ "stack_trace_table.h",
+ "stats.h",
+ "static_vars.h",
+ "system-alloc.h",
+ "thread_cache.h",
+ "tracking.h",
+ "transfer_cache_stats.h",
+ "transfer_cache.h",
+ "transfer_cache_internals.h",
+]
+
+common_deps = [
+ ":experiment",
+ ":malloc_extension",
+ ":noruntime_size_classes",
+ ":size_class_info",
+ "@com_google_absl//absl/algorithm:container",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:config",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/base:dynamic_annotations",
+ "@com_google_absl//absl/container:fixed_array",
+ "@com_google_absl//absl/debugging:debugging_internal",
+ "@com_google_absl//absl/debugging:stacktrace",
+ "@com_google_absl//absl/debugging:symbolize",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/hash:hash",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/time",
+ "@com_google_absl//absl/types:optional",
+ "@com_google_absl//absl/types:span",
+ "//tcmalloc/internal:atomic_stats_counter",
+ "@com_google_absl//absl/numeric:bits",
+ "//tcmalloc/internal:config",
+ "//tcmalloc/internal:declarations",
+ "//tcmalloc/internal:environment",
+ "//tcmalloc/internal:linked_list",
+ "//tcmalloc/internal:logging",
+ "//tcmalloc/internal:mincore",
+ "//tcmalloc/internal:numa",
+ "//tcmalloc/internal:cache_topology",
+ "//tcmalloc/internal:optimization",
+ "//tcmalloc/internal:parameter_accessors",
+ "//tcmalloc/internal:percpu",
+ "//tcmalloc/internal:percpu_tcmalloc",
+ "//tcmalloc/internal:range_tracker",
+ "//tcmalloc/internal:timeseries_tracker",
+ "//tcmalloc/internal:util",
+]
+
+cc_library(
+ name = "common",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_deprecated_perthread",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use hugepage-aware
+# allocator.
+cc_library(
+ name = "want_hpaa",
+ srcs = ["want_hpaa.cc"],
+ copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tcmalloc/internal:config",
+ "@com_google_absl//absl/base:core_headers",
+ ],
+ alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use hugepage-aware
+# allocator with hpaa_subrelease=true.
+cc_library(
+ name = "want_hpaa_subrelease",
+ srcs = ["want_hpaa_subrelease.cc"],
+ copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//visibility:public"],
+ deps = [
+ "//tcmalloc/internal:config",
+ "@com_google_absl//absl/base:core_headers",
+ ],
+ alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to not use hugepage-aware
+# allocator.
+cc_library(
+ name = "want_no_hpaa",
+ srcs = ["want_no_hpaa.cc"],
+ copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//tcmalloc/testing:__pkg__"],
+ deps = [
+ "//tcmalloc/internal:config",
+ "@com_google_absl//absl/base:core_headers",
+ ],
+ alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use old span sizes.
+cc_library(
+ name = "want_legacy_spans",
+ srcs = ["want_legacy_spans.cc"],
+ copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//tcmalloc/testing:__pkg__"],
+ deps = [
+ "//tcmalloc/internal:config",
+ "@com_google_absl//absl/base:core_headers",
+ ],
+ alwayslink = 1,
+)
+
+# Add a dep to this if you want your binary to enable NUMA awareness by
+# default.
+cc_library(
+ name = "want_numa_aware",
+ srcs = ["want_numa_aware.cc"],
+ copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+ visibility = [
+ "//tcmalloc:__pkg__",
+ "//tcmalloc/internal:__pkg__",
+ "//tcmalloc/testing:__pkg__",
+ ],
+ deps = [
+ "//tcmalloc/internal:config",
+ "@com_google_absl//absl/base:core_headers",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "runtime_size_classes",
+ srcs = ["runtime_size_classes.cc"],
+ hdrs = ["runtime_size_classes.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//visibility:private"],
+ deps = [
+ ":size_class_info",
+ "//tcmalloc/internal:environment",
+ "//tcmalloc/internal:logging",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "noruntime_size_classes",
+ srcs = ["noruntime_size_classes.cc"],
+ hdrs = ["runtime_size_classes.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":size_class_info",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ ],
+ alwayslink = 1,
+)
+
+# TCMalloc with large pages is usually faster but fragmentation is higher. See
+# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+ name = "tcmalloc_large_pages",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = tcmalloc_deps + [
+ ":common_large_pages",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_large_pages",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+# TCMalloc with 256k pages is usually faster but fragmentation is higher. See
+# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+ name = "tcmalloc_256k_pages",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = tcmalloc_deps + [
+ ":common_256k_pages",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_256k_pages",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "tcmalloc_256k_pages_and_numa",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = [
+ "-DTCMALLOC_256K_PAGES",
+ "-DTCMALLOC_NUMA_AWARE",
+ ] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc/testing:__pkg__"],
+ deps = tcmalloc_deps + [
+ ":common_256k_pages_and_numa",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_256k_pages_and_numa",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = [
+ "-DTCMALLOC_256K_PAGES",
+ "-DTCMALLOC_NUMA_AWARE",
+ ] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+# TCMalloc small-but-slow is a a version of TCMalloc that chooses to minimize
+# fragmentation at a *severe* cost to performance. It should be used by
+# applications that have significant memory constraints, but don't need to
+# frequently allocate/free objects.
+#
+# See https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+ name = "tcmalloc_small_but_slow",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = tcmalloc_deps + [
+ ":common_small_but_slow",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_small_but_slow",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+# TCMalloc with NUMA awareness compiled in. Note that by default NUMA awareness
+# will still be disabled at runtime - this default can be changed by adding a
+# dependency upon want_numa_aware, or overridden by setting the
+# TCMALLOC_NUMA_AWARE environment variable.
+cc_library(
+ name = "tcmalloc_numa_aware",
+ srcs = [
+ "libc_override.h",
+ "libc_override_gcc_and_weak.h",
+ "libc_override_glibc.h",
+ "libc_override_redefine.h",
+ "tcmalloc.cc",
+ "tcmalloc.h",
+ ],
+ copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc/testing:__pkg__"],
+ deps = tcmalloc_deps + [
+ ":common_numa_aware",
+ ],
+ alwayslink = 1,
+)
+
+cc_library(
+ name = "common_numa_aware",
+ srcs = common_srcs,
+ hdrs = common_hdrs,
+ copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+ alwayslink = 1,
+)
+
+# Export some header files to //tcmalloc/testing/...
+package_group(
+ name = "tcmalloc_tests",
+ packages = [
+ "//tcmalloc/...",
+ ],
+)
+
+cc_library(
+ name = "headers_for_tests",
+ srcs = [
+ "arena.h",
+ "central_freelist.h",
+ "guarded_page_allocator.h",
+ "huge_address_map.h",
+ "huge_allocator.h",
+ "huge_cache.h",
+ "huge_page_aware_allocator.h",
+ "huge_page_filler.h",
+ "huge_pages.h",
+ "huge_region.h",
+ "page_allocator.h",
+ "page_allocator_interface.h",
+ "page_heap.h",
+ "page_heap_allocator.h",
+ "pagemap.h",
+ "parameters.h",
+ "peak_heap_tracker.h",
+ "span_stats.h",
+ "stack_trace_table.h",
+ "tracking.h",
+ "transfer_cache.h",
+ "transfer_cache_internals.h",
+ "transfer_cache_stats.h",
+ ],
+ hdrs = [
+ "common.h",
+ "pages.h",
+ "sampler.h",
+ "size_class_info.h",
+ "span.h",
+ "static_vars.h",
+ "stats.h",
+ "system-alloc.h",
+ ],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = common_deps,
+)
+
+cc_library(
+ name = "mock_central_freelist",
+ testonly = 1,
+ srcs = ["mock_central_freelist.cc"],
+ hdrs = ["mock_central_freelist.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/types:span",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_library(
+ name = "page_allocator_test_util",
+ testonly = 1,
+ srcs = [
+ "page_allocator_test_util.h",
+ ],
+ hdrs = ["page_allocator_test_util.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ visibility = ["//tcmalloc:tcmalloc_tests"],
+ deps = [
+ ":common",
+ ":malloc_extension",
+ ],
+)
+
+cc_test(
+ name = "page_heap_test",
+ srcs = ["page_heap_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/memory",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_library(
+ name = "mock_transfer_cache",
+ testonly = 1,
+ srcs = ["mock_transfer_cache.cc"],
+ hdrs = ["mock_transfer_cache.h"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ ":mock_central_freelist",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:distributions",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
+cc_fuzz_test(
+ name = "transfer_cache_fuzz",
+ testonly = 1,
+ srcs = ["transfer_cache_fuzz.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ tags = [
+ "noasan",
+ "nomsan",
+ "notsan",
+ ],
+ deps = [
+ ":common",
+ ":mock_central_freelist",
+ ":mock_transfer_cache",
+ ],
+)
+
+cc_test(
+ name = "arena_test",
+ timeout = "moderate",
+ srcs = ["arena_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "transfer_cache_test",
+ timeout = "moderate",
+ srcs = ["transfer_cache_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ shard_count = 3,
+ deps = [
+ ":common",
+ ":mock_central_freelist",
+ ":mock_transfer_cache",
+ "//tcmalloc/testing:thread_manager",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:distributions",
+ "@com_google_absl//absl/time",
+ "@com_google_absl//absl/types:span",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_benchmark(
+ name = "transfer_cache_benchmark",
+ srcs = ["transfer_cache_benchmark.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ ":mock_central_freelist",
+ ":mock_transfer_cache",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/types:optional",
+ ],
+)
+
+cc_test(
+ name = "huge_cache_test",
+ srcs = ["huge_cache_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "huge_allocator_test",
+ srcs = ["huge_allocator_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "huge_page_filler_test",
+ timeout = "long",
+ srcs = ["huge_page_filler_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/algorithm:container",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:distributions",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "huge_page_aware_allocator_test",
+ srcs = ["huge_page_aware_allocator_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ malloc = "//tcmalloc",
+ tags = [
+ ],
+ deps = [
+ ":common",
+ ":malloc_extension",
+ ":page_allocator_test_util",
+ "//tcmalloc/internal:logging",
+ "//tcmalloc/testing:thread_manager",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/flags:flag",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "huge_region_test",
+ srcs = ["huge_region_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_benchmark(
+ name = "guarded_page_allocator_benchmark",
+ srcs = ["guarded_page_allocator_benchmark.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ ],
+)
+
+cc_test(
+ name = "guarded_page_allocator_test",
+ srcs = ["guarded_page_allocator_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/numeric:bits",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "pagemap_test",
+ srcs = ["pagemap_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "realloc_test",
+ srcs = ["realloc_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:distributions",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "stack_trace_table_test",
+ srcs = ["stack_trace_table_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/debugging:stacktrace",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "system-alloc_test",
+ srcs = ["system-alloc_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ tags = ["nosan"],
+ deps = [
+ ":common",
+ ":malloc_extension",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+# This test has been named "large" since before tests were s/m/l.
+# The "large" refers to large allocation sizes.
+cc_test(
+ name = "tcmalloc_large_test",
+ size = "small",
+ timeout = "moderate",
+ srcs = ["tcmalloc_large_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ tags = [
+ "noasan",
+ "noubsan",
+ ],
+ deps = [
+ ":common",
+ ":malloc_extension",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/container:node_hash_set",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "malloc_extension_system_malloc_test",
+ srcs = ["malloc_extension_system_malloc_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc/internal:system_malloc",
+ deps = [
+ ":malloc_extension",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "malloc_extension_test",
+ srcs = ["malloc_extension_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ tags = [
+ "nosan",
+ ],
+ deps = [
+ ":malloc_extension",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_fuzz_test(
+ name = "malloc_extension_fuzz",
+ testonly = 1,
+ srcs = ["malloc_extension_fuzz.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ tags = [
+ "noasan",
+ "nomsan",
+ "notsan",
+ ],
+ deps = [
+ ":malloc_extension",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/types:optional",
+ ],
+)
+
+cc_test(
+ name = "page_allocator_test",
+ srcs = ["page_allocator_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ deps = [
+ ":common",
+ ":malloc_extension",
+ ":page_allocator_test_util",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "profile_test",
+ size = "medium",
+ timeout = "long",
+ srcs = ["profile_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ flaky = 1, # TODO(b/134690164)
+ linkstatic = 1,
+ malloc = "//tcmalloc",
+ shard_count = 2,
+ tags = [
+ "noasan",
+ "nomsan",
+ "notsan",
+ ],
+ deps = [
+ ":malloc_extension",
+ "//tcmalloc/internal:declarations",
+ "//tcmalloc/internal:linked_list",
+ "//tcmalloc/testing:testutil",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/container:flat_hash_map",
+ "@com_google_absl//absl/synchronization",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "thread_cache_test",
+ size = "medium",
+ srcs = ["thread_cache_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ malloc = "//tcmalloc:tcmalloc_deprecated_perthread",
+ tags = [
+ "nosan",
+ ],
+ deps = [
+ ":malloc_extension",
+ "//tcmalloc/internal:logging",
+ "//tcmalloc/internal:memory_stats",
+ "//tcmalloc/internal:parameter_accessors",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/strings",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_testsuite(
+ name = "size_classes_test",
+ srcs = ["size_classes_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":size_class_info",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "size_classes_test_with_runtime_size_classes",
+ srcs = ["size_classes_with_runtime_size_classes_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ ":runtime_size_classes",
+ ":size_class_info",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/strings:str_format",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "heap_profiling_test",
+ srcs = ["heap_profiling_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ tags = [
+ "nosan",
+ ],
+ deps = [
+ ":common",
+ ":malloc_extension",
+ "//tcmalloc/internal:logging",
+ "//tcmalloc/internal:parameter_accessors",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "runtime_size_classes_test",
+ srcs = ["runtime_size_classes_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ linkstatic = 1,
+ malloc = "//tcmalloc",
+ deps = [
+ ":runtime_size_classes",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_testsuite(
+ name = "span_test",
+ srcs = ["span_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/container:flat_hash_set",
+ "@com_google_absl//absl/random",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_benchmark(
+ name = "span_benchmark",
+ srcs = ["span_benchmark.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = ":tcmalloc",
+ deps = [
+ ":common",
+ "//tcmalloc/internal:logging",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/random",
+ ],
+)
+
+cc_test(
+ name = "stats_test",
+ srcs = ["stats_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/base",
+ "@com_google_absl//absl/time",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
+ name = "huge_address_map_test",
+ srcs = ["huge_address_map_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_library(
+ name = "malloc_extension",
+ srcs = ["malloc_extension.cc"],
+ hdrs = [
+ "internal_malloc_extension.h",
+ "malloc_extension.h",
+ ],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ visibility = [
+ "//visibility:public",
+ ],
+ deps = [
+ "//tcmalloc/internal:parameter_accessors",
+ "@com_google_absl//absl/base:config",
+ "@com_google_absl//absl/base:core_headers",
+ "@com_google_absl//absl/base:malloc_internal",
+ "@com_google_absl//absl/functional:function_ref",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "@com_google_absl//absl/time",
+ "@com_google_absl//absl/types:optional",
+ "@com_google_absl//absl/types:span",
+ ],
+)
+
+cc_test(
+ name = "experiment_config_test",
+ srcs = ["experiment_config_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":experiment",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_fuzz_test(
+ name = "experiment_fuzz",
+ testonly = 1,
+ srcs = ["experiment_fuzz.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":experiment",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_fuzz_test(
+ name = "runtime_size_classes_fuzz",
+ testonly = 1,
+ srcs = ["runtime_size_classes_fuzz.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ ":common",
+ ":runtime_size_classes",
+ ":size_class_info",
+ "@com_google_absl//absl/strings",
+ ],
+)
+
+cc_test(
+ name = "cpu_cache_test",
+ srcs = ["cpu_cache_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = ":tcmalloc_deprecated_perthread",
+ tags = [
+ # TODO(b/193887621): Add TSan annotations to CPUCache and/or add
+ # atomics to PageMap
+ "notsan",
+ ],
+ deps = [
+ ":common_deprecated_perthread",
+ "//tcmalloc/internal:optimization",
+ "//tcmalloc/internal:util",
+ "//tcmalloc/testing:testutil",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/random:seed_sequences",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_testsuite(
+ name = "central_freelist_test",
+ srcs = ["central_freelist_test.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ deps = [
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/random",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+create_tcmalloc_benchmark(
+ name = "central_freelist_benchmark",
+ srcs = ["central_freelist_benchmark.cc"],
+ copts = TCMALLOC_DEFAULT_COPTS,
+ malloc = "//tcmalloc",
+ deps = [
+ ":common",
+ "@com_github_google_benchmark//:benchmark",
+ "@com_google_absl//absl/algorithm:container",
+ "@com_google_absl//absl/random",
+ "@com_google_absl//absl/types:optional",
+ ],
+)
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.cc b/contrib/libs/tcmalloc/tcmalloc/arena.cc
new file mode 100644
index 0000000000..5ba1a65bf3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena.cc
@@ -0,0 +1,78 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/arena.h"
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void* Arena::Alloc(size_t bytes, int alignment) {
+ ASSERT(alignment > 0);
+ { // First we need to move up to the correct alignment.
+ const int misalignment =
+ reinterpret_cast<uintptr_t>(free_area_) % alignment;
+ const int alignment_bytes =
+ misalignment != 0 ? alignment - misalignment : 0;
+ free_area_ += alignment_bytes;
+ free_avail_ -= alignment_bytes;
+ bytes_allocated_ += alignment_bytes;
+ }
+ char* result;
+ if (free_avail_ < bytes) {
+ size_t ask = bytes > kAllocIncrement ? bytes : kAllocIncrement;
+ size_t actual_size;
+ // TODO(b/171081864): Arena allocations should be made relatively
+ // infrequently. Consider tagging this memory with sampled objects which
+ // are also infrequently allocated.
+ //
+ // In the meantime it is important that we use the current NUMA partition
+ // rather than always using a particular one because it's possible that any
+ // single partition we choose might only contain nodes that the process is
+ // unable to allocate from due to cgroup restrictions.
+ MemoryTag tag;
+ const auto& numa_topology = Static::numa_topology();
+ if (numa_topology.numa_aware()) {
+ tag = NumaNormalTag(numa_topology.GetCurrentPartition());
+ } else {
+ tag = MemoryTag::kNormal;
+ }
+ free_area_ =
+ reinterpret_cast<char*>(SystemAlloc(ask, &actual_size, kPageSize, tag));
+ if (ABSL_PREDICT_FALSE(free_area_ == nullptr)) {
+ Crash(kCrash, __FILE__, __LINE__,
+ "FATAL ERROR: Out of memory trying to allocate internal tcmalloc "
+ "data (bytes, object-size); is something preventing mmap from "
+ "succeeding (sandbox, VSS limitations)?",
+ kAllocIncrement, bytes);
+ }
+ SystemBack(free_area_, actual_size);
+ free_avail_ = actual_size;
+ }
+
+ ASSERT(reinterpret_cast<uintptr_t>(free_area_) % alignment == 0);
+ result = free_area_;
+ free_area_ += bytes;
+ free_avail_ -= bytes;
+ bytes_allocated_ += bytes;
+ return reinterpret_cast<void*>(result);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.h b/contrib/libs/tcmalloc/tcmalloc/arena.h
new file mode 100644
index 0000000000..0655253540
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena.h
@@ -0,0 +1,68 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_ARENA_H_
+#define TCMALLOC_ARENA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Arena allocation; designed for use by tcmalloc internal data structures like
+// spans, profiles, etc. Always expands.
+class Arena {
+ public:
+ constexpr Arena()
+ : free_area_(nullptr), free_avail_(0), bytes_allocated_(0) {}
+
+ // Return a properly aligned byte array of length "bytes". Crashes if
+ // allocation fails. Requires pageheap_lock is held.
+ ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc(size_t bytes,
+ int alignment = kAlignment)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Returns the total number of bytes allocated from this arena. Requires
+ // pageheap_lock is held.
+ uint64_t bytes_allocated() const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return bytes_allocated_;
+ }
+
+ private:
+ // How much to allocate from system at a time
+ static constexpr int kAllocIncrement = 128 << 10;
+
+ // Free area from which to carve new objects
+ char* free_area_ ABSL_GUARDED_BY(pageheap_lock);
+ size_t free_avail_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // Total number of bytes allocated from this arena
+ uint64_t bytes_allocated_ ABSL_GUARDED_BY(pageheap_lock);
+
+ Arena(const Arena&) = delete;
+ Arena& operator=(const Arena&) = delete;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_ARENA_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc
new file mode 100644
index 0000000000..2fb728cac9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc
@@ -0,0 +1,38 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/arena.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(Arena, AlignedAlloc) {
+ Arena arena;
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(64, 64)) % 64, 0);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(7)) % 8, 0);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(128, 64)) % 64, 0);
+ for (int alignment = 1; alignment < 100; ++alignment) {
+ EXPECT_EQ(
+ reinterpret_cast<uintptr_t>(arena.Alloc(7, alignment)) % alignment, 0);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/background.cc b/contrib/libs/tcmalloc/tcmalloc/background.cc
new file mode 100644
index 0000000000..ec57c03901
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/background.cc
@@ -0,0 +1,182 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+
+#include "absl/base/internal/sysinfo.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Called by MallocExtension_Internal_ProcessBackgroundActions.
+//
+// We use a simple heuristic here:
+// We keep track of the set of CPUs that we are allowed to run on. Whenever a
+// CPU is removed from this list, the next call to this routine will detect the
+// disappearance and call ReleaseCpuMemory on it.
+//
+// Note that this heuristic _explicitly_ does not reclaim from isolated cores
+// that this process may have set up specific affinities for -- as this thread
+// will never have been allowed to run there.
+cpu_set_t prev_allowed_cpus;
+void ReleasePerCpuMemoryToOS() {
+ cpu_set_t allowed_cpus;
+
+ // Only attempt reclaim when per-CPU caches are in use. While
+ // ReleaseCpuMemory() itself is usually a no-op otherwise, we are experiencing
+ // failures in non-permissive sandboxes due to calls made to
+ // sched_getaffinity() below. It is expected that a runtime environment
+ // supporting per-CPU allocations supports sched_getaffinity().
+ // See b/27247854.
+ if (!MallocExtension::PerCpuCachesActive()) {
+ return;
+ }
+
+ if (subtle::percpu::UsingFlatVirtualCpus()) {
+ // Our (real) CPU mask does not provide useful information about the state
+ // of our virtual CPU set.
+ return;
+ }
+
+ // This can only fail due to a sandbox or similar intercepting the syscall.
+ if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus)) {
+ // We log periodically as start-up errors are frequently ignored and this is
+ // something we do want clients to fix if they are experiencing it.
+ Log(kLog, __FILE__, __LINE__,
+ "Unexpected sched_getaffinity() failure; errno ", errno);
+ return;
+ }
+
+ // Note: This is technically not correct in the presence of hotplug (it is
+ // not guaranteed that NumCPUs() is an upper bound on CPU-number). It is
+ // currently safe for Google systems.
+ const int num_cpus = absl::base_internal::NumCPUs();
+ for (int cpu = 0; cpu < num_cpus; cpu++) {
+ if (CPU_ISSET(cpu, &prev_allowed_cpus) && !CPU_ISSET(cpu, &allowed_cpus)) {
+ // This is a CPU present in the old mask, but not the new. Reclaim.
+ MallocExtension::ReleaseCpuMemory(cpu);
+ }
+ }
+
+ // Update cached runnable CPUs for next iteration.
+ memcpy(&prev_allowed_cpus, &allowed_cpus, sizeof(cpu_set_t));
+}
+
+void ShuffleCpuCaches() {
+ if (!MallocExtension::PerCpuCachesActive()) {
+ return;
+ }
+
+ // Shuffle per-cpu caches
+ Static::cpu_cache().ShuffleCpuCaches();
+}
+
+// Reclaims per-cpu caches. The CPU mask used in ReleasePerCpuMemoryToOS does
+// not provide useful information about virtual CPU state and hence, does not
+// reclaim memory when virtual CPUs are enabled.
+//
+// Here, we use heuristics that are based on cache usage and misses, to
+// determine if the caches have been recently inactive and if they may be
+// reclaimed.
+void ReclaimIdleCpuCaches() {
+ // Attempts reclaim only when per-CPU caches are in use.
+ if (!MallocExtension::PerCpuCachesActive()) {
+ return;
+ }
+
+ Static::cpu_cache().TryReclaimingCaches();
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+// Release memory to the system at a constant rate.
+void MallocExtension_Internal_ProcessBackgroundActions() {
+ tcmalloc::MallocExtension::MarkThreadIdle();
+
+ // Initialize storage for ReleasePerCpuMemoryToOS().
+ CPU_ZERO(&tcmalloc::tcmalloc_internal::prev_allowed_cpus);
+
+ absl::Time prev_time = absl::Now();
+ constexpr absl::Duration kSleepTime = absl::Seconds(1);
+
+ // Reclaim inactive per-cpu caches once per kCpuCacheReclaimPeriod.
+ //
+ // We use a longer 30 sec reclaim period to make sure that caches are indeed
+ // idle. Reclaim drains entire cache, as opposed to cache shuffle for instance
+ // that only shrinks a cache by a few objects at a time. So, we might have
+ // larger performance degradation if we use a shorter reclaim interval and
+ // drain caches that weren't supposed to.
+ constexpr absl::Duration kCpuCacheReclaimPeriod = absl::Seconds(30);
+ absl::Time last_reclaim = absl::Now();
+
+ // Shuffle per-cpu caches once per kCpuCacheShufflePeriod secs.
+ constexpr absl::Duration kCpuCacheShufflePeriod = absl::Seconds(5);
+ absl::Time last_shuffle = absl::Now();
+
+ while (true) {
+ absl::Time now = absl::Now();
+ const ssize_t bytes_to_release =
+ static_cast<size_t>(tcmalloc::tcmalloc_internal::Parameters::
+ background_release_rate()) *
+ absl::ToDoubleSeconds(now - prev_time);
+ if (bytes_to_release > 0) { // may be negative if time goes backwards
+ tcmalloc::MallocExtension::ReleaseMemoryToSystem(bytes_to_release);
+ }
+
+ const bool reclaim_idle_per_cpu_caches =
+ tcmalloc::tcmalloc_internal::Parameters::reclaim_idle_per_cpu_caches();
+
+ // If enabled, we use heuristics to determine if the per-cpu caches are
+ // inactive. If disabled, we use a more conservative approach, that uses
+ // allowed cpu masks, to reclaim cpu caches.
+ if (reclaim_idle_per_cpu_caches) {
+ // Try to reclaim per-cpu caches once every kCpuCacheReclaimPeriod
+ // when enabled.
+ if (now - last_reclaim >= kCpuCacheReclaimPeriod) {
+ tcmalloc::tcmalloc_internal::ReclaimIdleCpuCaches();
+ last_reclaim = now;
+ }
+ } else {
+ tcmalloc::tcmalloc_internal::ReleasePerCpuMemoryToOS();
+ }
+
+ const bool shuffle_per_cpu_caches =
+ tcmalloc::tcmalloc_internal::Parameters::shuffle_per_cpu_caches();
+
+ if (shuffle_per_cpu_caches) {
+ if (now - last_shuffle >= kCpuCacheShufflePeriod) {
+ tcmalloc::tcmalloc_internal::ShuffleCpuCaches();
+ last_shuffle = now;
+ }
+ }
+
+ tcmalloc::tcmalloc_internal::Static().sharded_transfer_cache().Plunder();
+ prev_time = now;
+ absl::SleepFor(kSleepTime);
+ }
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc
new file mode 100644
index 0000000000..8620e228a1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc
@@ -0,0 +1,218 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/central_freelist.h"
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static MemoryTag MemoryTagFromSizeClass(size_t cl) {
+ if (!Static::numa_topology().numa_aware()) {
+ return MemoryTag::kNormal;
+ }
+ return NumaNormalTag(cl / kNumBaseClasses);
+}
+
+// Like a constructor and hence we disable thread safety analysis.
+void CentralFreeList::Init(size_t cl) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ size_class_ = cl;
+ object_size_ = Static::sizemap().class_to_size(cl);
+ pages_per_span_ = Length(Static::sizemap().class_to_pages(cl));
+ objects_per_span_ =
+ pages_per_span_.in_bytes() / (object_size_ ? object_size_ : 1);
+}
+
+static Span* MapObjectToSpan(void* object) {
+ const PageId p = PageIdContaining(object);
+ Span* span = Static::pagemap().GetExistingDescriptor(p);
+ return span;
+}
+
+Span* CentralFreeList::ReleaseToSpans(void* object, Span* span,
+ size_t object_size) {
+ if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) {
+ nonempty_.prepend(span);
+ }
+
+ if (ABSL_PREDICT_TRUE(span->FreelistPush(object, object_size))) {
+ return nullptr;
+ }
+ span->RemoveFromList(); // from nonempty_
+ return span;
+}
+
+void CentralFreeList::InsertRange(absl::Span<void*> batch) {
+ CHECK_CONDITION(!batch.empty() && batch.size() <= kMaxObjectsToMove);
+ Span* spans[kMaxObjectsToMove];
+ // Safe to store free spans into freed up space in span array.
+ Span** free_spans = spans;
+ int free_count = 0;
+
+ // Prefetch Span objects to reduce cache misses.
+ for (int i = 0; i < batch.size(); ++i) {
+ Span* span = MapObjectToSpan(batch[i]);
+ ASSERT(span != nullptr);
+ span->Prefetch();
+ spans[i] = span;
+ }
+
+ // First, release all individual objects into spans under our mutex
+ // and collect spans that become completely free.
+ {
+ // Use local copy of variable to ensure that it is not reloaded.
+ size_t object_size = object_size_;
+ absl::base_internal::SpinLockHolder h(&lock_);
+ for (int i = 0; i < batch.size(); ++i) {
+ Span* span = ReleaseToSpans(batch[i], spans[i], object_size);
+ if (ABSL_PREDICT_FALSE(span)) {
+ free_spans[free_count] = span;
+ free_count++;
+ }
+ }
+
+ RecordMultiSpansDeallocated(free_count);
+ UpdateObjectCounts(batch.size());
+ }
+
+ // Then, release all free spans into page heap under its mutex.
+ if (ABSL_PREDICT_FALSE(free_count)) {
+ // Unregister size class doesn't require holding any locks.
+ for (int i = 0; i < free_count; ++i) {
+ Span* const free_span = free_spans[i];
+ ASSERT(IsNormalMemory(free_span->start_address())
+ );
+ Static::pagemap().UnregisterSizeClass(free_span);
+
+ // Before taking pageheap_lock, prefetch the PageTrackers these spans are
+ // on.
+ //
+ // Small-but-slow does not use the HugePageAwareAllocator (by default), so
+ // do not prefetch on this config.
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+ const PageId p = free_span->first_page();
+
+ // In huge_page_filler.h, we static_assert that PageTracker's key elements
+ // for deallocation are within the first two cachelines.
+ void* pt = Static::pagemap().GetHugepage(p);
+ // Prefetch for writing, as we will issue stores to the PageTracker
+ // instance.
+ __builtin_prefetch(pt, 1, 3);
+ __builtin_prefetch(
+ reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pt) +
+ ABSL_CACHELINE_SIZE),
+ 1, 3);
+#endif // TCMALLOC_SMALL_BUT_SLOW
+ }
+
+ const MemoryTag tag = MemoryTagFromSizeClass(size_class_);
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (int i = 0; i < free_count; ++i) {
+ Span* const free_span = free_spans[i];
+ ASSERT(tag == GetMemoryTag(free_span->start_address()));
+ Static::page_allocator().Delete(free_span, tag);
+ }
+ }
+}
+
+int CentralFreeList::RemoveRange(void** batch, int N) {
+ ASSUME(N > 0);
+ // Use local copy of variable to ensure that it is not reloaded.
+ size_t object_size = object_size_;
+ int result = 0;
+ absl::base_internal::SpinLockHolder h(&lock_);
+ if (ABSL_PREDICT_FALSE(nonempty_.empty())) {
+ result = Populate(batch, N);
+ } else {
+ do {
+ Span* span = nonempty_.first();
+ int here =
+ span->FreelistPopBatch(batch + result, N - result, object_size);
+ ASSERT(here > 0);
+ if (span->FreelistEmpty(object_size)) {
+ span->RemoveFromList(); // from nonempty_
+ }
+ result += here;
+ } while (result < N && !nonempty_.empty());
+ }
+ UpdateObjectCounts(-result);
+ return result;
+}
+
+// Fetch memory from the system and add to the central cache freelist.
+int CentralFreeList::Populate(void** batch,
+ int N) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ // Release central list lock while operating on pageheap
+ // Note, this could result in multiple calls to populate each allocating
+ // a new span and the pushing those partially full spans onto nonempty.
+ lock_.Unlock();
+
+ const MemoryTag tag = MemoryTagFromSizeClass(size_class_);
+ Span* span = Static::page_allocator().New(pages_per_span_, tag);
+ if (ABSL_PREDICT_FALSE(span == nullptr)) {
+ Log(kLog, __FILE__, __LINE__, "tcmalloc: allocation failed",
+ pages_per_span_.in_bytes());
+ lock_.Lock();
+ return 0;
+ }
+ ASSERT(tag == GetMemoryTag(span->start_address()));
+ ASSERT(span->num_pages() == pages_per_span_);
+
+ Static::pagemap().RegisterSizeClass(span, size_class_);
+ size_t objects_per_span = objects_per_span_;
+ int result = span->BuildFreelist(object_size_, objects_per_span, batch, N);
+ ASSERT(result > 0);
+ // This is a cheaper check than using FreelistEmpty().
+ bool span_empty = result == objects_per_span;
+
+ lock_.Lock();
+ if (!span_empty) {
+ nonempty_.prepend(span);
+ }
+ RecordSpanAllocated();
+ return result;
+}
+
+size_t CentralFreeList::OverheadBytes() const {
+ if (ABSL_PREDICT_FALSE(object_size_ == 0)) {
+ return 0;
+ }
+ const size_t overhead_per_span = pages_per_span_.in_bytes() % object_size_;
+ return num_spans() * overhead_per_span;
+}
+
+SpanStats CentralFreeList::GetSpanStats() const {
+ SpanStats stats;
+ if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) {
+ return stats;
+ }
+ stats.num_spans_requested = static_cast<size_t>(num_spans_requested_.value());
+ stats.num_spans_returned = static_cast<size_t>(num_spans_returned_.value());
+ stats.obj_capacity = stats.num_live_spans() * objects_per_span_;
+ return stats;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h
new file mode 100644
index 0000000000..266f184d6b
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h
@@ -0,0 +1,142 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_CENTRAL_FREELIST_H_
+#define TCMALLOC_CENTRAL_FREELIST_H_
+
+#include <stddef.h>
+
+#include <cstddef>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/span_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Data kept per size-class in central cache.
+class CentralFreeList {
+ public:
+ constexpr CentralFreeList()
+ : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+ size_class_(0),
+ object_size_(0),
+ objects_per_span_(0),
+ pages_per_span_(0),
+ nonempty_() {}
+
+ CentralFreeList(const CentralFreeList&) = delete;
+ CentralFreeList& operator=(const CentralFreeList&) = delete;
+
+ void Init(size_t cl) ABSL_LOCKS_EXCLUDED(lock_);
+
+ // These methods all do internal locking.
+
+ // Insert batch into the central freelist.
+ // REQUIRES: batch.size() > 0 && batch.size() <= kMaxObjectsToMove.
+ void InsertRange(absl::Span<void*> batch) ABSL_LOCKS_EXCLUDED(lock_);
+
+ // Fill a prefix of batch[0..N-1] with up to N elements removed from central
+ // freelist. Return the number of elements removed.
+ ABSL_MUST_USE_RESULT int RemoveRange(void** batch, int N)
+ ABSL_LOCKS_EXCLUDED(lock_);
+
+ // Returns the number of free objects in cache.
+ size_t length() const { return static_cast<size_t>(counter_.value()); }
+
+ // Returns the memory overhead (internal fragmentation) attributable
+ // to the freelist. This is memory lost when the size of elements
+ // in a freelist doesn't exactly divide the page-size (an 8192-byte
+ // page full of 5-byte objects would have 2 bytes memory overhead).
+ size_t OverheadBytes() const;
+
+ SpanStats GetSpanStats() const;
+
+ void AcquireInternalLocks() {
+ lock_.Lock();
+ }
+
+ void ReleaseInternalLocks() {
+ lock_.Unlock();
+ }
+
+ private:
+ // Release an object to spans.
+ // Returns object's span if it become completely free.
+ Span* ReleaseToSpans(void* object, Span* span, size_t object_size)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ // Populate cache by fetching from the page heap.
+ // May temporarily release lock_.
+ // Fill a prefix of batch[0..N-1] with up to N elements removed from central
+ // freelist. Returns the number of elements removed.
+ int Populate(void** batch, int N) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+ // This lock protects all the mutable data members.
+ absl::base_internal::SpinLock lock_;
+
+ size_t size_class_; // My size class (immutable after Init())
+ size_t object_size_;
+ size_t objects_per_span_;
+ Length pages_per_span_;
+
+ size_t num_spans() const {
+ size_t requested = num_spans_requested_.value();
+ size_t returned = num_spans_returned_.value();
+ if (requested < returned) return 0;
+ return (requested - returned);
+ }
+
+ void RecordSpanAllocated() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ counter_.LossyAdd(objects_per_span_);
+ num_spans_requested_.LossyAdd(1);
+ }
+
+ void RecordMultiSpansDeallocated(size_t num_spans_returned)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ counter_.LossyAdd(-num_spans_returned * objects_per_span_);
+ num_spans_returned_.LossyAdd(num_spans_returned);
+ }
+
+ void UpdateObjectCounts(int num) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ counter_.LossyAdd(num);
+ }
+
+ // The followings are kept as a StatsCounter so that they can read without
+ // acquiring a lock. Updates to these variables are guarded by lock_
+ // so writes are performed using LossyAdd for speed, the lock still
+ // guarantees accuracy.
+
+ // Num free objects in cache entry
+ StatsCounter counter_;
+
+ StatsCounter num_spans_requested_;
+ StatsCounter num_spans_returned_;
+
+ // Dummy header for non-empty spans
+ SpanList nonempty_ ABSL_GUARDED_BY(lock_);
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_CENTRAL_FREELIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc
new file mode 100644
index 0000000000..a80d580753
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc
@@ -0,0 +1,198 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tcmalloc_policy.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// This benchmark measures how long it takes to populate multiple
+// spans. The spans are freed in the same order as they were populated
+// to minimize the time it takes to free them.
+void BM_Populate(benchmark::State& state) {
+ size_t object_size = state.range(0);
+ size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+ int batch_size = Static::sizemap().num_objects_to_move(cl);
+ int num_objects = 64 * 1024 * 1024 / object_size;
+ CentralFreeList cfl;
+ // Initialize the span to contain the appropriate size of object.
+ cfl.Init(cl);
+
+ // Allocate an array large enough to hold 64 MiB of objects.
+ std::vector<void*> buffer(num_objects);
+ int64_t items_processed = 0;
+ absl::BitGen rnd;
+
+ for (auto s : state) {
+ int index = 0;
+ // The cost of fetching objects will include the cost of fetching and
+ // populating the span.
+ while (index < num_objects) {
+ int count = std::min(batch_size, num_objects - index);
+ int got = cfl.RemoveRange(&buffer[index], count);
+ index += got;
+ }
+
+ // Don't include the cost of returning the objects to the span, and the
+ // span to the pageheap.
+ state.PauseTiming();
+ index = 0;
+ while (index < num_objects) {
+ uint64_t count = std::min(batch_size, num_objects - index);
+ cfl.InsertRange({&buffer[index], count});
+ index += count;
+ }
+ items_processed += index;
+ state.ResumeTiming();
+ }
+ state.SetItemsProcessed(items_processed);
+}
+BENCHMARK(BM_Populate)
+ ->DenseRange(8, 64, 16)
+ ->DenseRange(64, 1024, 64)
+ ->DenseRange(4096, 28 * 1024, 4096)
+ ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024);
+
+// This benchmark fills a large array with objects, shuffles the objects
+// and then returns them.
+// This should be relatively representative of what happens at runtime.
+// Fetching objects from the CFL is usually done in batches, but returning
+// them is usually done spread over many active spans.
+void BM_MixAndReturn(benchmark::State& state) {
+ size_t object_size = state.range(0);
+ size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+ int batch_size = Static::sizemap().num_objects_to_move(cl);
+ int num_objects = 64 * 1024 * 1024 / object_size;
+ CentralFreeList cfl;
+ // Initialize the span to contain the appropriate size of object.
+ cfl.Init(cl);
+
+ // Allocate an array large enough to hold 64 MiB of objects.
+ std::vector<void*> buffer(num_objects);
+ int64_t items_processed = 0;
+ absl::BitGen rnd;
+
+ for (auto s : state) {
+ int index = 0;
+ while (index < num_objects) {
+ int count = std::min(batch_size, num_objects - index);
+ int got = cfl.RemoveRange(&buffer[index], count);
+ index += got;
+ }
+
+ state.PauseTiming();
+ // Shuffle the vector so that we don't return the objects in the same
+ // order as they were allocated.
+ absl::c_shuffle(buffer, rnd);
+ state.ResumeTiming();
+
+ index = 0;
+ while (index < num_objects) {
+ unsigned int count = std::min(batch_size, num_objects - index);
+ cfl.InsertRange({&buffer[index], count});
+ index += count;
+ }
+ items_processed += index;
+ }
+ state.SetItemsProcessed(items_processed);
+}
+BENCHMARK(BM_MixAndReturn)
+ ->DenseRange(8, 64, 16)
+ ->DenseRange(64, 1024, 64)
+ ->DenseRange(4096, 28 * 1024, 4096)
+ ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024);
+
+// This benchmark holds onto half the allocated objects so that (except for
+// single object spans) spans are never allocated or freed during the
+// benchmark run. This evaluates the performance of just the span handling
+// code, and avoids timing the pageheap code.
+void BM_SpanReuse(benchmark::State& state) {
+ size_t object_size = state.range(0);
+ size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+ int batch_size = Static::sizemap().num_objects_to_move(cl);
+ int num_objects = 64 * 1024 * 1024 / object_size;
+ CentralFreeList cfl;
+ // Initialize the span to contain the appropriate size of object.
+ cfl.Init(cl);
+
+ // Array used to hold onto half of the objects
+ std::vector<void*> held_objects(2 * num_objects);
+ // Request twice the objects we need
+ for (int index = 0; index < 2 * num_objects;) {
+ int count = std::min(batch_size, 2 * num_objects - index);
+ int got = cfl.RemoveRange(&held_objects[index], count);
+ index += got;
+ }
+
+ // Return half of the objects. This will stop the spans from being
+ // returned to the pageheap. So future operations will not touch the
+ // pageheap.
+ for (int index = 0; index < 2 * num_objects; index += 2) {
+ cfl.InsertRange({&held_objects[index], 1});
+ }
+ // Allocate an array large enough to hold 64 MiB of objects.
+ std::vector<void*> buffer(num_objects);
+ int64_t items_processed = 0;
+ absl::BitGen rnd;
+
+ for (auto s : state) {
+ int index = 0;
+ while (index < num_objects) {
+ int count = std::min(batch_size, num_objects - index);
+ int got = cfl.RemoveRange(&buffer[index], count);
+ index += got;
+ }
+
+ state.PauseTiming();
+ // Shuffle the vector so that we don't return the objects in the same
+ // order as they were allocated.
+ absl::c_shuffle(buffer, rnd);
+ state.ResumeTiming();
+
+ index = 0;
+ while (index < num_objects) {
+ uint64_t count = std::min(batch_size, num_objects - index);
+ cfl.InsertRange({&buffer[index], count});
+ index += count;
+ }
+ items_processed += index;
+ }
+ state.SetItemsProcessed(items_processed);
+
+ // Return the other half of the objects.
+ for (int index = 1; index < 2 * num_objects; index += 2) {
+ cfl.InsertRange({&held_objects[index], 1});
+ }
+}
+// Want to avoid benchmarking spans where there is a single object per span.
+BENCHMARK(BM_SpanReuse)
+ ->DenseRange(8, 64, 16)
+ ->DenseRange(64, 1024, 64)
+ ->DenseRange(1024, 4096, 512);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc
new file mode 100644
index 0000000000..de5960120d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc
@@ -0,0 +1,121 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/central_freelist.h"
+
+#include <algorithm>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// TODO(b/162552708) Mock out the page heap to interact with CFL instead
+class CFLTest : public testing::TestWithParam<size_t> {
+ protected:
+ size_t cl_;
+ size_t batch_size_;
+ size_t objects_per_span_;
+ CentralFreeList cfl_;
+
+ private:
+ void SetUp() override {
+ cl_ = GetParam();
+ size_t object_size = Static::sizemap().class_to_size(cl_);
+ if (object_size == 0) {
+ GTEST_SKIP() << "Skipping empty size class.";
+ }
+
+ auto pages_per_span = Length(Static::sizemap().class_to_pages(cl_));
+ batch_size_ = Static::sizemap().num_objects_to_move(cl_);
+ objects_per_span_ = pages_per_span.in_bytes() / object_size;
+ cfl_.Init(cl_);
+ }
+
+ void TearDown() override { EXPECT_EQ(cfl_.length(), 0); }
+};
+
+TEST_P(CFLTest, SingleBatch) {
+ void* batch[kMaxObjectsToMove];
+ uint64_t got = cfl_.RemoveRange(batch, batch_size_);
+ ASSERT_GT(got, 0);
+ cfl_.InsertRange({batch, got});
+ SpanStats stats = cfl_.GetSpanStats();
+ EXPECT_EQ(stats.num_spans_requested, 1);
+ EXPECT_EQ(stats.num_spans_returned, 1);
+ EXPECT_EQ(stats.obj_capacity, 0);
+}
+
+TEST_P(CFLTest, MultipleSpans) {
+ std::vector<void*> all_objects;
+
+ const size_t num_spans = 10;
+
+ // Request num_spans spans
+ void* batch[kMaxObjectsToMove];
+ const int num_objects_to_fetch = num_spans * objects_per_span_;
+ int total_fetched = 0;
+ while (total_fetched < num_objects_to_fetch) {
+ size_t n = num_objects_to_fetch - total_fetched;
+ int got = cfl_.RemoveRange(batch, std::min(n, batch_size_));
+ for (int i = 0; i < got; ++i) {
+ all_objects.push_back(batch[i]);
+ }
+ total_fetched += got;
+ }
+
+ SpanStats stats = cfl_.GetSpanStats();
+ EXPECT_EQ(stats.num_spans_requested, num_spans);
+ EXPECT_EQ(stats.num_spans_returned, 0);
+
+ EXPECT_EQ(all_objects.size(), num_objects_to_fetch);
+
+ // Shuffle
+ absl::BitGen rng;
+ std::shuffle(all_objects.begin(), all_objects.end(), rng);
+
+ // Return all
+ int total_returned = 0;
+ bool checked_half = false;
+ while (total_returned < num_objects_to_fetch) {
+ uint64_t size_to_pop =
+ std::min(all_objects.size() - total_returned, batch_size_);
+ for (int i = 0; i < size_to_pop; ++i) {
+ batch[i] = all_objects[i + total_returned];
+ }
+ total_returned += size_to_pop;
+ cfl_.InsertRange({batch, size_to_pop});
+ // sanity check
+ if (!checked_half && total_returned >= (num_objects_to_fetch / 2)) {
+ stats = cfl_.GetSpanStats();
+ EXPECT_GT(stats.num_spans_requested, stats.num_spans_returned);
+ EXPECT_NE(stats.obj_capacity, 0);
+ checked_half = true;
+ }
+ }
+
+ stats = cfl_.GetSpanStats();
+ EXPECT_EQ(stats.num_spans_requested, stats.num_spans_returned);
+ EXPECT_EQ(stats.obj_capacity, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(All, CFLTest, testing::Range(size_t(1), kNumClasses));
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/common.cc b/contrib/libs/tcmalloc/tcmalloc/common.cc
new file mode 100644
index 0000000000..38443040ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/common.cc
@@ -0,0 +1,204 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/sampler.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+absl::string_view MemoryTagToLabel(MemoryTag tag) {
+ switch (tag) {
+ case MemoryTag::kNormal:
+ return "NORMAL";
+ case MemoryTag::kNormalP1:
+ return "NORMAL_P1";
+ case MemoryTag::kSampled:
+ return "SAMPLED";
+ default:
+ ASSUME(false);
+ }
+}
+
+// Load sizes classes from environment variable if present
+// and valid, then returns True. If not found or valid, returns
+// False.
+bool SizeMap::MaybeRunTimeSizeClasses() {
+ SizeClassInfo parsed[kNumClasses];
+ int num_classes = MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed);
+ if (!ValidSizeClasses(num_classes, parsed)) {
+ return false;
+ }
+
+ if (num_classes != kSizeClassesCount) {
+ // TODO(b/122839049) - Add tests for num_classes < kSizeClassesCount before
+ // allowing that case.
+ Log(kLog, __FILE__, __LINE__, "Can't change the number of size classes",
+ num_classes, kSizeClassesCount);
+ return false;
+ }
+
+ SetSizeClasses(num_classes, parsed);
+ Log(kLog, __FILE__, __LINE__, "Loaded valid Runtime Size classes");
+ return true;
+}
+
+void SizeMap::SetSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+ class_to_size_[0] = 0;
+ class_to_pages_[0] = 0;
+ num_objects_to_move_[0] = 0;
+
+ for (int c = 1; c < num_classes; c++) {
+ class_to_size_[c] = parsed[c].size;
+ class_to_pages_[c] = parsed[c].pages;
+ num_objects_to_move_[c] = parsed[c].num_to_move;
+ }
+
+ // Fill any unspecified size classes with 0.
+ for (int x = num_classes; x < kNumBaseClasses; x++) {
+ class_to_size_[x] = 0;
+ class_to_pages_[x] = 0;
+ num_objects_to_move_[x] = 0;
+ }
+
+ // Copy selected size classes into the upper registers.
+ for (int i = 1; i < (kNumClasses / kNumBaseClasses); i++) {
+ std::copy(&class_to_size_[0], &class_to_size_[kNumBaseClasses],
+ &class_to_size_[kNumBaseClasses * i]);
+ std::copy(&class_to_pages_[0], &class_to_pages_[kNumBaseClasses],
+ &class_to_pages_[kNumBaseClasses * i]);
+ std::copy(&num_objects_to_move_[0], &num_objects_to_move_[kNumBaseClasses],
+ &num_objects_to_move_[kNumBaseClasses * i]);
+ }
+}
+
+// Return true if all size classes meet the requirements for alignment
+// ordering and min and max values.
+bool SizeMap::ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+ if (num_classes <= 0) {
+ return false;
+ }
+ if (kHasExpandedClasses && num_classes > kNumBaseClasses) {
+ num_classes = kNumBaseClasses;
+ }
+
+ for (int c = 1; c < num_classes; c++) {
+ size_t class_size = parsed[c].size;
+ size_t pages = parsed[c].pages;
+ size_t num_objects_to_move = parsed[c].num_to_move;
+ // Each size class must be larger than the previous size class.
+ if (class_size <= parsed[c - 1].size) {
+ Log(kLog, __FILE__, __LINE__, "Non-increasing size class", c,
+ parsed[c - 1].size, class_size);
+ return false;
+ }
+ if (class_size > kMaxSize) {
+ Log(kLog, __FILE__, __LINE__, "size class too big", c, class_size,
+ kMaxSize);
+ return false;
+ }
+ // Check required alignment
+ size_t alignment = 128;
+ if (class_size <= kMultiPageSize) {
+ alignment = kAlignment;
+ } else if (class_size <= SizeMap::kMaxSmallSize) {
+ alignment = kMultiPageAlignment;
+ }
+ if ((class_size & (alignment - 1)) != 0) {
+ Log(kLog, __FILE__, __LINE__, "Not aligned properly", c, class_size,
+ alignment);
+ return false;
+ }
+ if (class_size <= kMultiPageSize && pages != 1) {
+ Log(kLog, __FILE__, __LINE__, "Multiple pages not allowed", class_size,
+ pages, kMultiPageSize);
+ return false;
+ }
+ if (pages >= 256) {
+ Log(kLog, __FILE__, __LINE__, "pages limited to 255", pages);
+ return false;
+ }
+ if (num_objects_to_move > kMaxObjectsToMove) {
+ Log(kLog, __FILE__, __LINE__, "num objects to move too large",
+ num_objects_to_move, kMaxObjectsToMove);
+ return false;
+ }
+ }
+ // Last size class must be able to hold kMaxSize.
+ if (parsed[num_classes - 1].size < kMaxSize) {
+ Log(kLog, __FILE__, __LINE__, "last class doesn't cover kMaxSize",
+ num_classes - 1, parsed[num_classes - 1].size, kMaxSize);
+ return false;
+ }
+ return true;
+}
+
+int ABSL_ATTRIBUTE_WEAK default_want_legacy_spans();
+
+// Initialize the mapping arrays
+void SizeMap::Init() {
+ // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
+ if (ClassIndex(0) != 0) {
+ Crash(kCrash, __FILE__, __LINE__, "Invalid class index for size 0",
+ ClassIndex(0));
+ }
+ if (ClassIndex(kMaxSize) >= sizeof(class_array_)) {
+ Crash(kCrash, __FILE__, __LINE__, "Invalid class index for kMaxSize",
+ ClassIndex(kMaxSize));
+ }
+
+ static_assert(kAlignment <= 16, "kAlignment is too large");
+
+ if (IsExperimentActive(Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS)) {
+ SetSizeClasses(kExperimentalPow2SizeClassesCount,
+ kExperimentalPow2SizeClasses);
+ } else if (IsExperimentActive(
+ Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS)) {
+ SetSizeClasses(kExperimentalPow2Below64SizeClassesCount,
+ kExperimentalPow2Below64SizeClasses);
+ } else {
+ if (default_want_legacy_spans != nullptr &&
+ default_want_legacy_spans() > 0
+ ) {
+ SetSizeClasses(kLegacySizeClassesCount, kLegacySizeClasses);
+ } else {
+ SetSizeClasses(kSizeClassesCount, kSizeClasses);
+ }
+ }
+ MaybeRunTimeSizeClasses();
+
+ int next_size = 0;
+ for (int c = 1; c < kNumClasses; c++) {
+ const int max_size_in_class = class_to_size_[c];
+
+ for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
+ class_array_[ClassIndex(s)] = c;
+ }
+ next_size = max_size_in_class + kAlignment;
+ if (next_size > kMaxSize) {
+ break;
+ }
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/common.h b/contrib/libs/tcmalloc/tcmalloc/common.h
new file mode 100644
index 0000000000..d44811c726
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/common.h
@@ -0,0 +1,524 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Common definitions for tcmalloc code.
+
+#ifndef TCMALLOC_COMMON_H_
+#define TCMALLOC_COMMON_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <type_traits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Configuration
+//-------------------------------------------------------------------
+
+// There are four different models for tcmalloc which are created by defining a
+// set of constant variables differently:
+//
+// DEFAULT:
+// The default configuration strives for good performance while trying to
+// minimize fragmentation. It uses a smaller page size to reduce
+// fragmentation, but allocates per-thread and per-cpu capacities similar to
+// TCMALLOC_LARGE_PAGES / TCMALLOC_256K_PAGES.
+//
+// TCMALLOC_LARGE_PAGES:
+// Larger page sizes increase the bookkeeping granularity used by TCMalloc for
+// its allocations. This can reduce PageMap size and traffic to the
+// innermost cache (the page heap), but can increase memory footprints. As
+// TCMalloc will not reuse a page for a different allocation size until the
+// entire page is deallocated, this can be a source of increased memory
+// fragmentation.
+//
+// Historically, larger page sizes improved lookup performance for the
+// pointer-to-size lookup in the PageMap that was part of the critical path.
+// With most deallocations leveraging C++14's sized delete feature
+// (https://isocpp.org/files/papers/n3778.html), this optimization is less
+// significant.
+//
+// TCMALLOC_256K_PAGES
+// This configuration uses an even larger page size (256KB) as the unit of
+// accounting granularity.
+//
+// TCMALLOC_SMALL_BUT_SLOW:
+// Used for situations where minimizing the memory footprint is the most
+// desirable attribute, even at the cost of performance.
+//
+// The constants that vary between models are:
+//
+// kPageShift - Shift amount used to compute the page size.
+// kNumBaseClasses - Number of size classes serviced by bucket allocators
+// kMaxSize - Maximum size serviced by bucket allocators (thread/cpu/central)
+// kMinThreadCacheSize - The minimum size in bytes of each ThreadCache.
+// kMaxThreadCacheSize - The maximum size in bytes of each ThreadCache.
+// kDefaultOverallThreadCacheSize - The maximum combined size in bytes of all
+// ThreadCaches for an executable.
+// kStealAmount - The number of bytes one ThreadCache will steal from another
+// when the first ThreadCache is forced to Scavenge(), delaying the next
+// call to Scavenge for this thread.
+
+// Older configurations had their own customized macros. Convert them into
+// a page-shift parameter that is checked below.
+
+#ifndef TCMALLOC_PAGE_SHIFT
+#ifdef TCMALLOC_SMALL_BUT_SLOW
+#define TCMALLOC_PAGE_SHIFT 12
+#define TCMALLOC_USE_PAGEMAP3
+#elif defined(TCMALLOC_256K_PAGES)
+#define TCMALLOC_PAGE_SHIFT 18
+#elif defined(TCMALLOC_LARGE_PAGES)
+#define TCMALLOC_PAGE_SHIFT 15
+#else
+#define TCMALLOC_PAGE_SHIFT 13
+#endif
+#else
+#error "TCMALLOC_PAGE_SHIFT is an internal macro!"
+#endif
+
+#if TCMALLOC_PAGE_SHIFT == 12
+inline constexpr size_t kPageShift = 12;
+inline constexpr size_t kNumBaseClasses = 46;
+inline constexpr bool kHasExpandedClasses = false;
+inline constexpr size_t kMaxSize = 8 << 10;
+inline constexpr size_t kMinThreadCacheSize = 4 * 1024;
+inline constexpr size_t kMaxThreadCacheSize = 64 * 1024;
+inline constexpr size_t kMaxCpuCacheSize = 20 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = kMinThreadCacheSize;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 19;
+inline constexpr size_t kMinPages = 2;
+#elif TCMALLOC_PAGE_SHIFT == 15
+inline constexpr size_t kPageShift = 15;
+inline constexpr size_t kNumBaseClasses = 78;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+ 8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#elif TCMALLOC_PAGE_SHIFT == 18
+inline constexpr size_t kPageShift = 18;
+inline constexpr size_t kNumBaseClasses = 89;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+ 8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#elif TCMALLOC_PAGE_SHIFT == 13
+inline constexpr size_t kPageShift = 13;
+inline constexpr size_t kNumBaseClasses = 86;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+ 8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+
+// Sanitizers constrain the memory layout which causes problems with the
+// enlarged tags required to represent NUMA partitions. Disable NUMA awareness
+// to avoid failing to mmap memory.
+#if defined(TCMALLOC_NUMA_AWARE) && !defined(MEMORY_SANITIZER) && \
+ !defined(THREAD_SANITIZER)
+inline constexpr size_t kNumaPartitions = 2;
+#else
+inline constexpr size_t kNumaPartitions = 1;
+#endif
+
+// We have copies of kNumBaseClasses size classes for each NUMA node, followed
+// by any expanded classes.
+inline constexpr size_t kExpandedClassesStart =
+ kNumBaseClasses * kNumaPartitions;
+inline constexpr size_t kNumClasses =
+ kExpandedClassesStart + (kHasExpandedClasses ? kNumBaseClasses : 0);
+
+// Size classes are often stored as uint32_t values, but there are some
+// situations where we need to store a size class with as compact a
+// representation as possible (e.g. in PageMap). Here we determine the integer
+// type to use in these situations - i.e. the smallest integer type large
+// enough to store values in the range [0,kNumClasses).
+constexpr size_t kMaxClass = kNumClasses - 1;
+using CompactSizeClass =
+ std::conditional_t<kMaxClass <= std::numeric_limits<uint8_t>::max(),
+ uint8_t, uint16_t>;
+
+// ~64K classes ought to be enough for anybody, but let's be sure.
+static_assert(kMaxClass <= std::numeric_limits<CompactSizeClass>::max());
+
+// Minimum/maximum number of batches in TransferCache per size class.
+// Actual numbers depends on a number of factors, see TransferCache::Init
+// for details.
+inline constexpr size_t kMinObjectsToMove = 2;
+inline constexpr size_t kMaxObjectsToMove = 128;
+
+inline constexpr size_t kPageSize = 1 << kPageShift;
+// Verify that the page size used is at least 8x smaller than the maximum
+// element size in the thread cache. This guarantees at most 12.5% internal
+// fragmentation (1/8). When page size is 256k (kPageShift == 18), the benefit
+// of increasing kMaxSize to be multiple of kPageSize is unclear. Object size
+// profile data indicates that the number of simultaneously live objects (of
+// size >= 256k) tends to be very small. Keeping those objects as 'large'
+// objects won't cause too much memory waste, while heap memory reuse can be
+// improved. Increasing kMaxSize to be too large has another bad side effect --
+// the thread cache pressure is increased, which will in turn increase traffic
+// between central cache and thread cache, leading to performance degradation.
+static_assert((kMaxSize / kPageSize) >= kMinPages || kPageShift >= 18,
+ "Ratio of kMaxSize / kPageSize is too small");
+
+inline constexpr size_t kAlignment = 8;
+// log2 (kAlignment)
+inline constexpr size_t kAlignmentShift = absl::bit_width(kAlignment - 1u);
+
+// The number of times that a deallocation can cause a freelist to
+// go over its max_length() before shrinking max_length().
+inline constexpr int kMaxOverages = 3;
+
+// Maximum length we allow a per-thread free-list to have before we
+// move objects from it into the corresponding central free-list. We
+// want this big to avoid locking the central free-list too often. It
+// should not hurt to make this list somewhat big because the
+// scavenging code will shrink it down when its contents are not in use.
+inline constexpr int kMaxDynamicFreeListLength = 8192;
+
+enum class MemoryTag : uint8_t {
+ // Sampled, infrequently allocated
+ kSampled = 0x0,
+ // Not sampled, NUMA partition 0
+ kNormalP0 = 0x1,
+ // Not sampled, NUMA partition 1
+ kNormalP1 = (kNumaPartitions > 1) ? 0x2 : 0xff,
+ // Not sampled
+ kNormal = kNormalP0,
+};
+
+inline constexpr uintptr_t kTagShift = std::min(kAddressBits - 4, 42);
+inline constexpr uintptr_t kTagMask = uintptr_t{0x3} << kTagShift;
+
+// Returns true if ptr is tagged.
+ABSL_DEPRECATED("Replace with specific tests")
+inline bool IsTaggedMemory(const void* ptr) {
+ return (reinterpret_cast<uintptr_t>(ptr) & kTagMask) == 0;
+}
+
+inline bool IsSampledMemory(const void* ptr) {
+ constexpr uintptr_t kSampledNormalMask = kNumaPartitions > 1 ? 0x3 : 0x1;
+
+ static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP0) &
+ kSampledNormalMask);
+ static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP1) &
+ kSampledNormalMask);
+
+ const uintptr_t tag =
+ (reinterpret_cast<uintptr_t>(ptr) & kTagMask) >> kTagShift;
+ return (tag & kSampledNormalMask) ==
+ static_cast<uintptr_t>(MemoryTag::kSampled);
+}
+
+inline bool IsNormalMemory(const void* ptr) { return !IsSampledMemory(ptr); }
+
+inline MemoryTag GetMemoryTag(const void* ptr) {
+ return static_cast<MemoryTag>((reinterpret_cast<uintptr_t>(ptr) & kTagMask) >>
+ kTagShift);
+}
+
+absl::string_view MemoryTagToLabel(MemoryTag tag);
+
+inline constexpr bool IsExpandedSizeClass(unsigned cl) {
+ return kHasExpandedClasses && (cl >= kExpandedClassesStart);
+}
+
+#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __SIZEOF_POINTER__ != 4
+// Always allocate at least a huge page
+inline constexpr size_t kMinSystemAlloc = kHugePageSize;
+inline constexpr size_t kMinMmapAlloc = 1 << 30; // mmap() in 1GiB ranges.
+#else
+// Allocate in units of 2MiB. This is the size of a huge page for x86, but
+// not for Power.
+inline constexpr size_t kMinSystemAlloc = 2 << 20;
+// mmap() in units of 32MiB. This is a multiple of huge page size for
+// both x86 (2MiB) and Power (16MiB)
+inline constexpr size_t kMinMmapAlloc = 32 << 20;
+#endif
+
+static_assert(kMinMmapAlloc % kMinSystemAlloc == 0,
+ "Minimum mmap allocation size is not a multiple of"
+ " minimum system allocation size");
+
+inline MemoryTag NumaNormalTag(size_t numa_partition) {
+ switch (numa_partition) {
+ case 0:
+ return MemoryTag::kNormalP0;
+ case 1:
+ return MemoryTag::kNormalP1;
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+}
+
+inline size_t NumaPartitionFromPointer(void* ptr) {
+ if constexpr (kNumaPartitions == 1) {
+ return 0;
+ }
+
+ switch (GetMemoryTag(ptr)) {
+ case MemoryTag::kNormalP1:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+// Size-class information + mapping
+class SizeMap {
+ public:
+ // All size classes <= 512 in all configs always have 1 page spans.
+ static constexpr size_t kMultiPageSize = 512;
+ // Min alignment for all size classes > kMultiPageSize in all configs.
+ static constexpr size_t kMultiPageAlignment = 64;
+ // log2 (kMultiPageAlignment)
+ static constexpr size_t kMultiPageAlignmentShift =
+ absl::bit_width(kMultiPageAlignment - 1u);
+
+ private:
+ //-------------------------------------------------------------------
+ // Mapping from size to size_class and vice versa
+ //-------------------------------------------------------------------
+
+ // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
+ // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
+ // So for these larger sizes we have an array indexed by ceil(size/128).
+ //
+ // We flatten both logical arrays into one physical array and use
+ // arithmetic to compute an appropriate index. The constants used by
+ // ClassIndex() were selected to make the flattening work.
+ //
+ // Examples:
+ // Size Expression Index
+ // -------------------------------------------------------
+ // 0 (0 + 7) / 8 0
+ // 1 (1 + 7) / 8 1
+ // ...
+ // 1024 (1024 + 7) / 8 128
+ // 1025 (1025 + 127 + (120<<7)) / 128 129
+ // ...
+ // 32768 (32768 + 127 + (120<<7)) / 128 376
+ static constexpr int kMaxSmallSize = 1024;
+ static constexpr size_t kClassArraySize =
+ ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
+
+ // Batch size is the number of objects to move at once.
+ typedef unsigned char BatchSize;
+
+ // class_array_ is accessed on every malloc, so is very hot. We make it the
+ // first member so that it inherits the overall alignment of a SizeMap
+ // instance. In particular, if we create a SizeMap instance that's cache-line
+ // aligned, this member is also aligned to the width of a cache line.
+ CompactSizeClass
+ class_array_[kClassArraySize * (kHasExpandedClasses ? 2 : 1)] = {0};
+
+ // Number of objects to move between a per-thread list and a central
+ // list in one shot. We want this to be not too small so we can
+ // amortize the lock overhead for accessing the central list. Making
+ // it too big may temporarily cause unnecessary memory wastage in the
+ // per-thread free list until the scavenger cleans up the list.
+ BatchSize num_objects_to_move_[kNumClasses] = {0};
+
+ // If size is no more than kMaxSize, compute index of the
+ // class_array[] entry for it, putting the class index in output
+ // parameter idx and returning true. Otherwise return false.
+ static inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+ ClassIndexMaybe(size_t s, uint32_t* idx) {
+ if (ABSL_PREDICT_TRUE(s <= kMaxSmallSize)) {
+ *idx = (static_cast<uint32_t>(s) + 7) >> 3;
+ return true;
+ } else if (s <= kMaxSize) {
+ *idx = (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
+ return true;
+ }
+ return false;
+ }
+
+ static inline size_t ClassIndex(size_t s) {
+ uint32_t ret;
+ CHECK_CONDITION(ClassIndexMaybe(s, &ret));
+ return ret;
+ }
+
+ // Mapping from size class to number of pages to allocate at a time
+ unsigned char class_to_pages_[kNumClasses] = {0};
+
+ // Mapping from size class to max size storable in that class
+ uint32_t class_to_size_[kNumClasses] = {0};
+
+ // If environment variable defined, use it to override sizes classes.
+ // Returns true if all classes defined correctly.
+ bool MaybeRunTimeSizeClasses();
+
+ protected:
+ // Set the give size classes to be used by TCMalloc.
+ void SetSizeClasses(int num_classes, const SizeClassInfo* parsed);
+
+ // Check that the size classes meet all requirements.
+ bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed);
+
+ // Definition of size class that is set in size_classes.cc
+ static const SizeClassInfo kSizeClasses[];
+ static const int kSizeClassesCount;
+
+ static const SizeClassInfo kExperimentalPow2Below64SizeClasses[];
+ static const int kExperimentalPow2Below64SizeClassesCount;
+ // kExperimentalPowBelow64SizeClassesCount
+ static const SizeClassInfo kExperimentalPow2SizeClasses[];
+ static const int kExperimentalPow2SizeClassesCount;
+
+ // Definition of size class that is set in size_classes.cc
+ static const SizeClassInfo kLegacySizeClasses[];
+ static const int kLegacySizeClassesCount;
+
+ public:
+ // constexpr constructor to guarantee zero-initialization at compile-time. We
+ // rely on Init() to populate things.
+ constexpr SizeMap() = default;
+
+ // Initialize the mapping arrays
+ void Init();
+
+ // Returns the size class for size `size` respecting the alignment
+ // requirements of `policy`.
+ //
+ // Returns true on success. Returns false if either:
+ // - the size exceeds the maximum size class size.
+ // - the align size is greater or equal to the default page size
+ // - no matching properly aligned size class is available
+ //
+ // Requires that policy.align() returns a non-zero power of 2.
+ //
+ // When policy.align() = 1 the default alignment of the size table will be
+ // used. If policy.align() is constexpr 1 (e.g. when using
+ // DefaultAlignPolicy) then alignment-related code will optimize away.
+ //
+ // TODO(b/171978365): Replace the output parameter with returning
+ // absl::optional<uint32_t>.
+ template <typename Policy>
+ inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(Policy policy,
+ size_t size,
+ uint32_t* cl) {
+ const size_t align = policy.align();
+ ASSERT(absl::has_single_bit(align));
+
+ if (ABSL_PREDICT_FALSE(align >= kPageSize)) {
+ // TODO(b/172060547): Consider changing this to align > kPageSize.
+ ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+ return false;
+ }
+
+ uint32_t idx;
+ if (ABSL_PREDICT_FALSE(!ClassIndexMaybe(size, &idx))) {
+ ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+ return false;
+ }
+ *cl = class_array_[idx] + policy.scaled_numa_partition();
+
+ // Predict that size aligned allocs most often directly map to a proper
+ // size class, i.e., multiples of 32, 64, etc, matching our class sizes.
+ const size_t mask = (align - 1);
+ do {
+ if (ABSL_PREDICT_TRUE((class_to_size(*cl) & mask) == 0)) {
+ return true;
+ }
+ } while ((++*cl % kNumBaseClasses) != 0);
+
+ ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+ return false;
+ }
+
+ // Returns size class for given size, or 0 if this instance has not been
+ // initialized yet. REQUIRES: size <= kMaxSize.
+ template <typename Policy>
+ inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(Policy policy,
+ size_t size) {
+ ASSERT(size <= kMaxSize);
+ uint32_t ret = 0;
+ GetSizeClass(policy, size, &ret);
+ return ret;
+ }
+
+ // Get the byte-size for a specified class. REQUIRES: cl <= kNumClasses.
+ inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE class_to_size(size_t cl) {
+ ASSERT(cl < kNumClasses);
+ return class_to_size_[cl];
+ }
+
+ // Mapping from size class to number of pages to allocate at a time
+ inline size_t class_to_pages(size_t cl) {
+ ASSERT(cl < kNumClasses);
+ return class_to_pages_[cl];
+ }
+
+ // Number of objects to move between a per-thread list and a central
+ // list in one shot. We want this to be not too small so we can
+ // amortize the lock overhead for accessing the central list. Making
+ // it too big may temporarily cause unnecessary memory wastage in the
+ // per-thread free list until the scavenger cleans up the list.
+ inline SizeMap::BatchSize num_objects_to_move(size_t cl) {
+ ASSERT(cl < kNumClasses);
+ return num_objects_to_move_[cl];
+ }
+};
+
+// Linker initialized, so this lock can be accessed at any time.
+extern absl::base_internal::SpinLock pageheap_lock;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_COMMON_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc
new file mode 100644
index 0000000000..8ae02b38e9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc
@@ -0,0 +1,1140 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/cpu_cache.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/fixed_array.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static cpu_set_t FillActiveCpuMask() {
+ cpu_set_t allowed_cpus;
+ if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) {
+ CPU_ZERO(&allowed_cpus);
+ }
+
+#ifdef PERCPU_USE_RSEQ
+ const bool real_cpus = !subtle::percpu::UsingFlatVirtualCpus();
+#else
+ const bool real_cpus = true;
+#endif
+
+ if (real_cpus) {
+ return allowed_cpus;
+ }
+
+ const int virtual_cpu_count = CPU_COUNT(&allowed_cpus);
+ CPU_ZERO(&allowed_cpus);
+ for (int cpu = 0; cpu < virtual_cpu_count; ++cpu) {
+ CPU_SET(cpu, &allowed_cpus);
+ }
+ return allowed_cpus;
+}
+
+// MaxCapacity() determines how we distribute memory in the per-cpu cache
+// to the various class sizes.
+static size_t MaxCapacity(size_t cl) {
+ // The number of size classes that are commonly used and thus should be
+ // allocated more slots in the per-cpu cache.
+ static constexpr size_t kNumSmall = 10;
+
+ // The memory used for each per-CPU slab is the sum of:
+ // sizeof(std::atomic<int64_t>) * kNumClasses
+ // sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall
+ // sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge
+ //
+ // Class size 0 has MaxCapacity() == 0, which is the reason for using
+ // kNumClasses - 1 above instead of kNumClasses.
+ //
+ // Each Size class region in the slab is preceded by one padding pointer that
+ // points to itself, because prefetch instructions of invalid pointers are
+ // slow. That is accounted for by the +1 for object depths.
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+ // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we
+ // allocate:
+ // == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096
+ static const uint16_t kSmallObjectDepth = 16;
+ static const uint16_t kLargeObjectDepth = 6;
+#else
+ // We allocate 256KiB per-cpu for pointers to cached per-cpu memory.
+ // Each 256KiB is a subtle::percpu::TcmallocSlab::Slabs
+ // Max(kNumClasses) is 89, so the maximum footprint per CPU is:
+ // 89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78 + 88) = 254 KiB
+ static const uint16_t kSmallObjectDepth = 2048;
+ static const uint16_t kLargeObjectDepth = 152;
+#endif
+ if (cl == 0 || cl >= kNumClasses) return 0;
+
+ if (Static::sharded_transfer_cache().should_use(cl)) {
+ return 0;
+ }
+
+ if (Static::sizemap().class_to_size(cl) == 0) {
+ return 0;
+ }
+
+ if (!IsExpandedSizeClass(cl) && (cl % kNumBaseClasses) <= kNumSmall) {
+ // Small object sizes are very heavily used and need very deep caches for
+ // good performance (well over 90% of malloc calls are for cl <= 10.)
+ return kSmallObjectDepth;
+ }
+
+ if (IsExpandedSizeClass(cl)) {
+ return 0;
+ }
+
+ return kLargeObjectDepth;
+}
+
+static void *SlabAlloc(size_t size)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return Static::arena().Alloc(size);
+}
+
+void CPUCache::Activate(ActivationMode mode) {
+ ASSERT(Static::IsInited());
+ int num_cpus = absl::base_internal::NumCPUs();
+
+ size_t per_cpu_shift = kPerCpuShift;
+ const auto &topology = Static::numa_topology();
+ if (topology.numa_aware()) {
+ per_cpu_shift += absl::bit_ceil(topology.active_partitions() - 1);
+ }
+
+ const size_t kBytesAvailable = (1 << per_cpu_shift);
+ size_t bytes_required = sizeof(std::atomic<int64_t>) * kNumClasses;
+
+ // Deal with size classes that correspond only to NUMA partitions that are in
+ // use. If NUMA awareness is disabled then we may have a smaller shift than
+ // would suffice for all of the unused size classes.
+ for (int cl = 0;
+ cl < Static::numa_topology().active_partitions() * kNumBaseClasses;
+ ++cl) {
+ const uint16_t mc = MaxCapacity(cl);
+ max_capacity_[cl] = mc;
+ bytes_required += sizeof(void *) * mc;
+ }
+
+ // Deal with expanded size classes.
+ for (int cl = kExpandedClassesStart; cl < kNumClasses; ++cl) {
+ const uint16_t mc = MaxCapacity(cl);
+ max_capacity_[cl] = mc;
+ bytes_required += sizeof(void *) * mc;
+ }
+
+ // As we may make certain size classes no-ops by selecting "0" at runtime,
+ // using a compile-time calculation overestimates the worst-case memory usage.
+ if (ABSL_PREDICT_FALSE(bytes_required > kBytesAvailable)) {
+ Crash(kCrash, __FILE__, __LINE__, "per-CPU memory exceeded, have ",
+ kBytesAvailable, " need ", bytes_required);
+ }
+
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+ resize_ = reinterpret_cast<ResizeInfo *>(
+ Static::arena().Alloc(sizeof(ResizeInfo) * num_cpus));
+ lazy_slabs_ = Parameters::lazy_per_cpu_caches();
+
+ auto max_cache_size = Parameters::max_per_cpu_cache_size();
+
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ resize_[cpu].per_class[cl].Init();
+ }
+ resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed);
+ resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed);
+ resize_[cpu].last_steal.store(1, std::memory_order_relaxed);
+ }
+
+ freelist_.Init(SlabAlloc, MaxCapacityHelper, lazy_slabs_, per_cpu_shift);
+ if (mode == ActivationMode::FastPathOn) {
+ Static::ActivateCPUCache();
+ }
+}
+
+// Fetch more items from the central cache, refill our local cache,
+// and try to grow it if necessary.
+//
+// This is complicated by the fact that we can only tweak the cache on
+// our current CPU and we might get migrated whenever (in fact, we
+// might already have been migrated since failing to get memory...)
+//
+// So make sure only to make changes to one CPU's cache; at all times,
+// it must be safe to find ourselves migrated (at which point we atomically
+// return memory to the correct CPU.)
+void *CPUCache::Refill(int cpu, size_t cl) {
+ const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+
+ // UpdateCapacity can evict objects from other size classes as it tries to
+ // increase capacity of this size class. The objects are returned in
+ // to_return, we insert them into transfer cache at the end of function
+ // (to increase possibility that we stay on the current CPU as we are
+ // refilling the list).
+ ObjectsToReturn to_return;
+ const size_t target =
+ UpdateCapacity(cpu, cl, batch_length, false, &to_return);
+
+ // Refill target objects in batch_length batches.
+ size_t total = 0;
+ size_t got;
+ size_t i;
+ void *result = nullptr;
+ void *batch[kMaxObjectsToMove];
+ do {
+ const size_t want = std::min(batch_length, target - total);
+ got = Static::transfer_cache().RemoveRange(cl, batch, want);
+ if (got == 0) {
+ break;
+ }
+ total += got;
+ i = got;
+ if (result == nullptr) {
+ i--;
+ result = batch[i];
+ }
+ if (i) {
+ i -= freelist_.PushBatch(cl, batch, i);
+ if (i != 0) {
+ static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+ "not enough space in batch");
+ Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, i));
+ }
+ }
+ } while (got == batch_length && i == 0 && total < target &&
+ cpu == freelist_.GetCurrentVirtualCpuUnsafe());
+
+ for (int i = to_return.count; i < kMaxToReturn; ++i) {
+ Static::transfer_cache().InsertRange(
+ to_return.cl[i], absl::Span<void *>(&(to_return.obj[i]), 1));
+ }
+
+ return result;
+}
+
+size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length,
+ bool overflow, ObjectsToReturn *to_return) {
+ // Freelist size balancing strategy:
+ // - We grow a size class only on overflow/underflow.
+ // - We shrink size classes in Steal as it scans all size classes.
+ // - If overflows/underflows happen on a size class, we want to grow its
+ // capacity to at least 2 * batch_length. It enables usage of the
+ // transfer cache and leaves the list half-full after we insert/remove
+ // a batch from the transfer cache.
+ // - We increase capacity beyond 2 * batch_length only when an overflow is
+ // followed by an underflow. That's the only case when we could benefit
+ // from larger capacity -- the overflow and the underflow would collapse.
+ //
+ // Note: we can't understand when we have a perfectly-sized list, because for
+ // a perfectly-sized list we don't hit any slow paths which looks the same as
+ // inactive list. Eventually we will shrink a perfectly-sized list a bit and
+ // then it will grow back. This won't happen very frequently for the most
+ // important small sizes, because we will need several ticks before we shrink
+ // it again. Also we will shrink it by 1, but grow by a batch. So we should
+ // have lots of time until we need to grow it again.
+
+ const size_t max_capacity = max_capacity_[cl];
+ size_t capacity = freelist_.Capacity(cpu, cl);
+ // We assert that the return value, target, is non-zero, so starting from an
+ // initial capacity of zero means we may be populating this core for the
+ // first time.
+ absl::base_internal::LowLevelCallOnce(
+ &resize_[cpu].initialized,
+ [](CPUCache *cache, int cpu) {
+ if (cache->lazy_slabs_) {
+ absl::base_internal::SpinLockHolder h(&cache->resize_[cpu].lock);
+ cache->freelist_.InitCPU(cpu, MaxCapacityHelper);
+ }
+
+ // While we could unconditionally store, a lazy slab population
+ // implementation will require evaluating a branch.
+ cache->resize_[cpu].populated.store(true, std::memory_order_relaxed);
+ },
+ this, cpu);
+ const bool grow_by_one = capacity < 2 * batch_length;
+ uint32_t successive = 0;
+ bool grow_by_batch =
+ resize_[cpu].per_class[cl].Update(overflow, grow_by_one, &successive);
+ if ((grow_by_one || grow_by_batch) && capacity != max_capacity) {
+ size_t increase = 1;
+ if (grow_by_batch) {
+ increase = std::min(batch_length, max_capacity - capacity);
+ } else if (!overflow && capacity < batch_length) {
+ // On underflow we want to grow to at least batch size, because that's
+ // what we want to request from transfer cache.
+ increase = batch_length - capacity;
+ }
+ Grow(cpu, cl, increase, to_return);
+ capacity = freelist_.Capacity(cpu, cl);
+ }
+ // Calculate number of objects to return/request from transfer cache.
+ // Generally we prefer to transfer a single batch, because transfer cache
+ // handles it efficiently. Except for 2 special cases:
+ size_t target = batch_length;
+ // "capacity + 1" because on overflow we already have one object from caller,
+ // so we can return a whole batch even if capacity is one less. Similarly,
+ // on underflow we need to return one object to caller, so we can request
+ // a whole batch even if capacity is one less.
+ if ((capacity + 1) < batch_length) {
+ // If we don't have a full batch, return/request just half. We are missing
+ // transfer cache anyway, and cost of insertion into central freelist is
+ // ~O(number of objects).
+ target = std::max<size_t>(1, (capacity + 1) / 2);
+ } else if (successive > 0 && capacity >= 3 * batch_length) {
+ // If the freelist is large and we are hitting series of overflows or
+ // underflows, return/request several batches at once. On the first overflow
+ // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
+ // half of the batches we have. We do this to save on the cost of hitting
+ // malloc/free slow path, reduce instruction cache pollution, avoid cache
+ // misses when accessing transfer/central caches, etc.
+ size_t num_batches =
+ std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
+ ((capacity / batch_length) + 1) / 2);
+ target = num_batches * batch_length;
+ }
+ ASSERT(target != 0);
+ return target;
+}
+
+void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase,
+ ObjectsToReturn *to_return) {
+ const size_t size = Static::sizemap().class_to_size(cl);
+ const size_t desired_bytes = desired_increase * size;
+ size_t acquired_bytes;
+
+ // First, there might be unreserved slack. Take what we can.
+ size_t before, after;
+ do {
+ before = resize_[cpu].available.load(std::memory_order_relaxed);
+ acquired_bytes = std::min(before, desired_bytes);
+ after = before - acquired_bytes;
+ } while (!resize_[cpu].available.compare_exchange_strong(
+ before, after, std::memory_order_relaxed, std::memory_order_relaxed));
+
+ if (acquired_bytes < desired_bytes) {
+ acquired_bytes += Steal(cpu, cl, desired_bytes - acquired_bytes, to_return);
+ }
+
+ // We have all the memory we could reserve. Time to actually do the growth.
+
+ // We might have gotten more than we wanted (stealing from larger sizeclasses)
+ // so don't grow _too_ much.
+ size_t actual_increase = acquired_bytes / size;
+ actual_increase = std::min(actual_increase, desired_increase);
+ // Remember, Grow may not give us all we ask for.
+ size_t increase = freelist_.Grow(cpu, cl, actual_increase, max_capacity_[cl]);
+ size_t increased_bytes = increase * size;
+ if (increased_bytes < acquired_bytes) {
+ // return whatever we didn't use to the slack.
+ size_t unused = acquired_bytes - increased_bytes;
+ resize_[cpu].available.fetch_add(unused, std::memory_order_relaxed);
+ }
+}
+
+void CPUCache::TryReclaimingCaches() {
+ const int num_cpus = absl::base_internal::NumCPUs();
+
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ // Nothing to reclaim if the cpu is not populated.
+ if (!HasPopulated(cpu)) {
+ continue;
+ }
+
+ uint64_t used_bytes = UsedBytes(cpu);
+ uint64_t prev_used_bytes =
+ resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed);
+
+ // Get reclaim miss and used bytes stats that were captured at the end of
+ // the previous interval.
+ const CpuCacheMissStats miss_stats = GetReclaimCacheMissStats(cpu);
+ uint64_t misses =
+ uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows};
+
+ // Reclaim the cache if the number of used bytes and total number of misses
+ // stayed constant since the last interval.
+ if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) {
+ Reclaim(cpu);
+ }
+
+ // Takes a snapshot of used bytes in the cache at the end of this interval
+ // so that we can calculate if cache usage changed in the next interval.
+ //
+ // Reclaim occurs on a single thread. So, the relaxed store to used_bytes
+ // is safe.
+ resize_[cpu].reclaim_used_bytes.store(used_bytes,
+ std::memory_order_relaxed);
+ }
+}
+
+void CPUCache::ShuffleCpuCaches() {
+ // Knobs that we can potentially tune depending on the workloads.
+ constexpr double kBytesToStealPercent = 5.0;
+ constexpr int kMaxNumStealCpus = 5;
+
+ const int num_cpus = absl::base_internal::NumCPUs();
+ absl::FixedArray<std::pair<int, uint64_t>> misses(num_cpus);
+
+ // Record the cumulative misses for the caches so that we can select the
+ // caches with the highest misses as the candidates to steal the cache for.
+ int max_populated_cpu = -1;
+ int num_populated_cpus = 0;
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ if (!HasPopulated(cpu)) {
+ continue;
+ }
+ const CpuCacheMissStats miss_stats = GetIntervalCacheMissStats(cpu);
+ misses[num_populated_cpus] = {
+ cpu, uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}};
+ max_populated_cpu = cpu;
+ ++num_populated_cpus;
+ }
+ if (max_populated_cpu == -1) {
+ return;
+ }
+
+ // Sorts misses to identify cpus with highest misses.
+ //
+ // TODO(vgogte): We can potentially sort the entire misses array and use that
+ // in StealFromOtherCache to determine cpus to steal from. That is, [0,
+ // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus)
+ // may be cpus we may steal from. We can iterate through the array in a
+ // descending order to steal from them. The upside of this mechanism is that
+ // we would be able to do a more fair stealing, starting with cpus with lowest
+ // misses. The downside of this mechanism is that we would have to sort the
+ // entire misses array. This might be compute intensive on servers with high
+ // number of cpus (eg. Rome, Milan). We need to investigate the compute
+ // required to implement this.
+ const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus);
+ std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus,
+ misses.end(),
+ [](std::pair<int, uint64_t> a, std::pair<int, uint64_t> b) {
+ if (a.second == b.second) {
+ return a.first < b.first;
+ }
+ return a.second > b.second;
+ });
+
+ // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for
+ // each destination cpu cache.
+ size_t to_steal =
+ kBytesToStealPercent / 100.0 * Parameters::max_per_cpu_cache_size();
+ for (int i = 0; i < num_dest_cpus; ++i) {
+ StealFromOtherCache(misses[i].first, max_populated_cpu, to_steal);
+ }
+
+ // Takes a snapshot of underflows and overflows at the end of this interval
+ // so that we can calculate the misses that occurred in the next interval.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ size_t underflows =
+ resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+ size_t overflows =
+ resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+
+ // Shuffle occurs on a single thread. So, the relaxed stores to
+ // prev_underflow and pre_overflow counters are safe.
+ resize_[cpu].shuffle_underflows.store(underflows,
+ std::memory_order_relaxed);
+ resize_[cpu].shuffle_overflows.store(overflows, std::memory_order_relaxed);
+ }
+}
+
+static void ShrinkHandler(void *arg, size_t cl, void **batch, size_t count) {
+ const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+ for (size_t i = 0; i < count; i += batch_length) {
+ size_t n = std::min(batch_length, count - i);
+ Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n));
+ }
+}
+
+void CPUCache::StealFromOtherCache(int cpu, int max_populated_cpu,
+ size_t bytes) {
+ constexpr double kCacheMissThreshold = 0.80;
+
+ const CpuCacheMissStats dest_misses = GetIntervalCacheMissStats(cpu);
+
+ // If both underflows and overflows are 0, we should not need to steal.
+ if (dest_misses.underflows == 0 && dest_misses.overflows == 0) return;
+
+ size_t acquired = 0;
+
+ // We use last_cpu_cache_steal_ as a hint to start our search for cpu ids to
+ // steal from so that we can iterate through the cpus in a nice round-robin
+ // fashion.
+ int src_cpu = std::min(last_cpu_cache_steal_.load(std::memory_order_relaxed),
+ max_populated_cpu);
+
+ // We iterate through max_populate_cpus number of cpus to steal from.
+ // max_populate_cpus records the max cpu id that has been populated. Note
+ // that, any intermediate changes since the max_populated_cpus was measured
+ // may have populated higher cpu ids, but we do not include those in the
+ // search. The approximation prevents us from doing another pass through the
+ // cpus to just find the latest populated cpu id.
+ //
+ // We break from the loop once we iterate through all the cpus once, or if the
+ // total number of acquired bytes is higher than or equal to the desired bytes
+ // we want to steal.
+ for (int cpu_offset = 1; cpu_offset <= max_populated_cpu && acquired < bytes;
+ ++cpu_offset) {
+ if (--src_cpu < 0) {
+ src_cpu = max_populated_cpu;
+ }
+ ASSERT(0 <= src_cpu);
+ ASSERT(src_cpu <= max_populated_cpu);
+
+ // We do not steal from the same CPU. Maybe we can explore combining this
+ // with stealing from the same CPU later.
+ if (src_cpu == cpu) continue;
+
+ // We do not steal from the cache that hasn't been populated yet.
+ if (!HasPopulated(src_cpu)) continue;
+
+ // We do not steal from cache that has capacity less than our lower
+ // capacity threshold.
+ if (Capacity(src_cpu) <
+ kCacheCapacityThreshold * Parameters::max_per_cpu_cache_size())
+ continue;
+
+ const CpuCacheMissStats src_misses = GetIntervalCacheMissStats(src_cpu);
+
+ // If underflows and overflows from the source cpu are higher, we do not
+ // steal from that cache. We consider the cache as a candidate to steal from
+ // only when its misses are lower than 0.8x that of the dest cache.
+ if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows ||
+ src_misses.overflows > kCacheMissThreshold * dest_misses.overflows)
+ continue;
+
+ size_t start_cl =
+ resize_[src_cpu].last_steal.load(std::memory_order_relaxed);
+
+ ASSERT(start_cl < kNumClasses);
+ ASSERT(0 < start_cl);
+ size_t source_cl = start_cl;
+ for (size_t offset = 1; offset < kNumClasses; ++offset) {
+ source_cl = start_cl + offset;
+ if (source_cl >= kNumClasses) {
+ source_cl -= kNumClasses - 1;
+ }
+ ASSERT(0 < source_cl);
+ ASSERT(source_cl < kNumClasses);
+
+ const size_t capacity = freelist_.Capacity(src_cpu, source_cl);
+ if (capacity == 0) {
+ // Nothing to steal.
+ continue;
+ }
+ const size_t length = freelist_.Length(src_cpu, source_cl);
+
+ // TODO(vgogte): Currently, scoring is similar to stealing from the
+ // same cpu in CpuCache::Steal(). Revisit this later to tune the
+ // knobs.
+ const size_t batch_length =
+ Static::sizemap().num_objects_to_move(source_cl);
+ size_t size = Static::sizemap().class_to_size(source_cl);
+
+ // Clock-like algorithm to prioritize size classes for shrinking.
+ //
+ // Each size class has quiescent ticks counter which is incremented as we
+ // pass it, the counter is reset to 0 in UpdateCapacity on grow.
+ // If the counter value is 0, then we've just tried to grow the size
+ // class, so it makes little sense to shrink it back. The higher counter
+ // value the longer ago we grew the list and the more probable it is that
+ // the full capacity is unused.
+ //
+ // Then, we calculate "shrinking score", the higher the score the less we
+ // we want to shrink this size class. The score is considerably skewed
+ // towards larger size classes: smaller classes are usually used more
+ // actively and we also benefit less from shrinking smaller classes (steal
+ // less capacity). Then, we also avoid shrinking full freelists as we will
+ // need to evict an object and then go to the central freelist to return
+ // it. Then, we also avoid shrinking freelists that are just above batch
+ // size, because shrinking them will disable transfer cache.
+ //
+ // Finally, we shrink if the ticks counter is >= the score.
+ uint32_t qticks = resize_[src_cpu].per_class[source_cl].Tick();
+ uint32_t score = 0;
+ // Note: the following numbers are based solely on intuition, common sense
+ // and benchmarking results.
+ if (size <= 144) {
+ score = 2 + (length >= capacity) +
+ (length >= batch_length && length < 2 * batch_length);
+ } else if (size <= 1024) {
+ score = 1 + (length >= capacity) +
+ (length >= batch_length && length < 2 * batch_length);
+ } else if (size <= (64 << 10)) {
+ score = (length >= capacity);
+ }
+ if (score > qticks) {
+ continue;
+ }
+
+ // Finally, try to shrink (can fail if we were migrated).
+ // We always shrink by 1 object. The idea is that inactive lists will be
+ // shrunk to zero eventually anyway (or they just would not grow in the
+ // first place), but for active lists it does not make sense to
+ // aggressively shuffle capacity all the time.
+ //
+ // If the list is full, ShrinkOtherCache first tries to pop enough items
+ // to make space and then shrinks the capacity.
+ // TODO(vgogte): Maybe we can steal more from a single list to avoid
+ // frequent locking overhead.
+ {
+ absl::base_internal::SpinLockHolder h(&resize_[src_cpu].lock);
+ if (freelist_.ShrinkOtherCache(src_cpu, source_cl, 1, nullptr,
+ ShrinkHandler) == 1) {
+ acquired += size;
+ resize_[src_cpu].capacity.fetch_sub(size, std::memory_order_relaxed);
+ }
+ }
+
+ if (acquired >= bytes) {
+ break;
+ }
+ }
+ resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed);
+ }
+ // Record the last cpu id we stole from, which would provide a hint to the
+ // next time we iterate through the cpus for stealing.
+ last_cpu_cache_steal_.store(src_cpu, std::memory_order_relaxed);
+
+ // Increment the capacity of the destination cpu cache by the amount of bytes
+ // acquired from source caches.
+ if (acquired) {
+ size_t before = resize_[cpu].available.load(std::memory_order_relaxed);
+ size_t bytes_with_stolen;
+ do {
+ bytes_with_stolen = before + acquired;
+ } while (!resize_[cpu].available.compare_exchange_weak(
+ before, bytes_with_stolen, std::memory_order_relaxed,
+ std::memory_order_relaxed));
+ resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed);
+ }
+}
+
+// There are rather a lot of policy knobs we could tweak here.
+size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes,
+ ObjectsToReturn *to_return) {
+ // Steal from other sizeclasses. Try to go in a nice circle.
+ // Complicated by sizeclasses actually being 1-indexed.
+ size_t acquired = 0;
+ size_t start = resize_[cpu].last_steal.load(std::memory_order_relaxed);
+ ASSERT(start < kNumClasses);
+ ASSERT(0 < start);
+ size_t source_cl = start;
+ for (size_t offset = 1; offset < kNumClasses; ++offset) {
+ source_cl = start + offset;
+ if (source_cl >= kNumClasses) {
+ source_cl -= kNumClasses - 1;
+ }
+ ASSERT(0 < source_cl);
+ ASSERT(source_cl < kNumClasses);
+ // Decide if we want to steal source_cl.
+ if (source_cl == dest_cl) {
+ // First, no sense in picking your own pocket.
+ continue;
+ }
+ const size_t capacity = freelist_.Capacity(cpu, source_cl);
+ if (capacity == 0) {
+ // Nothing to steal.
+ continue;
+ }
+ const size_t length = freelist_.Length(cpu, source_cl);
+ const size_t batch_length =
+ Static::sizemap().num_objects_to_move(source_cl);
+ size_t size = Static::sizemap().class_to_size(source_cl);
+
+ // Clock-like algorithm to prioritize size classes for shrinking.
+ //
+ // Each size class has quiescent ticks counter which is incremented as we
+ // pass it, the counter is reset to 0 in UpdateCapacity on grow.
+ // If the counter value is 0, then we've just tried to grow the size class,
+ // so it makes little sense to shrink it back. The higher counter value
+ // the longer ago we grew the list and the more probable it is that
+ // the full capacity is unused.
+ //
+ // Then, we calculate "shrinking score", the higher the score the less we
+ // we want to shrink this size class. The score is considerably skewed
+ // towards larger size classes: smaller classes are usually used more
+ // actively and we also benefit less from shrinking smaller classes (steal
+ // less capacity). Then, we also avoid shrinking full freelists as we will
+ // need to evict an object and then go to the central freelist to return it.
+ // Then, we also avoid shrinking freelists that are just above batch size,
+ // because shrinking them will disable transfer cache.
+ //
+ // Finally, we shrink if the ticks counter is >= the score.
+ uint32_t qticks = resize_[cpu].per_class[source_cl].Tick();
+ uint32_t score = 0;
+ // Note: the following numbers are based solely on intuition, common sense
+ // and benchmarking results.
+ if (size <= 144) {
+ score = 2 + (length >= capacity) +
+ (length >= batch_length && length < 2 * batch_length);
+ } else if (size <= 1024) {
+ score = 1 + (length >= capacity) +
+ (length >= batch_length && length < 2 * batch_length);
+ } else if (size <= (64 << 10)) {
+ score = (length >= capacity);
+ }
+ if (score > qticks) {
+ continue;
+ }
+
+ if (length >= capacity) {
+ // The list is full, need to evict an object to shrink it.
+ if (to_return == nullptr) {
+ continue;
+ }
+ if (to_return->count == 0) {
+ // Can't steal any more because the to_return set is full.
+ break;
+ }
+ void *obj = freelist_.Pop(source_cl, NoopUnderflow);
+ if (obj) {
+ --to_return->count;
+ to_return->cl[to_return->count] = source_cl;
+ to_return->obj[to_return->count] = obj;
+ }
+ }
+
+ // Finally, try to shrink (can fail if we were migrated).
+ // We always shrink by 1 object. The idea is that inactive lists will be
+ // shrunk to zero eventually anyway (or they just would not grow in the
+ // first place), but for active lists it does not make sense to aggressively
+ // shuffle capacity all the time.
+ if (freelist_.Shrink(cpu, source_cl, 1) == 1) {
+ acquired += size;
+ }
+
+ if (cpu != freelist_.GetCurrentVirtualCpuUnsafe() || acquired >= bytes) {
+ // can't steal any more or don't need to
+ break;
+ }
+ }
+ // update the hint
+ resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed);
+ return acquired;
+}
+
+int CPUCache::Overflow(void *ptr, size_t cl, int cpu) {
+ const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+ const size_t target = UpdateCapacity(cpu, cl, batch_length, true, nullptr);
+ // Return target objects in batch_length batches.
+ size_t total = 0;
+ size_t count = 1;
+ void *batch[kMaxObjectsToMove];
+ batch[0] = ptr;
+ do {
+ size_t want = std::min(batch_length, target - total);
+ if (count < want) {
+ count += freelist_.PopBatch(cl, batch + count, want - count);
+ }
+ if (!count) break;
+
+ total += count;
+ static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+ "not enough space in batch");
+ Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, count));
+ if (count != batch_length) break;
+ count = 0;
+ } while (total < target && cpu == freelist_.GetCurrentVirtualCpuUnsafe());
+ tracking::Report(kFreeTruncations, cl, 1);
+ return 1;
+}
+
+uint64_t CPUCache::Allocated(int target_cpu) const {
+ ASSERT(target_cpu >= 0);
+ if (!HasPopulated(target_cpu)) {
+ return 0;
+ }
+
+ uint64_t total = 0;
+ for (int cl = 1; cl < kNumClasses; cl++) {
+ int size = Static::sizemap().class_to_size(cl);
+ total += size * freelist_.Capacity(target_cpu, cl);
+ }
+ return total;
+}
+
+uint64_t CPUCache::UsedBytes(int target_cpu) const {
+ ASSERT(target_cpu >= 0);
+ if (!HasPopulated(target_cpu)) {
+ return 0;
+ }
+
+ uint64_t total = 0;
+ for (int cl = 1; cl < kNumClasses; cl++) {
+ int size = Static::sizemap().class_to_size(cl);
+ total += size * freelist_.Length(target_cpu, cl);
+ }
+ return total;
+}
+
+bool CPUCache::HasPopulated(int target_cpu) const {
+ ASSERT(target_cpu >= 0);
+ return resize_[target_cpu].populated.load(std::memory_order_relaxed);
+}
+
+PerCPUMetadataState CPUCache::MetadataMemoryUsage() const {
+ return freelist_.MetadataMemoryUsage();
+}
+
+uint64_t CPUCache::TotalUsedBytes() const {
+ uint64_t total = 0;
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ total += UsedBytes(cpu);
+ }
+ return total;
+}
+
+uint64_t CPUCache::TotalObjectsOfClass(size_t cl) const {
+ ASSERT(cl < kNumClasses);
+ uint64_t total_objects = 0;
+ if (cl > 0) {
+ for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; cpu++) {
+ if (!HasPopulated(cpu)) {
+ continue;
+ }
+ total_objects += freelist_.Length(cpu, cl);
+ }
+ }
+ return total_objects;
+}
+
+uint64_t CPUCache::Unallocated(int cpu) const {
+ return resize_[cpu].available.load(std::memory_order_relaxed);
+}
+
+uint64_t CPUCache::Capacity(int cpu) const {
+ return resize_[cpu].capacity.load(std::memory_order_relaxed);
+}
+
+uint64_t CPUCache::CacheLimit() const {
+ return Parameters::max_per_cpu_cache_size();
+}
+
+struct DrainContext {
+ std::atomic<size_t> *available;
+ uint64_t bytes;
+};
+
+static void DrainHandler(void *arg, size_t cl, void **batch, size_t count,
+ size_t cap) {
+ DrainContext *ctx = static_cast<DrainContext *>(arg);
+ const size_t size = Static::sizemap().class_to_size(cl);
+ const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+ ctx->bytes += count * size;
+ // Drain resets capacity to 0, so return the allocated capacity to that
+ // CPU's slack.
+ ctx->available->fetch_add(cap * size, std::memory_order_relaxed);
+ for (size_t i = 0; i < count; i += batch_length) {
+ size_t n = std::min(batch_length, count - i);
+ Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n));
+ }
+}
+
+uint64_t CPUCache::Reclaim(int cpu) {
+ absl::base_internal::SpinLockHolder h(&resize_[cpu].lock);
+
+ // If we haven't populated this core, freelist_.Drain() will touch the memory
+ // (for writing) as part of its locking process. Avoid faulting new pages as
+ // part of a release process.
+ if (!resize_[cpu].populated.load(std::memory_order_relaxed)) {
+ return 0;
+ }
+
+ DrainContext ctx{&resize_[cpu].available, 0};
+ freelist_.Drain(cpu, &ctx, DrainHandler);
+
+ // Record that the reclaim occurred for this CPU.
+ resize_[cpu].num_reclaims.store(
+ resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1,
+ std::memory_order_relaxed);
+ return ctx.bytes;
+}
+
+uint64_t CPUCache::GetNumReclaims(int cpu) const {
+ return resize_[cpu].num_reclaims.load(std::memory_order_relaxed);
+}
+
+void CPUCache::RecordCacheMissStat(const int cpu, const bool is_malloc) {
+ CPUCache &cpu_cache = Static::cpu_cache();
+ if (is_malloc) {
+ cpu_cache.resize_[cpu].total_underflows.fetch_add(
+ 1, std::memory_order_relaxed);
+ } else {
+ cpu_cache.resize_[cpu].total_overflows.fetch_add(1,
+ std::memory_order_relaxed);
+ }
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetReclaimCacheMissStats(int cpu) const {
+ CpuCacheMissStats stats;
+ size_t total_underflows =
+ resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+ size_t prev_reclaim_underflows =
+ resize_[cpu].reclaim_underflows.load(std::memory_order_relaxed);
+ // Takes a snapshot of underflows at the end of this interval so that we can
+ // calculate the misses that occurred in the next interval.
+ //
+ // Reclaim occurs on a single thread. So, a relaxed store to the reclaim
+ // underflow stat is safe.
+ resize_[cpu].reclaim_underflows.store(total_underflows,
+ std::memory_order_relaxed);
+
+ // In case of a size_t overflow, we wrap around to 0.
+ stats.underflows = total_underflows > prev_reclaim_underflows
+ ? total_underflows - prev_reclaim_underflows
+ : 0;
+
+ size_t total_overflows =
+ resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+ size_t prev_reclaim_overflows =
+ resize_[cpu].reclaim_overflows.load(std::memory_order_relaxed);
+ // Takes a snapshot of overflows at the end of this interval so that we can
+ // calculate the misses that occurred in the next interval.
+ //
+ // Reclaim occurs on a single thread. So, a relaxed store to the reclaim
+ // overflow stat is safe.
+ resize_[cpu].reclaim_overflows.store(total_overflows,
+ std::memory_order_relaxed);
+
+ // In case of a size_t overflow, we wrap around to 0.
+ stats.overflows = total_overflows > prev_reclaim_overflows
+ ? total_overflows - prev_reclaim_overflows
+ : 0;
+
+ return stats;
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetIntervalCacheMissStats(int cpu) const {
+ CpuCacheMissStats stats;
+ size_t total_underflows =
+ resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+ size_t shuffle_underflows =
+ resize_[cpu].shuffle_underflows.load(std::memory_order_relaxed);
+ // In case of a size_t overflow, we wrap around to 0.
+ stats.underflows = total_underflows > shuffle_underflows
+ ? total_underflows - shuffle_underflows
+ : 0;
+
+ size_t total_overflows =
+ resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+ size_t shuffle_overflows =
+ resize_[cpu].shuffle_overflows.load(std::memory_order_relaxed);
+ // In case of a size_t overflow, we wrap around to 0.
+ stats.overflows = total_overflows > shuffle_overflows
+ ? total_overflows - shuffle_overflows
+ : 0;
+
+ return stats;
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetTotalCacheMissStats(int cpu) const {
+ CpuCacheMissStats stats;
+ stats.underflows =
+ resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+ stats.overflows =
+ resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+ return stats;
+}
+
+void CPUCache::Print(Printer *out) const {
+ out->printf("------------------------------------------------\n");
+ out->printf("Bytes in per-CPU caches (per cpu limit: %" PRIu64 " bytes)\n",
+ Static::cpu_cache().CacheLimit());
+ out->printf("------------------------------------------------\n");
+
+ const cpu_set_t allowed_cpus = FillActiveCpuMask();
+
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ static constexpr double MiB = 1048576.0;
+
+ uint64_t rbytes = UsedBytes(cpu);
+ bool populated = HasPopulated(cpu);
+ uint64_t unallocated = Unallocated(cpu);
+ out->printf("cpu %3d: %12" PRIu64
+ " bytes (%7.1f MiB) with"
+ "%12" PRIu64 " bytes unallocated %s%s\n",
+ cpu, rbytes, rbytes / MiB, unallocated,
+ CPU_ISSET(cpu, &allowed_cpus) ? " active" : "",
+ populated ? " populated" : "");
+ }
+
+ out->printf("------------------------------------------------\n");
+ out->printf("Number of per-CPU cache underflows, overflows and reclaims\n");
+ out->printf("------------------------------------------------\n");
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu);
+ uint64_t reclaims = GetNumReclaims(cpu);
+ out->printf(
+ "cpu %3d:"
+ "%12" PRIu64
+ " underflows,"
+ "%12" PRIu64
+ " overflows,"
+ "%12" PRIu64 " reclaims\n",
+ cpu, miss_stats.underflows, miss_stats.overflows, reclaims);
+ }
+}
+
+void CPUCache::PrintInPbtxt(PbtxtRegion *region) const {
+ const cpu_set_t allowed_cpus = FillActiveCpuMask();
+
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ PbtxtRegion entry = region->CreateSubRegion("cpu_cache");
+ uint64_t rbytes = UsedBytes(cpu);
+ bool populated = HasPopulated(cpu);
+ uint64_t unallocated = Unallocated(cpu);
+ CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu);
+ uint64_t reclaims = GetNumReclaims(cpu);
+ entry.PrintI64("cpu", uint64_t(cpu));
+ entry.PrintI64("used", rbytes);
+ entry.PrintI64("unused", unallocated);
+ entry.PrintBool("active", CPU_ISSET(cpu, &allowed_cpus));
+ entry.PrintBool("populated", populated);
+ entry.PrintI64("underflows", miss_stats.underflows);
+ entry.PrintI64("overflows", miss_stats.overflows);
+ entry.PrintI64("reclaims", reclaims);
+ }
+}
+
+void CPUCache::AcquireInternalLocks() {
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ resize_[cpu].lock.Lock();
+ }
+}
+
+void CPUCache::ReleaseInternalLocks() {
+ for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+ ++cpu) {
+ resize_[cpu].lock.Unlock();
+ }
+}
+
+void CPUCache::PerClassResizeInfo::Init() {
+ state_.store(0, std::memory_order_relaxed);
+}
+
+bool CPUCache::PerClassResizeInfo::Update(bool overflow, bool grow,
+ uint32_t *successive) {
+ int32_t raw = state_.load(std::memory_order_relaxed);
+ State state;
+ memcpy(&state, &raw, sizeof(state));
+ const bool overflow_then_underflow = !overflow && state.overflow;
+ grow |= overflow_then_underflow;
+ // Reset quiescent ticks for Steal clock algorithm if we are going to grow.
+ State new_state;
+ new_state.overflow = overflow;
+ new_state.quiescent_ticks = grow ? 0 : state.quiescent_ticks;
+ new_state.successive = overflow == state.overflow ? state.successive + 1 : 0;
+ memcpy(&raw, &new_state, sizeof(raw));
+ state_.store(raw, std::memory_order_relaxed);
+ *successive = new_state.successive;
+ return overflow_then_underflow;
+}
+
+uint32_t CPUCache::PerClassResizeInfo::Tick() {
+ int32_t raw = state_.load(std::memory_order_relaxed);
+ State state;
+ memcpy(&state, &raw, sizeof(state));
+ state.quiescent_ticks++;
+ memcpy(&raw, &state, sizeof(raw));
+ state_.store(raw, std::memory_order_relaxed);
+ return state.quiescent_ticks - 1;
+}
+
+#ifdef ABSL_HAVE_THREAD_SANITIZER
+extern "C" int RunningOnValgrind();
+#endif
+
+static void ActivatePerCPUCaches() {
+ if (tcmalloc::tcmalloc_internal::Static::CPUCacheActive()) {
+ // Already active.
+ return;
+ }
+
+#ifdef ABSL_HAVE_THREAD_SANITIZER
+ // RunningOnValgrind is a proxy for "is something intercepting malloc."
+ //
+ // If Valgrind, et. al., are in use, TCMalloc isn't in use and we shouldn't
+ // activate our per-CPU caches.
+ if (RunningOnValgrind()) {
+ return;
+ }
+#endif
+ if (Parameters::per_cpu_caches() && subtle::percpu::IsFast()) {
+ Static::InitIfNecessary();
+ Static::cpu_cache().Activate(CPUCache::ActivationMode::FastPathOn);
+ // no need for this thread cache anymore, I guess.
+ ThreadCache::BecomeIdle();
+ // If there's a problem with this code, let's notice it right away:
+ ::operator delete(::operator new(1));
+ }
+}
+
+class PerCPUInitializer {
+ public:
+ PerCPUInitializer() {
+ ActivatePerCPUCaches();
+ }
+};
+static PerCPUInitializer module_enter_exit;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+extern "C" void TCMalloc_Internal_ForceCpuCacheActivation() {
+ tcmalloc::tcmalloc_internal::ActivatePerCPUCaches();
+}
+
+extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() {
+ return tcmalloc::tcmalloc_internal::Static::CPUCacheActive();
+}
+
+extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() {
+ tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false);
+ tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache();
+}
+
+extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() {
+ return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size();
+}
+
+extern "C" void MallocExtension_Internal_SetMaxPerCpuCacheSize(int32_t value) {
+ tcmalloc::tcmalloc_internal::Parameters::set_max_per_cpu_cache_size(value);
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h
new file mode 100644
index 0000000000..dab7d18910
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h
@@ -0,0 +1,390 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_CPU_CACHE_H_
+#define TCMALLOC_CPU_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/call_once.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal/percpu_tcmalloc.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class CPUCache {
+ public:
+ constexpr CPUCache() = default;
+
+ enum class ActivationMode {
+ FastPathOn,
+ FastPathOffTestOnly,
+ };
+
+ // tcmalloc explicitly initializes its global state (to be safe for
+ // use in global constructors) so our constructor must be trivial;
+ // do all initialization here instead.
+ void Activate(ActivationMode mode);
+
+ // Allocate an object of the given size class. When allocation fails
+ // (from this cache and after running Refill), OOMHandler(size) is
+ // called and its return value is returned from
+ // Allocate. OOMHandler is used to parameterize out-of-memory
+ // handling (raising exception, returning nullptr, calling
+ // new_handler or anything else). "Passing" OOMHandler in this way
+ // allows Allocate to be used in tail-call position in fast-path,
+ // making Allocate use jump (tail-call) to slow path code.
+ template <void* OOMHandler(size_t)>
+ void* Allocate(size_t cl);
+
+ // Free an object of the given class.
+ void Deallocate(void* ptr, size_t cl);
+
+ // Give the number of bytes in <cpu>'s cache
+ uint64_t UsedBytes(int cpu) const;
+
+ // Give the allocated number of bytes in <cpu>'s cache
+ uint64_t Allocated(int cpu) const;
+
+ // Whether <cpu>'s cache has ever been populated with objects
+ bool HasPopulated(int cpu) const;
+
+ PerCPUMetadataState MetadataMemoryUsage() const;
+
+ // Give the number of bytes used in all cpu caches.
+ uint64_t TotalUsedBytes() const;
+
+ // Give the number of objects of a given class in all cpu caches.
+ uint64_t TotalObjectsOfClass(size_t cl) const;
+
+ // Give the number of bytes unallocated to any sizeclass in <cpu>'s cache.
+ uint64_t Unallocated(int cpu) const;
+
+ // Gives the total capacity of <cpu>'s cache in bytes.
+ //
+ // The total capacity of <cpu>'s cache should be equal to the sum of allocated
+ // and unallocated bytes for that cache.
+ uint64_t Capacity(int cpu) const;
+
+ // Give the per-cpu limit of cache size.
+ uint64_t CacheLimit() const;
+
+ // Shuffles per-cpu caches using the number of underflows and overflows that
+ // occurred in the prior interval. It selects the top per-cpu caches
+ // with highest misses as candidates, iterates through the other per-cpu
+ // caches to steal capacity from them and adds the stolen bytes to the
+ // available capacity of the per-cpu caches. May be called from any processor.
+ //
+ // TODO(vgogte): There are quite a few knobs that we can play around with in
+ // ShuffleCpuCaches.
+ void ShuffleCpuCaches();
+
+ // Sets the lower limit on the capacity that can be stolen from the cpu cache.
+ static constexpr double kCacheCapacityThreshold = 0.20;
+
+ // Tries to steal <bytes> for the destination <cpu>. It iterates through the
+ // the set of populated cpu caches and steals the bytes from them. A cpu is
+ // considered a good candidate to steal from if:
+ // (1) the cache is populated
+ // (2) the numbers of underflows and overflows are both less than 0.8x those
+ // of the destination per-cpu cache
+ // (3) source cpu is not the same as the destination cpu
+ // (4) capacity of the source cpu/cl is non-zero
+ //
+ // For a given source cpu, we iterate through the size classes to steal from
+ // them. Currently, we use a similar clock-like algorithm from Steal() to
+ // identify the cl to steal from.
+ void StealFromOtherCache(int cpu, int max_populated_cpu, size_t bytes);
+
+ // Tries to reclaim inactive per-CPU caches. It iterates through the set of
+ // populated cpu caches and reclaims the caches that:
+ // (1) had same number of used bytes since the last interval,
+ // (2) had no change in the number of misses since the last interval.
+ void TryReclaimingCaches();
+
+ // Empty out the cache on <cpu>; move all objects to the central
+ // cache. (If other threads run concurrently on that cpu, we can't
+ // guarantee it will be fully empty on return, but if the cpu is
+ // unused, this will eliminate stranded memory.) Returns the number
+ // of bytes we sent back. This function is thread safe.
+ uint64_t Reclaim(int cpu);
+
+ // Reports number of times the <cpu> has been reclaimed.
+ uint64_t GetNumReclaims(int cpu) const;
+
+ // Determine number of bits we should use for allocating per-cpu cache
+ // The amount of per-cpu cache is 2 ^ kPerCpuShift
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+ static const size_t kPerCpuShift = 12;
+#else
+ static constexpr size_t kPerCpuShift = 18;
+#endif
+
+ struct CpuCacheMissStats {
+ size_t underflows;
+ size_t overflows;
+ };
+
+ // Reports total cache underflows and overflows for <cpu>.
+ CpuCacheMissStats GetTotalCacheMissStats(int cpu) const;
+
+ // Reports the cache underflows and overflows for <cpu> that were recorded at
+ // the end of the previous interval. It also records current underflows and
+ // overflows in the reclaim underflow and overflow stats.
+ CpuCacheMissStats GetReclaimCacheMissStats(int cpu) const;
+
+ // Reports cache underflows and overflows for <cpu> this interval.
+ CpuCacheMissStats GetIntervalCacheMissStats(int cpu) const;
+
+ // Report statistics
+ void Print(Printer* out) const;
+ void PrintInPbtxt(PbtxtRegion* region) const;
+
+ void AcquireInternalLocks();
+ void ReleaseInternalLocks();
+
+ private:
+ // Per-size-class freelist resizing info.
+ class PerClassResizeInfo {
+ public:
+ void Init();
+ // Updates info on overflow/underflow.
+ // <overflow> says if it's overflow or underflow.
+ // <grow> is caller approximation of whether we want to grow capacity.
+ // <successive> will contain number of successive overflows/underflows.
+ // Returns if capacity needs to be grown aggressively (i.e. by batch size).
+ bool Update(bool overflow, bool grow, uint32_t* successive);
+ uint32_t Tick();
+
+ private:
+ std::atomic<int32_t> state_;
+ // state_ layout:
+ struct State {
+ // last overflow/underflow?
+ uint32_t overflow : 1;
+ // number of times Steal checked this class since the last grow
+ uint32_t quiescent_ticks : 15;
+ // number of successive overflows/underflows
+ uint32_t successive : 16;
+ };
+ static_assert(sizeof(State) == sizeof(std::atomic<int32_t>),
+ "size mismatch");
+ };
+
+ subtle::percpu::TcmallocSlab<kNumClasses> freelist_;
+
+ struct ResizeInfoUnpadded {
+ // cache space on this CPU we're not using. Modify atomically;
+ // we don't want to lose space.
+ std::atomic<size_t> available;
+ // this is just a hint
+ std::atomic<size_t> last_steal;
+ // Track whether we have initialized this CPU.
+ absl::once_flag initialized;
+ // Track whether we have ever populated this CPU.
+ std::atomic<bool> populated;
+ // For cross-cpu operations.
+ absl::base_internal::SpinLock lock;
+ PerClassResizeInfo per_class[kNumClasses];
+ // tracks number of underflows on allocate.
+ std::atomic<size_t> total_underflows;
+ // tracks number of overflows on deallocate.
+ std::atomic<size_t> total_overflows;
+ // tracks number of underflows recorded as of the end of the last shuffle
+ // interval.
+ std::atomic<size_t> shuffle_underflows;
+ // tracks number of overflows recorded as of the end of the last shuffle
+ // interval.
+ std::atomic<size_t> shuffle_overflows;
+ // total cache space available on this CPU. This tracks the total
+ // allocated and unallocated bytes on this CPU cache.
+ std::atomic<size_t> capacity;
+ // Number of underflows as of the end of the last resize interval.
+ std::atomic<size_t> reclaim_underflows;
+ // Number of overflows as of the end of the last resize interval.
+ std::atomic<size_t> reclaim_overflows;
+ // Used bytes in the cache as of the end of the last resize interval.
+ std::atomic<uint64_t> reclaim_used_bytes;
+ // Tracks number of times this CPU has been reclaimed.
+ std::atomic<size_t> num_reclaims;
+ };
+ struct ResizeInfo : ResizeInfoUnpadded {
+ char pad[ABSL_CACHELINE_SIZE -
+ sizeof(ResizeInfoUnpadded) % ABSL_CACHELINE_SIZE];
+ };
+ // Tracking data for each CPU's cache resizing efforts.
+ ResizeInfo* resize_ = nullptr;
+
+ // Track whether we are lazily initializing slabs. We cannot use the latest
+ // value in Parameters, as it can change after initialization.
+ bool lazy_slabs_ = false;
+ // The maximum capacity of each size class within the slab.
+ uint16_t max_capacity_[kNumClasses] = {0};
+
+ // Provides a hint to StealFromOtherCache() so that we can steal from the
+ // caches in a round-robin fashion.
+ std::atomic<int> last_cpu_cache_steal_ = 0;
+
+ // Return a set of objects to be returned to the Transfer Cache.
+ static constexpr int kMaxToReturn = 16;
+ struct ObjectsToReturn {
+ // The number of slots available for storing objects.
+ int count = kMaxToReturn;
+ // The size class of the returned object. kNumClasses is the
+ // largest value that needs to be stored in cl.
+ CompactSizeClass cl[kMaxToReturn];
+ void* obj[kMaxToReturn];
+ };
+
+ static size_t MaxCapacityHelper(size_t cl) {
+ CPUCache& cpu_cache = Static::cpu_cache();
+ // Heuristic that the CPUCache has been activated.
+ ASSERT(cpu_cache.resize_ != nullptr);
+ return cpu_cache.max_capacity_[cl];
+ }
+
+ void* Refill(int cpu, size_t cl);
+
+ // This is called after finding a full freelist when attempting to push <ptr>
+ // on the freelist for sizeclass <cl>. The last arg should indicate which
+ // CPU's list was full. Returns 1.
+ int Overflow(void* ptr, size_t cl, int cpu);
+
+ // Called on <cl> freelist overflow/underflow on <cpu> to balance cache
+ // capacity between size classes. Returns number of objects to return/request
+ // from transfer cache. <to_return> will contain objects that need to be
+ // freed.
+ size_t UpdateCapacity(int cpu, size_t cl, size_t batch_length, bool overflow,
+ ObjectsToReturn* to_return);
+
+ // Tries to obtain up to <desired_increase> bytes of freelist space on <cpu>
+ // for <cl> from other <cls>. <to_return> will contain objects that need to be
+ // freed.
+ void Grow(int cpu, size_t cl, size_t desired_increase,
+ ObjectsToReturn* to_return);
+
+ // Tries to steal <bytes> for <cl> on <cpu> from other size classes on that
+ // CPU. Returns acquired bytes. <to_return> will contain objects that need to
+ // be freed.
+ size_t Steal(int cpu, size_t cl, size_t bytes, ObjectsToReturn* to_return);
+
+ // Records a cache underflow or overflow on <cpu>, increments underflow or
+ // overflow by 1.
+ // <is_malloc> determines whether the associated count corresponds to an
+ // underflow or overflow.
+ void RecordCacheMissStat(const int cpu, const bool is_malloc);
+
+ static void* NoopUnderflow(int cpu, size_t cl) { return nullptr; }
+ static int NoopOverflow(int cpu, size_t cl, void* item) { return -1; }
+};
+
+template <void* OOMHandler(size_t)>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Allocate(size_t cl) {
+ ASSERT(cl > 0);
+
+ tracking::Report(kMallocHit, cl, 1);
+ struct Helper {
+ static void* ABSL_ATTRIBUTE_NOINLINE Underflow(int cpu, size_t cl) {
+ // we've optimistically reported hit in Allocate, lets undo it and
+ // report miss instead.
+ tracking::Report(kMallocHit, cl, -1);
+ void* ret = nullptr;
+ if (Static::sharded_transfer_cache().should_use(cl)) {
+ ret = Static::sharded_transfer_cache().Pop(cl);
+ } else {
+ tracking::Report(kMallocMiss, cl, 1);
+ CPUCache& cache = Static::cpu_cache();
+ cache.RecordCacheMissStat(cpu, true);
+ ret = cache.Refill(cpu, cl);
+ }
+ if (ABSL_PREDICT_FALSE(ret == nullptr)) {
+ size_t size = Static::sizemap().class_to_size(cl);
+ return OOMHandler(size);
+ }
+ return ret;
+ }
+ };
+ return freelist_.Pop(cl, &Helper::Underflow);
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Deallocate(void* ptr,
+ size_t cl) {
+ ASSERT(cl > 0);
+ tracking::Report(kFreeHit, cl, 1); // Be optimistic; correct later if needed.
+
+ struct Helper {
+ static int ABSL_ATTRIBUTE_NOINLINE Overflow(int cpu, size_t cl, void* ptr) {
+ // When we reach here we've already optimistically bumped FreeHits.
+ // Fix that.
+ tracking::Report(kFreeHit, cl, -1);
+ if (Static::sharded_transfer_cache().should_use(cl)) {
+ Static::sharded_transfer_cache().Push(cl, ptr);
+ return 1;
+ }
+ tracking::Report(kFreeMiss, cl, 1);
+ CPUCache& cache = Static::cpu_cache();
+ cache.RecordCacheMissStat(cpu, false);
+ return cache.Overflow(ptr, cl, cpu);
+ }
+ };
+ freelist_.Push(cl, ptr, Helper::Overflow);
+}
+
+inline bool UsePerCpuCache() {
+ // We expect a fast path of per-CPU caches being active and the thread being
+ // registered with rseq.
+ if (ABSL_PREDICT_FALSE(!Static::CPUCacheActive())) {
+ return false;
+ }
+
+ if (ABSL_PREDICT_TRUE(subtle::percpu::IsFastNoInit())) {
+ return true;
+ }
+
+ // When rseq is not registered, use this transition edge to shutdown the
+ // thread cache for this thread.
+ //
+ // We call IsFast() on every non-fastpath'd malloc or free since IsFast() has
+ // the side-effect of initializing the per-thread state needed for "unsafe"
+ // per-cpu operations in case this is the first time a new thread is calling
+ // into tcmalloc.
+ //
+ // If the per-CPU cache for a thread is not initialized, we push ourselves
+ // onto the slow path (if !defined(TCMALLOC_DEPRECATED_PERTHREAD)) until this
+ // occurs. See fast_alloc's use of TryRecordAllocationFast.
+ if (ABSL_PREDICT_TRUE(subtle::percpu::IsFast())) {
+ ThreadCache::BecomeIdle();
+ return true;
+ }
+
+ return false;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+#endif // TCMALLOC_CPU_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc
new file mode 100644
index 0000000000..fd4282b9c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc
@@ -0,0 +1,599 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/cpu_cache.h"
+
+#include <thread> // NOLINT(build/c++11)
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/random/seed_sequences.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+constexpr size_t kStressSlabs = 4;
+void* OOMHandler(size_t) { return nullptr; }
+
+TEST(CpuCacheTest, Metadata) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ const int num_cpus = absl::base_internal::NumCPUs();
+
+ CPUCache& cache = Static::cpu_cache();
+ // Since this test allocates memory, avoid activating the real fast path to
+ // minimize allocations against the per-CPU cache.
+ cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+ PerCPUMetadataState r = cache.MetadataMemoryUsage();
+ EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift);
+ if (Parameters::lazy_per_cpu_caches()) {
+ EXPECT_EQ(r.resident_size, 0);
+ } else {
+ EXPECT_EQ(r.resident_size, r.virtual_size);
+ }
+
+ auto count_cores = [&]() {
+ int populated_cores = 0;
+ for (int i = 0; i < num_cpus; i++) {
+ if (cache.HasPopulated(i)) {
+ populated_cores++;
+ }
+ }
+ return populated_cores;
+ };
+
+ EXPECT_EQ(0, count_cores());
+
+ int allowed_cpu_id;
+ const size_t kSizeClass = 3;
+ const size_t num_to_move = Static::sizemap().num_objects_to_move(kSizeClass);
+ const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus()
+ ? offsetof(kernel_rseq, vcpu_id)
+ : offsetof(kernel_rseq, cpu_id);
+ void* ptr;
+ {
+ // Restrict this thread to a single core while allocating and processing the
+ // slow path.
+ //
+ // TODO(b/151313823): Without this restriction, we may access--for reading
+ // only--other slabs if we end up being migrated. These may cause huge
+ // pages to be faulted for those cores, leading to test flakiness.
+ tcmalloc_internal::ScopedAffinityMask mask(
+ tcmalloc_internal::AllowedCpus()[0]);
+ allowed_cpu_id =
+ subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset);
+
+ ptr = cache.Allocate<OOMHandler>(kSizeClass);
+
+ if (mask.Tampered() ||
+ allowed_cpu_id !=
+ subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) {
+ return;
+ }
+ }
+ EXPECT_NE(ptr, nullptr);
+ EXPECT_EQ(1, count_cores());
+
+ r = cache.MetadataMemoryUsage();
+ EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift);
+ if (Parameters::lazy_per_cpu_caches()) {
+ // We expect to fault in a single core, but we may end up faulting an
+ // entire hugepage worth of memory
+ const size_t core_slab_size = r.virtual_size / num_cpus;
+ const size_t upper_bound =
+ ((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1));
+
+ // A single core may be less than the full slab (core_slab_size), since we
+ // do not touch every page within the slab.
+ EXPECT_GT(r.resident_size, 0);
+ EXPECT_LE(r.resident_size, upper_bound) << count_cores();
+
+ // This test is much more sensitive to implementation details of the per-CPU
+ // cache. It may need to be updated from time to time. These numbers were
+ // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
+ // measuring the resident size.
+ //
+ // TODO(ckennelly): Allow CPUCache::Activate to accept a specific arena
+ // allocator, so we can MADV_NOHUGEPAGE the backing store in testing for
+ // more precise measurements.
+ switch (CPUCache::kPerCpuShift) {
+ case 12:
+ EXPECT_GE(r.resident_size, 4096);
+ break;
+ case 18:
+ EXPECT_GE(r.resident_size, 110592);
+ break;
+ default:
+ ASSUME(false);
+ break;
+ };
+
+ // Read stats from the CPU caches. This should not impact resident_size.
+ const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
+ size_t total_used_bytes = 0;
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ size_t used_bytes = cache.UsedBytes(cpu);
+ total_used_bytes += used_bytes;
+
+ if (cpu == allowed_cpu_id) {
+ EXPECT_GT(used_bytes, 0);
+ EXPECT_TRUE(cache.HasPopulated(cpu));
+ } else {
+ EXPECT_EQ(used_bytes, 0);
+ EXPECT_FALSE(cache.HasPopulated(cpu));
+ }
+
+ EXPECT_LE(cache.Unallocated(cpu), max_cpu_cache_size);
+ EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size);
+ EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
+ cache.Capacity(cpu));
+ }
+
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ // This is sensitive to the current growth policies of CPUCache. It may
+ // require updating from time-to-time.
+ EXPECT_EQ(cache.TotalObjectsOfClass(cl),
+ (cl == kSizeClass ? num_to_move - 1 : 0))
+ << cl;
+ }
+ EXPECT_EQ(cache.TotalUsedBytes(), total_used_bytes);
+
+ PerCPUMetadataState post_stats = cache.MetadataMemoryUsage();
+ // Confirm stats are within expected bounds.
+ EXPECT_GT(post_stats.resident_size, 0);
+ EXPECT_LE(post_stats.resident_size, upper_bound) << count_cores();
+ // Confirm stats are unchanged.
+ EXPECT_EQ(r.resident_size, post_stats.resident_size);
+ } else {
+ EXPECT_EQ(r.resident_size, r.virtual_size);
+ }
+
+ // Tear down.
+ //
+ // TODO(ckennelly): We're interacting with the real TransferCache.
+ cache.Deallocate(ptr, kSizeClass);
+
+ for (int i = 0; i < num_cpus; i++) {
+ cache.Reclaim(i);
+ }
+}
+
+TEST(CpuCacheTest, CacheMissStats) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ const int num_cpus = absl::base_internal::NumCPUs();
+
+ CPUCache& cache = Static::cpu_cache();
+ // Since this test allocates memory, avoid activating the real fast path to
+ // minimize allocations against the per-CPU cache.
+ cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+ // The number of underflows and overflows must be zero for all the caches.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ CPUCache::CpuCacheMissStats total_misses =
+ cache.GetTotalCacheMissStats(cpu);
+ CPUCache::CpuCacheMissStats interval_misses =
+ cache.GetIntervalCacheMissStats(cpu);
+ EXPECT_EQ(total_misses.underflows, 0);
+ EXPECT_EQ(total_misses.overflows, 0);
+ EXPECT_EQ(interval_misses.underflows, 0);
+ EXPECT_EQ(interval_misses.overflows, 0);
+ }
+
+ int allowed_cpu_id;
+ const size_t kSizeClass = 3;
+ const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus()
+ ? offsetof(kernel_rseq, vcpu_id)
+ : offsetof(kernel_rseq, cpu_id);
+ void* ptr;
+ {
+ // Restrict this thread to a single core while allocating and processing the
+ // slow path.
+ //
+ // TODO(b/151313823): Without this restriction, we may access--for reading
+ // only--other slabs if we end up being migrated. These may cause huge
+ // pages to be faulted for those cores, leading to test flakiness.
+ tcmalloc_internal::ScopedAffinityMask mask(
+ tcmalloc_internal::AllowedCpus()[0]);
+ allowed_cpu_id =
+ subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset);
+
+ ptr = cache.Allocate<OOMHandler>(kSizeClass);
+
+ if (mask.Tampered() ||
+ allowed_cpu_id !=
+ subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) {
+ return;
+ }
+ }
+
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ CPUCache::CpuCacheMissStats total_misses =
+ cache.GetTotalCacheMissStats(cpu);
+ CPUCache::CpuCacheMissStats interval_misses =
+ cache.GetIntervalCacheMissStats(cpu);
+ if (cpu == allowed_cpu_id) {
+ EXPECT_EQ(total_misses.underflows, 1);
+ EXPECT_EQ(interval_misses.underflows, 1);
+ } else {
+ EXPECT_EQ(total_misses.underflows, 0);
+ EXPECT_EQ(interval_misses.underflows, 0);
+ }
+ EXPECT_EQ(total_misses.overflows, 0);
+ EXPECT_EQ(interval_misses.overflows, 0);
+ }
+
+ // Tear down.
+ //
+ // TODO(ckennelly): We're interacting with the real TransferCache.
+ cache.Deallocate(ptr, kSizeClass);
+
+ for (int i = 0; i < num_cpus; i++) {
+ cache.Reclaim(i);
+ }
+}
+
+static void ShuffleThread(const std::atomic<bool>& stop) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ CPUCache& cache = Static::cpu_cache();
+ // Wake up every 10ms to shuffle the caches so that we can allow misses to
+ // accumulate during that interval
+ while (!stop) {
+ cache.ShuffleCpuCaches();
+ absl::SleepFor(absl::Milliseconds(10));
+ }
+}
+
+static void StressThread(size_t thread_id, const std::atomic<bool>& stop) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ CPUCache& cache = Static::cpu_cache();
+ std::vector<std::pair<size_t, void*>> blocks;
+ absl::BitGen rnd;
+ while (!stop) {
+ const int what = absl::Uniform<int32_t>(rnd, 0, 2);
+ if (what) {
+ // Allocate an object for a class
+ size_t cl = absl::Uniform<int32_t>(rnd, 1, kStressSlabs + 1);
+ void* ptr = cache.Allocate<OOMHandler>(cl);
+ blocks.emplace_back(std::make_pair(cl, ptr));
+ } else {
+ // Deallocate an object for a class
+ if (!blocks.empty()) {
+ cache.Deallocate(blocks.back().second, blocks.back().first);
+ blocks.pop_back();
+ }
+ }
+ }
+
+ // Cleaup. Deallocate rest of the allocated memory.
+ for (int i = 0; i < blocks.size(); i++) {
+ cache.Deallocate(blocks[i].second, blocks[i].first);
+ }
+}
+
+TEST(CpuCacheTest, StealCpuCache) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ CPUCache& cache = Static::cpu_cache();
+ // Since this test allocates memory, avoid activating the real fast path to
+ // minimize allocations against the per-CPU cache.
+ cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+ std::vector<std::thread> threads;
+ std::thread shuffle_thread;
+ const int n_threads = absl::base_internal::NumCPUs();
+ std::atomic<bool> stop(false);
+
+ for (size_t t = 0; t < n_threads; ++t) {
+ threads.push_back(std::thread(StressThread, t, std::ref(stop)));
+ }
+ shuffle_thread = std::thread(ShuffleThread, std::ref(stop));
+
+ absl::SleepFor(absl::Seconds(5));
+ stop = true;
+ for (auto& t : threads) {
+ t.join();
+ }
+ shuffle_thread.join();
+
+ // Check that the total capacity is preserved after the shuffle.
+ size_t capacity = 0;
+ const int num_cpus = absl::base_internal::NumCPUs();
+ const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size();
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
+ cache.Capacity(cpu));
+ capacity += cache.Capacity(cpu);
+ }
+ EXPECT_EQ(capacity, kTotalCapacity);
+
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ cache.Reclaim(cpu);
+ }
+}
+
+// Runs a single allocate and deallocate operation to warm up the cache. Once a
+// few objects are allocated in the cold cache, we can shuffle cpu caches to
+// steal that capacity from the cold cache to the hot cache.
+static void ColdCacheOperations(int cpu_id, size_t size_class) {
+ // Temporarily fake being on the given CPU.
+ ScopedFakeCpuId fake_cpu_id(cpu_id);
+
+ CPUCache& cache = Static::cpu_cache();
+#if TCMALLOC_PERCPU_USE_RSEQ
+ if (subtle::percpu::UsingFlatVirtualCpus()) {
+ subtle::percpu::__rseq_abi.vcpu_id = cpu_id;
+ }
+#endif
+
+ void* ptr = cache.Allocate<OOMHandler>(size_class);
+ cache.Deallocate(ptr, size_class);
+}
+
+// Runs multiple allocate and deallocate operation on the cpu cache to collect
+// misses. Once we collect enough misses on this cache, we can shuffle cpu
+// caches to steal capacity from colder caches to the hot cache.
+static void HotCacheOperations(int cpu_id) {
+ // Temporarily fake being on the given CPU.
+ ScopedFakeCpuId fake_cpu_id(cpu_id);
+
+ CPUCache& cache = Static::cpu_cache();
+#if TCMALLOC_PERCPU_USE_RSEQ
+ if (subtle::percpu::UsingFlatVirtualCpus()) {
+ subtle::percpu::__rseq_abi.vcpu_id = cpu_id;
+ }
+#endif
+
+ // Allocate and deallocate objects to make sure we have enough misses on the
+ // cache. This will make sure we have sufficient disparity in misses between
+ // the hotter and colder cache, and that we may be able to steal bytes from
+ // the colder cache.
+ for (size_t cl = 1; cl <= kStressSlabs; ++cl) {
+ void* ptr = cache.Allocate<OOMHandler>(cl);
+ cache.Deallocate(ptr, cl);
+ }
+
+ // We reclaim the cache to reset it so that we record underflows/overflows the
+ // next time we allocate and deallocate objects. Without reclaim, the cache
+ // would stay warmed up and it would take more time to drain the colder cache.
+ cache.Reclaim(cpu_id);
+}
+
+TEST(CpuCacheTest, ColdHotCacheShuffleTest) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ CPUCache& cache = Static::cpu_cache();
+ // Since this test allocates memory, avoid activating the real fast path to
+ // minimize allocations against the per-CPU cache.
+ cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+ constexpr int hot_cpu_id = 0;
+ constexpr int cold_cpu_id = 1;
+
+ const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
+
+ // Empirical tests suggest that we should be able to steal all the steal-able
+ // capacity from colder cache in < 100 tries. Keeping enough buffer here to
+ // make sure we steal from colder cache, while at the same time avoid timeouts
+ // if something goes bad.
+ constexpr int kMaxStealTries = 1000;
+
+ // We allocate and deallocate a single highest cl object.
+ // This makes sure that we have a single large object in the cache that faster
+ // cache can steal.
+ const size_t size_class = kNumClasses - 1;
+
+ for (int num_tries = 0;
+ num_tries < kMaxStealTries &&
+ cache.Capacity(cold_cpu_id) >
+ CPUCache::kCacheCapacityThreshold * max_cpu_cache_size;
+ ++num_tries) {
+ ColdCacheOperations(cold_cpu_id, size_class);
+ HotCacheOperations(hot_cpu_id);
+ cache.ShuffleCpuCaches();
+
+ // Check that the capacity is preserved.
+ EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id),
+ cache.Capacity(cold_cpu_id));
+ EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id),
+ cache.Capacity(hot_cpu_id));
+ }
+
+ size_t cold_cache_capacity = cache.Capacity(cold_cpu_id);
+ size_t hot_cache_capacity = cache.Capacity(hot_cpu_id);
+
+ // Check that we drained cold cache to the lower capacity limit.
+ // We also keep some tolerance, up to the largest class size, below the lower
+ // capacity threshold that we can drain cold cache to.
+ EXPECT_GT(cold_cache_capacity,
+ CPUCache::kCacheCapacityThreshold * max_cpu_cache_size -
+ Static::sizemap().class_to_size(kNumClasses - 1));
+
+ // Check that we have at least stolen some capacity.
+ EXPECT_GT(hot_cache_capacity, max_cpu_cache_size);
+
+ // Perform a few more shuffles to make sure that lower cache capacity limit
+ // has been reached for the cold cache. A few more shuffles should not
+ // change the capacity of either of the caches.
+ for (int i = 0; i < 100; ++i) {
+ ColdCacheOperations(cold_cpu_id, size_class);
+ HotCacheOperations(hot_cpu_id);
+ cache.ShuffleCpuCaches();
+
+ // Check that the capacity is preserved.
+ EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id),
+ cache.Capacity(cold_cpu_id));
+ EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id),
+ cache.Capacity(hot_cpu_id));
+ }
+
+ // Check that the capacity of cold and hot caches is same as before.
+ EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity);
+ EXPECT_EQ(cache.Capacity(hot_cpu_id), hot_cache_capacity);
+
+ // Make sure that the total capacity is preserved.
+ EXPECT_EQ(cache.Capacity(cold_cpu_id) + cache.Capacity(hot_cpu_id),
+ 2 * max_cpu_cache_size);
+
+ // Reclaim caches.
+ const int num_cpus = absl::base_internal::NumCPUs();
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ cache.Reclaim(cpu);
+ }
+}
+
+TEST(CpuCacheTest, ReclaimCpuCache) {
+ if (!subtle::percpu::IsFast()) {
+ return;
+ }
+
+ CPUCache& cache = Static::cpu_cache();
+ // Since this test allocates memory, avoid activating the real fast path to
+ // minimize allocations against the per-CPU cache.
+ cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+ // The number of underflows and overflows must be zero for all the caches.
+ const int num_cpus = absl::base_internal::NumCPUs();
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ // Check that reclaim miss metrics are reset.
+ CPUCache::CpuCacheMissStats reclaim_misses =
+ cache.GetReclaimCacheMissStats(cpu);
+ EXPECT_EQ(reclaim_misses.underflows, 0);
+ EXPECT_EQ(reclaim_misses.overflows, 0);
+
+ // None of the caches should have been reclaimed yet.
+ EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+
+ // Check that caches are empty.
+ uint64_t used_bytes = cache.UsedBytes(cpu);
+ EXPECT_EQ(used_bytes, 0);
+ }
+
+ const size_t kSizeClass = 3;
+
+ // We chose a different size class here so that we can populate different size
+ // class slots and change the number of bytes used by the busy cache later in
+ // our test.
+ const size_t kBusySizeClass = 4;
+
+ // Perform some operations to warm up caches and make sure they are populated.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ ColdCacheOperations(cpu, kSizeClass);
+ EXPECT_TRUE(cache.HasPopulated(cpu));
+ }
+
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ CPUCache::CpuCacheMissStats misses_last_interval =
+ cache.GetReclaimCacheMissStats(cpu);
+ CPUCache::CpuCacheMissStats total_misses =
+ cache.GetTotalCacheMissStats(cpu);
+
+ // Misses since the last reclaim (i.e. since we initialized the caches)
+ // should match the total miss metrics.
+ EXPECT_EQ(misses_last_interval.underflows, total_misses.underflows);
+ EXPECT_EQ(misses_last_interval.overflows, total_misses.overflows);
+
+ // Caches should have non-zero used bytes.
+ EXPECT_GT(cache.UsedBytes(cpu), 0);
+ }
+
+ cache.TryReclaimingCaches();
+
+ // Miss metrics since the last interval were non-zero and the change in used
+ // bytes was non-zero, so none of the caches should get reclaimed.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ // As no cache operations were performed since the last reclaim
+ // operation, the reclaim misses captured during the last interval (i.e.
+ // since the last reclaim) should be zero.
+ CPUCache::CpuCacheMissStats reclaim_misses =
+ cache.GetReclaimCacheMissStats(cpu);
+ EXPECT_EQ(reclaim_misses.underflows, 0);
+ EXPECT_EQ(reclaim_misses.overflows, 0);
+
+ // None of the caches should have been reclaimed as the caches were
+ // accessed in the previous interval.
+ EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+
+ // Caches should not have been reclaimed; used bytes should be non-zero.
+ EXPECT_GT(cache.UsedBytes(cpu), 0);
+ }
+
+ absl::BitGen rnd;
+ const int busy_cpu =
+ absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+ const size_t prev_used = cache.UsedBytes(busy_cpu);
+ ColdCacheOperations(busy_cpu, kBusySizeClass);
+ EXPECT_GT(cache.UsedBytes(busy_cpu), prev_used);
+
+ // Try reclaiming caches again.
+ cache.TryReclaimingCaches();
+
+ // All caches, except the busy cpu cache against which we performed some
+ // operations in the previous interval, should have been reclaimed exactly
+ // once.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ if (cpu == busy_cpu) {
+ EXPECT_GT(cache.UsedBytes(cpu), 0);
+ EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+ } else {
+ EXPECT_EQ(cache.UsedBytes(cpu), 0);
+ EXPECT_EQ(cache.GetNumReclaims(cpu), 1);
+ }
+ }
+
+ // Try reclaiming caches again.
+ cache.TryReclaimingCaches();
+
+ // All caches, including the busy cache, should have been reclaimed this
+ // time. Note that the caches that were reclaimed in the previous interval
+ // should not be reclaimed again and the number of reclaims reported for them
+ // should still be one.
+ for (int cpu = 0; cpu < num_cpus; ++cpu) {
+ SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+ EXPECT_EQ(cache.UsedBytes(cpu), 0);
+ EXPECT_EQ(cache.GetNumReclaims(cpu), 1);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.cc b/contrib/libs/tcmalloc/tcmalloc/experiment.cc
new file mode 100644
index 0000000000..1c425fbf9e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/experiment.h"
+
+#include <string.h>
+
+#include "absl/base/macros.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+const char kDelimiter = ',';
+const char kExperiments[] = "BORG_EXPERIMENTS";
+const char kDisableExperiments[] = "BORG_DISABLE_EXPERIMENTS";
+constexpr absl::string_view kEnableAll = "enable-all-known-experiments";
+constexpr absl::string_view kDisableAll = "all";
+
+bool LookupExperimentID(absl::string_view label, Experiment* exp) {
+ for (auto config : experiments) {
+ if (config.name == label) {
+ *exp = config.id;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+const bool* GetSelectedExperiments() {
+ static bool by_id[kNumExperiments];
+
+ static const bool* status = [&]() {
+ const char* active_experiments = thread_safe_getenv(kExperiments);
+ const char* disabled_experiments = thread_safe_getenv(kDisableExperiments);
+ return SelectExperiments(by_id,
+ active_experiments ? active_experiments : "",
+ disabled_experiments ? disabled_experiments : "");
+ }();
+ return status;
+}
+
+template <typename F>
+void ParseExperiments(absl::string_view labels, F f) {
+ absl::string_view::size_type pos = 0;
+ do {
+ absl::string_view token;
+ auto end = labels.find(kDelimiter, pos);
+ if (end == absl::string_view::npos) {
+ token = labels.substr(pos);
+ pos = end;
+ } else {
+ token = labels.substr(pos, end - pos);
+ pos = end + 1;
+ }
+
+ f(token);
+ } while (pos != absl::string_view::npos);
+}
+
+} // namespace
+
+const bool* SelectExperiments(bool* buffer, absl::string_view active,
+ absl::string_view disabled) {
+ memset(buffer, 0, sizeof(*buffer) * kNumExperiments);
+
+ if (active == kEnableAll) {
+ std::fill(buffer, buffer + kNumExperiments, true);
+ }
+
+ ParseExperiments(active, [buffer](absl::string_view token) {
+ Experiment id;
+ if (LookupExperimentID(token, &id)) {
+ buffer[static_cast<int>(id)] = true;
+ }
+ });
+
+ if (disabled == kDisableAll) {
+ memset(buffer, 0, sizeof(*buffer) * kNumExperiments);
+ }
+
+ ParseExperiments(disabled, [buffer](absl::string_view token) {
+ Experiment id;
+ if (LookupExperimentID(token, &id)) {
+ buffer[static_cast<int>(id)] = false;
+ }
+ });
+
+ return buffer;
+}
+
+void PrintExperiments(Printer* printer) {
+ // Index experiments by their positions in the experiments array, rather than
+ // by experiment ID.
+ static bool active[ABSL_ARRAYSIZE(experiments)];
+ static const bool* status = []() {
+ memset(active, 0, sizeof(active));
+ const bool* by_id = GetSelectedExperiments();
+
+ for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) {
+ const auto& config = experiments[i];
+ active[i] = by_id[static_cast<int>(config.id)];
+ }
+
+ return active;
+ }();
+
+ printer->printf("MALLOC EXPERIMENTS:");
+ for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) {
+ const char* value = status[i] ? "1" : "0";
+ printer->printf(" %s=%s", experiments[i].name, value);
+ }
+
+ printer->printf("\n");
+}
+
+void FillExperimentProperties(
+ std::map<std::string, MallocExtension::Property>* result) {
+ for (const auto& config : experiments) {
+ (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value =
+ IsExperimentActive(config.id) ? 1 : 0;
+ }
+}
+
+} // namespace tcmalloc_internal
+
+bool IsExperimentActive(Experiment exp) {
+ ASSERT(static_cast<int>(exp) >= 0);
+ ASSERT(exp < Experiment::kMaxExperimentID);
+
+ return tcmalloc_internal::GetSelectedExperiments()[static_cast<int>(exp)];
+}
+
+absl::optional<Experiment> FindExperimentByName(absl::string_view name) {
+ for (const auto& config : experiments) {
+ if (name == config.name) {
+ return config.id;
+ }
+ }
+
+ return absl::nullopt;
+}
+
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.h b/contrib/libs/tcmalloc/tcmalloc/experiment.h
new file mode 100644
index 0000000000..90b3049df1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment.h
@@ -0,0 +1,71 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_EXPERIMENT_H_
+#define TCMALLOC_EXPERIMENT_H_
+
+#include <stddef.h>
+
+#include <map>
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+// TCMalloc Experiment Controller
+//
+// This consumes environment variables to decide whether to activate experiments
+// to control TCMalloc behavior. It avoids memory allocations when making
+// experiment decisions to allow experiments to be used in critical TCMalloc
+// initialization paths.
+//
+// If an experiment is causing difficulty, all experiments can be disabled by
+// setting the environment variable:
+// BORG_DISABLE_EXPERIMENTS=all *or*
+// BORG_DISABLE_EXPERIMENTS=BAD_EXPERIMENT_LABEL
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+constexpr size_t kNumExperiments =
+ static_cast<size_t>(Experiment::kMaxExperimentID);
+
+// SelectExperiments parses the experiments enumerated by active and disabled
+// and updates buffer[experiment_id] accordingly.
+//
+// buffer must be sized for kMaxExperimentID entries.
+//
+// This is exposed for testing purposes only.
+const bool* SelectExperiments(bool* buffer, absl::string_view active,
+ absl::string_view disabled);
+
+void FillExperimentProperties(
+ std::map<std::string, MallocExtension::Property>* result);
+
+void PrintExperiments(Printer* printer);
+
+} // namespace tcmalloc_internal
+
+bool IsExperimentActive(Experiment exp);
+
+absl::optional<Experiment> FindExperimentByName(absl::string_view name);
+
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_EXPERIMENT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h
new file mode 100644
index 0000000000..294c0374e4
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h
@@ -0,0 +1,51 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_EXPERIMENT_CONFIG_H_
+#define TCMALLOC_EXPERIMENT_CONFIG_H_
+
+#include "absl/strings/string_view.h"
+
+// Autogenerated by experiments_proto_test --experiments_generate_config=true
+namespace tcmalloc {
+
+enum class Experiment : int {
+ TCMALLOC_TEMERAIRE,
+ TCMALLOC_SANS_56_SIZECLASS,
+ TEST_ONLY_TCMALLOC_POW2_SIZECLASS,
+ TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS,
+ TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE,
+ TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE,
+ kMaxExperimentID,
+};
+
+struct ExperimentConfig {
+ Experiment id;
+ absl::string_view name;
+};
+
+// clang-format off
+inline constexpr ExperimentConfig experiments[] = {
+ {Experiment::TCMALLOC_TEMERAIRE, "TCMALLOC_TEMERAIRE"},
+ {Experiment::TCMALLOC_SANS_56_SIZECLASS, "TCMALLOC_SANS_56_SIZECLASS"},
+ {Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_SIZECLASS"},
+ {Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS"},
+ {Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE"},
+ {Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE"},
+};
+// clang-format on
+
+} // namespace tcmalloc
+
+#endif // TCMALLOC_EXPERIMENT_CONFIG_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc
new file mode 100644
index 0000000000..24da9e64aa
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc
@@ -0,0 +1,31 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/experiment_config.h"
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace {
+
+// Verify IDs are non-negative and strictly less than kMaxExperimentID.
+TEST(ExperimentConfigTest, ValidateIDs) {
+ for (const auto& exp : experiments) {
+ ASSERT_LE(0, static_cast<int>(exp.id));
+ ASSERT_LT(exp.id, Experiment::kMaxExperimentID);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc
new file mode 100644
index 0000000000..2a7afe9b85
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc
@@ -0,0 +1,38 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/experiment.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+ const char* data = reinterpret_cast<const char*>(d);
+
+ bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments];
+ absl::string_view active, disabled;
+
+ const char* split = static_cast<const char*>(memchr(data, ';', size));
+ if (split == nullptr) {
+ active = absl::string_view(data, size);
+ } else {
+ active = absl::string_view(data, split - data);
+ disabled = absl::string_view(split + 1, size - (split - data + 1));
+ }
+
+ tcmalloc::tcmalloc_internal::SelectExperiments(buffer, active, disabled);
+ return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc
new file mode 100644
index 0000000000..c582cdb9ba
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc
@@ -0,0 +1,706 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+namespace tcmalloc {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 24, 1, 32}, // 0.68%
+ { 32, 1, 32}, // 0.59%
+ { 40, 1, 32}, // 0.98%
+ { 48, 1, 32}, // 0.98%
+ { 64, 1, 32}, // 0.59%
+ { 72, 1, 32}, // 1.28%
+ { 80, 1, 32}, // 0.98%
+ { 88, 1, 32}, // 0.68%
+ { 96, 1, 32}, // 0.98%
+ { 104, 1, 32}, // 1.58%
+ { 112, 1, 32}, // 0.78%
+ { 120, 1, 32}, // 0.98%
+ { 128, 1, 32}, // 0.59%
+ { 136, 1, 32}, // 0.98%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 184, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 288, 1, 32}, // 2.18%
+ { 312, 1, 32}, // 1.58%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 384, 1, 32}, // 2.18%
+ { 408, 1, 32}, // 0.98%
+ { 424, 1, 32}, // 2.28%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 2, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 2, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 98304, 12, 2}, // 0.05%
+ { 106496, 13, 2}, // 0.05%
+ { 131072, 16, 2}, // 0.04%
+ { 147456, 18, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 229376, 28, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 24, 1, 32}, // 0.17%
+ { 32, 1, 32}, // 0.15%
+ { 40, 1, 32}, // 0.17%
+ { 48, 1, 32}, // 0.24%
+ { 64, 1, 32}, // 0.15%
+ { 72, 1, 32}, // 0.17%
+ { 80, 1, 32}, // 0.29%
+ { 88, 1, 32}, // 0.24%
+ { 96, 1, 32}, // 0.24%
+ { 104, 1, 32}, // 0.17%
+ { 112, 1, 32}, // 0.34%
+ { 120, 1, 32}, // 0.17%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 280, 1, 32}, // 0.17%
+ { 304, 1, 32}, // 0.89%
+ { 336, 1, 32}, // 0.69%
+ { 368, 1, 32}, // 0.20%
+ { 416, 1, 32}, // 1.13%
+ { 456, 1, 32}, // 1.36%
+ { 488, 1, 32}, // 0.37%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 768, 1, 32}, // 1.74%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1408, 1, 32}, // 1.33%
+ { 1664, 1, 32}, // 3.80%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2432, 1, 26}, // 3.80%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13056, 2, 5}, // 0.47%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 24, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 40, 1, 32}, // 0.03%
+ { 48, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 72, 1, 32}, // 0.04%
+ { 80, 1, 32}, // 0.04%
+ { 88, 1, 32}, // 0.05%
+ { 96, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 216, 1, 32}, // 0.07%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 288, 1, 32}, // 0.04%
+ { 312, 1, 32}, // 0.04%
+ { 344, 1, 32}, // 0.02%
+ { 360, 1, 32}, // 0.04%
+ { 416, 1, 32}, // 0.04%
+ { 464, 1, 32}, // 0.19%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1408, 1, 32}, // 0.12%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2688, 1, 24}, // 0.56%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4608, 1, 14}, // 1.61%
+ { 5120, 1, 12}, // 0.41%
+ { 5504, 1, 11}, // 1.35%
+ { 5760, 1, 11}, // 1.15%
+ { 6144, 1, 10}, // 1.61%
+ { 6656, 1, 9}, // 1.00%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 9344, 1, 7}, // 0.21%
+ { 9984, 1, 6}, // 1.00%
+ { 10880, 1, 6}, // 0.41%
+ { 11904, 1, 5}, // 0.12%
+ { 13056, 1, 5}, // 0.41%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 17408, 1, 3}, // 0.41%
+ { 20096, 1, 3}, // 0.36%
+ { 21760, 1, 3}, // 0.41%
+ { 23808, 1, 2}, // 0.12%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 45568, 2, 2}, // 4.61%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 196608, 3, 2}, // 0.01%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 24, 1, 32}, // 1.57%
+ { 32, 1, 32}, // 1.17%
+ { 40, 1, 32}, // 1.57%
+ { 48, 1, 32}, // 1.57%
+ { 64, 1, 32}, // 1.17%
+ { 72, 1, 32}, // 2.78%
+ { 80, 1, 32}, // 1.57%
+ { 88, 1, 32}, // 2.37%
+ { 96, 1, 32}, // 2.78%
+ { 104, 1, 32}, // 2.17%
+ { 112, 1, 32}, // 2.78%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 176, 1, 32}, // 2.37%
+ { 192, 1, 32}, // 2.78%
+ { 208, 1, 32}, // 4.86%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 312, 1, 32}, // 2.17%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 408, 1, 32}, // 1.57%
+ { 448, 1, 32}, // 2.78%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 640, 2, 32}, // 7.29%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3200, 4, 20}, // 2.70%
+ { 4096, 4, 16}, // 0.29%
+ { 4736, 5, 13}, // 8.36%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 48, 1, 32}, // 0.98%
+ { 64, 1, 32}, // 0.59%
+ { 80, 1, 32}, // 0.98%
+ { 96, 1, 32}, // 0.98%
+ { 112, 1, 32}, // 0.78%
+ { 128, 1, 32}, // 0.59%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 288, 1, 32}, // 2.18%
+ { 304, 1, 32}, // 4.25%
+ { 320, 1, 32}, // 3.00%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 384, 1, 32}, // 2.18%
+ { 400, 1, 32}, // 3.00%
+ { 416, 1, 32}, // 4.25%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 2, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 6784, 5, 9}, // 0.75%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 2, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 90112, 11, 2}, // 0.05%
+ { 98304, 12, 2}, // 0.05%
+ { 106496, 13, 2}, // 0.05%
+ { 122880, 15, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 139264, 17, 2}, // 0.03%
+ { 155648, 19, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 221184, 27, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 48, 1, 32}, // 0.24%
+ { 64, 1, 32}, // 0.15%
+ { 80, 1, 32}, // 0.29%
+ { 96, 1, 32}, // 0.24%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 272, 1, 32}, // 0.54%
+ { 288, 1, 32}, // 0.84%
+ { 304, 1, 32}, // 0.89%
+ { 336, 1, 32}, // 0.69%
+ { 368, 1, 32}, // 0.20%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 480, 1, 32}, // 0.54%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 768, 1, 32}, // 1.74%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1408, 1, 32}, // 1.33%
+ { 1536, 1, 32}, // 1.74%
+ { 1664, 1, 32}, // 3.80%
+ { 1920, 1, 32}, // 0.54%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2432, 1, 26}, // 3.80%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 5632, 2, 11}, // 5.86%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13056, 2, 5}, // 0.47%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 48, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 80, 1, 32}, // 0.04%
+ { 96, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 224, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 288, 1, 32}, // 0.04%
+ { 320, 1, 32}, // 0.04%
+ { 352, 1, 32}, // 0.12%
+ { 368, 1, 32}, // 0.07%
+ { 416, 1, 32}, // 0.04%
+ { 464, 1, 32}, // 0.19%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1408, 1, 32}, // 0.12%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2688, 1, 24}, // 0.56%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4608, 1, 14}, // 1.61%
+ { 5120, 1, 12}, // 0.41%
+ { 5504, 1, 11}, // 1.35%
+ { 5760, 1, 11}, // 1.15%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 7040, 1, 9}, // 0.66%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 9984, 1, 6}, // 1.00%
+ { 10880, 1, 6}, // 0.41%
+ { 11904, 1, 5}, // 0.12%
+ { 13056, 1, 5}, // 0.41%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 17408, 1, 3}, // 0.41%
+ { 20096, 1, 3}, // 0.36%
+ { 21760, 1, 3}, // 0.41%
+ { 23808, 1, 2}, // 0.12%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 45568, 2, 2}, // 4.61%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 196608, 3, 2}, // 0.01%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 48, 1, 32}, // 1.57%
+ { 64, 1, 32}, // 1.17%
+ { 80, 1, 32}, // 1.57%
+ { 96, 1, 32}, // 2.78%
+ { 112, 1, 32}, // 2.78%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 176, 1, 32}, // 2.37%
+ { 192, 1, 32}, // 2.78%
+ { 208, 1, 32}, // 4.86%
+ { 224, 1, 32}, // 2.78%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 288, 1, 32}, // 2.78%
+ { 304, 1, 32}, // 4.86%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 400, 1, 32}, // 3.60%
+ { 448, 1, 32}, // 2.78%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 640, 2, 32}, // 7.29%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3200, 4, 20}, // 2.70%
+ { 3584, 7, 18}, // 0.17%
+ { 4096, 4, 16}, // 0.29%
+ { 4736, 5, 13}, // 8.36%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc
new file mode 100755
index 0000000000..c6769f450e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc
@@ -0,0 +1,679 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 82;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 64, 1, 32}, // 0.59%
+ { 72, 1, 32}, // 1.28%
+ { 80, 1, 32}, // 0.98%
+ { 88, 1, 32}, // 0.68%
+ { 96, 1, 32}, // 0.98%
+ { 104, 1, 32}, // 1.58%
+ { 112, 1, 32}, // 0.78%
+ { 120, 1, 32}, // 0.98%
+ { 128, 1, 32}, // 0.59%
+ { 136, 1, 32}, // 0.98%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 296, 1, 32}, // 3.10%
+ { 312, 1, 32}, // 1.58%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 408, 1, 32}, // 0.98%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 1, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 98304, 12, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 147456, 18, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 74;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 64, 1, 32}, // 0.15%
+ { 72, 1, 32}, // 0.17%
+ { 80, 1, 32}, // 0.29%
+ { 88, 1, 32}, // 0.24%
+ { 96, 1, 32}, // 0.24%
+ { 104, 1, 32}, // 0.17%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 280, 1, 32}, // 0.17%
+ { 304, 1, 32}, // 0.89%
+ { 328, 1, 32}, // 1.06%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 488, 1, 32}, // 0.37%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 85;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 72, 1, 32}, // 0.04%
+ { 80, 1, 32}, // 0.04%
+ { 88, 1, 32}, // 0.05%
+ { 96, 1, 32}, // 0.04%
+ { 104, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 360, 1, 32}, // 0.04%
+ { 408, 1, 32}, // 0.10%
+ { 456, 1, 32}, // 0.17%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10880, 1, 6}, // 0.41%
+ { 11904, 1, 5}, // 0.12%
+ { 13056, 1, 5}, // 0.41%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 18688, 1, 3}, // 0.21%
+ { 21760, 1, 3}, // 0.41%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 42;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 64, 1, 32}, // 1.17%
+ { 72, 1, 32}, // 2.78%
+ { 80, 1, 32}, // 1.57%
+ { 88, 1, 32}, // 2.37%
+ { 96, 1, 32}, // 2.78%
+ { 104, 1, 32}, // 2.17%
+ { 120, 1, 32}, // 1.57%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 184, 1, 32}, // 2.37%
+ { 208, 1, 32}, // 4.86%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 312, 1, 32}, // 2.17%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 408, 1, 32}, // 1.57%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3456, 6, 18}, // 1.79%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 82;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 64, 1, 32}, // 0.59%
+ { 80, 1, 32}, // 0.98%
+ { 96, 1, 32}, // 0.98%
+ { 112, 1, 32}, // 0.78%
+ { 128, 1, 32}, // 0.59%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 288, 1, 32}, // 2.18%
+ { 304, 1, 32}, // 4.25%
+ { 320, 1, 32}, // 3.00%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 384, 1, 32}, // 2.18%
+ { 400, 1, 32}, // 3.00%
+ { 416, 1, 32}, // 4.25%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 1, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 90112, 11, 2}, // 0.05%
+ { 98304, 12, 2}, // 0.05%
+ { 106496, 13, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 147456, 18, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 74;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 64, 1, 32}, // 0.15%
+ { 80, 1, 32}, // 0.29%
+ { 96, 1, 32}, // 0.24%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 272, 1, 32}, // 0.54%
+ { 288, 1, 32}, // 0.84%
+ { 304, 1, 32}, // 0.89%
+ { 320, 1, 32}, // 0.54%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 480, 1, 32}, // 0.54%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 768, 1, 32}, // 1.74%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1408, 1, 32}, // 1.33%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 85;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 80, 1, 32}, // 0.04%
+ { 96, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 368, 1, 32}, // 0.07%
+ { 416, 1, 32}, // 0.04%
+ { 464, 1, 32}, // 0.19%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1408, 1, 32}, // 0.12%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3200, 1, 20}, // 1.15%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10368, 1, 6}, // 1.15%
+ { 11392, 1, 5}, // 0.07%
+ { 12416, 1, 5}, // 0.56%
+ { 13696, 1, 4}, // 0.76%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 18688, 1, 3}, // 0.21%
+ { 21760, 1, 3}, // 0.41%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 42;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 64, 1, 32}, // 1.17%
+ { 80, 1, 32}, // 1.57%
+ { 96, 1, 32}, // 2.78%
+ { 112, 1, 32}, // 2.78%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 176, 1, 32}, // 2.37%
+ { 192, 1, 32}, // 2.78%
+ { 208, 1, 32}, // 4.86%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 304, 1, 32}, // 4.86%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 400, 1, 32}, // 3.60%
+ { 448, 1, 32}, // 2.78%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 640, 2, 32}, // 7.29%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3456, 6, 18}, // 1.79%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc
new file mode 100755
index 0000000000..1e6da051ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc
@@ -0,0 +1,239 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 64, 1, 32}, // 0.59%
+ { 128, 1, 32}, // 0.59%
+ { 256, 1, 32}, // 0.59%
+ { 512, 1, 32}, // 0.59%
+ { 1024, 1, 32}, // 0.59%
+ { 2048, 2, 32}, // 0.29%
+ { 4096, 1, 16}, // 0.29%
+ { 8192, 1, 8}, // 0.29%
+ { 16384, 2, 4}, // 0.29%
+ { 32768, 4, 2}, // 0.15%
+ { 65536, 8, 2}, // 0.07%
+ { 131072, 16, 2}, // 0.04%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 64, 1, 32}, // 0.15%
+ { 128, 1, 32}, // 0.15%
+ { 256, 1, 32}, // 0.15%
+ { 512, 1, 32}, // 0.15%
+ { 1024, 1, 32}, // 0.15%
+ { 2048, 1, 32}, // 0.15%
+ { 4096, 1, 16}, // 0.15%
+ { 8192, 1, 8}, // 0.15%
+ { 16384, 1, 4}, // 0.15%
+ { 32768, 1, 2}, // 0.15%
+ { 65536, 2, 2}, // 0.07%
+ { 131072, 4, 2}, // 0.04%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 128, 1, 32}, // 0.02%
+ { 256, 1, 32}, // 0.02%
+ { 512, 1, 32}, // 0.02%
+ { 1024, 1, 32}, // 0.02%
+ { 2048, 1, 32}, // 0.02%
+ { 4096, 1, 16}, // 0.02%
+ { 8192, 1, 8}, // 0.02%
+ { 16384, 1, 4}, // 0.02%
+ { 32768, 1, 2}, // 0.02%
+ { 65536, 1, 2}, // 0.02%
+ { 131072, 1, 2}, // 0.02%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 12;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 64, 1, 32}, // 1.17%
+ { 128, 1, 32}, // 1.17%
+ { 256, 1, 32}, // 1.17%
+ { 512, 1, 32}, // 1.17%
+ { 1024, 2, 32}, // 0.59%
+ { 2048, 4, 32}, // 0.29%
+ { 4096, 4, 16}, // 0.29%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 64, 1, 32}, // 0.59%
+ { 128, 1, 32}, // 0.59%
+ { 256, 1, 32}, // 0.59%
+ { 512, 1, 32}, // 0.59%
+ { 1024, 1, 32}, // 0.59%
+ { 2048, 2, 32}, // 0.29%
+ { 4096, 1, 16}, // 0.29%
+ { 8192, 1, 8}, // 0.29%
+ { 16384, 2, 4}, // 0.29%
+ { 32768, 4, 2}, // 0.15%
+ { 65536, 8, 2}, // 0.07%
+ { 131072, 16, 2}, // 0.04%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 64, 1, 32}, // 0.15%
+ { 128, 1, 32}, // 0.15%
+ { 256, 1, 32}, // 0.15%
+ { 512, 1, 32}, // 0.15%
+ { 1024, 1, 32}, // 0.15%
+ { 2048, 1, 32}, // 0.15%
+ { 4096, 1, 16}, // 0.15%
+ { 8192, 1, 8}, // 0.15%
+ { 16384, 1, 4}, // 0.15%
+ { 32768, 1, 2}, // 0.15%
+ { 65536, 2, 2}, // 0.07%
+ { 131072, 4, 2}, // 0.04%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 128, 1, 32}, // 0.02%
+ { 256, 1, 32}, // 0.02%
+ { 512, 1, 32}, // 0.02%
+ { 1024, 1, 32}, // 0.02%
+ { 2048, 1, 32}, // 0.02%
+ { 4096, 1, 16}, // 0.02%
+ { 8192, 1, 8}, // 0.02%
+ { 16384, 1, 4}, // 0.02%
+ { 32768, 1, 2}, // 0.02%
+ { 65536, 1, 2}, // 0.02%
+ { 131072, 1, 2}, // 0.02%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 12;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 64, 1, 32}, // 1.17%
+ { 128, 1, 32}, // 1.17%
+ { 256, 1, 32}, // 1.17%
+ { 512, 1, 32}, // 1.17%
+ { 1024, 2, 32}, // 0.59%
+ { 2048, 4, 32}, // 0.29%
+ { 4096, 4, 16}, // 0.29%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc
new file mode 100644
index 0000000000..cc02ed7a05
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc
@@ -0,0 +1,562 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/guarded_page_allocator.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <csignal>
+#include <tuple>
+#include <utility>
+
+#include "absl/base/call_once.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+const size_t GuardedPageAllocator::kMagicSize; // NOLINT
+
+void GuardedPageAllocator::Init(size_t max_alloced_pages, size_t total_pages) {
+ CHECK_CONDITION(max_alloced_pages > 0);
+ CHECK_CONDITION(max_alloced_pages <= total_pages);
+ CHECK_CONDITION(total_pages <= kGpaMaxPages);
+ max_alloced_pages_ = max_alloced_pages;
+ total_pages_ = total_pages;
+
+ // If the system page size is larger than kPageSize, we need to use the
+ // system page size for this allocator since mprotect operates on full pages
+ // only. This case happens on PPC.
+ page_size_ = std::max(kPageSize, static_cast<size_t>(getpagesize()));
+ ASSERT(page_size_ % kPageSize == 0);
+
+ rand_ = reinterpret_cast<uint64_t>(this); // Initialize RNG seed.
+ MapPages();
+}
+
+void GuardedPageAllocator::Destroy() {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ if (initialized_) {
+ size_t len = pages_end_addr_ - pages_base_addr_;
+ int err = munmap(reinterpret_cast<void *>(pages_base_addr_), len);
+ ASSERT(err != -1);
+ (void)err;
+ initialized_ = false;
+ }
+}
+
+void *GuardedPageAllocator::Allocate(size_t size, size_t alignment) {
+ if (size == 0) return nullptr;
+ ssize_t free_slot = ReserveFreeSlot();
+ if (free_slot == -1) return nullptr; // All slots are reserved.
+
+ ASSERT(size <= page_size_);
+ ASSERT(alignment <= page_size_);
+ ASSERT(alignment == 0 || absl::has_single_bit(alignment));
+ void *result = reinterpret_cast<void *>(SlotToAddr(free_slot));
+ if (mprotect(result, page_size_, PROT_READ | PROT_WRITE) == -1) {
+ ASSERT(false && "mprotect failed");
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ num_failed_allocations_++;
+ FreeSlot(free_slot);
+ return nullptr;
+ }
+
+ // Place some allocations at end of page for better overflow detection.
+ MaybeRightAlign(free_slot, size, alignment, &result);
+
+ // Record stack trace.
+ SlotMetadata &d = data_[free_slot];
+ d.dealloc_trace.depth = 0;
+ d.alloc_trace.depth = absl::GetStackTrace(d.alloc_trace.stack, kMaxStackDepth,
+ /*skip_count=*/3);
+ d.alloc_trace.tid = absl::base_internal::GetTID();
+ d.requested_size = size;
+ d.allocation_start = reinterpret_cast<uintptr_t>(result);
+
+ ASSERT(!alignment || d.allocation_start % alignment == 0);
+ return result;
+}
+
+void GuardedPageAllocator::Deallocate(void *ptr) {
+ ASSERT(PointerIsMine(ptr));
+ const uintptr_t page_addr = GetPageAddr(reinterpret_cast<uintptr_t>(ptr));
+ size_t slot = AddrToSlot(page_addr);
+
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ if (IsFreed(slot)) {
+ double_free_detected_ = true;
+ } else if (WriteOverflowOccurred(slot)) {
+ write_overflow_detected_ = true;
+ }
+
+ CHECK_CONDITION(mprotect(reinterpret_cast<void *>(page_addr), page_size_,
+ PROT_NONE) != -1);
+
+ if (write_overflow_detected_ || double_free_detected_) {
+ *reinterpret_cast<char *>(ptr) = 'X'; // Trigger SEGV handler.
+ CHECK_CONDITION(false); // Unreachable.
+ }
+
+ // Record stack trace.
+ GpaStackTrace &trace = data_[slot].dealloc_trace;
+ trace.depth = absl::GetStackTrace(trace.stack, kMaxStackDepth,
+ /*skip_count=*/2);
+ trace.tid = absl::base_internal::GetTID();
+
+ FreeSlot(slot);
+}
+
+size_t GuardedPageAllocator::GetRequestedSize(const void *ptr) const {
+ ASSERT(PointerIsMine(ptr));
+ size_t slot = AddrToSlot(GetPageAddr(reinterpret_cast<uintptr_t>(ptr)));
+ return data_[slot].requested_size;
+}
+
+std::pair<off_t, size_t> GuardedPageAllocator::GetAllocationOffsetAndSize(
+ const void *ptr) const {
+ ASSERT(PointerIsMine(ptr));
+ const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+ const size_t slot = GetNearestSlot(addr);
+ return {addr - data_[slot].allocation_start, data_[slot].requested_size};
+}
+
+GuardedPageAllocator::ErrorType GuardedPageAllocator::GetStackTraces(
+ const void *ptr, GpaStackTrace *alloc_trace,
+ GpaStackTrace *dealloc_trace) const {
+ ASSERT(PointerIsMine(ptr));
+ const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+ size_t slot = GetNearestSlot(addr);
+ *alloc_trace = data_[slot].alloc_trace;
+ *dealloc_trace = data_[slot].dealloc_trace;
+ return GetErrorType(addr, data_[slot]);
+}
+
+// We take guarded samples during periodic profiling samples. Computes the
+// mean number of profiled samples made for every guarded sample.
+static int GetChainedRate() {
+ auto guarded_rate = Parameters::guarded_sampling_rate();
+ auto sample_rate = Parameters::profile_sampling_rate();
+ if (guarded_rate < 0 || sample_rate <= 0) {
+ return guarded_rate;
+ } else {
+ return std::ceil(static_cast<double>(guarded_rate) /
+ static_cast<double>(sample_rate));
+ }
+}
+
+void GuardedPageAllocator::Print(Printer *out) {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ out->printf(
+ "\n"
+ "------------------------------------------------\n"
+ "GWP-ASan Status\n"
+ "------------------------------------------------\n"
+ "Successful Allocations: %zu\n"
+ "Failed Allocations: %zu\n"
+ "Slots Currently Allocated: %zu\n"
+ "Slots Currently Quarantined: %zu\n"
+ "Maximum Slots Allocated: %zu / %zu\n"
+ "PARAMETER tcmalloc_guarded_sample_parameter %d\n",
+ num_allocation_requests_ - num_failed_allocations_,
+ num_failed_allocations_, num_alloced_pages_,
+ total_pages_ - num_alloced_pages_, num_alloced_pages_max_,
+ max_alloced_pages_, GetChainedRate());
+}
+
+void GuardedPageAllocator::PrintInPbtxt(PbtxtRegion *gwp_asan) const {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ gwp_asan->PrintI64("successful_allocations",
+ num_allocation_requests_ - num_failed_allocations_);
+ gwp_asan->PrintI64("failed_allocations", num_failed_allocations_);
+ gwp_asan->PrintI64("current_slots_allocated", num_alloced_pages_);
+ gwp_asan->PrintI64("current_slots_quarantined",
+ total_pages_ - num_alloced_pages_);
+ gwp_asan->PrintI64("max_slots_allocated", num_alloced_pages_max_);
+ gwp_asan->PrintI64("allocated_slot_limit", max_alloced_pages_);
+ gwp_asan->PrintI64("tcmalloc_guarded_sample_parameter", GetChainedRate());
+}
+
+// Maps 2 * total_pages_ + 1 pages so that there are total_pages_ unique pages
+// we can return from Allocate with guard pages before and after them.
+void GuardedPageAllocator::MapPages() {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ ASSERT(!first_page_addr_);
+ ASSERT(page_size_ % getpagesize() == 0);
+ size_t len = (2 * total_pages_ + 1) * page_size_;
+ auto base_addr = reinterpret_cast<uintptr_t>(
+ MmapAligned(len, page_size_, MemoryTag::kSampled));
+ ASSERT(base_addr);
+ if (!base_addr) return;
+
+ // Tell TCMalloc's PageMap about the memory we own.
+ const PageId page = PageIdContaining(reinterpret_cast<void *>(base_addr));
+ const Length page_len = BytesToLengthFloor(len);
+ if (!Static::pagemap().Ensure(page, page_len)) {
+ ASSERT(false && "Failed to notify page map of page-guarded memory.");
+ return;
+ }
+
+ // Allocate memory for slot metadata.
+ data_ = reinterpret_cast<SlotMetadata *>(
+ Static::arena().Alloc(sizeof(*data_) * total_pages_));
+ for (size_t i = 0; i < total_pages_; ++i) {
+ new (&data_[i]) SlotMetadata;
+ }
+
+ pages_base_addr_ = base_addr;
+ pages_end_addr_ = pages_base_addr_ + len;
+
+ // Align first page to page_size_.
+ first_page_addr_ = GetPageAddr(pages_base_addr_ + page_size_);
+
+ std::fill_n(free_pages_, total_pages_, true);
+ initialized_ = true;
+}
+
+// Selects a random slot in O(total_pages_) time.
+ssize_t GuardedPageAllocator::ReserveFreeSlot() {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ if (!initialized_ || !allow_allocations_) return -1;
+ num_allocation_requests_++;
+ if (num_alloced_pages_ == max_alloced_pages_) {
+ num_failed_allocations_++;
+ return -1;
+ }
+
+ rand_ = Sampler::NextRandom(rand_);
+ size_t num_free_pages = total_pages_ - num_alloced_pages_;
+ size_t slot = GetIthFreeSlot(rand_ % num_free_pages);
+ ASSERT(free_pages_[slot]);
+ free_pages_[slot] = false;
+ num_alloced_pages_++;
+ num_alloced_pages_max_ = std::max(num_alloced_pages_, num_alloced_pages_max_);
+ return slot;
+}
+
+size_t GuardedPageAllocator::GetIthFreeSlot(size_t ith_free_slot) {
+ ASSERT(ith_free_slot < total_pages_ - num_alloced_pages_);
+ for (size_t free_slot_count = 0, j = 0;; j++) {
+ if (free_pages_[j]) {
+ if (free_slot_count == ith_free_slot) return j;
+ free_slot_count++;
+ }
+ }
+}
+
+void GuardedPageAllocator::FreeSlot(size_t slot) {
+ ASSERT(slot < total_pages_);
+ ASSERT(!free_pages_[slot]);
+ free_pages_[slot] = true;
+ num_alloced_pages_--;
+}
+
+uintptr_t GuardedPageAllocator::GetPageAddr(uintptr_t addr) const {
+ const uintptr_t addr_mask = ~(page_size_ - 1ULL);
+ return addr & addr_mask;
+}
+
+uintptr_t GuardedPageAllocator::GetNearestValidPage(uintptr_t addr) const {
+ if (addr < first_page_addr_) return first_page_addr_;
+ const uintptr_t last_page_addr =
+ first_page_addr_ + 2 * (total_pages_ - 1) * page_size_;
+ if (addr > last_page_addr) return last_page_addr;
+ uintptr_t offset = addr - first_page_addr_;
+
+ // If addr is already on a valid page, just return addr.
+ if ((offset / page_size_) % 2 == 0) return addr;
+
+ // ptr points to a guard page, so get nearest valid page.
+ const size_t kHalfPageSize = page_size_ / 2;
+ if ((offset / kHalfPageSize) % 2 == 0) {
+ return addr - kHalfPageSize; // Round down.
+ }
+ return addr + kHalfPageSize; // Round up.
+}
+
+size_t GuardedPageAllocator::GetNearestSlot(uintptr_t addr) const {
+ return AddrToSlot(GetPageAddr(GetNearestValidPage(addr)));
+}
+
+bool GuardedPageAllocator::IsFreed(size_t slot) const {
+ return free_pages_[slot];
+}
+
+bool GuardedPageAllocator::WriteOverflowOccurred(size_t slot) const {
+ if (!ShouldRightAlign(slot)) return false;
+ uint8_t magic = GetWriteOverflowMagic(slot);
+ uintptr_t alloc_end =
+ data_[slot].allocation_start + data_[slot].requested_size;
+ uintptr_t page_end = SlotToAddr(slot) + page_size_;
+ uintptr_t magic_end = std::min(page_end, alloc_end + kMagicSize);
+ for (uintptr_t p = alloc_end; p < magic_end; ++p) {
+ if (*reinterpret_cast<uint8_t *>(p) != magic) return true;
+ }
+ return false;
+}
+
+GuardedPageAllocator::ErrorType GuardedPageAllocator::GetErrorType(
+ uintptr_t addr, const SlotMetadata &d) const {
+ if (!d.allocation_start) return ErrorType::kUnknown;
+ if (double_free_detected_) return ErrorType::kDoubleFree;
+ if (write_overflow_detected_) return ErrorType::kBufferOverflowOnDealloc;
+ if (d.dealloc_trace.depth) return ErrorType::kUseAfterFree;
+ if (addr < d.allocation_start) return ErrorType::kBufferUnderflow;
+ if (addr >= d.allocation_start + d.requested_size) {
+ return ErrorType::kBufferOverflow;
+ }
+ return ErrorType::kUnknown;
+}
+
+uintptr_t GuardedPageAllocator::SlotToAddr(size_t slot) const {
+ ASSERT(slot < total_pages_);
+ return first_page_addr_ + 2 * slot * page_size_;
+}
+
+size_t GuardedPageAllocator::AddrToSlot(uintptr_t addr) const {
+ uintptr_t offset = addr - first_page_addr_;
+ ASSERT(offset % page_size_ == 0);
+ ASSERT((offset / page_size_) % 2 == 0);
+ int slot = offset / page_size_ / 2;
+ ASSERT(slot >= 0 && slot < total_pages_);
+ return slot;
+}
+
+void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size,
+ size_t alignment, void **ptr) {
+ if (!ShouldRightAlign(slot)) return;
+ uintptr_t adjusted_ptr =
+ reinterpret_cast<uintptr_t>(*ptr) + page_size_ - size;
+
+ // If alignment == 0, the necessary alignment is never larger than the size
+ // rounded up to the next power of 2. We use this fact to minimize alignment
+ // padding between the end of small allocations and their guard pages.
+ //
+ // For allocations larger than the greater of kAlignment and
+ // __STDCPP_DEFAULT_NEW_ALIGNMENT__, we're safe aligning to that value.
+ size_t default_alignment =
+ std::min(absl::bit_ceil(size),
+ std::max(kAlignment,
+ static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__)));
+
+ // Ensure valid alignment.
+ alignment = std::max(alignment, default_alignment);
+ uintptr_t alignment_padding = adjusted_ptr & (alignment - 1);
+ adjusted_ptr -= alignment_padding;
+
+ // Write magic bytes in alignment padding to detect small overflow writes.
+ size_t magic_size = std::min(alignment_padding, kMagicSize);
+ memset(reinterpret_cast<void *>(adjusted_ptr + size),
+ GetWriteOverflowMagic(slot), magic_size);
+ *ptr = reinterpret_cast<void *>(adjusted_ptr);
+}
+
+// If this failure occurs during "bazel test", writes a warning for Bazel to
+// display.
+static void RecordBazelWarning(absl::string_view error) {
+ const char *warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE");
+ if (!warning_file) return; // Not a bazel test.
+
+ constexpr char warning[] = "GWP-ASan error detected: ";
+ int fd = open(warning_file, O_CREAT | O_WRONLY | O_APPEND, 0644);
+ if (fd == -1) return;
+ (void)write(fd, warning, sizeof(warning) - 1);
+ (void)write(fd, error.data(), error.size());
+ (void)write(fd, "\n", 1);
+ close(fd);
+}
+
+// If this failure occurs during a gUnit test, writes an XML file describing the
+// error type. Note that we cannot use ::testing::Test::RecordProperty()
+// because it doesn't write the XML file if a test crashes (which we're about to
+// do here). So we write directly to the XML file instead.
+//
+static void RecordTestFailure(absl::string_view error) {
+ const char *xml_file = thread_safe_getenv("XML_OUTPUT_FILE");
+ if (!xml_file) return; // Not a gUnit test.
+
+ // Record test failure for Sponge.
+ constexpr char xml_text_header[] =
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ "<testsuites><testsuite><testcase>"
+ " <properties>"
+ " <property name=\"gwp-asan-report\" value=\"";
+ constexpr char xml_text_footer[] =
+ "\"/>"
+ " </properties>"
+ " <failure message=\"MemoryError\">"
+ " GWP-ASan detected a memory error. See the test log for full report."
+ " </failure>"
+ "</testcase></testsuite></testsuites>";
+
+ int fd = open(xml_file, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (fd == -1) return;
+ (void)write(fd, xml_text_header, sizeof(xml_text_header) - 1);
+ (void)write(fd, error.data(), error.size());
+ (void)write(fd, xml_text_footer, sizeof(xml_text_footer) - 1);
+ close(fd);
+}
+//
+// If this crash occurs in a test, records test failure summaries.
+//
+// error contains the type of error to record.
+static void RecordCrash(absl::string_view error) {
+
+ RecordBazelWarning(error);
+ RecordTestFailure(error);
+}
+
+static void PrintStackTrace(void **stack_frames, size_t depth) {
+ for (size_t i = 0; i < depth; ++i) {
+ Log(kLog, __FILE__, __LINE__, " @ ", stack_frames[i]);
+ }
+}
+
+static void PrintStackTraceFromSignalHandler(void *context) {
+ void *stack_frames[kMaxStackDepth];
+ size_t depth = absl::GetStackTraceWithContext(stack_frames, kMaxStackDepth, 1,
+ context, nullptr);
+ PrintStackTrace(stack_frames, depth);
+}
+
+// A SEGV handler that prints stack traces for the allocation and deallocation
+// of relevant memory as well as the location of the memory error.
+static void SegvHandler(int signo, siginfo_t *info, void *context) {
+ if (signo != SIGSEGV) return;
+ void *fault = info->si_addr;
+ if (!Static::guardedpage_allocator().PointerIsMine(fault)) return;
+ GuardedPageAllocator::GpaStackTrace alloc_trace, dealloc_trace;
+ GuardedPageAllocator::ErrorType error =
+ Static::guardedpage_allocator().GetStackTraces(fault, &alloc_trace,
+ &dealloc_trace);
+ if (error == GuardedPageAllocator::ErrorType::kUnknown) return;
+ pid_t current_thread = absl::base_internal::GetTID();
+ off_t offset;
+ size_t size;
+ std::tie(offset, size) =
+ Static::guardedpage_allocator().GetAllocationOffsetAndSize(fault);
+
+ Log(kLog, __FILE__, __LINE__,
+ "*** GWP-ASan "
+ "(https://google.github.io/tcmalloc/gwp-asan.html) "
+ "has detected a memory error ***");
+ Log(kLog, __FILE__, __LINE__, ">>> Access at offset", offset,
+ "into buffer of length", size);
+ Log(kLog, __FILE__, __LINE__,
+ "Error originates from memory allocated in thread", alloc_trace.tid,
+ "at:");
+ PrintStackTrace(alloc_trace.stack, alloc_trace.depth);
+
+ switch (error) {
+ case GuardedPageAllocator::ErrorType::kUseAfterFree:
+ Log(kLog, __FILE__, __LINE__, "The memory was freed in thread",
+ dealloc_trace.tid, "at:");
+ PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth);
+ Log(kLog, __FILE__, __LINE__, "Use-after-free occurs in thread",
+ current_thread, "at:");
+ RecordCrash("use-after-free");
+ break;
+ case GuardedPageAllocator::ErrorType::kBufferUnderflow:
+ Log(kLog, __FILE__, __LINE__, "Buffer underflow occurs in thread",
+ current_thread, "at:");
+ RecordCrash("buffer-underflow");
+ break;
+ case GuardedPageAllocator::ErrorType::kBufferOverflow:
+ Log(kLog, __FILE__, __LINE__, "Buffer overflow occurs in thread",
+ current_thread, "at:");
+ RecordCrash("buffer-overflow");
+ break;
+ case GuardedPageAllocator::ErrorType::kDoubleFree:
+ Log(kLog, __FILE__, __LINE__, "The memory was freed in thread",
+ dealloc_trace.tid, "at:");
+ PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth);
+ Log(kLog, __FILE__, __LINE__, "Double free occurs in thread",
+ current_thread, "at:");
+ RecordCrash("double-free");
+ break;
+ case GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc:
+ Log(kLog, __FILE__, __LINE__,
+ "Buffer overflow (write) detected in thread", current_thread,
+ "at free:");
+ RecordCrash("buffer-overflow-detected-at-free");
+ break;
+ case GuardedPageAllocator::ErrorType::kUnknown:
+ Crash(kCrash, __FILE__, __LINE__, "Unexpected ErrorType::kUnknown");
+ }
+ PrintStackTraceFromSignalHandler(context);
+ if (error == GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc) {
+ Log(kLog, __FILE__, __LINE__,
+ "*** Try rerunning with --config=asan to get stack trace of overflow "
+ "***");
+ }
+}
+
+static struct sigaction old_sa;
+
+static void ForwardSignal(int signo, siginfo_t *info, void *context) {
+ if (old_sa.sa_flags & SA_SIGINFO) {
+ old_sa.sa_sigaction(signo, info, context);
+ } else if (old_sa.sa_handler == SIG_DFL) {
+ // No previous handler registered. Re-raise signal for core dump.
+ int err = sigaction(signo, &old_sa, nullptr);
+ if (err == -1) {
+ Log(kLog, __FILE__, __LINE__, "Couldn't restore previous sigaction!");
+ }
+ raise(signo);
+ } else if (old_sa.sa_handler == SIG_IGN) {
+ return; // Previous sigaction ignored signal, so do the same.
+ } else {
+ old_sa.sa_handler(signo);
+ }
+}
+
+static void HandleSegvAndForward(int signo, siginfo_t *info, void *context) {
+ SegvHandler(signo, info, context);
+ ForwardSignal(signo, info, context);
+}
+
+extern "C" void MallocExtension_Internal_ActivateGuardedSampling() {
+ static absl::once_flag flag;
+ absl::call_once(flag, []() {
+ struct sigaction action = {};
+ action.sa_sigaction = HandleSegvAndForward;
+ sigemptyset(&action.sa_mask);
+ action.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &action, &old_sa);
+ Static::guardedpage_allocator().AllowAllocations();
+ });
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h
new file mode 100644
index 0000000000..e5a6118c08
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h
@@ -0,0 +1,311 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
+#define TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock;
+
+// An allocator that gives each allocation a new region, with guard pages on
+// either side of the allocated region. If a buffer is overflowed to the next
+// guard page or underflowed to the previous guard page, a segfault occurs.
+// After an allocation is freed, the underlying page is marked as inaccessible,
+// and any future accesses to it will also cause segfaults until the page is
+// reallocated.
+//
+// Is safe to use with static storage duration and is thread safe with the
+// exception of calls to Init() and Destroy() (see corresponding function
+// comments).
+//
+// SYNCHRONIZATION
+// Requires the SpinLock guarded_page_lock to be defined externally. This is
+// required so that this class may be instantiated with static storage
+// duration. The lock is held by this class during initialization and when
+// accessing the internal free page map.
+//
+// Example:
+// ABSL_CONST_INIT absl::base_internal::SpinLock
+// guarded_page_lock(absl::kConstInit,
+// absl::base_internal::SCHEDULE_KERNEL_ONLY);
+// ABSL_CONST_INIT GuardedPageAllocator gpa;
+//
+// void foo() {
+// char *buf = reinterpret_cast<char *>(gpa.Allocate(8000, 1));
+// buf[0] = 'A'; // OK. No segfault occurs.
+// memset(buf, 'A', 8000); // OK. No segfault occurs.
+// buf[-300] = 'A'; // Segfault!
+// buf[9000] = 'A'; // Segfault!
+// gpa.Deallocate(buf);
+// buf[0] = 'B'; // Segfault!
+// }
+//
+// int main() {
+// // Call Init() only once.
+// gpa.Init(64, GuardedPageAllocator::kGpaMaxPages);
+// gpa.AllowAllocations();
+// for (int i = 0; i < 1000; i++) foo();
+// return 0;
+// }
+class GuardedPageAllocator {
+ public:
+ struct GpaStackTrace {
+ void *stack[kMaxStackDepth];
+ size_t depth = 0;
+ pid_t tid = 0;
+ };
+
+ // Maximum number of pages this class can allocate.
+ static constexpr size_t kGpaMaxPages = 512;
+
+ enum class ErrorType {
+ kUseAfterFree,
+ kBufferUnderflow,
+ kBufferOverflow,
+ kDoubleFree,
+ kBufferOverflowOnDealloc,
+ kUnknown,
+ };
+
+ constexpr GuardedPageAllocator()
+ : free_pages_{},
+ num_alloced_pages_(0),
+ num_alloced_pages_max_(0),
+ num_allocation_requests_(0),
+ num_failed_allocations_(0),
+ data_(nullptr),
+ pages_base_addr_(0),
+ pages_end_addr_(0),
+ first_page_addr_(0),
+ max_alloced_pages_(0),
+ total_pages_(0),
+ page_size_(0),
+ rand_(0),
+ initialized_(false),
+ allow_allocations_(false),
+ double_free_detected_(false),
+ write_overflow_detected_(false) {}
+
+ GuardedPageAllocator(const GuardedPageAllocator &) = delete;
+ GuardedPageAllocator &operator=(const GuardedPageAllocator &) = delete;
+
+ ~GuardedPageAllocator() = default;
+
+ // Configures this allocator to allocate up to max_alloced_pages pages at a
+ // time from a pool of total_pages pages, where:
+ // 1 <= max_alloced_pages <= total_pages <= kGpaMaxPages
+ //
+ // This method should be called non-concurrently and only once to complete
+ // initialization. Dynamic initialization is deliberately done here and not
+ // in the constructor, thereby allowing the constructor to be constexpr and
+ // avoiding static initialization order issues.
+ void Init(size_t max_alloced_pages, size_t total_pages)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Unmaps memory allocated by this class.
+ //
+ // This method should be called non-concurrently and only once to complete
+ // destruction. Destruction is deliberately done here and not in the
+ // destructor, thereby allowing the destructor to be trivial (i.e. a no-op)
+ // and avoiding use-after-destruction issues for static/global instances.
+ void Destroy();
+
+ // On success, returns a pointer to size bytes of page-guarded memory, aligned
+ // to alignment. On failure, returns nullptr. The returned pointer is
+ // guaranteed to be tagged. Failure can occur if memory could not be mapped
+ // or protected, if all guarded pages are already allocated, or if size is 0.
+ //
+ // Precondition: size and alignment <= page_size_
+ // Precondition: alignment is 0 or a power of 2
+ void *Allocate(size_t size, size_t alignment)
+ ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+ // Deallocates memory pointed to by ptr. ptr must have been previously
+ // returned by a call to Allocate.
+ void Deallocate(void *ptr) ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+ // Returns the size requested when ptr was allocated. ptr must have been
+ // previously returned by a call to Allocate.
+ size_t GetRequestedSize(const void *ptr) const;
+
+ // Returns ptr's offset from the beginning of its allocation along with the
+ // allocation's size.
+ std::pair<off_t, size_t> GetAllocationOffsetAndSize(const void *ptr) const;
+
+ // Records stack traces in alloc_trace and dealloc_trace for the page nearest
+ // to ptr. alloc_trace is the trace at the time the page was allocated. If
+ // the page is still allocated, dealloc_trace->depth will be 0. If the page
+ // has been deallocated, dealloc_trace is the trace at the time the page was
+ // deallocated.
+ //
+ // Returns the likely error type for an access at ptr.
+ //
+ // Requires that ptr points to memory mapped by this class.
+ ErrorType GetStackTraces(const void *ptr, GpaStackTrace *alloc_trace,
+ GpaStackTrace *dealloc_trace) const;
+
+ // Writes a human-readable summary of GuardedPageAllocator's internal state to
+ // *out.
+ void Print(Printer *out) ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+ void PrintInPbtxt(PbtxtRegion *gwp_asan) const
+ ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+ // Returns true if ptr points to memory managed by this class.
+ inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+ PointerIsMine(const void *ptr) const {
+ uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+ return pages_base_addr_ <= addr && addr < pages_end_addr_;
+ }
+
+ // Allows Allocate() to start returning allocations.
+ void AllowAllocations() ABSL_LOCKS_EXCLUDED(guarded_page_lock) {
+ absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+ allow_allocations_ = true;
+ }
+
+ private:
+ // Structure for storing data about a slot.
+ struct SlotMetadata {
+ GpaStackTrace alloc_trace;
+ GpaStackTrace dealloc_trace;
+ size_t requested_size = 0;
+ uintptr_t allocation_start = 0;
+ };
+
+ // Max number of magic bytes we use to detect write-overflows at deallocation.
+ static constexpr size_t kMagicSize = 32;
+
+ // Maps pages into memory.
+ void MapPages() ABSL_LOCKS_EXCLUDED(guarded_page_lock)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Reserves and returns a slot randomly selected from the free slots in
+ // free_pages_. Returns -1 if no slots available, or if AllowAllocations()
+ // hasn't been called yet.
+ ssize_t ReserveFreeSlot() ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+ // Returns the i-th free slot of free_pages_. i must be in the range [0,
+ // total_pages_ - num_alloced_pages_).
+ size_t GetIthFreeSlot(size_t i)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+ // Marks the specified slot as unreserved.
+ void FreeSlot(size_t slot) ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+ // Returns the address of the page that addr resides on.
+ uintptr_t GetPageAddr(uintptr_t addr) const;
+
+ // Returns an address somewhere on the valid page nearest to addr.
+ uintptr_t GetNearestValidPage(uintptr_t addr) const;
+
+ // Returns the slot number for the page nearest to addr.
+ size_t GetNearestSlot(uintptr_t addr) const;
+
+ // Returns true if the specified slot has already been freed.
+ bool IsFreed(size_t slot) const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+ // Returns true if magic bytes for slot were overwritten.
+ bool WriteOverflowOccurred(size_t slot) const;
+
+ // Returns the likely error type for the given access address and metadata
+ // associated with the nearest slot.
+ ErrorType GetErrorType(uintptr_t addr, const SlotMetadata &d) const;
+
+ // Magic constant used for detecting write-overflows at deallocation time.
+ static uint8_t GetWriteOverflowMagic(size_t slot) {
+ // Only even slots get magic bytes, so use slot / 2 for more unique magics.
+ return uint8_t{0xcd} * static_cast<uint8_t>(slot / 2);
+ }
+
+ // Returns true if slot should be right aligned.
+ static bool ShouldRightAlign(size_t slot) { return slot % 2 == 0; }
+
+ // If slot is marked for right alignment, moves the allocation in *ptr to the
+ // right end of the slot, maintaining the specified size and alignment. Magic
+ // bytes are written in any alignment padding.
+ void MaybeRightAlign(size_t slot, size_t size, size_t alignment, void **ptr);
+
+ uintptr_t SlotToAddr(size_t slot) const;
+ size_t AddrToSlot(uintptr_t addr) const;
+
+ // Maps each bool to one page.
+ // true: Free. false: Reserved.
+ bool free_pages_[kGpaMaxPages] ABSL_GUARDED_BY(guarded_page_lock);
+
+ // Number of currently-allocated pages.
+ size_t num_alloced_pages_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // The high-water mark for num_alloced_pages_.
+ size_t num_alloced_pages_max_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // Number of calls to Allocate.
+ size_t num_allocation_requests_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // Number of times Allocate has failed.
+ size_t num_failed_allocations_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // A dynamically-allocated array of stack trace data captured when each page
+ // is allocated/deallocated. Printed by the SEGV handler when a memory error
+ // is detected.
+ SlotMetadata *data_;
+
+ uintptr_t pages_base_addr_; // Points to start of mapped region.
+ uintptr_t pages_end_addr_; // Points to the end of mapped region.
+ uintptr_t first_page_addr_; // Points to first page returnable by Allocate.
+ size_t max_alloced_pages_; // Max number of pages to allocate at once.
+ size_t total_pages_; // Size of the page pool to allocate from.
+ size_t page_size_; // Size of pages we allocate.
+ uint64_t rand_; // RNG seed.
+
+ // True if this object has been fully initialized.
+ bool initialized_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // Flag to control whether we can return allocations or not.
+ bool allow_allocations_ ABSL_GUARDED_BY(guarded_page_lock);
+
+ // Set to true if a double free has occurred.
+ bool double_free_detected_;
+
+ // Set to true if a write overflow was detected on deallocation.
+ bool write_overflow_detected_;
+
+ friend struct ConstexprCheck;
+};
+
+struct ConstexprCheck {
+ static_assert(GuardedPageAllocator().rand_ || true,
+ "GuardedPageAllocator must have a constexpr constructor");
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc
new file mode 100644
index 0000000000..fb6d0ea265
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc
@@ -0,0 +1,60 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages;
+
+// Size of pages used by GuardedPageAllocator.
+static size_t PageSize() {
+ static const size_t page_size =
+ std::max(kPageSize, static_cast<size_t>(getpagesize()));
+ return page_size;
+}
+
+void BM_AllocDealloc(benchmark::State& state) {
+ static GuardedPageAllocator* gpa = []() {
+ auto gpa = new GuardedPageAllocator;
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ gpa->Init(kMaxGpaPages, kMaxGpaPages);
+ gpa->AllowAllocations();
+ return gpa;
+ }();
+ size_t alloc_size = state.range(0);
+ for (auto _ : state) {
+ char* ptr = reinterpret_cast<char*>(gpa->Allocate(alloc_size, 0));
+ CHECK_CONDITION(ptr != nullptr);
+ ptr[0] = 'X'; // Page fault first page.
+ ptr[alloc_size - 1] = 'X'; // Page fault last page.
+ gpa->Deallocate(ptr);
+ }
+}
+
+BENCHMARK(BM_AllocDealloc)->Range(1, PageSize());
+BENCHMARK(BM_AllocDealloc)->Arg(1)->ThreadRange(1, kMaxGpaPages);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc
new file mode 100644
index 0000000000..0d603de690
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc
@@ -0,0 +1,243 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/guarded_page_allocator.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <memory>
+#include <set>
+#include <string>
+#include <thread> // NOLINT(build/c++11)
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/casts.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/memory/memory.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages;
+
+// Size of pages used by GuardedPageAllocator.
+static size_t PageSize() {
+ static const size_t page_size =
+ std::max(kPageSize, static_cast<size_t>(getpagesize()));
+ return page_size;
+}
+
+class GuardedPageAllocatorTest : public testing::Test {
+ protected:
+ GuardedPageAllocatorTest() {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ gpa_.Init(kMaxGpaPages, kMaxGpaPages);
+ gpa_.AllowAllocations();
+ }
+
+ explicit GuardedPageAllocatorTest(size_t num_pages) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ gpa_.Init(num_pages, kMaxGpaPages);
+ gpa_.AllowAllocations();
+ }
+
+ ~GuardedPageAllocatorTest() override { gpa_.Destroy(); }
+
+ GuardedPageAllocator gpa_;
+};
+
+class GuardedPageAllocatorParamTest
+ : public GuardedPageAllocatorTest,
+ public testing::WithParamInterface<size_t> {
+ protected:
+ GuardedPageAllocatorParamTest() : GuardedPageAllocatorTest(GetParam()) {}
+};
+
+TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) {
+ char *buf = reinterpret_cast<char *>(gpa_.Allocate(PageSize(), 0));
+ EXPECT_NE(buf, nullptr);
+ EXPECT_TRUE(gpa_.PointerIsMine(buf));
+ memset(buf, 'A', PageSize());
+ EXPECT_DEATH(buf[-1] = 'A', "");
+ EXPECT_DEATH(buf[PageSize()] = 'A', "");
+ gpa_.Deallocate(buf);
+ EXPECT_DEATH(buf[0] = 'B', "");
+ EXPECT_DEATH(buf[PageSize() / 2] = 'B', "");
+ EXPECT_DEATH(buf[PageSize() - 1] = 'B', "");
+}
+
+TEST_F(GuardedPageAllocatorTest, NoAlignmentProvided) {
+ constexpr size_t kLargeObjectAlignment = std::max(
+ kAlignment, static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__));
+
+ for (size_t base_size = 1; base_size <= 64; base_size <<= 1) {
+ for (size_t size : {base_size, base_size + 1}) {
+ SCOPED_TRACE(size);
+
+ constexpr int kElements = 10;
+ std::array<void *, kElements> ptrs;
+
+ // Make several allocation attempts to encounter left/right-alignment in
+ // the guarded region.
+ for (int i = 0; i < kElements; i++) {
+ ptrs[i] = gpa_.Allocate(size, 0);
+ EXPECT_NE(ptrs[i], nullptr);
+ EXPECT_TRUE(gpa_.PointerIsMine(ptrs[i]));
+
+ size_t observed_alignment =
+ 1 << absl::countr_zero(absl::bit_cast<uintptr_t>(ptrs[i]));
+ EXPECT_GE(observed_alignment, std::min(size, kLargeObjectAlignment));
+ }
+
+ for (void *ptr : ptrs) {
+ gpa_.Deallocate(ptr);
+ }
+ }
+ }
+}
+
+TEST_F(GuardedPageAllocatorTest, AllocDeallocAligned) {
+ for (size_t align = 1; align <= PageSize(); align <<= 1) {
+ constexpr size_t alloc_size = 1;
+ void *p = gpa_.Allocate(alloc_size, align);
+ EXPECT_NE(p, nullptr);
+ EXPECT_TRUE(gpa_.PointerIsMine(p));
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(p) % align, 0);
+ }
+}
+
+TEST_P(GuardedPageAllocatorParamTest, AllocDeallocAllPages) {
+ size_t num_pages = GetParam();
+ char *bufs[kMaxGpaPages];
+ for (size_t i = 0; i < num_pages; i++) {
+ bufs[i] = reinterpret_cast<char *>(gpa_.Allocate(1, 0));
+ EXPECT_NE(bufs[i], nullptr);
+ EXPECT_TRUE(gpa_.PointerIsMine(bufs[i]));
+ }
+ EXPECT_EQ(gpa_.Allocate(1, 0), nullptr);
+ gpa_.Deallocate(bufs[0]);
+ bufs[0] = reinterpret_cast<char *>(gpa_.Allocate(1, 0));
+ EXPECT_NE(bufs[0], nullptr);
+ EXPECT_TRUE(gpa_.PointerIsMine(bufs[0]));
+ for (size_t i = 0; i < num_pages; i++) {
+ bufs[i][0] = 'A';
+ gpa_.Deallocate(bufs[i]);
+ }
+}
+INSTANTIATE_TEST_SUITE_P(VaryNumPages, GuardedPageAllocatorParamTest,
+ testing::Values(1, kMaxGpaPages / 2, kMaxGpaPages));
+
+TEST_F(GuardedPageAllocatorTest, PointerIsMine) {
+ void *buf = gpa_.Allocate(1, 0);
+ int stack_var;
+ auto malloc_ptr = absl::make_unique<char>();
+ EXPECT_TRUE(gpa_.PointerIsMine(buf));
+ EXPECT_FALSE(gpa_.PointerIsMine(&stack_var));
+ EXPECT_FALSE(gpa_.PointerIsMine(malloc_ptr.get()));
+}
+
+TEST_F(GuardedPageAllocatorTest, Print) {
+ char buf[1024] = {};
+ Printer out(buf, sizeof(buf));
+ gpa_.Print(&out);
+ EXPECT_THAT(buf, testing::ContainsRegex("GWP-ASan Status"));
+}
+
+// Test that no pages are double-allocated or left unallocated, and that no
+// extra pages are allocated when there's concurrent calls to Allocate().
+TEST_F(GuardedPageAllocatorTest, ThreadedAllocCount) {
+ constexpr size_t kNumThreads = 2;
+ void *allocations[kNumThreads][kMaxGpaPages];
+ {
+ std::vector<std::thread> threads;
+ threads.reserve(kNumThreads);
+ for (size_t i = 0; i < kNumThreads; i++) {
+ threads.push_back(std::thread([this, &allocations, i]() {
+ for (size_t j = 0; j < kMaxGpaPages; j++) {
+ allocations[i][j] = gpa_.Allocate(1, 0);
+ }
+ }));
+ }
+
+ for (auto &t : threads) {
+ t.join();
+ }
+ }
+ absl::flat_hash_set<void *> allocations_set;
+ for (size_t i = 0; i < kNumThreads; i++) {
+ for (size_t j = 0; j < kMaxGpaPages; j++) {
+ allocations_set.insert(allocations[i][j]);
+ }
+ }
+ allocations_set.erase(nullptr);
+ EXPECT_EQ(allocations_set.size(), kMaxGpaPages);
+}
+
+// Test that allocator remains in consistent state under high contention and
+// doesn't double-allocate pages or fail to deallocate pages.
+TEST_F(GuardedPageAllocatorTest, ThreadedHighContention) {
+ const size_t kNumThreads = 4 * absl::base_internal::NumCPUs();
+ {
+ std::vector<std::thread> threads;
+ threads.reserve(kNumThreads);
+ for (size_t i = 0; i < kNumThreads; i++) {
+ threads.push_back(std::thread([this]() {
+ char *buf;
+ while ((buf = reinterpret_cast<char *>(gpa_.Allocate(1, 0))) ==
+ nullptr) {
+ absl::SleepFor(absl::Nanoseconds(5000));
+ }
+
+ // Verify that no other thread has access to this page.
+ EXPECT_EQ(buf[0], 0);
+
+ // Mark this page and allow some time for another thread to potentially
+ // gain access to this page.
+ buf[0] = 'A';
+ absl::SleepFor(absl::Nanoseconds(5000));
+
+ // Unmark this page and deallocate.
+ buf[0] = 0;
+ gpa_.Deallocate(buf);
+ }));
+ }
+
+ for (auto &t : threads) {
+ t.join();
+ }
+ }
+ // Verify all pages have been deallocated now that all threads are done.
+ for (size_t i = 0; i < kMaxGpaPages; i++) {
+ EXPECT_NE(gpa_.Allocate(1, 0), nullptr);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc
new file mode 100644
index 0000000000..5c2473ffed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc
@@ -0,0 +1,122 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <new>
+
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace {
+
+int64_t ProfileSize(ProfileType type) {
+ int64_t total = 0;
+
+ MallocExtension::SnapshotCurrent(type).Iterate(
+ [&](const Profile::Sample &e) { total += e.sum; });
+ return total;
+}
+
+class ScopedPeakGrowthFraction {
+ public:
+ explicit ScopedPeakGrowthFraction(double temporary_value)
+ : previous_(TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction()) {
+ TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(temporary_value);
+ }
+
+ ~ScopedPeakGrowthFraction() {
+ TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(previous_);
+ }
+
+ private:
+ double previous_;
+};
+
+TEST(HeapProfilingTest, PeakHeapTracking) {
+ // Adjust high watermark threshold for our scenario, to be independent of
+ // changes to the default. As we use a random value for choosing our next
+ // sampling point, we may overweight some allocations above their true size.
+ ScopedPeakGrowthFraction s(1.25);
+
+ int64_t start_peak_sz = ProfileSize(ProfileType::kPeakHeap);
+
+ // make a large allocation to force a new peak heap sample
+ // (total live: 50MiB)
+ void *first = ::operator new(50 << 20);
+ // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+ benchmark::DoNotOptimize(first);
+ int64_t peak_after_first = ProfileSize(ProfileType::kPeakHeap);
+ EXPECT_NEAR(peak_after_first, start_peak_sz + (50 << 20), 10 << 20);
+
+ // a small allocation shouldn't increase the peak
+ // (total live: 54MiB)
+ void *second = ::operator new(4 << 20);
+ benchmark::DoNotOptimize(second);
+ int64_t peak_after_second = ProfileSize(ProfileType::kPeakHeap);
+ EXPECT_EQ(peak_after_second, peak_after_first);
+
+ // but a large one should
+ // (total live: 254MiB)
+ void *third = ::operator new(200 << 20);
+ benchmark::DoNotOptimize(third);
+ int64_t peak_after_third = ProfileSize(ProfileType::kPeakHeap);
+ EXPECT_NEAR(peak_after_third, peak_after_second + (200 << 20), 10 << 20);
+
+ // freeing everything shouldn't affect the peak
+ // (total live: 0MiB)
+ ::operator delete(first);
+ EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+ ::operator delete(second);
+ EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+ ::operator delete(third);
+ EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+ // going back up less than previous peak shouldn't affect the peak
+ // (total live: 200MiB)
+ void *fourth = ::operator new(100 << 20);
+ benchmark::DoNotOptimize(fourth);
+ void *fifth = ::operator new(100 << 20);
+ benchmark::DoNotOptimize(fifth);
+ EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+ // passing the old peak significantly, even with many small allocations,
+ // should generate a new one
+ // (total live: 200MiB + 256MiB = 456MiB, 80% over the 254MiB peak)
+ void *bitsy[1 << 10];
+ for (int i = 0; i < 1 << 10; i++) {
+ bitsy[i] = ::operator new(1 << 18);
+ benchmark::DoNotOptimize(bitsy[i]);
+ }
+ EXPECT_GT(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+ ::operator delete(fourth);
+ ::operator delete(fifth);
+ for (int i = 0; i < 1 << 10; i++) {
+ ::operator delete(bitsy[i]);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc
new file mode 100644
index 0000000000..898c6d934a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc
@@ -0,0 +1,374 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_address_map.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <new>
+
+#include "absl/base/internal/cycleclock.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+const HugeAddressMap::Node *HugeAddressMap::Node::next() const {
+ const Node *n = right_;
+ if (n) {
+ while (n->left_) n = n->left_;
+ return n;
+ }
+
+ n = parent_;
+ const Node *last = this;
+ while (n) {
+ if (n->left_ == last) return n;
+ last = n;
+ n = n->parent_;
+ }
+
+ return nullptr;
+}
+
+HugeAddressMap::Node *HugeAddressMap::Node::next() {
+ const Node *n = static_cast<const Node *>(this)->next();
+ return const_cast<Node *>(n);
+}
+
+void HugeAddressMap::Node::Check(size_t *num_nodes, HugeLength *size) const {
+ HugeLength longest = range_.len();
+ *num_nodes += 1;
+ *size += range_.len();
+
+ if (left_) {
+ // tree
+ CHECK_CONDITION(left_->range_.start() < range_.start());
+ // disjoint
+ CHECK_CONDITION(left_->range_.end_addr() < range_.start_addr());
+ // well-formed
+ CHECK_CONDITION(left_->parent_ == this);
+ // heap
+ CHECK_CONDITION(left_->prio_ <= prio_);
+ left_->Check(num_nodes, size);
+ if (left_->longest_ > longest) longest = left_->longest_;
+ }
+
+ if (right_) {
+ // tree
+ CHECK_CONDITION(right_->range_.start() > range_.start());
+ // disjoint
+ CHECK_CONDITION(right_->range_.start_addr() > range_.end_addr());
+ // well-formed
+ CHECK_CONDITION(right_->parent_ == this);
+ // heap
+ CHECK_CONDITION(right_->prio_ <= prio_);
+ right_->Check(num_nodes, size);
+ if (right_->longest_ > longest) longest = right_->longest_;
+ }
+
+ CHECK_CONDITION(longest_ == longest);
+}
+
+const HugeAddressMap::Node *HugeAddressMap::first() const {
+ const Node *n = root();
+ if (!n) return nullptr;
+ const Node *left = n->left_;
+ while (left) {
+ n = left;
+ left = n->left_;
+ }
+
+ return n;
+}
+
+HugeAddressMap::Node *HugeAddressMap::first() {
+ const Node *f = static_cast<const HugeAddressMap *>(this)->first();
+ return const_cast<Node *>(f);
+}
+
+void HugeAddressMap::Check() {
+ size_t nodes = 0;
+ HugeLength size = NHugePages(0);
+ if (root_) {
+ CHECK_CONDITION(root_->parent_ == nullptr);
+ root_->Check(&nodes, &size);
+ }
+ CHECK_CONDITION(nodes == nranges());
+ CHECK_CONDITION(size == total_mapped());
+ CHECK_CONDITION(total_nodes_ == used_nodes_ + freelist_size_);
+}
+
+size_t HugeAddressMap::nranges() const { return used_nodes_; }
+
+HugeLength HugeAddressMap::total_mapped() const { return total_size_; }
+
+void HugeAddressMap::Print(Printer *out) const {
+ out->printf("HugeAddressMap: treap %zu / %zu nodes used / created\n",
+ used_nodes_, total_nodes_);
+ const size_t longest = root_ ? root_->longest_.raw_num() : 0;
+ out->printf("HugeAddressMap: %zu contiguous hugepages available\n", longest);
+}
+
+void HugeAddressMap::PrintInPbtxt(PbtxtRegion *hpaa) const {
+ hpaa->PrintI64("num_huge_address_map_treap_nodes_used", used_nodes_);
+ hpaa->PrintI64("num_huge_address_map_treap_nodes_created", total_nodes_);
+ const size_t longest = root_ ? root_->longest_.in_bytes() : 0;
+ hpaa->PrintI64("contiguous_free_bytes", longest);
+}
+
+HugeAddressMap::Node *HugeAddressMap::Predecessor(HugePage p) {
+ Node *n = root();
+ Node *best = nullptr;
+ while (n) {
+ HugeRange here = n->range_;
+ if (here.contains(p)) return n;
+ if (p < here.start()) {
+ // p comes before here:
+ // our predecessor isn't here, nor in the right subtree.
+ n = n->left_;
+ } else {
+ // p comes after here:
+ // here is a valid candidate, and the right subtree might have better.
+ best = n;
+ n = n->right_;
+ }
+ }
+
+ return best;
+}
+
+void HugeAddressMap::Merge(Node *b, HugeRange r, Node *a) {
+ auto merge_when = [](HugeRange x, int64_t x_when, HugeRange y,
+ int64_t y_when) {
+ // avoid overflow with floating-point
+ const size_t x_len = x.len().raw_num();
+ const size_t y_len = y.len().raw_num();
+ const double x_weight = static_cast<double>(x_len) * x_when;
+ const double y_weight = static_cast<double>(y_len) * y_when;
+ return static_cast<int64_t>((x_weight + y_weight) / (x_len + y_len));
+ };
+
+ int64_t when = absl::base_internal::CycleClock::Now();
+ // Two way merges are easy.
+ if (a == nullptr) {
+ b->when_ = merge_when(b->range_, b->when(), r, when);
+ b->range_ = Join(b->range_, r);
+ FixLongest(b);
+ return;
+ } else if (b == nullptr) {
+ a->when_ = merge_when(r, when, a->range_, a->when());
+ a->range_ = Join(r, a->range_);
+ FixLongest(a);
+ return;
+ }
+
+ // Three way merge: slightly harder. We must remove one node
+ // (arbitrarily picking next).
+ HugeRange partial = Join(r, a->range_);
+ int64_t partial_when = merge_when(r, when, a->range_, a->when());
+ HugeRange full = Join(b->range_, partial);
+ int64_t full_when = merge_when(b->range_, b->when(), partial, partial_when);
+ // Removing a will reduce total_size_ by that length, but since we're merging
+ // we actually don't change lengths at all; undo that.
+ total_size_ += a->range_.len();
+ Remove(a);
+ b->range_ = full;
+ b->when_ = full_when;
+ FixLongest(b);
+}
+
+void HugeAddressMap::Insert(HugeRange r) {
+ total_size_ += r.len();
+ // First, try to merge if necessary. Note there are three possibilities:
+ // we might need to merge before with r, r with after, or all three together.
+ Node *before = Predecessor(r.start());
+ CHECK_CONDITION(!before || !before->range_.intersects(r));
+ Node *after = before ? before->next() : first();
+ CHECK_CONDITION(!after || !after->range_.intersects(r));
+ if (before && before->range_.precedes(r)) {
+ if (after && r.precedes(after->range_)) {
+ Merge(before, r, after);
+ } else {
+ Merge(before, r, nullptr);
+ }
+ return;
+ } else if (after && r.precedes(after->range_)) {
+ Merge(nullptr, r, after);
+ return;
+ }
+ CHECK_CONDITION(!before || !before->range_.precedes(r));
+ CHECK_CONDITION(!after || !r.precedes(after->range_));
+ // No merging possible; just add a new node.
+ Node *n = Get(r);
+ Node *curr = root();
+ Node *parent = nullptr;
+ Node **link = &root_;
+ // Walk down the tree to our correct location
+ while (curr != nullptr && curr->prio_ >= n->prio_) {
+ curr->longest_ = std::max(curr->longest_, r.len());
+ parent = curr;
+ if (curr->range_.start() < r.start()) {
+ link = &curr->right_;
+ curr = curr->right_;
+ } else {
+ link = &curr->left_;
+ curr = curr->left_;
+ }
+ }
+ *link = n;
+ n->parent_ = parent;
+ n->left_ = n->right_ = nullptr;
+ n->longest_ = r.len();
+ if (curr) {
+ HugePage p = r.start();
+ // We need to split the treap at curr into n's children.
+ // This will be two treaps: one less than p, one greater, and has
+ // a nice recursive structure.
+ Node **less = &n->left_;
+ Node *lp = n;
+ Node **more = &n->right_;
+ Node *mp = n;
+ while (curr) {
+ if (curr->range_.start() < p) {
+ *less = curr;
+ curr->parent_ = lp;
+ less = &curr->right_;
+ lp = curr;
+ curr = curr->right_;
+ } else {
+ *more = curr;
+ curr->parent_ = mp;
+ more = &curr->left_;
+ mp = curr;
+ curr = curr->left_;
+ }
+ }
+ *more = *less = nullptr;
+ // We ripped apart the tree along these two paths--fix longest pointers.
+ FixLongest(lp);
+ FixLongest(mp);
+ }
+}
+
+void HugeAddressMap::Node::FixLongest() {
+ const HugeLength l = left_ ? left_->longest_ : NHugePages(0);
+ const HugeLength r = right_ ? right_->longest_ : NHugePages(0);
+ const HugeLength c = range_.len();
+ const HugeLength new_longest = std::max({l, r, c});
+ longest_ = new_longest;
+}
+
+void HugeAddressMap::FixLongest(HugeAddressMap::Node *n) {
+ while (n) {
+ n->FixLongest();
+ n = n->parent_;
+ }
+}
+
+void HugeAddressMap::Remove(HugeAddressMap::Node *n) {
+ total_size_ -= n->range_.len();
+ // We need to merge the left and right children of n into one
+ // treap, then glue it into place wherever n was.
+ Node **link;
+ Node *parent = n->parent_;
+ Node *top = n->left_;
+ Node *bottom = n->right_;
+
+ const HugeLength child_longest =
+ std::max(top ? top->longest_ : NHugePages(0),
+ bottom ? bottom->longest_ : NHugePages(0));
+ if (!parent) {
+ link = &root_;
+ } else {
+ // Account for the removed child--might change longests.
+ // Easiest way: update this subtree to ignore the removed node,
+ // then fix the chain of parents.
+ n->longest_ = child_longest;
+ FixLongest(parent);
+ if (parent->range_.start() > n->range_.start()) {
+ link = &parent->left_;
+ } else {
+ link = &parent->right_;
+ }
+ }
+
+ // A routine op we'll need a lot: given two (possibly null)
+ // children, put the root-ier one into top.
+ auto reorder_maybe = [](Node **top, Node **bottom) {
+ Node *b = *bottom, *t = *top;
+ if (b && (!t || t->prio_ < b->prio_)) {
+ *bottom = t;
+ *top = b;
+ }
+ };
+
+ reorder_maybe(&top, &bottom);
+ // if we have two treaps to merge (top is always non-null if bottom is)
+ // Invariant: top, bottom are two valid (longest included)
+ // treaps. parent (and all above/elsewhere) have the correct longest
+ // values, though parent does not have the correct children (will be the
+ // merged value of top and bottom.)
+ while (bottom) {
+ *link = top;
+ top->parent_ = parent;
+ // We're merging bottom into top, so top might contain a longer
+ // chunk than it thinks.
+ top->longest_ = std::max(top->longest_, bottom->longest_);
+ parent = top;
+ if (bottom->range_.start() < top->range_.start()) {
+ link = &top->left_;
+ top = top->left_;
+ } else {
+ link = &top->right_;
+ top = top->right_;
+ }
+ reorder_maybe(&top, &bottom);
+ }
+ *link = top;
+ if (top) top->parent_ = parent;
+ Put(n);
+}
+
+void HugeAddressMap::Put(Node *n) {
+ freelist_size_++;
+ used_nodes_--;
+ n->left_ = freelist_;
+ freelist_ = n;
+}
+
+HugeAddressMap::Node *HugeAddressMap::Get(HugeRange r) {
+ CHECK_CONDITION((freelist_ == nullptr) == (freelist_size_ == 0));
+ used_nodes_++;
+ int prio = rand_r(&seed_);
+ if (freelist_size_ == 0) {
+ total_nodes_++;
+ Node *ret = reinterpret_cast<Node *>(meta_(sizeof(Node)));
+ return new (ret) Node(r, prio);
+ }
+
+ freelist_size_--;
+ Node *ret = freelist_;
+ freelist_ = ret->left_;
+ return new (ret) Node(r, prio);
+}
+
+HugeAddressMap::Node::Node(HugeRange r, int prio)
+ : range_(r), prio_(prio), when_(absl::base_internal::CycleClock::Now()) {}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h
new file mode 100644
index 0000000000..3c71f19a3f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h
@@ -0,0 +1,148 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_ADDRESS_MAP_H_
+#define TCMALLOC_HUGE_ADDRESS_MAP_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Maintains a set of disjoint HugeRanges, merging adjacent ranges into one.
+// Exposes a balanced (somehow) binary tree of free ranges on address,
+// augmented with the largest range in each subtree (this allows fairly simple
+// allocation algorithms from the contained ranges.
+//
+// This class scales well and is *reasonably* performant, but it is not intended
+// for use on extremely hot paths.
+// TODO(b/134688982): extend to support other range-like types?
+class HugeAddressMap {
+ public:
+ typedef void *(*MetadataAllocFunction)(size_t bytes);
+ explicit constexpr HugeAddressMap(MetadataAllocFunction meta);
+
+ // IMPORTANT: DESTROYING A HUGE ADDRESS MAP DOES NOT MAKE ANY ATTEMPT
+ // AT FREEING ALLOCATED METADATA.
+ ~HugeAddressMap() = default;
+
+ class Node {
+ public:
+ // the range stored at this point
+ HugeRange range() const;
+ // Tree structure
+ Node *left();
+ Node *right();
+ // Iterate to the next node in address order
+ const Node *next() const;
+ Node *next();
+ // when were this node's content added (in
+ // absl::base_internal::CycleClock::Now units)?
+ int64_t when() const;
+
+ // What is the length of the longest range in the subtree rooted here?
+ HugeLength longest() const;
+
+ private:
+ Node(HugeRange r, int prio);
+ friend class HugeAddressMap;
+ HugeRange range_;
+ int prio_; // chosen randomly
+ Node *left_, *right_;
+ Node *parent_;
+ HugeLength longest_;
+ int64_t when_;
+ // Expensive, recursive consistency check.
+ // Accumulates node count and range sizes into passed arguments.
+ void Check(size_t *num_nodes, HugeLength *size) const;
+
+ // We've broken longest invariants somehow; fix them here.
+ void FixLongest();
+ };
+
+ // Get root of the tree.
+ Node *root();
+ const Node *root() const;
+
+ // Get lowest-addressed node
+ const Node *first() const;
+ Node *first();
+
+ // Returns the highest-addressed range that does not lie completely
+ // after p (if any).
+ Node *Predecessor(HugePage p);
+
+ // Expensive consistency check.
+ void Check();
+
+ // Statistics
+ size_t nranges() const;
+ HugeLength total_mapped() const;
+ void Print(Printer *out) const;
+ void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+ // Add <r> to the map, merging with adjacent ranges as needed.
+ void Insert(HugeRange r);
+
+ // Delete n from the map.
+ void Remove(Node *n);
+
+ private:
+ // our tree
+ Node *root_{nullptr};
+ size_t used_nodes_{0};
+ HugeLength total_size_{NHugePages(0)};
+
+ // cache of unused nodes
+ Node *freelist_{nullptr};
+ size_t freelist_size_{0};
+ // How we get more
+ MetadataAllocFunction meta_;
+ Node *Get(HugeRange r);
+ void Put(Node *n);
+
+ size_t total_nodes_{0};
+
+ void Merge(Node *b, HugeRange r, Node *a);
+ void FixLongest(Node *n);
+ // Note that we always use the same seed, currently; this isn't very random.
+ // In practice we're not worried about adversarial input and this works well
+ // enough.
+ unsigned int seed_{0};
+};
+
+inline constexpr HugeAddressMap::HugeAddressMap(MetadataAllocFunction meta)
+ : meta_(meta) {}
+
+inline HugeRange HugeAddressMap::Node::range() const { return range_; }
+inline HugeAddressMap::Node *HugeAddressMap::Node::left() { return left_; }
+inline HugeAddressMap::Node *HugeAddressMap::Node::right() { return right_; }
+
+inline int64_t HugeAddressMap::Node::when() const { return when_; }
+inline HugeLength HugeAddressMap::Node::longest() const { return longest_; }
+
+inline HugeAddressMap::Node *HugeAddressMap::root() { return root_; }
+inline const HugeAddressMap::Node *HugeAddressMap::root() const {
+ return root_;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_ADDRESS_MAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc
new file mode 100644
index 0000000000..455cd63809
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc
@@ -0,0 +1,85 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_address_map.h"
+
+#include <stdlib.h>
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeAddressMapTest : public ::testing::Test {
+ protected:
+ HugeAddressMapTest() : map_(MallocMetadata) { metadata_allocs_.clear(); }
+
+ ~HugeAddressMapTest() override {
+ for (void* p : metadata_allocs_) {
+ free(p);
+ }
+ }
+
+ std::vector<HugeRange> Contents() {
+ std::vector<HugeRange> ret;
+ auto node = map_.first();
+ while (node) {
+ ret.push_back(node->range());
+ node = node->next();
+ }
+
+ return ret;
+ }
+
+ HugePage hp(size_t i) { return {i}; }
+ HugeLength hl(size_t i) { return NHugePages(i); }
+
+ HugeAddressMap map_;
+
+ private:
+ static void* MallocMetadata(size_t size) {
+ void* ptr = malloc(size);
+ metadata_allocs_.push_back(ptr);
+ return ptr;
+ }
+
+ static std::vector<void*> metadata_allocs_;
+};
+
+std::vector<void*> HugeAddressMapTest::metadata_allocs_;
+
+// This test verifies that HugeAddressMap merges properly.
+TEST_F(HugeAddressMapTest, Merging) {
+ const HugeRange r1 = HugeRange::Make(hp(0), hl(1));
+ const HugeRange r2 = HugeRange::Make(hp(1), hl(1));
+ const HugeRange r3 = HugeRange::Make(hp(2), hl(1));
+ const HugeRange all = Join(r1, Join(r2, r3));
+ map_.Insert(r1);
+ map_.Check();
+ EXPECT_THAT(Contents(), testing::ElementsAre(r1));
+ map_.Insert(r3);
+ map_.Check();
+ EXPECT_THAT(Contents(), testing::ElementsAre(r1, r3));
+ map_.Insert(r2);
+ map_.Check();
+ EXPECT_THAT(Contents(), testing::ElementsAre(all));
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc
new file mode 100644
index 0000000000..c77f4522ad
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc
@@ -0,0 +1,175 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_allocator.h"
+
+#include <string.h>
+
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void HugeAllocator::Print(Printer *out) {
+ out->printf("HugeAllocator: contiguous, unbacked hugepage(s)\n");
+ free_.Print(out);
+ out->printf(
+ "HugeAllocator: %zu requested - %zu in use = %zu hugepages free\n",
+ from_system_.raw_num(), in_use_.raw_num(),
+ (from_system_ - in_use_).raw_num());
+}
+
+void HugeAllocator::PrintInPbtxt(PbtxtRegion *hpaa) const {
+ free_.PrintInPbtxt(hpaa);
+ hpaa->PrintI64("num_total_requested_huge_pages", from_system_.raw_num());
+ hpaa->PrintI64("num_in_use_huge_pages", in_use_.raw_num());
+}
+
+HugeAddressMap::Node *HugeAllocator::Find(HugeLength n) {
+ HugeAddressMap::Node *curr = free_.root();
+ // invariant: curr != nullptr && curr->longest >= n
+ // we favor smaller gaps and lower nodes and lower addresses, in that
+ // order. The net effect is that we are neither a best-fit nor a
+ // lowest-address allocator but vaguely close to both.
+ HugeAddressMap::Node *best = nullptr;
+ while (curr && curr->longest() >= n) {
+ if (curr->range().len() >= n) {
+ if (!best || best->range().len() > curr->range().len()) {
+ best = curr;
+ }
+ }
+
+ // Either subtree could contain a better fit and we don't want to
+ // search the whole tree. Pick a reasonable child to look at.
+ auto left = curr->left();
+ auto right = curr->right();
+ if (!left || left->longest() < n) {
+ curr = right;
+ continue;
+ }
+
+ if (!right || right->longest() < n) {
+ curr = left;
+ continue;
+ }
+
+ // Here, we have a nontrivial choice.
+ if (left->range().len() == right->range().len()) {
+ if (left->longest() <= right->longest()) {
+ curr = left;
+ } else {
+ curr = right;
+ }
+ } else if (left->range().len() < right->range().len()) {
+ // Here, the longest range in both children is the same...look
+ // in the subtree with the smaller root, as that's slightly
+ // more likely to be our best.
+ curr = left;
+ } else {
+ curr = right;
+ }
+ }
+ return best;
+}
+
+void HugeAllocator::CheckFreelist() {
+ free_.Check();
+ size_t num_nodes = free_.nranges();
+ HugeLength n = free_.total_mapped();
+ free_.Check();
+ CHECK_CONDITION(n == from_system_ - in_use_);
+ LargeSpanStats large;
+ AddSpanStats(nullptr, &large, nullptr);
+ CHECK_CONDITION(num_nodes == large.spans);
+ CHECK_CONDITION(n.in_pages() == large.returned_pages);
+}
+
+HugeRange HugeAllocator::AllocateRange(HugeLength n) {
+ if (n.overflows()) return HugeRange::Nil();
+ size_t actual;
+ size_t bytes = n.in_bytes();
+ size_t align = kHugePageSize;
+ void *ptr = allocate_(bytes, &actual, align);
+ if (ptr == nullptr) {
+ // OOM...
+ return HugeRange::Nil();
+ }
+ CHECK_CONDITION(ptr != nullptr);
+ // It's possible for a request to return extra hugepages.
+ CHECK_CONDITION(actual % kHugePageSize == 0);
+ n = HLFromBytes(actual);
+ from_system_ += n;
+ return HugeRange::Make(HugePageContaining(ptr), n);
+}
+
+HugeRange HugeAllocator::Get(HugeLength n) {
+ CHECK_CONDITION(n > NHugePages(0));
+ auto *node = Find(n);
+ if (!node) {
+ // Get more memory, then "delete" it
+ HugeRange r = AllocateRange(n);
+ if (!r.valid()) return r;
+ in_use_ += r.len();
+ Release(r);
+ node = Find(n);
+ CHECK_CONDITION(node != nullptr);
+ }
+ in_use_ += n;
+
+ HugeRange r = node->range();
+ free_.Remove(node);
+ if (r.len() > n) {
+ HugeLength before = r.len();
+ HugeRange extra = HugeRange::Make(r.start() + n, before - n);
+ r = HugeRange::Make(r.start(), n);
+ ASSERT(r.precedes(extra));
+ ASSERT(r.len() + extra.len() == before);
+ in_use_ += extra.len();
+ Release(extra);
+ } else {
+ // Release does this for us
+ DebugCheckFreelist();
+ }
+
+ return r;
+}
+
+void HugeAllocator::Release(HugeRange r) {
+ in_use_ -= r.len();
+
+ free_.Insert(r);
+ DebugCheckFreelist();
+}
+
+void HugeAllocator::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const {
+ for (const HugeAddressMap::Node *node = free_.first(); node != nullptr;
+ node = node->next()) {
+ HugeLength n = node->range().len();
+ if (large != nullptr) {
+ large->spans++;
+ large->returned_pages += n.in_pages();
+ }
+
+ if (ages != nullptr) {
+ ages->RecordRange(n.in_pages(), true, node->when());
+ }
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h
new file mode 100644
index 0000000000..6242805c49
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h
@@ -0,0 +1,108 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Tracking information for the available range of hugepages,
+// and a basic allocator for unmapped hugepages.
+#ifndef TCMALLOC_HUGE_ALLOCATOR_H_
+#define TCMALLOC_HUGE_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// these typedefs allow replacement of tcmalloc::System* for tests.
+typedef void *(*MemoryAllocFunction)(size_t bytes, size_t *actual,
+ size_t align);
+typedef void *(*MetadataAllocFunction)(size_t bytes);
+
+// This tracks available ranges of hugepages and fulfills requests for
+// usable memory, allocating more from the system as needed. All
+// hugepages are treated as (and assumed to be) unbacked.
+class HugeAllocator {
+ public:
+ constexpr HugeAllocator(MemoryAllocFunction allocate,
+ MetadataAllocFunction meta_allocate)
+ : free_(meta_allocate), allocate_(allocate) {}
+
+ // Obtain a range of n unbacked hugepages, distinct from all other
+ // calls to Get (other than those that have been Released.)
+ HugeRange Get(HugeLength n);
+
+ // Returns a range of hugepages for reuse by subsequent Gets().
+ // REQUIRES: <r> is the return value (or a subrange thereof) of a previous
+ // call to Get(); neither <r> nor any overlapping range has been released
+ // since that Get().
+ void Release(HugeRange r);
+
+ // Total memory requested from the system, whether in use or not,
+ HugeLength system() const { return from_system_; }
+ // Unused memory in the allocator.
+ HugeLength size() const { return from_system_ - in_use_; }
+
+ void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const;
+
+ BackingStats stats() const {
+ BackingStats s;
+ s.system_bytes = system().in_bytes();
+ s.free_bytes = 0;
+ s.unmapped_bytes = size().in_bytes();
+ return s;
+ }
+
+ void Print(Printer *out);
+ void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+ private:
+ // We're constrained in several ways by existing code. Hard requirements:
+ // * no radix tree or similar O(address space) external space tracking
+ // * support sub releasing
+ // * low metadata overhead
+ // * no pre-allocation.
+ // * reasonable space overhead
+ //
+ // We use a treap ordered on addresses to track. This isn't the most
+ // efficient thing ever but we're about to hit 100usec+/hugepage
+ // backing costs if we've gotten this far; the last few bits of performance
+ // don't matter, and most of the simple ideas can't hit all of the above
+ // requirements.
+ HugeAddressMap free_;
+ HugeAddressMap::Node *Find(HugeLength n);
+
+ void CheckFreelist();
+ void DebugCheckFreelist() {
+#ifndef NDEBUG
+ CheckFreelist();
+#endif
+ }
+
+ HugeLength from_system_{NHugePages(0)};
+ HugeLength in_use_{NHugePages(0)};
+
+ MemoryAllocFunction allocate_;
+ HugeRange AllocateRange(HugeLength n);
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc
new file mode 100644
index 0000000000..150075b88e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc
@@ -0,0 +1,449 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_allocator.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeAllocatorTest : public testing::TestWithParam<bool> {
+ private:
+ // Use a tiny fraction of actual size so we can test aggressively.
+ static void *AllocateFake(size_t bytes, size_t *actual, size_t align);
+
+ static constexpr size_t kMaxBacking = 1024 * 1024;
+ // This isn't super good form but we'll never have more than one HAT
+ // extant at once.
+ static std::vector<size_t> backing_;
+
+ // We use actual malloc for metadata allocations, but we track them so they
+ // can be deleted.
+ static void *MallocMetadata(size_t size);
+ static std::vector<void *> metadata_allocs_;
+ static size_t metadata_bytes_;
+ static bool should_overallocate_;
+ static HugeLength huge_pages_requested_;
+ static HugeLength huge_pages_received_;
+
+ protected:
+ HugeLength HugePagesRequested() { return huge_pages_requested_; }
+ HugeLength HugePagesReceived() { return huge_pages_received_; }
+
+ HugeAllocatorTest() {
+ should_overallocate_ = GetParam();
+ huge_pages_requested_ = NHugePages(0);
+ huge_pages_received_ = NHugePages(0);
+ // We don't use the first few bytes, because things might get weird
+ // given zero pointers.
+ backing_.resize(1024);
+ metadata_bytes_ = 0;
+ }
+
+ ~HugeAllocatorTest() override {
+ for (void *p : metadata_allocs_) {
+ free(p);
+ }
+ metadata_allocs_.clear();
+ backing_.clear();
+ }
+
+ size_t *GetActual(HugePage p) { return &backing_[p.index()]; }
+
+ // We're dealing with a lot of memory, so we don't want to do full memset
+ // and then check every byte for corruption. So set the first and last
+ // byte in each page...
+ void CheckPages(HugeRange r, size_t c) {
+ for (HugePage p = r.first; p < r.first + r.n; ++p) {
+ EXPECT_EQ(c, *GetActual(p));
+ }
+ }
+
+ void MarkPages(HugeRange r, size_t c) {
+ for (HugePage p = r.first; p < r.first + r.n; ++p) {
+ *GetActual(p) = c;
+ }
+ }
+
+ void CheckStats(HugeLength expected_use) {
+ const HugeLength received = HugePagesReceived();
+ EXPECT_EQ(received, allocator_.system());
+ HugeLength used = received - allocator_.size();
+ EXPECT_EQ(used, expected_use);
+ }
+
+ HugeAllocator allocator_{AllocateFake, MallocMetadata};
+};
+
+// Use a tiny fraction of actual size so we can test aggressively.
+void *HugeAllocatorTest::AllocateFake(size_t bytes, size_t *actual,
+ size_t align) {
+ CHECK_CONDITION(bytes % kHugePageSize == 0);
+ CHECK_CONDITION(align % kHugePageSize == 0);
+ HugeLength req = HLFromBytes(bytes);
+ huge_pages_requested_ += req;
+ // Test the case where our sys allocator provides too much.
+ if (should_overallocate_) ++req;
+ huge_pages_received_ += req;
+ *actual = req.in_bytes();
+ // we'll actually provide hidden backing, one word per hugepage.
+ bytes = req / NHugePages(1);
+ align /= kHugePageSize;
+ size_t index = backing_.size();
+ if (index % align != 0) {
+ index += (align - (index & align));
+ }
+ if (index + bytes > kMaxBacking) return nullptr;
+ backing_.resize(index + bytes);
+ void *ptr = reinterpret_cast<void *>(index * kHugePageSize);
+ return ptr;
+}
+
+// We use actual malloc for metadata allocations, but we track them so they
+// can be deleted.
+void *HugeAllocatorTest::MallocMetadata(size_t size) {
+ metadata_bytes_ += size;
+ void *ptr = malloc(size);
+ metadata_allocs_.push_back(ptr);
+ return ptr;
+}
+
+std::vector<size_t> HugeAllocatorTest::backing_;
+std::vector<void *> HugeAllocatorTest::metadata_allocs_;
+size_t HugeAllocatorTest::metadata_bytes_;
+bool HugeAllocatorTest::should_overallocate_;
+HugeLength HugeAllocatorTest::huge_pages_requested_;
+HugeLength HugeAllocatorTest::huge_pages_received_;
+
+TEST_P(HugeAllocatorTest, Basic) {
+ std::vector<std::pair<HugeRange, size_t>> allocs;
+ absl::BitGen rng;
+ size_t label = 0;
+ HugeLength total = NHugePages(0);
+ static const size_t kSize = 1000;
+ HugeLength peak = total;
+ for (int i = 0; i < kSize; ++i) {
+ HugeLength len =
+ NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 12) - 1) + 1);
+ auto r = allocator_.Get(len);
+ ASSERT_TRUE(r.valid());
+ total += len;
+ peak = std::max(peak, total);
+ CheckStats(total);
+ MarkPages(r, label);
+ allocs.push_back({r, label});
+ label++;
+ }
+
+ for (int i = 0; i < 1000 * 25; ++i) {
+ size_t index = absl::Uniform<int32_t>(rng, 0, kSize);
+ std::swap(allocs[index], allocs[kSize - 1]);
+ auto p = allocs[kSize - 1];
+ CheckPages(p.first, p.second);
+ total -= p.first.len();
+ allocator_.Release(p.first);
+ CheckStats(total);
+
+ HugeLength len =
+ NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 12) - 1) + 1);
+ auto r = allocator_.Get(len);
+ ASSERT_TRUE(r.valid());
+ ASSERT_EQ(r.len(), len);
+ total += len;
+ peak = std::max(peak, total);
+ CheckStats(total);
+ MarkPages(r, label);
+ allocs[kSize - 1] = {r, label};
+ label++;
+ }
+ for (auto p : allocs) {
+ CheckPages(p.first, p.second);
+ allocator_.Release(p.first);
+ }
+}
+
+// Check that releasing small chunks of allocations works OK.
+TEST_P(HugeAllocatorTest, Subrelease) {
+ size_t label = 1;
+ const HugeLength kLen = NHugePages(8);
+ const HugeLength kTotal = kLen * (kLen / NHugePages(1) - 1);
+ for (int i = 0; i < 100; ++i) {
+ std::vector<std::pair<HugeRange, size_t>> allocs;
+ // get allocs of kLen and release different sized sub-chunks of them -
+ // make sure that doesn't break anything else.
+ for (HugeLength j = NHugePages(1); j < kLen; ++j) {
+ auto r = allocator_.Get(kLen);
+ ASSERT_TRUE(r.valid());
+ MarkPages(r, label);
+ allocator_.Release({r.start(), j});
+ allocs.push_back({{r.start() + j, kLen - j}, label});
+ label++;
+ }
+ EXPECT_EQ(kTotal, HugePagesRequested());
+ for (auto p : allocs) {
+ CheckPages(p.first, p.second);
+ allocator_.Release(p.first);
+ }
+ }
+}
+
+// Does subreleasing work OK for absurdly large allocations?
+TEST_P(HugeAllocatorTest, SubreleaseLarge) {
+ absl::BitGen rng;
+ std::vector<std::pair<HugeRange, size_t>> allocs;
+ size_t label = 1;
+ const HugeLength kLimit = HLFromBytes(1024ul * 1024 * 1024 * 1024);
+ for (HugeLength n = NHugePages(2); n < kLimit; n *= 2) {
+ auto r = allocator_.Get(n);
+ ASSERT_TRUE(r.valid());
+ MarkPages(r, label);
+ // chunk of less than half
+ HugeLength chunk =
+ NHugePages(absl::Uniform<int32_t>(rng, 0, n / NHugePages(2)) + 1);
+ allocator_.Release({r.start(), chunk});
+ allocs.push_back({{r.start() + chunk, n - chunk}, label});
+ label++;
+ }
+ // reuse the released space
+ const HugeLength total = HugePagesRequested();
+ while (total == HugePagesRequested()) {
+ HugeLength n =
+ NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 8) - 1) + 1);
+ auto r = allocator_.Get(n);
+ ASSERT_TRUE(r.valid());
+ MarkPages(r, label);
+ allocs.push_back({r, label});
+ label++;
+ }
+ for (auto p : allocs) {
+ CheckPages(p.first, p.second);
+ allocator_.Release(p.first);
+ }
+}
+
+// We don't care *that* much about vaddress space, but let's not be crazy.
+// Don't fill tiny requests from big spaces.
+TEST_P(HugeAllocatorTest, Fragmentation) {
+ // Prime the pump with some random allocations.
+ absl::BitGen rng;
+
+ std::vector<HugeRange> free;
+ constexpr int kSlots = 50;
+
+ // Plan to insert a large allocation at the big_slot'th index, then free it
+ // during the initial priming step (so we have at least a contiguous region of
+ // at least big hugepages).
+ HugeLength big = NHugePages(8);
+ const int big_slot = absl::Uniform(rng, 0, kSlots);
+
+ for (int i = 0; i < kSlots; ++i) {
+ if (i == big_slot) {
+ auto r = allocator_.Get(big);
+ ASSERT_TRUE(r.valid());
+ free.push_back(r);
+ }
+
+ auto r = allocator_.Get(NHugePages(1));
+ ASSERT_TRUE(r.valid());
+ if (absl::Bernoulli(rng, 1.0 / 2)) {
+ free.push_back(r);
+ }
+ }
+ size_t slots = free.size() - 1;
+ for (auto r : free) {
+ allocator_.Release(r);
+ }
+ free.clear();
+ static const size_t kReps = 5;
+ for (int i = 0; i < kReps; ++i) {
+ SCOPED_TRACE(i);
+
+ // Ensure we have a range of this size.
+ HugeRange r = allocator_.Get(big);
+ ASSERT_TRUE(r.valid());
+ if (NHugePages(slots) > allocator_.size()) {
+ // We should also have slots pages left over after allocating big
+ for (int i = 0; i < slots; ++i) {
+ HugeRange f = allocator_.Get(NHugePages(1));
+ ASSERT_TRUE(f.valid());
+ free.push_back(f);
+ }
+ for (auto f : free) {
+ allocator_.Release(f);
+ }
+ free.clear();
+ }
+ allocator_.Release(r);
+ // We should definitely have at least this many small spaces...
+ for (int i = 0; i < slots; ++i) {
+ r = allocator_.Get(NHugePages(1));
+ ASSERT_TRUE(r.valid());
+ free.push_back(r);
+ }
+ // that don't interfere with the available big space.
+ auto before = allocator_.system();
+ r = allocator_.Get(big);
+ ASSERT_TRUE(r.valid());
+ EXPECT_EQ(before, allocator_.system());
+ allocator_.Release(r);
+ for (auto r : free) {
+ allocator_.Release(r);
+ }
+ free.clear();
+ slots += big.raw_num();
+ big += big;
+ }
+}
+
+// Check that we only request as much as we actually need from the system.
+TEST_P(HugeAllocatorTest, Frugal) {
+ HugeLength total = NHugePages(0);
+ static const size_t kSize = 1000;
+ for (int i = 1; i < kSize; ++i) {
+ HugeLength len = NHugePages(i);
+ // toss the range, we ain't using it
+ ASSERT_TRUE(allocator_.Get(len).valid());
+
+ total += len;
+ CheckStats(total);
+ EXPECT_EQ(total, HugePagesRequested());
+ }
+}
+
+TEST_P(HugeAllocatorTest, Stats) {
+ struct Helper {
+ static void Stats(const HugeAllocator *huge, size_t *num_spans,
+ Length *pages, absl::Duration *avg_age) {
+ SmallSpanStats small;
+ LargeSpanStats large;
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ huge->AddSpanStats(&small, &large, &ages);
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ EXPECT_EQ(0, small.normal_length[i.raw_num()]);
+ EXPECT_EQ(0, small.returned_length[i.raw_num()]);
+ }
+ *num_spans = large.spans;
+ EXPECT_EQ(Length(0), large.normal_pages);
+ *pages = large.returned_pages;
+ const PageAgeHistograms::Histogram *hist = ages.GetTotalHistogram(true);
+ *avg_age = absl::Seconds(hist->avg_age());
+ }
+ };
+
+ if (GetParam()) {
+ // Ensure overallocation doesn't skew our measurements below.
+ allocator_.Release(allocator_.Get(NHugePages(7)));
+ }
+ const HugeRange r = allocator_.Get(NHugePages(8));
+ ASSERT_TRUE(r.valid());
+ const HugePage p = r.start();
+ // Break it into 3 ranges, separated by one-page regions,
+ // so we can easily track the internal state in stats.
+ const HugeRange r1 = {p, NHugePages(1)};
+ const HugeRange b1 = {p + NHugePages(1), NHugePages(1)};
+ const HugeRange r2 = {p + NHugePages(2), NHugePages(2)};
+ const HugeRange b2 = {p + NHugePages(4), NHugePages(1)};
+ const HugeRange r3 = {p + NHugePages(5), NHugePages(3)};
+
+ size_t num_spans;
+ Length pages;
+ absl::Duration avg_age;
+
+ Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+ EXPECT_EQ(0, num_spans);
+ EXPECT_EQ(Length(0), pages);
+ EXPECT_EQ(absl::ZeroDuration(), avg_age);
+
+ allocator_.Release(r1);
+ constexpr absl::Duration kDelay = absl::Milliseconds(500);
+ absl::SleepFor(kDelay);
+ Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+ EXPECT_EQ(1, num_spans);
+ EXPECT_EQ(NHugePages(1).in_pages(), pages);
+ // We can only do >= testing, because we might be arbitrarily delayed.
+ // Since avg_age is computed in floating point, we may have round-off from
+ // TCMalloc's internal use of absl::base_internal::CycleClock down through
+ // computing the average age of the spans. kEpsilon allows for a tiny amount
+ // of slop.
+ constexpr absl::Duration kEpsilon = absl::Microseconds(200);
+ EXPECT_LE(kDelay - kEpsilon, avg_age);
+
+ allocator_.Release(r2);
+ absl::SleepFor(absl::Milliseconds(250));
+ Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+ EXPECT_EQ(2, num_spans);
+ EXPECT_EQ(NHugePages(3).in_pages(), pages);
+ EXPECT_LE(
+ (absl::Seconds(0.75) * 1 + absl::Seconds(0.25) * 2) / (1 + 2) - kEpsilon,
+ avg_age);
+
+ allocator_.Release(r3);
+ absl::SleepFor(absl::Milliseconds(125));
+ Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+ EXPECT_EQ(3, num_spans);
+ EXPECT_EQ(NHugePages(6).in_pages(), pages);
+ EXPECT_LE((absl::Seconds(0.875) * 1 + absl::Seconds(0.375) * 2 +
+ absl::Seconds(0.125) * 3) /
+ (1 + 2 + 3) -
+ kEpsilon,
+ avg_age);
+
+ allocator_.Release(b1);
+ allocator_.Release(b2);
+ absl::SleepFor(absl::Milliseconds(100));
+ Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+ EXPECT_EQ(1, num_spans);
+ EXPECT_EQ(NHugePages(8).in_pages(), pages);
+ EXPECT_LE((absl::Seconds(0.975) * 1 + absl::Seconds(0.475) * 2 +
+ absl::Seconds(0.225) * 3 + absl::Seconds(0.1) * 2) /
+ (1 + 2 + 3 + 2) -
+ kEpsilon,
+ avg_age);
+}
+
+// Make sure we're well-behaved in the presence of OOM (and that we do
+// OOM at some point...)
+TEST_P(HugeAllocatorTest, OOM) {
+ HugeLength n = NHugePages(1);
+ while (allocator_.Get(n).valid()) {
+ n *= 2;
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ NormalOverAlloc, HugeAllocatorTest, testing::Values(false, true),
+ +[](const testing::TestParamInfo<bool> &info) {
+ return info.param ? "overallocates" : "normal";
+ });
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc
new file mode 100644
index 0000000000..0d25da2983
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc
@@ -0,0 +1,494 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_cache.h"
+
+#include <tuple>
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::Report(HugeLength val) {
+ timeseries_.Report(val);
+}
+
+template <size_t kEpochs>
+HugeLength MinMaxTracker<kEpochs>::MaxOverTime(absl::Duration t) const {
+ HugeLength m = NHugePages(0);
+ size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength));
+ timeseries_.IterBackwards([&](size_t offset, int64_t ts,
+ const Extrema &e) { m = std::max(m, e.max); },
+ num_epochs);
+ return m;
+}
+
+template <size_t kEpochs>
+HugeLength MinMaxTracker<kEpochs>::MinOverTime(absl::Duration t) const {
+ HugeLength m = kMaxVal;
+ size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength));
+ timeseries_.IterBackwards([&](size_t offset, int64_t ts,
+ const Extrema &e) { m = std::min(m, e.min); },
+ num_epochs);
+ return m;
+}
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::Print(Printer *out) const {
+ // Prints timestamp:min_pages:max_pages for each window with records.
+ // Timestamp == kEpochs - 1 is the most recent measurement.
+ const int64_t millis = absl::ToInt64Milliseconds(kEpochLength);
+ out->printf("\nHugeCache: window %lldms * %zu", millis, kEpochs);
+ int written = 0;
+ timeseries_.Iter(
+ [&](size_t offset, int64_t ts, const Extrema &e) {
+ if ((written++) % 100 == 0)
+ out->printf("\nHugeCache: Usage timeseries ");
+ out->printf("%zu:%zu:%zd,", offset, e.min.raw_num(), e.max.raw_num());
+ },
+ timeseries_.kSkipEmptyEntries);
+ out->printf("\n");
+}
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::PrintInPbtxt(PbtxtRegion *hpaa) const {
+ // Prints content of each non-empty epoch, from oldest to most recent data
+ auto huge_cache_history = hpaa->CreateSubRegion("huge_cache_history");
+ huge_cache_history.PrintI64("window_ms",
+ absl::ToInt64Milliseconds(kEpochLength));
+ huge_cache_history.PrintI64("epochs", kEpochs);
+
+ timeseries_.Iter(
+ [&](size_t offset, int64_t ts, const Extrema &e) {
+ auto m = huge_cache_history.CreateSubRegion("measurements");
+ m.PrintI64("epoch", offset);
+ m.PrintI64("min_bytes", e.min.in_bytes());
+ m.PrintI64("max_bytes", e.max.in_bytes());
+ },
+ timeseries_.kSkipEmptyEntries);
+}
+
+template <size_t kEpochs>
+bool MinMaxTracker<kEpochs>::Extrema::operator==(const Extrema &other) const {
+ return (other.max == max) && (other.min == min);
+}
+
+// Explicit instantiations of template
+template class MinMaxTracker<>;
+template class MinMaxTracker<600>;
+
+// The logic for actually allocating from the cache or backing, and keeping
+// the hit rates specified.
+HugeRange HugeCache::DoGet(HugeLength n, bool *from_released) {
+ auto *node = Find(n);
+ if (!node) {
+ misses_++;
+ weighted_misses_ += n.raw_num();
+ HugeRange res = allocator_->Get(n);
+ if (res.valid()) {
+ *from_released = true;
+ }
+
+ return res;
+ }
+ hits_++;
+ weighted_hits_ += n.raw_num();
+ *from_released = false;
+ size_ -= n;
+ UpdateSize(size());
+ HugeRange result, leftover;
+ // Put back whatever we have left (or nothing, if it's exact.)
+ std::tie(result, leftover) = Split(node->range(), n);
+ cache_.Remove(node);
+ if (leftover.valid()) {
+ cache_.Insert(leftover);
+ }
+ return result;
+}
+
+void HugeCache::MaybeGrowCacheLimit(HugeLength missed) {
+ // Our goal is to make the cache size = the largest "brief dip."
+ //
+ // A "dip" being a case where usage shrinks, then increases back up
+ // to previous levels (at least partially).
+ //
+ // "brief" is "returns to normal usage in < kCacheTime." (In
+ // other words, we ideally want to be willing to cache memory for
+ // kCacheTime before expecting it to be used again--we are loose
+ // on the timing..)
+ //
+ // The interesting part is finding those dips.
+
+ // This is the downward slope: we lost some usage. (This in theory could
+ // be as much as 2 * kCacheTime old, which is fine.)
+ const HugeLength shrink = off_peak_tracker_.MaxOverTime(kCacheTime);
+
+ // This is the upward slope: we are coming back up.
+ const HugeLength grow = usage_ - usage_tracker_.MinOverTime(kCacheTime);
+
+ // Ideally we now know that we dipped down by some amount, then came
+ // up. Sadly our stats aren't quite good enough to guarantee things
+ // happened in the proper order. Suppose our usage takes the
+ // following path (in essentially zero time):
+ // 0, 10000, 5000, 5500.
+ //
+ // Clearly the proven dip here is 500. But we'll compute shrink = 5000,
+ // grow = 5500--we'd prefer to measure from a min *after* that shrink.
+ //
+ // It's difficult to ensure this, and hopefully this case is rare.
+ // TODO(b/134690209): figure out if we can solve that problem.
+ const HugeLength dip = std::min(shrink, grow);
+
+ // Fragmentation: we may need to cache a little more than the actual
+ // usage jump. 10% seems to be a reasonable addition that doesn't waste
+ // much space, but gets good performance on tests.
+ const HugeLength slack = dip / 10;
+
+ const HugeLength lim = dip + slack;
+
+ if (lim > limit()) {
+ last_limit_change_ = clock_.now();
+ limit_ = lim;
+ }
+}
+
+void HugeCache::IncUsage(HugeLength n) {
+ usage_ += n;
+ usage_tracker_.Report(usage_);
+ detailed_tracker_.Report(usage_);
+ off_peak_tracker_.Report(NHugePages(0));
+ if (size() + usage() > max_rss_) max_rss_ = size() + usage();
+}
+
+void HugeCache::DecUsage(HugeLength n) {
+ usage_ -= n;
+ usage_tracker_.Report(usage_);
+ detailed_tracker_.Report(usage_);
+ const HugeLength max = usage_tracker_.MaxOverTime(kCacheTime);
+ ASSERT(max >= usage_);
+ const HugeLength off_peak = max - usage_;
+ off_peak_tracker_.Report(off_peak);
+ if (size() + usage() > max_rss_) max_rss_ = size() + usage();
+}
+
+void HugeCache::UpdateSize(HugeLength size) {
+ size_tracker_.Report(size);
+ if (size > max_size_) max_size_ = size;
+ if (size + usage() > max_rss_) max_rss_ = size + usage();
+
+ // TODO(b/134691947): moving this inside the MinMaxTracker would save one call
+ // to clock_.now() but all MinMaxTrackers would track regret instead.
+ int64_t now = clock_.now();
+ if (now > last_regret_update_) {
+ regret_ += size.raw_num() * (now - last_regret_update_);
+ last_regret_update_ = now;
+ }
+}
+
+HugeRange HugeCache::Get(HugeLength n, bool *from_released) {
+ HugeRange r = DoGet(n, from_released);
+ // failure to get a range should "never" "never" happen (VSS limits
+ // or wildly incorrect allocation sizes only...) Don't deal with
+ // this case for cache size accounting.
+ IncUsage(r.len());
+
+ const bool miss = r.valid() && *from_released;
+ if (miss) MaybeGrowCacheLimit(n);
+ return r;
+}
+
+void HugeCache::Release(HugeRange r) {
+ DecUsage(r.len());
+
+ cache_.Insert(r);
+ size_ += r.len();
+ if (size_ <= limit()) {
+ fills_++;
+ } else {
+ overflows_++;
+ }
+
+ // Shrink the limit, if we're going to do it, before we shrink to
+ // the max size. (This could reduce the number of regions we break
+ // in half to avoid overshrinking.)
+ if ((clock_.now() - last_limit_change_) > (cache_time_ticks_ * 2)) {
+ total_fast_unbacked_ += MaybeShrinkCacheLimit();
+ }
+ total_fast_unbacked_ += ShrinkCache(limit());
+
+ UpdateSize(size());
+}
+
+void HugeCache::ReleaseUnbacked(HugeRange r) {
+ DecUsage(r.len());
+ // No point in trying to cache it, just hand it back.
+ allocator_->Release(r);
+}
+
+HugeLength HugeCache::MaybeShrinkCacheLimit() {
+ last_limit_change_ = clock_.now();
+
+ const HugeLength min = size_tracker_.MinOverTime(kCacheTime * 2);
+ // If cache size has gotten down to at most 20% of max, we assume
+ // we're close enough to the optimal size--we don't want to fiddle
+ // too much/too often unless we have large gaps in usage.
+ if (min < limit() / 5) return NHugePages(0);
+
+ // Take away half of the unused portion.
+ HugeLength drop = std::max(min / 2, NHugePages(1));
+ limit_ = std::max(limit() <= drop ? NHugePages(0) : limit() - drop,
+ MinCacheLimit());
+ return ShrinkCache(limit());
+}
+
+HugeLength HugeCache::ShrinkCache(HugeLength target) {
+ HugeLength removed = NHugePages(0);
+ while (size_ > target) {
+ // Remove smallest-ish nodes, to avoid fragmentation where possible.
+ auto *node = Find(NHugePages(1));
+ CHECK_CONDITION(node);
+ HugeRange r = node->range();
+ cache_.Remove(node);
+ // Suppose we're 10 MiB over target but the smallest available node
+ // is 100 MiB. Don't go overboard--split up the range.
+ // In particular - this prevents disastrous results if we've decided
+ // the cache should be 99 MiB but the actual hot usage is 100 MiB
+ // (and it is unfragmented).
+ const HugeLength delta = size() - target;
+ if (r.len() > delta) {
+ HugeRange to_remove, leftover;
+ std::tie(to_remove, leftover) = Split(r, delta);
+ ASSERT(leftover.valid());
+ cache_.Insert(leftover);
+ r = to_remove;
+ }
+
+ size_ -= r.len();
+ // Note, actual unback implementation is temporarily dropping and
+ // re-acquiring the page heap lock here.
+ unback_(r.start_addr(), r.byte_len());
+ allocator_->Release(r);
+ removed += r.len();
+ }
+
+ return removed;
+}
+
+HugeLength HugeCache::ReleaseCachedPages(HugeLength n) {
+ // This is a good time to check: is our cache going persistently unused?
+ HugeLength released = MaybeShrinkCacheLimit();
+
+ if (released < n) {
+ n -= released;
+ const HugeLength target = n > size() ? NHugePages(0) : size() - n;
+ released += ShrinkCache(target);
+ }
+
+ UpdateSize(size());
+ total_periodic_unbacked_ += released;
+ return released;
+}
+
+void HugeCache::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const {
+ static_assert(kPagesPerHugePage >= kMaxPages);
+ for (const HugeAddressMap::Node *node = cache_.first(); node != nullptr;
+ node = node->next()) {
+ HugeLength n = node->range().len();
+ if (large != nullptr) {
+ large->spans++;
+ large->normal_pages += n.in_pages();
+ }
+
+ if (ages != nullptr) {
+ ages->RecordRange(n.in_pages(), false, node->when());
+ }
+ }
+}
+
+HugeAddressMap::Node *HugeCache::Find(HugeLength n) {
+ HugeAddressMap::Node *curr = cache_.root();
+ // invariant: curr != nullptr && curr->longest >= n
+ // we favor smaller gaps and lower nodes and lower addresses, in that
+ // order. The net effect is that we are neither a best-fit nor a
+ // lowest-address allocator but vaguely close to both.
+ HugeAddressMap::Node *best = nullptr;
+ while (curr && curr->longest() >= n) {
+ if (curr->range().len() >= n) {
+ if (!best || best->range().len() > curr->range().len()) {
+ best = curr;
+ }
+ }
+
+ // Either subtree could contain a better fit and we don't want to
+ // search the whole tree. Pick a reasonable child to look at.
+ auto left = curr->left();
+ auto right = curr->right();
+ if (!left || left->longest() < n) {
+ curr = right;
+ continue;
+ }
+
+ if (!right || right->longest() < n) {
+ curr = left;
+ continue;
+ }
+
+ // Here, we have a nontrivial choice.
+ if (left->range().len() == right->range().len()) {
+ if (left->longest() <= right->longest()) {
+ curr = left;
+ } else {
+ curr = right;
+ }
+ } else if (left->range().len() < right->range().len()) {
+ // Here, the longest range in both children is the same...look
+ // in the subtree with the smaller root, as that's slightly
+ // more likely to be our best.
+ curr = left;
+ } else {
+ curr = right;
+ }
+ }
+ return best;
+}
+
+void HugeCache::Print(Printer *out) {
+ const int64_t millis = absl::ToInt64Milliseconds(kCacheTime);
+ out->printf(
+ "HugeCache: contains unused, backed hugepage(s) "
+ "(kCacheTime = %lldms)\n",
+ millis);
+ // a / (a + b), avoiding division by zero
+ auto safe_ratio = [](double a, double b) {
+ const double total = a + b;
+ if (total == 0) return 0.0;
+ return a / total;
+ };
+
+ const double hit_rate = safe_ratio(hits_, misses_);
+ const double overflow_rate = safe_ratio(overflows_, fills_);
+
+ out->printf(
+ "HugeCache: %zu / %zu hugepages cached / cache limit "
+ "(%.3f hit rate, %.3f overflow rate)\n",
+ size_.raw_num(), limit().raw_num(), hit_rate, overflow_rate);
+ out->printf("HugeCache: %zu MiB fast unbacked, %zu MiB periodic\n",
+ total_fast_unbacked_.in_bytes() / 1024 / 1024,
+ total_periodic_unbacked_.in_bytes() / 1024 / 1024);
+ UpdateSize(size());
+ out->printf(
+ "HugeCache: %zu MiB*s cached since startup\n",
+ NHugePages(regret_).in_mib() / static_cast<size_t>(clock_.freq()));
+
+ usage_tracker_.Report(usage_);
+ const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime);
+ const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime);
+ out->printf(
+ "HugeCache: recent usage range: %zu min - %zu curr - %zu max MiB\n",
+ usage_min.in_mib(), usage_.in_mib(), usage_max.in_mib());
+
+ const HugeLength off_peak = usage_max - usage_;
+ off_peak_tracker_.Report(off_peak);
+ const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime);
+ const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime);
+ out->printf(
+ "HugeCache: recent offpeak range: %zu min - %zu curr - %zu max MiB\n",
+ off_peak_min.in_mib(), off_peak.in_mib(), off_peak_max.in_mib());
+
+ const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime);
+ const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime);
+ out->printf(
+ "HugeCache: recent cache range: %zu min - %zu curr - %zu max MiB\n",
+ cache_min.in_mib(), size_.in_mib(), cache_max.in_mib());
+
+ detailed_tracker_.Print(out);
+}
+
+void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) {
+ hpaa->PrintI64("huge_cache_time_const",
+ absl::ToInt64Milliseconds(kCacheTime));
+
+ // a / (a + b), avoiding division by zero
+ auto safe_ratio = [](double a, double b) {
+ const double total = a + b;
+ if (total == 0) return 0.0;
+ return a / total;
+ };
+
+ const double hit_rate = safe_ratio(hits_, misses_);
+ const double overflow_rate = safe_ratio(overflows_, fills_);
+
+ // number of bytes in HugeCache
+ hpaa->PrintI64("cached_huge_page_bytes", size_.in_bytes());
+ // max allowed bytes in HugeCache
+ hpaa->PrintI64("max_cached_huge_page_bytes", limit().in_bytes());
+ // lifetime cache hit rate
+ hpaa->PrintDouble("huge_cache_hit_rate", hit_rate);
+ // lifetime cache overflow rate
+ hpaa->PrintDouble("huge_cache_overflow_rate", overflow_rate);
+ // bytes eagerly unbacked by HugeCache
+ hpaa->PrintI64("fast_unbacked_bytes", total_fast_unbacked_.in_bytes());
+ // bytes unbacked by periodic releaser thread
+ hpaa->PrintI64("periodic_unbacked_bytes",
+ total_periodic_unbacked_.in_bytes());
+ UpdateSize(size());
+ // memory cached since startup (in MiB*s)
+ hpaa->PrintI64("huge_cache_regret", NHugePages(regret_).in_mib() /
+ static_cast<size_t>(clock_.freq()));
+
+ usage_tracker_.Report(usage_);
+ const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime);
+ const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime);
+ {
+ auto usage_stats = hpaa->CreateSubRegion("huge_cache_usage_stats");
+ usage_stats.PrintI64("min_bytes", usage_min.in_bytes());
+ usage_stats.PrintI64("current_bytes", usage_.in_bytes());
+ usage_stats.PrintI64("max_bytes", usage_max.in_bytes());
+ }
+
+ const HugeLength off_peak = usage_max - usage_;
+ off_peak_tracker_.Report(off_peak);
+ const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime);
+ const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime);
+ {
+ auto usage_stats = hpaa->CreateSubRegion("huge_cache_offpeak_stats");
+ usage_stats.PrintI64("min_bytes", off_peak_min.in_bytes());
+ usage_stats.PrintI64("current_bytes", off_peak.in_bytes());
+ usage_stats.PrintI64("max_bytes", off_peak_max.in_bytes());
+ }
+
+ const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime);
+ const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime);
+ {
+ auto usage_stats = hpaa->CreateSubRegion("huge_cache_cache_stats");
+ usage_stats.PrintI64("min_bytes", cache_min.in_bytes());
+ usage_stats.PrintI64("current_bytes", size_.in_bytes());
+ usage_stats.PrintI64("max_bytes", cache_max.in_bytes());
+ }
+
+ detailed_tracker_.PrintInPbtxt(hpaa);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h
new file mode 100644
index 0000000000..2ffda26cb2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Wrapping interface for HugeAllocator that handles backing and
+// unbacking, including a hot cache of backed single hugepages.
+#ifndef TCMALLOC_HUGE_CACHE_H_
+#define TCMALLOC_HUGE_CACHE_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/timeseries_tracker.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+typedef void (*MemoryModifyFunction)(void *start, size_t len);
+
+// Track the extreme values of a HugeLength value over the past
+// kWindow (time ranges approximate.)
+template <size_t kEpochs = 16>
+class MinMaxTracker {
+ public:
+ explicit constexpr MinMaxTracker(Clock clock, absl::Duration w)
+ : kEpochLength(w / kEpochs), timeseries_(clock, w) {}
+
+ void Report(HugeLength val);
+ void Print(Printer *out) const;
+ void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+ // If t < kEpochLength, these functions return statistics for last epoch. The
+ // granularity is kEpochLength (rounded up).
+ HugeLength MaxOverTime(absl::Duration t) const;
+ HugeLength MinOverTime(absl::Duration t) const;
+
+ private:
+ const absl::Duration kEpochLength;
+
+ static constexpr HugeLength kMaxVal =
+ NHugePages(std::numeric_limits<size_t>::max());
+ struct Extrema {
+ HugeLength min, max;
+
+ static Extrema Nil() {
+ Extrema e;
+ e.max = NHugePages(0);
+ e.min = kMaxVal;
+ return e;
+ }
+
+ void Report(HugeLength n) {
+ max = std::max(max, n);
+ min = std::min(min, n);
+ }
+
+ bool empty() const { return (*this == Nil()); }
+
+ bool operator==(const Extrema &other) const;
+ };
+
+ TimeSeriesTracker<Extrema, HugeLength, kEpochs> timeseries_;
+};
+
+// Explicit instantiations are defined in huge_cache.cc.
+extern template class MinMaxTracker<>;
+extern template class MinMaxTracker<600>;
+
+template <size_t kEpochs>
+constexpr HugeLength MinMaxTracker<kEpochs>::kMaxVal;
+
+class HugeCache {
+ public:
+ // For use in production
+ HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate,
+ MemoryModifyFunction unback)
+ : HugeCache(allocator, meta_allocate, unback,
+ Clock{.now = absl::base_internal::CycleClock::Now,
+ .freq = absl::base_internal::CycleClock::Frequency}) {}
+
+ // For testing with mock clock
+ HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate,
+ MemoryModifyFunction unback, Clock clock)
+ : allocator_(allocator),
+ cache_(meta_allocate),
+ clock_(clock),
+ cache_time_ticks_(clock_.freq() * absl::ToDoubleSeconds(kCacheTime)),
+ nanoseconds_per_tick_(absl::ToInt64Nanoseconds(absl::Seconds(1)) /
+ clock_.freq()),
+ last_limit_change_(clock.now()),
+ last_regret_update_(clock.now()),
+ detailed_tracker_(clock, absl::Minutes(10)),
+ usage_tracker_(clock, kCacheTime * 2),
+ off_peak_tracker_(clock, kCacheTime * 2),
+ size_tracker_(clock, kCacheTime * 2),
+ unback_(unback) {}
+ // Allocate a usable set of <n> contiguous hugepages. Try to give out
+ // memory that's currently backed from the kernel if we have it available.
+ // *from_released is set to false if the return range is already backed;
+ // otherwise, it is set to true (and the caller should back it.)
+ HugeRange Get(HugeLength n, bool *from_released);
+
+ // Deallocate <r> (assumed to be backed by the kernel.)
+ void Release(HugeRange r);
+ // As Release, but the range is assumed to _not_ be backed.
+ void ReleaseUnbacked(HugeRange r);
+
+ // Release to the system up to <n> hugepages of cache contents; returns
+ // the number of hugepages released.
+ HugeLength ReleaseCachedPages(HugeLength n);
+
+ // Backed memory available.
+ HugeLength size() const { return size_; }
+ // Total memory cached (in HugeLength * nanoseconds)
+ uint64_t regret() const { return regret_ * nanoseconds_per_tick_; }
+ // Current limit for how much backed memory we'll cache.
+ HugeLength limit() const { return limit_; }
+ // Sum total of unreleased requests.
+ HugeLength usage() const { return usage_; }
+
+ void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const;
+
+ BackingStats stats() const {
+ BackingStats s;
+ s.system_bytes = (usage() + size()).in_bytes();
+ s.free_bytes = size().in_bytes();
+ s.unmapped_bytes = 0;
+ return s;
+ }
+
+ void Print(Printer *out);
+ void PrintInPbtxt(PbtxtRegion *hpaa);
+
+ private:
+ HugeAllocator *allocator_;
+
+ // We just cache-missed a request for <missed> pages;
+ // should we grow?
+ void MaybeGrowCacheLimit(HugeLength missed);
+ // Check if the cache seems consistently too big. Returns the
+ // number of pages *evicted* (not the change in limit).
+ HugeLength MaybeShrinkCacheLimit();
+
+ // Ensure the cache contains at most <target> hugepages,
+ // returning the number removed.
+ HugeLength ShrinkCache(HugeLength target);
+
+ HugeRange DoGet(HugeLength n, bool *from_released);
+
+ HugeAddressMap::Node *Find(HugeLength n);
+
+ HugeAddressMap cache_;
+ HugeLength size_{NHugePages(0)};
+
+ HugeLength limit_{NHugePages(10)};
+ const absl::Duration kCacheTime = absl::Seconds(1);
+
+ size_t hits_{0};
+ size_t misses_{0};
+ size_t fills_{0};
+ size_t overflows_{0};
+ uint64_t weighted_hits_{0};
+ uint64_t weighted_misses_{0};
+
+ // Sum(size of Gets) - Sum(size of Releases), i.e. amount of backed
+ // hugepages our user currently wants to have.
+ void IncUsage(HugeLength n);
+ void DecUsage(HugeLength n);
+ HugeLength usage_{NHugePages(0)};
+
+ // This is CycleClock, except overridable for tests.
+ Clock clock_;
+ const int64_t cache_time_ticks_;
+ const double nanoseconds_per_tick_;
+
+ int64_t last_limit_change_;
+
+ // 10 hugepages is a good baseline for our cache--easily wiped away
+ // by periodic release, and not that much memory on any real server.
+ // However, we can go below it if we haven't used that much for 30 seconds.
+ HugeLength MinCacheLimit() const { return NHugePages(10); }
+
+ uint64_t regret_{0}; // overflows if we cache 585 hugepages for 1 year
+ int64_t last_regret_update_;
+ void UpdateSize(HugeLength size);
+
+ MinMaxTracker<600> detailed_tracker_;
+
+ MinMaxTracker<> usage_tracker_;
+ MinMaxTracker<> off_peak_tracker_;
+ MinMaxTracker<> size_tracker_;
+ HugeLength max_size_{NHugePages(0)};
+ HugeLength max_rss_{NHugePages(0)};
+
+ HugeLength total_fast_unbacked_{NHugePages(0)};
+ HugeLength total_periodic_unbacked_{NHugePages(0)};
+
+ MemoryModifyFunction unback_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc
new file mode 100644
index 0000000000..2699b44303
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc
@@ -0,0 +1,563 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_cache.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <memory>
+#include <random>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/memory/memory.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeCacheTest : public testing::Test {
+ private:
+ // Allow tests to modify the clock used by the cache.
+ static int64_t clock_offset_;
+ static double GetClockFrequency() {
+ return absl::base_internal::CycleClock::Frequency();
+ }
+ static int64_t GetClock() {
+ return absl::base_internal::CycleClock::Now() +
+ clock_offset_ * GetClockFrequency() /
+ absl::ToDoubleNanoseconds(absl::Seconds(1));
+ }
+
+ // Use a tiny fraction of actual size so we can test aggressively.
+ static void* AllocateFake(size_t bytes, size_t* actual, size_t align) {
+ if (bytes % kHugePageSize != 0) {
+ Crash(kCrash, __FILE__, __LINE__, "not aligned", bytes, kHugePageSize);
+ }
+ if (align % kHugePageSize != 0) {
+ Crash(kCrash, __FILE__, __LINE__, "not aligned", align, kHugePageSize);
+ }
+ *actual = bytes;
+ // we'll actually provide hidden backing, one word per hugepage.
+ bytes /= kHugePageSize;
+ align /= kHugePageSize;
+ size_t index = backing.size();
+ if (index % align != 0) {
+ index += (align - (index & align));
+ }
+ backing.resize(index + bytes);
+ void* ptr = reinterpret_cast<void*>(index * kHugePageSize);
+ return ptr;
+ }
+ // This isn't super good form but we'll never have more than one HAT
+ // extant at once.
+ static std::vector<size_t> backing;
+
+ // We use actual malloc for metadata allocations, but we track them so they
+ // can be deleted. (TODO make this an arena if we care, which I doubt)
+ static void* MallocMetadata(size_t size) {
+ metadata_bytes += size;
+ void* ptr = calloc(size, 1);
+ metadata_allocs.push_back(ptr);
+ return ptr;
+ }
+ static std::vector<void*> metadata_allocs;
+ static size_t metadata_bytes;
+
+ // This is wordy, but necessary for mocking:
+ class BackingInterface {
+ public:
+ virtual void Unback(void* p, size_t len) = 0;
+ virtual ~BackingInterface() {}
+ };
+
+ class MockBackingInterface : public BackingInterface {
+ public:
+ MOCK_METHOD2(Unback, void(void* p, size_t len));
+ };
+
+ static void MockUnback(void* p, size_t len) { mock_->Unback(p, len); }
+
+ protected:
+ static std::unique_ptr<testing::NiceMock<MockBackingInterface>> mock_;
+
+ HugeCacheTest() {
+ // We don't use the first few bytes, because things might get weird
+ // given zero pointers.
+ backing.resize(1024);
+ metadata_bytes = 0;
+ mock_ = absl::make_unique<testing::NiceMock<MockBackingInterface>>();
+ }
+
+ ~HugeCacheTest() override {
+ for (void* p : metadata_allocs) {
+ free(p);
+ }
+ metadata_allocs.clear();
+ backing.clear();
+ mock_.reset(nullptr);
+
+ clock_offset_ = 0;
+ }
+
+ void Advance(absl::Duration d) {
+ clock_offset_ += absl::ToInt64Nanoseconds(d);
+ }
+
+ HugeAllocator alloc_{AllocateFake, MallocMetadata};
+ HugeCache cache_{&alloc_, MallocMetadata, MockUnback,
+ Clock{.now = GetClock, .freq = GetClockFrequency}};
+};
+
+std::vector<size_t> HugeCacheTest::backing;
+std::vector<void*> HugeCacheTest::metadata_allocs;
+size_t HugeCacheTest::metadata_bytes;
+std::unique_ptr<testing::NiceMock<HugeCacheTest::MockBackingInterface>>
+ HugeCacheTest::mock_;
+
+int64_t HugeCacheTest::clock_offset_ = 0;
+
+TEST_F(HugeCacheTest, Basic) {
+ bool from;
+ for (int i = 0; i < 100 * 1000; ++i) {
+ cache_.Release(cache_.Get(NHugePages(1), &from));
+ }
+}
+
+TEST_F(HugeCacheTest, Backing) {
+ bool from;
+ cache_.Release(cache_.Get(NHugePages(4), &from));
+ EXPECT_TRUE(from);
+ // We should be able to split up a large range...
+ HugeRange r1 = cache_.Get(NHugePages(3), &from);
+ EXPECT_FALSE(from);
+ HugeRange r2 = cache_.Get(NHugePages(1), &from);
+ EXPECT_FALSE(from);
+
+ // and then merge it back.
+ cache_.Release(r1);
+ cache_.Release(r2);
+ HugeRange r = cache_.Get(NHugePages(4), &from);
+ EXPECT_FALSE(from);
+ cache_.Release(r);
+}
+
+TEST_F(HugeCacheTest, Release) {
+ bool from;
+ const HugeLength one = NHugePages(1);
+ cache_.Release(cache_.Get(NHugePages(5), &from));
+ HugeRange r1, r2, r3, r4, r5;
+ r1 = cache_.Get(one, &from);
+ r2 = cache_.Get(one, &from);
+ r3 = cache_.Get(one, &from);
+ r4 = cache_.Get(one, &from);
+ r5 = cache_.Get(one, &from);
+ cache_.Release(r1);
+ cache_.Release(r2);
+ cache_.Release(r3);
+ cache_.Release(r4);
+ cache_.Release(r5);
+
+ r1 = cache_.Get(one, &from);
+ ASSERT_EQ(false, from);
+ r2 = cache_.Get(one, &from);
+ ASSERT_EQ(false, from);
+ r3 = cache_.Get(one, &from);
+ ASSERT_EQ(false, from);
+ r4 = cache_.Get(one, &from);
+ ASSERT_EQ(false, from);
+ r5 = cache_.Get(one, &from);
+ ASSERT_EQ(false, from);
+ cache_.Release(r1);
+ cache_.Release(r2);
+ cache_.Release(r5);
+
+ ASSERT_EQ(NHugePages(3), cache_.size());
+ EXPECT_CALL(*mock_, Unback(r5.start_addr(), kHugePageSize * 1)).Times(1);
+ EXPECT_EQ(NHugePages(1), cache_.ReleaseCachedPages(NHugePages(1)));
+ cache_.Release(r3);
+ cache_.Release(r4);
+
+ EXPECT_CALL(*mock_, Unback(r1.start_addr(), 4 * kHugePageSize)).Times(1);
+ EXPECT_EQ(NHugePages(4), cache_.ReleaseCachedPages(NHugePages(200)));
+}
+
+TEST_F(HugeCacheTest, Regret) {
+ bool from;
+ HugeRange r = cache_.Get(NHugePages(20), &from);
+ cache_.Release(r);
+ HugeLength cached = cache_.size();
+ absl::Duration d = absl::Seconds(20);
+ Advance(d);
+ char buf[512];
+ Printer out(buf, 512);
+ cache_.Print(&out); // To update the regret
+ uint64_t expected_regret = absl::ToInt64Nanoseconds(d) * cached.raw_num();
+ // Not exactly accurate since the mock clock advances with real time, and
+ // when we measure regret will be updated.
+ EXPECT_NEAR(cache_.regret(), expected_regret, expected_regret / 1000);
+ EXPECT_GE(cache_.regret(), expected_regret);
+}
+
+TEST_F(HugeCacheTest, Stats) {
+ bool from;
+ HugeRange r = cache_.Get(NHugePages(1 + 1 + 2 + 1 + 3), &from);
+ HugeRange r1, r2, r3, spacer1, spacer2;
+ std::tie(r1, spacer1) = Split(r, NHugePages(1));
+ std::tie(spacer1, r2) = Split(spacer1, NHugePages(1));
+ std::tie(r2, spacer2) = Split(r2, NHugePages(2));
+ std::tie(spacer2, r3) = Split(spacer2, NHugePages(1));
+ cache_.Release(r1);
+ cache_.Release(r2);
+ cache_.Release(r3);
+
+ ASSERT_EQ(NHugePages(6), cache_.size());
+ r1 = cache_.Get(NHugePages(1), &from);
+ ASSERT_EQ(false, from);
+ r2 = cache_.Get(NHugePages(2), &from);
+ ASSERT_EQ(false, from);
+ r3 = cache_.Get(NHugePages(3), &from);
+ ASSERT_EQ(false, from);
+
+ struct Helper {
+ static void Stat(const HugeCache& cache, size_t* spans,
+ Length* pages_backed, Length* pages_unbacked,
+ double* avg_age) {
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ LargeSpanStats large;
+ cache.AddSpanStats(nullptr, &large, &ages);
+
+ const PageAgeHistograms::Histogram* hist = ages.GetTotalHistogram(false);
+ *spans = large.spans;
+ *pages_backed = large.normal_pages;
+ *pages_unbacked = large.returned_pages;
+ *avg_age = hist->avg_age();
+ }
+ };
+
+ double avg_age;
+ size_t spans;
+ Length pages_backed;
+ Length pages_unbacked;
+
+ cache_.Release(r1);
+ absl::SleepFor(absl::Microseconds(5000));
+ Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+ EXPECT_EQ(Length(0), pages_unbacked);
+ EXPECT_EQ(1, spans);
+ EXPECT_EQ(NHugePages(1).in_pages(), pages_backed);
+ EXPECT_LE(0.005, avg_age);
+
+ cache_.Release(r2);
+ absl::SleepFor(absl::Microseconds(2500));
+ Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+ EXPECT_EQ(Length(0), pages_unbacked);
+ EXPECT_EQ(2, spans);
+ EXPECT_EQ(NHugePages(3).in_pages(), pages_backed);
+ EXPECT_LE((0.0075 * 1 + 0.0025 * 2) / (1 + 2), avg_age);
+
+ cache_.Release(r3);
+ absl::SleepFor(absl::Microseconds(1250));
+ Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+ EXPECT_EQ(Length(0), pages_unbacked);
+ EXPECT_EQ(3, spans);
+ EXPECT_EQ(NHugePages(6).in_pages(), pages_backed);
+ EXPECT_LE((0.00875 * 1 + 0.00375 * 2 + 0.00125 * 3) / (1 + 2 + 3), avg_age);
+}
+
+static double Frac(HugeLength num, HugeLength denom) {
+ return static_cast<double>(num.raw_num()) / denom.raw_num();
+}
+
+TEST_F(HugeCacheTest, Growth) {
+ bool released;
+ absl::BitGen rng;
+ // fragmentation is a bit of a challenge
+ std::uniform_int_distribution<size_t> sizes(1, 5);
+ // fragment the cache badly.
+ std::vector<HugeRange> keep;
+ std::vector<HugeRange> drop;
+ for (int i = 0; i < 1000; ++i) {
+ auto& l = std::bernoulli_distribution()(rng) ? keep : drop;
+ l.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+ }
+
+ for (auto r : drop) {
+ cache_.Release(r);
+ }
+
+ // See the TODO in HugeCache::MaybeGrowCache; without this delay,
+ // the above fragmentation plays merry havoc with our instrumentation.
+ Advance(absl::Seconds(30));
+
+ // Test that our cache can grow to fit a working set.
+ HugeLength hot_set_sizes[] = {NHugePages(5), NHugePages(10), NHugePages(100),
+ NHugePages(10000)};
+
+ for (const HugeLength hot : hot_set_sizes) {
+ SCOPED_TRACE(absl::StrCat("cache size = ", hot.in_bytes() / 1024.0 / 1024.0,
+ " MiB"));
+ // Exercise the cache allocating about <hot> worth of data. After
+ // a brief warmup phase, we should do this without needing to back much.
+ auto alloc = [&]() -> std::pair<HugeLength, HugeLength> {
+ HugeLength got = NHugePages(0);
+ HugeLength needed_backing = NHugePages(0);
+ std::vector<HugeRange> items;
+ while (got < hot) {
+ HugeLength rest = hot - got;
+ HugeLength l = std::min(rest, NHugePages(sizes(rng)));
+ got += l;
+ items.push_back(cache_.Get(l, &released));
+ if (released) needed_backing += l;
+ }
+ for (auto r : items) {
+ cache_.Release(r);
+ }
+ return {needed_backing, got};
+ };
+
+ // warmup - we're allowed to incur misses and be too big.
+ for (int i = 0; i < 2; ++i) {
+ alloc();
+ }
+
+ HugeLength needed_backing = NHugePages(0);
+ HugeLength total = NHugePages(0);
+ for (int i = 0; i < 16; ++i) {
+ auto r = alloc();
+ needed_backing += r.first;
+ total += r.second;
+ // Cache shouldn't have just grown arbitrarily
+ const HugeLength cached = cache_.size();
+ // Allow us 10% slop, but don't get out of bed for tiny caches anyway.
+ const double ratio = Frac(cached, hot);
+ SCOPED_TRACE(
+ absl::StrCat(cached.raw_num(), "hps ", Frac(r.first, r.second)));
+ if (ratio > 1 && cached > NHugePages(16)) {
+ EXPECT_LE(ratio, 1.1);
+ }
+ }
+ // approximately, given the randomized sizing...
+
+ const double ratio = Frac(needed_backing, total);
+ EXPECT_LE(ratio, 0.2);
+ }
+}
+
+// If we repeatedly grow and shrink, but do so very slowly, we should *not*
+// cache the large variation.
+TEST_F(HugeCacheTest, SlowGrowthUncached) {
+ absl::BitGen rng;
+ std::uniform_int_distribution<size_t> sizes(1, 10);
+ for (int i = 0; i < 20; ++i) {
+ std::vector<HugeRange> rs;
+ for (int j = 0; j < 20; ++j) {
+ Advance(absl::Milliseconds(600));
+ bool released;
+ rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+ }
+ HugeLength max_cached = NHugePages(0);
+ for (auto r : rs) {
+ Advance(absl::Milliseconds(600));
+ cache_.Release(r);
+ max_cached = std::max(max_cached, cache_.size());
+ }
+ EXPECT_GE(NHugePages(10), max_cached);
+ }
+}
+
+// If very rarely we have a huge increase in usage, it shouldn't be cached.
+TEST_F(HugeCacheTest, SpikesUncached) {
+ absl::BitGen rng;
+ std::uniform_int_distribution<size_t> sizes(1, 10);
+ for (int i = 0; i < 20; ++i) {
+ std::vector<HugeRange> rs;
+ for (int j = 0; j < 2000; ++j) {
+ bool released;
+ rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+ }
+ HugeLength max_cached = NHugePages(0);
+ for (auto r : rs) {
+ cache_.Release(r);
+ max_cached = std::max(max_cached, cache_.size());
+ }
+ EXPECT_GE(NHugePages(10), max_cached);
+ Advance(absl::Seconds(30));
+ }
+}
+
+// If very rarely we have a huge *decrease* in usage, it *should* be cached.
+TEST_F(HugeCacheTest, DipsCached) {
+ absl::BitGen rng;
+ std::uniform_int_distribution<size_t> sizes(1, 10);
+ for (int i = 0; i < 20; ++i) {
+ std::vector<HugeRange> rs;
+ HugeLength got = NHugePages(0);
+ HugeLength uncached = NHugePages(0);
+ for (int j = 0; j < 2000; ++j) {
+ bool released;
+ HugeLength n = NHugePages(sizes(rng));
+ rs.push_back(cache_.Get(n, &released));
+ got += n;
+ if (released) uncached += n;
+ }
+ // Most of our time is at high usage...
+ Advance(absl::Seconds(30));
+ // Now immediately release and reallocate.
+ for (auto r : rs) {
+ cache_.Release(r);
+ }
+
+ // warmup
+ if (i >= 2) {
+ EXPECT_GE(0.06, Frac(uncached, got));
+ }
+ }
+}
+
+// Suppose in a previous era of behavior we needed a giant cache,
+// but now we don't. Do we figure this out promptly?
+TEST_F(HugeCacheTest, Shrink) {
+ absl::BitGen rng;
+ std::uniform_int_distribution<size_t> sizes(1, 10);
+ for (int i = 0; i < 20; ++i) {
+ std::vector<HugeRange> rs;
+ for (int j = 0; j < 2000; ++j) {
+ HugeLength n = NHugePages(sizes(rng));
+ bool released;
+ rs.push_back(cache_.Get(n, &released));
+ }
+ for (auto r : rs) {
+ cache_.Release(r);
+ }
+ }
+
+ ASSERT_LE(NHugePages(10000), cache_.size());
+
+ for (int i = 0; i < 30; ++i) {
+ // New working set <= 20 pages.
+ Advance(absl::Seconds(1));
+
+ // And do some work.
+ for (int j = 0; j < 100; ++j) {
+ bool released;
+ HugeRange r1 = cache_.Get(NHugePages(sizes(rng)), &released);
+ HugeRange r2 = cache_.Get(NHugePages(sizes(rng)), &released);
+ cache_.Release(r1);
+ cache_.Release(r2);
+ }
+ }
+
+ ASSERT_GE(NHugePages(25), cache_.limit());
+}
+
+TEST_F(HugeCacheTest, Usage) {
+ bool released;
+
+ auto r1 = cache_.Get(NHugePages(10), &released);
+ EXPECT_EQ(NHugePages(10), cache_.usage());
+
+ auto r2 = cache_.Get(NHugePages(100), &released);
+ EXPECT_EQ(NHugePages(110), cache_.usage());
+
+ cache_.Release(r1);
+ EXPECT_EQ(NHugePages(100), cache_.usage());
+
+ // Pretend we unbacked this.
+ cache_.ReleaseUnbacked(r2);
+ EXPECT_EQ(NHugePages(0), cache_.usage());
+}
+
+class MinMaxTrackerTest : public testing::Test {
+ protected:
+ void Advance(absl::Duration d) {
+ clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+ }
+
+ static int64_t FakeClock() { return clock_; }
+
+ static double GetFakeClockFrequency() {
+ return absl::ToDoubleNanoseconds(absl::Seconds(2));
+ }
+
+ private:
+ static int64_t clock_;
+};
+
+int64_t MinMaxTrackerTest::clock_{0};
+
+TEST_F(MinMaxTrackerTest, Works) {
+ const absl::Duration kDuration = absl::Seconds(2);
+ MinMaxTracker<> tracker{
+ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration};
+
+ tracker.Report(NHugePages(0));
+ EXPECT_EQ(NHugePages(0), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ tracker.Report(NHugePages(10));
+ EXPECT_EQ(NHugePages(10), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ tracker.Report(NHugePages(5));
+ EXPECT_EQ(NHugePages(10), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ tracker.Report(NHugePages(100));
+ EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ // Some tests for advancing time
+ Advance(kDuration / 3);
+ tracker.Report(NHugePages(2));
+ EXPECT_EQ(NHugePages(2), tracker.MaxOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration / 2));
+ EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(2), tracker.MinOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration / 2));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ Advance(kDuration / 3);
+ tracker.Report(NHugePages(5));
+ EXPECT_EQ(NHugePages(5), tracker.MaxOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(5), tracker.MaxOverTime(kDuration / 2));
+ EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(5), tracker.MinOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(2), tracker.MinOverTime(kDuration / 2));
+ EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+ // This should annihilate everything.
+ Advance(kDuration * 2);
+ tracker.Report(NHugePages(1));
+ EXPECT_EQ(NHugePages(1), tracker.MaxOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(1), tracker.MinOverTime(absl::Nanoseconds(1)));
+ EXPECT_EQ(NHugePages(1), tracker.MaxOverTime(kDuration));
+ EXPECT_EQ(NHugePages(1), tracker.MinOverTime(kDuration));
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc
new file mode 100644
index 0000000000..e662456df6
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc
@@ -0,0 +1,676 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_aware_allocator.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <new>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool decide_want_hpaa();
+ABSL_ATTRIBUTE_WEAK int default_want_hpaa();
+ABSL_ATTRIBUTE_WEAK int default_subrelease();
+
+bool decide_subrelease() {
+ if (!decide_want_hpaa()) {
+ // Subrelease is off if HPAA is off.
+ return false;
+ }
+
+ const char *e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL");
+ if (e) {
+ switch (e[0]) {
+ case '0':
+ if (kPageShift <= 12) {
+ return false;
+ }
+
+ if (default_want_hpaa != nullptr) {
+ int default_hpaa = default_want_hpaa();
+ if (default_hpaa < 0) {
+ return false;
+ }
+ }
+
+ Log(kLog, __FILE__, __LINE__,
+ "Runtime opt-out from HPAA requires building with "
+ "//tcmalloc:want_no_hpaa."
+ );
+ break;
+ case '1':
+ return false;
+ case '2':
+ return true;
+ default:
+ Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+ return false;
+ }
+ }
+
+ if (default_subrelease != nullptr) {
+ const int decision = default_subrelease();
+ if (decision != 0) {
+ return decision > 0;
+ }
+ }
+
+ if (tcmalloc::IsExperimentActive(tcmalloc::Experiment::TCMALLOC_TEMERAIRE)) {
+ return false;
+ }
+
+ return true;
+}
+
+FillerPartialRerelease decide_partial_rerelease() {
+ const char *e = thread_safe_getenv("TCMALLOC_PARTIAL_RELEASE_CONTROL");
+ if (e) {
+ if (e[0] == '0') {
+ return FillerPartialRerelease::Return;
+ }
+ if (e[0] == '1') {
+ return FillerPartialRerelease::Retain;
+ }
+ Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+ }
+
+ return FillerPartialRerelease::Retain;
+}
+
+// Some notes: locking discipline here is a bit funny, because
+// we want to *not* hold the pageheap lock while backing memory.
+
+// We have here a collection of slightly different allocators each
+// optimized for slightly different purposes. This file has two main purposes:
+// - pick the right one for a given allocation
+// - provide enough data to figure out what we picked last time!
+
+HugePageAwareAllocator::HugePageAwareAllocator(MemoryTag tag)
+ : PageAllocatorInterface("HugePageAware", tag),
+ filler_(decide_partial_rerelease()),
+ alloc_(
+ [](MemoryTag tag) {
+ // TODO(ckennelly): Remove the template parameter.
+ switch (tag) {
+ case MemoryTag::kNormal:
+ return AllocAndReport<MemoryTag::kNormal>;
+ case MemoryTag::kNormalP1:
+ return AllocAndReport<MemoryTag::kNormalP1>;
+ case MemoryTag::kSampled:
+ return AllocAndReport<MemoryTag::kSampled>;
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+ }(tag),
+ MetaDataAlloc),
+ cache_(HugeCache{&alloc_, MetaDataAlloc, UnbackWithoutLock}) {
+ tracker_allocator_.Init(&Static::arena());
+ region_allocator_.Init(&Static::arena());
+}
+
+HugePageAwareAllocator::FillerType::Tracker *HugePageAwareAllocator::GetTracker(
+ HugePage p) {
+ void *v = Static::pagemap().GetHugepage(p.first_page());
+ FillerType::Tracker *pt = reinterpret_cast<FillerType::Tracker *>(v);
+ ASSERT(pt == nullptr || pt->location() == p);
+ return pt;
+}
+
+void HugePageAwareAllocator::SetTracker(
+ HugePage p, HugePageAwareAllocator::FillerType::Tracker *pt) {
+ Static::pagemap().SetHugepage(p.first_page(), pt);
+}
+
+PageId HugePageAwareAllocator::AllocAndContribute(HugePage p, Length n,
+ bool donated) {
+ CHECK_CONDITION(p.start_addr() != nullptr);
+ FillerType::Tracker *pt = tracker_allocator_.New();
+ new (pt) FillerType::Tracker(p, absl::base_internal::CycleClock::Now());
+ ASSERT(pt->longest_free_range() >= n);
+ PageId page = pt->Get(n).page;
+ ASSERT(page == p.first_page());
+ SetTracker(p, pt);
+ filler_.Contribute(pt, donated);
+ return page;
+}
+
+PageId HugePageAwareAllocator::RefillFiller(Length n, bool *from_released) {
+ HugeRange r = cache_.Get(NHugePages(1), from_released);
+ if (!r.valid()) return PageId{0};
+ // This is duplicate to Finalize, but if we need to break up
+ // hugepages to get to our usage limit it would be very bad to break
+ // up what's left of r after we allocate from there--while r is
+ // mostly empty, clearly what's left in the filler is too fragmented
+ // to be very useful, and we would rather release those
+ // pages. Otherwise, we're nearly guaranteed to release r (if n
+ // isn't very large), and the next allocation will just repeat this
+ // process.
+ Static::page_allocator().ShrinkToUsageLimit();
+ return AllocAndContribute(r.start(), n, /*donated=*/false);
+}
+
+Span *HugePageAwareAllocator::Finalize(Length n, PageId page)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ ASSERT(page != PageId{0});
+ Span *ret = Span::New(page, n);
+ Static::pagemap().Set(page, ret);
+ ASSERT(!ret->sampled());
+ info_.RecordAlloc(page, n);
+ Static::page_allocator().ShrinkToUsageLimit();
+ return ret;
+}
+
+// For anything <= half a huge page, we will unconditionally use the filler
+// to pack it into a single page. If we need another page, that's fine.
+Span *HugePageAwareAllocator::AllocSmall(Length n, bool *from_released) {
+ auto [pt, page] = filler_.TryGet(n);
+ if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+ *from_released = false;
+ return Finalize(n, page);
+ }
+
+ page = RefillFiller(n, from_released);
+ if (ABSL_PREDICT_FALSE(page == PageId{0})) {
+ return nullptr;
+ }
+ return Finalize(n, page);
+}
+
+Span *HugePageAwareAllocator::AllocLarge(Length n, bool *from_released) {
+ // If it's an exact page multiple, just pull it from pages directly.
+ HugeLength hl = HLFromPages(n);
+ if (hl.in_pages() == n) {
+ return AllocRawHugepages(n, from_released);
+ }
+
+ PageId page;
+ // If we fit in a single hugepage, try the Filler first.
+ if (n < kPagesPerHugePage) {
+ auto [pt, page] = filler_.TryGet(n);
+ if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+ *from_released = false;
+ return Finalize(n, page);
+ }
+ }
+
+ // If we're using regions in this binary (see below comment), is
+ // there currently available space there?
+ if (regions_.MaybeGet(n, &page, from_released)) {
+ return Finalize(n, page);
+ }
+
+ // We have two choices here: allocate a new region or go to
+ // hugepages directly (hoping that slack will be filled by small
+ // allocation.) The second strategy is preferrable, as it's
+ // typically faster and usually more space efficient, but it's sometimes
+ // catastrophic.
+ //
+ // See https://github.com/google/tcmalloc/tree/master/docs/regions-are-not-optional.md
+ //
+ // So test directly if we're in the bad case--almost no binaries are.
+ // If not, just fall back to direct allocation (and hope we do hit that case!)
+ const Length slack = info_.slack();
+ // Don't bother at all until the binary is reasonably sized
+ if (slack < HLFromBytes(64 * 1024 * 1024).in_pages()) {
+ return AllocRawHugepages(n, from_released);
+ }
+
+ // In the vast majority of binaries, we have many small allocations which
+ // will nicely fill slack. (Fleetwide, the average ratio is 15:1; only
+ // a handful of binaries fall below 1:1.)
+ const Length small = info_.small();
+ if (slack < small) {
+ return AllocRawHugepages(n, from_released);
+ }
+
+ // We couldn't allocate a new region. They're oversized, so maybe we'd get
+ // lucky with a smaller request?
+ if (!AddRegion()) {
+ return AllocRawHugepages(n, from_released);
+ }
+
+ CHECK_CONDITION(regions_.MaybeGet(n, &page, from_released));
+ return Finalize(n, page);
+}
+
+Span *HugePageAwareAllocator::AllocEnormous(Length n, bool *from_released) {
+ return AllocRawHugepages(n, from_released);
+}
+
+Span *HugePageAwareAllocator::AllocRawHugepages(Length n, bool *from_released) {
+ HugeLength hl = HLFromPages(n);
+
+ HugeRange r = cache_.Get(hl, from_released);
+ if (!r.valid()) return nullptr;
+
+ // We now have a huge page range that covers our request. There
+ // might be some slack in it if n isn't a multiple of
+ // kPagesPerHugePage. Add the hugepage with slack to the filler,
+ // pretending the non-slack portion is a smaller allocation.
+ Length total = hl.in_pages();
+ Length slack = total - n;
+ HugePage first = r.start();
+ SetTracker(first, nullptr);
+ HugePage last = first + r.len() - NHugePages(1);
+ if (slack == Length(0)) {
+ SetTracker(last, nullptr);
+ return Finalize(total, r.start().first_page());
+ }
+
+ ++donated_huge_pages_;
+
+ Length here = kPagesPerHugePage - slack;
+ ASSERT(here > Length(0));
+ AllocAndContribute(last, here, /*donated=*/true);
+ return Finalize(n, r.start().first_page());
+}
+
+static void BackSpan(Span *span) {
+ SystemBack(span->start_address(), span->bytes_in_span());
+}
+
+// public
+Span *HugePageAwareAllocator::New(Length n) {
+ CHECK_CONDITION(n > Length(0));
+ bool from_released;
+ Span *s = LockAndAlloc(n, &from_released);
+ if (s) {
+ // Prefetch for writing, as we anticipate using the memory soon.
+ __builtin_prefetch(s->start_address(), 1, 3);
+ if (from_released) BackSpan(s);
+ }
+ ASSERT(!s || GetMemoryTag(s->start_address()) == tag_);
+ return s;
+}
+
+Span *HugePageAwareAllocator::LockAndAlloc(Length n, bool *from_released) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ // Our policy depends on size. For small things, we will pack them
+ // into single hugepages.
+ if (n <= kPagesPerHugePage / 2) {
+ return AllocSmall(n, from_released);
+ }
+
+ // For anything too big for the filler, we use either a direct hugepage
+ // allocation, or possibly the regions if we are worried about slack.
+ if (n <= HugeRegion::size().in_pages()) {
+ return AllocLarge(n, from_released);
+ }
+
+ // In the worst case, we just fall back to directly allocating a run
+ // of hugepages.
+ return AllocEnormous(n, from_released);
+}
+
+// public
+Span *HugePageAwareAllocator::NewAligned(Length n, Length align) {
+ if (align <= Length(1)) {
+ return New(n);
+ }
+
+ // we can do better than this, but...
+ // TODO(b/134690769): support higher align.
+ CHECK_CONDITION(align <= kPagesPerHugePage);
+ bool from_released;
+ Span *s;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ s = AllocRawHugepages(n, &from_released);
+ }
+ if (s && from_released) BackSpan(s);
+ ASSERT(!s || GetMemoryTag(s->start_address()) == tag_);
+ return s;
+}
+
+void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt,
+ PageId p, Length n) {
+ if (ABSL_PREDICT_TRUE(filler_.Put(pt, p, n) == nullptr)) return;
+ if (pt->donated()) {
+ --donated_huge_pages_;
+ }
+ ReleaseHugepage(pt);
+}
+
+bool HugePageAwareAllocator::AddRegion() {
+ HugeRange r = alloc_.Get(HugeRegion::size());
+ if (!r.valid()) return false;
+ HugeRegion *region = region_allocator_.New();
+ new (region) HugeRegion(r, SystemRelease);
+ regions_.Contribute(region);
+ return true;
+}
+
+void HugePageAwareAllocator::Delete(Span *span) {
+ ASSERT(!span || GetMemoryTag(span->start_address()) == tag_);
+ PageId p = span->first_page();
+ HugePage hp = HugePageContaining(p);
+ Length n = span->num_pages();
+ info_.RecordFree(p, n);
+
+ Span::Delete(span);
+
+ // The tricky part, as with so many allocators: where did we come from?
+ // There are several possibilities.
+ FillerType::Tracker *pt = GetTracker(hp);
+ // a) We got packed by the filler onto a single hugepage - return our
+ // allocation to that hugepage in the filler.
+ if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+ ASSERT(hp == HugePageContaining(p + n - Length(1)));
+ DeleteFromHugepage(pt, p, n);
+ return;
+ }
+
+ // b) We got put into a region, possibly crossing hugepages -
+ // return our allocation to the region.
+ if (regions_.MaybePut(p, n)) return;
+
+ // c) we came straight from the HugeCache - return straight there. (We
+ // might have had slack put into the filler - if so, return that virtual
+ // allocation to the filler too!)
+ ASSERT(n >= kPagesPerHugePage);
+ HugeLength hl = HLFromPages(n);
+ HugePage last = hp + hl - NHugePages(1);
+ Length slack = hl.in_pages() - n;
+ if (slack == Length(0)) {
+ ASSERT(GetTracker(last) == nullptr);
+ } else {
+ pt = GetTracker(last);
+ CHECK_CONDITION(pt != nullptr);
+ // We put the slack into the filler (see AllocEnormous.)
+ // Handle this page separately as a virtual allocation
+ // onto the last hugepage.
+ PageId virt = last.first_page();
+ Length virt_len = kPagesPerHugePage - slack;
+ pt = filler_.Put(pt, virt, virt_len);
+ // We may have used the slack, which would prevent us from returning
+ // the entire range now. If filler returned a Tracker, we are fully empty.
+ if (pt == nullptr) {
+ // Last page isn't empty -- pretend the range was shorter.
+ --hl;
+ } else {
+ // Last page was empty - but if we sub-released it, we still
+ // have to split it off and release it independently.)
+ if (pt->released()) {
+ --hl;
+ ReleaseHugepage(pt);
+ } else {
+ // Get rid of the tracker *object*, but not the *hugepage*
+ // (which is still part of our range.) We were able to reclaim the
+ // contributed slack.
+ --donated_huge_pages_;
+ SetTracker(pt->location(), nullptr);
+ tracker_allocator_.Delete(pt);
+ }
+ }
+ }
+ cache_.Release({hp, hl});
+}
+
+void HugePageAwareAllocator::ReleaseHugepage(FillerType::Tracker *pt) {
+ ASSERT(pt->used_pages() == Length(0));
+ HugeRange r = {pt->location(), NHugePages(1)};
+ SetTracker(pt->location(), nullptr);
+
+ if (pt->released()) {
+ cache_.ReleaseUnbacked(r);
+ } else {
+ cache_.Release(r);
+ }
+
+ tracker_allocator_.Delete(pt);
+}
+
+// public
+BackingStats HugePageAwareAllocator::stats() const {
+ BackingStats stats = alloc_.stats();
+ const auto actual_system = stats.system_bytes;
+ stats += cache_.stats();
+ stats += filler_.stats();
+ stats += regions_.stats();
+ // the "system" (total managed) byte count is wildly double counted,
+ // since it all comes from HugeAllocator but is then managed by
+ // cache/regions/filler. Adjust for that.
+ stats.system_bytes = actual_system;
+ return stats;
+}
+
+// public
+void HugePageAwareAllocator::GetSmallSpanStats(SmallSpanStats *result) {
+ GetSpanStats(result, nullptr, nullptr);
+}
+
+// public
+void HugePageAwareAllocator::GetLargeSpanStats(LargeSpanStats *result) {
+ GetSpanStats(nullptr, result, nullptr);
+}
+
+void HugePageAwareAllocator::GetSpanStats(SmallSpanStats *small,
+ LargeSpanStats *large,
+ PageAgeHistograms *ages) {
+ if (small != nullptr) {
+ *small = SmallSpanStats();
+ }
+ if (large != nullptr) {
+ *large = LargeSpanStats();
+ }
+
+ alloc_.AddSpanStats(small, large, ages);
+ filler_.AddSpanStats(small, large, ages);
+ regions_.AddSpanStats(small, large, ages);
+ cache_.AddSpanStats(small, large, ages);
+}
+
+// public
+Length HugePageAwareAllocator::ReleaseAtLeastNPages(Length num_pages) {
+ Length released;
+ released += cache_.ReleaseCachedPages(HLFromPages(num_pages)).in_pages();
+
+ // This is our long term plan but in current state will lead to insufficent
+ // THP coverage. It is however very useful to have the ability to turn this on
+ // for testing.
+ // TODO(b/134690769): make this work, remove the flag guard.
+ if (Parameters::hpaa_subrelease()) {
+ if (released < num_pages) {
+ released += filler_.ReleasePages(
+ num_pages - released, Parameters::filler_skip_subrelease_interval(),
+ /*hit_limit*/ false);
+ }
+ }
+
+ // TODO(b/134690769):
+ // - perhaps release region?
+ // - refuse to release if we're too close to zero?
+ info_.RecordRelease(num_pages, released);
+ return released;
+}
+
+static double BytesToMiB(size_t bytes) {
+ const double MiB = 1048576.0;
+ return bytes / MiB;
+}
+
+static void BreakdownStats(Printer *out, const BackingStats &s,
+ const char *label) {
+ out->printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label,
+ BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes),
+ BytesToMiB(s.free_bytes), BytesToMiB(s.unmapped_bytes));
+}
+
+static void BreakdownStatsInPbtxt(PbtxtRegion *hpaa, const BackingStats &s,
+ const char *key) {
+ auto usage = hpaa->CreateSubRegion(key);
+ usage.PrintI64("used", s.system_bytes - s.free_bytes - s.unmapped_bytes);
+ usage.PrintI64("free", s.free_bytes);
+ usage.PrintI64("unmapped", s.unmapped_bytes);
+}
+
+// public
+void HugePageAwareAllocator::Print(Printer *out) { Print(out, true); }
+
+void HugePageAwareAllocator::Print(Printer *out, bool everything) {
+ SmallSpanStats small;
+ LargeSpanStats large;
+ BackingStats bstats;
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ bstats = stats();
+ GetSpanStats(&small, &large, &ages);
+ PrintStats("HugePageAware", out, bstats, small, large, everything);
+ out->printf(
+ "\nHuge page aware allocator components:\n"
+ "------------------------------------------------\n");
+ out->printf("HugePageAware: breakdown of used / free / unmapped space:\n");
+
+ auto fstats = filler_.stats();
+ BreakdownStats(out, fstats, "HugePageAware: filler");
+
+ auto rstats = regions_.stats();
+ BreakdownStats(out, rstats, "HugePageAware: region");
+
+ auto cstats = cache_.stats();
+ // Everything in the filler came from the cache -
+ // adjust the totals so we see the amount used by the mutator.
+ cstats.system_bytes -= fstats.system_bytes;
+ BreakdownStats(out, cstats, "HugePageAware: cache ");
+
+ auto astats = alloc_.stats();
+ // Everything in *all* components came from here -
+ // so again adjust the totals.
+ astats.system_bytes -= (fstats + rstats + cstats).system_bytes;
+ BreakdownStats(out, astats, "HugePageAware: alloc ");
+ out->printf("\n");
+
+ out->printf("HugePageAware: filler donations %zu\n",
+ donated_huge_pages_.raw_num());
+
+ // Component debug output
+ // Filler is by far the most important; print (some) of it
+ // unconditionally.
+ filler_.Print(out, everything);
+ out->printf("\n");
+ if (everything) {
+ regions_.Print(out);
+ out->printf("\n");
+ cache_.Print(out);
+ out->printf("\n");
+ alloc_.Print(out);
+ out->printf("\n");
+
+ // Use statistics
+ info_.Print(out);
+
+ // and age tracking.
+ ages.Print("HugePageAware", out);
+ }
+
+ out->printf("PARAMETER hpaa_subrelease %d\n",
+ Parameters::hpaa_subrelease() ? 1 : 0);
+}
+
+void HugePageAwareAllocator::PrintInPbtxt(PbtxtRegion *region) {
+ SmallSpanStats small;
+ LargeSpanStats large;
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ GetSpanStats(&small, &large, &ages);
+ PrintStatsInPbtxt(region, small, large, ages);
+ {
+ auto hpaa = region->CreateSubRegion("huge_page_allocator");
+ hpaa.PrintBool("using_hpaa", true);
+ hpaa.PrintBool("using_hpaa_subrelease", Parameters::hpaa_subrelease());
+
+ // Fill HPAA Usage
+ auto fstats = filler_.stats();
+ BreakdownStatsInPbtxt(&hpaa, fstats, "filler_usage");
+
+ auto rstats = regions_.stats();
+ BreakdownStatsInPbtxt(&hpaa, rstats, "region_usage");
+
+ auto cstats = cache_.stats();
+ // Everything in the filler came from the cache -
+ // adjust the totals so we see the amount used by the mutator.
+ cstats.system_bytes -= fstats.system_bytes;
+ BreakdownStatsInPbtxt(&hpaa, cstats, "cache_usage");
+
+ auto astats = alloc_.stats();
+ // Everything in *all* components came from here -
+ // so again adjust the totals.
+ astats.system_bytes -= (fstats + rstats + cstats).system_bytes;
+ BreakdownStatsInPbtxt(&hpaa, astats, "alloc_usage");
+
+ filler_.PrintInPbtxt(&hpaa);
+ regions_.PrintInPbtxt(&hpaa);
+ cache_.PrintInPbtxt(&hpaa);
+ alloc_.PrintInPbtxt(&hpaa);
+
+ // Use statistics
+ info_.PrintInPbtxt(&hpaa, "hpaa_stat");
+
+ hpaa.PrintI64("filler_donated_huge_pages", donated_huge_pages_.raw_num());
+ }
+}
+
+template <MemoryTag tag>
+void *HugePageAwareAllocator::AllocAndReport(size_t bytes, size_t *actual,
+ size_t align) {
+ void *p = SystemAlloc(bytes, actual, align, tag);
+ if (p == nullptr) return p;
+ const PageId page = PageIdContaining(p);
+ const Length page_len = BytesToLengthFloor(*actual);
+ Static::pagemap().Ensure(page, page_len);
+ return p;
+}
+
+void *HugePageAwareAllocator::MetaDataAlloc(size_t bytes)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return Static::arena().Alloc(bytes);
+}
+
+Length HugePageAwareAllocator::ReleaseAtLeastNPagesBreakingHugepages(Length n) {
+ // We desparately need to release memory, and are willing to
+ // compromise on hugepage usage. That means releasing from the filler.
+ return filler_.ReleasePages(n, absl::ZeroDuration(), /*hit_limit*/ true);
+}
+
+void HugePageAwareAllocator::UnbackWithoutLock(void *start, size_t length) {
+ pageheap_lock.Unlock();
+ SystemRelease(start, length);
+ pageheap_lock.Lock();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h
new file mode 100644
index 0000000000..c36a1e515e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h
@@ -0,0 +1,175 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
+#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_cache.h"
+#include "tcmalloc/huge_page_filler.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/huge_region.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool decide_subrelease();
+
+// An implementation of the PageAllocator interface that is hugepage-efficent.
+// Attempts to pack allocations into full hugepages wherever possible,
+// and aggressively returns empty ones to the system.
+class HugePageAwareAllocator final : public PageAllocatorInterface {
+ public:
+ explicit HugePageAwareAllocator(MemoryTag tag);
+ ~HugePageAwareAllocator() override = default;
+
+ // Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
+ Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ // As New, but the returned span is aligned to a <align>-page boundary.
+ // <align> must be a power of two.
+ Span* NewAligned(Length n, Length align)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ // Delete the span "[p, p+n-1]".
+ // REQUIRES: span was returned by earlier call to New() and
+ // has not yet been deleted.
+ void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ BackingStats stats() const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ void GetSmallSpanStats(SmallSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ void GetLargeSpanStats(LargeSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ // Try to release at least num_pages for reuse by the OS. Returns
+ // the actual number of pages released, which may be less than
+ // num_pages if there weren't enough pages to release. The result
+ // may also be larger than num_pages since page_heap might decide to
+ // release one large range instead of fragmenting it into two
+ // smaller released and unreleased ranges.
+ Length ReleaseAtLeastNPages(Length num_pages)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ Length ReleaseAtLeastNPagesBreakingHugepages(Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Prints stats about the page heap to *out.
+ void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ // Print stats to *out, excluding long/likely uninteresting things
+ // unless <everything> is true.
+ void Print(Printer* out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ void PrintInPbtxt(PbtxtRegion* region)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ HugeLength DonatedHugePages() const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return donated_huge_pages_;
+ }
+
+ const HugeCache* cache() const { return &cache_; }
+
+ private:
+ typedef HugePageFiller<PageTracker<SystemRelease>> FillerType;
+ FillerType filler_;
+
+ // Calls SystemRelease, but with dropping of pageheap_lock around the call.
+ static void UnbackWithoutLock(void* start, size_t length)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ HugeRegionSet<HugeRegion> regions_;
+
+ PageHeapAllocator<FillerType::Tracker> tracker_allocator_;
+ PageHeapAllocator<HugeRegion> region_allocator_;
+
+ FillerType::Tracker* GetTracker(HugePage p);
+
+ void SetTracker(HugePage p, FillerType::Tracker* pt);
+
+ template <MemoryTag tag>
+ static void* AllocAndReport(size_t bytes, size_t* actual, size_t align)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ static void* MetaDataAlloc(size_t bytes);
+ HugeAllocator alloc_;
+ HugeCache cache_;
+
+ // donated_huge_pages_ measures the number of huge pages contributed to the
+ // filler from left overs of large huge page allocations. When the large
+ // allocation is deallocated, we decrement this count *if* we were able to
+ // fully reassemble the address range (that is, the partial hugepage did not
+ // get stuck in the filler).
+ HugeLength donated_huge_pages_ ABSL_GUARDED_BY(pageheap_lock);
+
+ void GetSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+ PageAgeHistograms* ages);
+
+ PageId RefillFiller(Length n, bool* from_released)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Allocate the first <n> from p, and contribute the rest to the filler. If
+ // "donated" is true, the contribution will be marked as coming from the
+ // tail of a multi-hugepage alloc. Returns the allocated section.
+ PageId AllocAndContribute(HugePage p, Length n, bool donated)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ // Helpers for New().
+
+ Span* LockAndAlloc(Length n, bool* from_released);
+
+ Span* AllocSmall(Length n, bool* from_released)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ Span* AllocLarge(Length n, bool* from_released)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ Span* AllocEnormous(Length n, bool* from_released)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ Span* AllocRawHugepages(Length n, bool* from_released)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ bool AddRegion() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ void ReleaseHugepage(FillerType::Tracker* pt)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ // Return an allocation from a single hugepage.
+ void DeleteFromHugepage(FillerType::Tracker* pt, PageId p, Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Finish an allocation request - give it a span and mark it in the pagemap.
+ Span* Finalize(Length n, PageId page);
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc
new file mode 100644
index 0000000000..83ae930e44
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc
@@ -0,0 +1,957 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_aware_allocator.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <new>
+#include <string>
+#include <thread> // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/flags/flag.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
+#include "absl/synchronization/barrier.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator_test_util.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+#include "tcmalloc/testing/thread_manager.h"
+
+ABSL_FLAG(std::string, tracefile, "", "file to pull trace from");
+ABSL_FLAG(uint64_t, limit, 0, "");
+ABSL_FLAG(bool, always_check_usage, false, "enable expensive memory checks");
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::HasSubstr;
+
+class HugePageAwareAllocatorTest : public ::testing::Test {
+ protected:
+ HugePageAwareAllocatorTest() : rng_() {
+ before_ = MallocExtension::GetRegionFactory();
+ extra_ = new ExtraRegionFactory(before_);
+ MallocExtension::SetRegionFactory(extra_);
+
+ // HugePageAwareAllocator can't be destroyed cleanly, so we store a pointer
+ // to one and construct in place.
+ void* p = malloc(sizeof(HugePageAwareAllocator));
+ allocator_ = new (p) HugePageAwareAllocator(MemoryTag::kNormal);
+ }
+
+ ~HugePageAwareAllocatorTest() override {
+ CHECK_CONDITION(ids_.empty());
+ CHECK_CONDITION(total_ == Length(0));
+ // We end up leaking both the backing allocations and the metadata.
+ // The backing allocations are unmapped--it's silly, but not
+ // costing us muchin a 64-bit address space.
+ // The metadata is real memory, but there's barely any of it.
+ // It'd be very complicated to rebuild the allocator to support
+ // teardown, so we just put up with it.
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ auto stats = allocator_->stats();
+ if (stats.free_bytes + stats.unmapped_bytes != stats.system_bytes) {
+ Crash(kCrash, __FILE__, __LINE__, stats.free_bytes,
+ stats.unmapped_bytes, "!=", stats.system_bytes);
+ }
+ }
+
+ free(allocator_);
+
+ MallocExtension::SetRegionFactory(before_);
+ delete extra_;
+ }
+
+ void CheckStats() {
+ size_t actual_used_bytes = total_.in_bytes();
+ BackingStats stats;
+ {
+ absl::base_internal::SpinLockHolder h2(&pageheap_lock);
+ stats = allocator_->stats();
+ }
+ uint64_t used_bytes =
+ stats.system_bytes - stats.free_bytes - stats.unmapped_bytes;
+ ASSERT_EQ(used_bytes, actual_used_bytes);
+ }
+
+ uint64_t GetFreeBytes() {
+ BackingStats stats;
+ {
+ absl::base_internal::SpinLockHolder h2(&pageheap_lock);
+ stats = allocator_->stats();
+ }
+ return stats.free_bytes;
+ }
+
+ Span* AllocatorNew(Length n) { return allocator_->New(n); }
+
+ void AllocatorDelete(Span* s) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ allocator_->Delete(s);
+ }
+
+ Span* New(Length n) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ Span* span = AllocatorNew(n);
+ CHECK_CONDITION(span != nullptr);
+ EXPECT_GE(span->num_pages(), n);
+ const size_t id = next_id_++;
+ total_ += n;
+ CheckStats();
+ // and distinct spans...
+ CHECK_CONDITION(ids_.insert({span, id}).second);
+ return span;
+ }
+
+ void Delete(Span* span) {
+ Length n = span->num_pages();
+ {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ auto i = ids_.find(span);
+ CHECK_CONDITION(i != ids_.end());
+ const size_t id = i->second;
+ ids_.erase(i);
+ AllocatorDelete(span);
+ total_ -= n;
+ CheckStats();
+ }
+ }
+
+ // Mostly small things, some large ones.
+ Length RandomAllocSize() {
+ // TODO(b/128521238): scalable RNG
+ absl::base_internal::SpinLockHolder h(&lock_);
+ if (absl::Bernoulli(rng_, 1.0 / 1000)) {
+ Length n =
+ Length(1024) * (1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 8) - 1));
+ n += Length(absl::Uniform<int32_t>(rng_, 0, 1024));
+ return n;
+ }
+ return Length(1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 9) - 1));
+ }
+
+ Length ReleasePages(Length k) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return allocator_->ReleaseAtLeastNPages(k);
+ }
+
+ std::string Print() {
+ std::string ret;
+ const size_t kSize = 1 << 20;
+ ret.resize(kSize);
+ Printer p(&ret[0], kSize);
+ allocator_->Print(&p);
+ ret.erase(p.SpaceRequired());
+ return ret;
+ }
+
+ std::string PrintInPbTxt() {
+ std::string ret;
+ const size_t kSize = 1 << 20;
+ ret.resize(kSize);
+ Printer p(&ret[0], kSize);
+ {
+ PbtxtRegion region(&p, kNested, 0);
+ allocator_->PrintInPbtxt(&region);
+ }
+ ret.erase(p.SpaceRequired());
+ return ret;
+ }
+
+ HugePageAwareAllocator* allocator_;
+ ExtraRegionFactory* extra_;
+ AddressRegionFactory* before_;
+ absl::BitGen rng_;
+ absl::base_internal::SpinLock lock_;
+ absl::flat_hash_map<Span*, size_t> ids_;
+ size_t next_id_{0};
+ Length total_;
+};
+
+TEST_F(HugePageAwareAllocatorTest, Fuzz) {
+ std::vector<Span*> allocs;
+ for (int i = 0; i < 5000; ++i) {
+ Length n = RandomAllocSize();
+ allocs.push_back(New(n));
+ }
+ static const size_t kReps = 50 * 1000;
+ for (int i = 0; i < kReps; ++i) {
+ SCOPED_TRACE(absl::StrFormat("%d reps, %d pages", i, total_.raw_num()));
+ size_t index = absl::Uniform<int32_t>(rng_, 0, allocs.size());
+ Span* old = allocs[index];
+ Delete(old);
+ Length n = RandomAllocSize();
+ allocs[index] = New(n);
+ }
+
+ for (auto s : allocs) {
+ Delete(s);
+ }
+}
+
+// Prevent regression of the fragmentation problem that was reported in
+// b/63301358, reproduced in CL/161345659 and (partially) fixed in CL/161305971.
+TEST_F(HugePageAwareAllocatorTest, JustUnderMultipleOfHugepages) {
+ std::vector<Span*> big_allocs, small_allocs;
+ // Trigger creation of a hugepage with more than one allocation and plenty of
+ // free space.
+ small_allocs.push_back(New(Length(1)));
+ small_allocs.push_back(New(Length(10)));
+ // Limit iterations so that the huge page with the small allocs doesn't fill
+ // up.
+ size_t n_iter = (kPagesPerHugePage - Length(2)).raw_num();
+ // Also limit memory usage to ~1 GB.
+ n_iter = std::min((1 << 30) / (2 * kHugePageSize), n_iter);
+ for (int i = 0; i < n_iter; ++i) {
+ Length n = 2 * kPagesPerHugePage - Length(1);
+ big_allocs.push_back(New(n));
+ small_allocs.push_back(New(Length(1)));
+ }
+ for (auto* span : big_allocs) {
+ Delete(span);
+ }
+ // We should have one hugepage that's full of small allocations and a bunch
+ // of empty hugepages. The HugeCache will keep some of the empty hugepages
+ // backed so free space should drop to a small multiple of the huge page size.
+ EXPECT_LE(GetFreeBytes(), 20 * kHugePageSize);
+ for (auto* span : small_allocs) {
+ Delete(span);
+ }
+}
+
+TEST_F(HugePageAwareAllocatorTest, Multithreaded) {
+ static const size_t kThreads = 16;
+ std::vector<std::thread> threads;
+ threads.reserve(kThreads);
+ absl::Barrier b1(kThreads);
+ absl::Barrier b2(kThreads);
+ for (int i = 0; i < kThreads; ++i) {
+ threads.push_back(std::thread([this, &b1, &b2]() {
+ absl::BitGen rng;
+ std::vector<Span*> allocs;
+ for (int i = 0; i < 150; ++i) {
+ Length n = RandomAllocSize();
+ allocs.push_back(New(n));
+ }
+ b1.Block();
+ static const size_t kReps = 4 * 1000;
+ for (int i = 0; i < kReps; ++i) {
+ size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+ Delete(allocs[index]);
+ Length n = RandomAllocSize();
+ allocs[index] = New(n);
+ }
+ b2.Block();
+ for (auto s : allocs) {
+ Delete(s);
+ }
+ }));
+ }
+
+ for (auto& t : threads) {
+ t.join();
+ }
+}
+
+TEST_F(HugePageAwareAllocatorTest, ReleasingLarge) {
+ // Ensure the HugeCache has some free items:
+ Delete(New(kPagesPerHugePage));
+ ASSERT_LE(kPagesPerHugePage, ReleasePages(kPagesPerHugePage));
+}
+
+TEST_F(HugePageAwareAllocatorTest, ReleasingSmall) {
+ const bool old_subrelease = Parameters::hpaa_subrelease();
+ Parameters::set_hpaa_subrelease(true);
+
+ const absl::Duration old_skip_subrelease =
+ Parameters::filler_skip_subrelease_interval();
+ Parameters::set_filler_skip_subrelease_interval(absl::ZeroDuration());
+
+ std::vector<Span*> live, dead;
+ static const size_t N = kPagesPerHugePage.raw_num() * 128;
+ for (int i = 0; i < N; ++i) {
+ Span* span = New(Length(1));
+ ((i % 2 == 0) ? live : dead).push_back(span);
+ }
+
+ for (auto d : dead) {
+ Delete(d);
+ }
+
+ EXPECT_EQ(kPagesPerHugePage / 2, ReleasePages(Length(1)));
+
+ for (auto l : live) {
+ Delete(l);
+ }
+
+ Parameters::set_hpaa_subrelease(old_subrelease);
+ Parameters::set_filler_skip_subrelease_interval(old_skip_subrelease);
+}
+
+TEST_F(HugePageAwareAllocatorTest, DonatedHugePages) {
+ // This test verifies that we accurately measure the amount of RAM that we
+ // donate to the huge page filler when making large allocations, including
+ // those kept alive after we deallocate.
+ static constexpr Length kSlack = Length(2);
+ static constexpr Length kLargeSize = 2 * kPagesPerHugePage - kSlack;
+ static constexpr Length kSmallSize = Length(1);
+
+ Span* large1 = New(kLargeSize);
+ Length slack;
+ HugeLength donated_huge_pages;
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ slack = allocator_->info().slack();
+ donated_huge_pages = allocator_->DonatedHugePages();
+ }
+ EXPECT_EQ(slack, kSlack);
+ EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+ EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+ EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+
+ // Make a small allocation and then free the large allocation. Slack should
+ // fall, but we've kept alive our donation to the filler.
+ Span* small = New(kSmallSize);
+ Delete(large1);
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ slack = allocator_->info().slack();
+ donated_huge_pages = allocator_->DonatedHugePages();
+ }
+ EXPECT_EQ(slack, Length(0));
+ EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+ EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+ EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+
+ // Make another large allocation. The number of donated huge pages should
+ // continue to increase.
+ Span* large2 = New(kLargeSize);
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ slack = allocator_->info().slack();
+ donated_huge_pages = allocator_->DonatedHugePages();
+ }
+ EXPECT_EQ(slack, kSlack);
+ EXPECT_EQ(donated_huge_pages, NHugePages(2));
+
+ EXPECT_THAT(Print(), HasSubstr("filler donations 2"));
+ EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2"));
+
+ // Deallocating the small allocation does not reduce the number of donations,
+ // as we were unable to reassemble the VSS for large1.
+ Delete(small);
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ slack = allocator_->info().slack();
+ donated_huge_pages = allocator_->DonatedHugePages();
+ }
+ EXPECT_EQ(slack, kSlack);
+ EXPECT_EQ(donated_huge_pages, NHugePages(2));
+
+ EXPECT_THAT(Print(), HasSubstr("filler donations 2"));
+ EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2"));
+
+ // Deallocating everything should return slack to 0 and allow large2's
+ // contiguous VSS to be reassembled.
+ Delete(large2);
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ slack = allocator_->info().slack();
+ donated_huge_pages = allocator_->DonatedHugePages();
+ }
+ EXPECT_EQ(slack, Length(0));
+ EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+ EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+ EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+}
+
+TEST_F(HugePageAwareAllocatorTest, PageMapInterference) {
+ // This test manipulates the test HugePageAwareAllocator while making
+ // allocations/deallocations that interact with the real PageAllocator. The
+ // two share a global PageMap.
+ //
+ // If this test begins failing, the two are likely conflicting by violating
+ // invariants in the PageMap.
+ std::vector<Span*> allocs;
+
+ for (int i : {10, 20, 30}) {
+ auto n = Length(i << 7);
+ allocs.push_back(New(n));
+ }
+
+ for (auto* a : allocs) {
+ Delete(a);
+ }
+
+ allocs.clear();
+
+ // Do the same, but allocate something on the real page heap.
+ for (int i : {10, 20, 30}) {
+ auto n = Length(i << 7);
+ allocs.push_back(New(n));
+
+ ::operator delete(::operator new(1 << 20));
+ }
+
+ for (auto* a : allocs) {
+ Delete(a);
+ }
+}
+
+TEST_F(HugePageAwareAllocatorTest, LargeSmall) {
+ const int kIters = 2000;
+ const Length kSmallPages = Length(1);
+ // Large block must be larger than 1 huge page.
+ const Length kLargePages = 2 * kPagesPerHugePage - kSmallPages;
+ std::vector<Span*> small_allocs;
+
+ // Repeatedly allocate large and small allocations that fit into a multiple of
+ // huge pages. The large allocations are short lived and the small
+ // allocations are long-lived. We want to refrain from growing the heap size
+ // without bound, keeping many huge pages alive because of the small
+ // allocations.
+ for (int i = 0; i < kIters; i++) {
+ Span* large = New(kLargePages);
+ ASSERT_NE(large, nullptr);
+ Span* small = New(kSmallPages);
+ ASSERT_NE(small, nullptr);
+
+ small_allocs.push_back(small);
+ Delete(large);
+ }
+
+ BackingStats stats;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ stats = allocator_->stats();
+ }
+
+ constexpr size_t kBufferSize = 1024 * 1024;
+ char buffer[kBufferSize];
+ Printer printer(buffer, kBufferSize);
+ allocator_->Print(&printer);
+ // Verify that we have less free memory than we allocated in total. We have
+ // to account for bytes tied up in the cache.
+ EXPECT_LE(stats.free_bytes - allocator_->cache()->size().in_bytes(),
+ kSmallPages.in_bytes() * kIters)
+ << buffer;
+
+ for (Span* small : small_allocs) {
+ Delete(small);
+ }
+}
+
+// Tests an edge case in hugepage donation behavior.
+TEST_F(HugePageAwareAllocatorTest, DonatedPageLists) {
+ const Length kSmallPages = Length(1);
+ // Large block must be larger than 1 huge page.
+ const Length kLargePages = 2 * kPagesPerHugePage - 2 * kSmallPages;
+
+ Span* large = New(kLargePages);
+ ASSERT_NE(large, nullptr);
+
+ // Allocating small1 moves the backing huge page off of the donated pages
+ // list.
+ Span* small1 = New(kSmallPages);
+ ASSERT_NE(small1, nullptr);
+ // This delete needs to have put the origin PageTracker back onto the right
+ // free list.
+ Delete(small1);
+
+ // This otherwise fails.
+ Span* small2 = New(kSmallPages);
+ ASSERT_NE(small2, nullptr);
+ Delete(small2);
+
+ // Clean up.
+ Delete(large);
+}
+
+TEST_F(HugePageAwareAllocatorTest, DonationAccounting) {
+ const Length kSmallPages = Length(2);
+ const Length kOneHugePageDonation = kPagesPerHugePage - kSmallPages;
+ const Length kMultipleHugePagesDonation = 3 * kPagesPerHugePage - kSmallPages;
+
+ // Each of these allocations should count as one donation, but only if they
+ // are actually being reused.
+ Span* large = New(kOneHugePageDonation);
+ ASSERT_NE(large, nullptr);
+
+ // This allocation ensures that the donation is not counted.
+ Span* small = New(kSmallPages);
+ ASSERT_NE(small, nullptr);
+
+ Span* large2 = New(kMultipleHugePagesDonation);
+ ASSERT_NE(large2, nullptr);
+
+ // This allocation ensures that the donation is not counted.
+ Span* small2 = New(kSmallPages);
+ ASSERT_NE(small2, nullptr);
+
+ Span* large3 = New(kOneHugePageDonation);
+ ASSERT_NE(large3, nullptr);
+
+ Span* large4 = New(kMultipleHugePagesDonation);
+ ASSERT_NE(large4, nullptr);
+
+ // Clean up.
+ Delete(large);
+ Delete(large2);
+ Delete(large3);
+ Delete(large4);
+ Delete(small);
+ Delete(small2);
+
+ // Check donation count.
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ CHECK_CONDITION(NHugePages(2) == allocator_->DonatedHugePages());
+}
+
+// We'd like to test OOM behavior but this, err, OOMs. :)
+// (Usable manually in controlled environments.
+TEST_F(HugePageAwareAllocatorTest, DISABLED_OOM) {
+ std::vector<Span*> objs;
+ auto n = Length(1);
+ while (true) {
+ Span* s = New(n);
+ if (!s) break;
+ objs.push_back(s);
+ n *= 2;
+ }
+ for (auto s : objs) {
+ Delete(s);
+ }
+}
+
+struct MemoryBytes {
+ uint64_t virt;
+ uint64_t phys;
+};
+
+int64_t pagesize = getpagesize();
+
+static size_t BytesInCore(void* p, size_t len) {
+ static const size_t kBufSize = 1024;
+ unsigned char buf[kBufSize];
+ const size_t kChunk = pagesize * kBufSize;
+ size_t resident = 0;
+ while (len > 0) {
+ // We call mincore in bounded size chunks (though typically one
+ // chunk will cover an entire request.)
+ const size_t chunk_len = std::min(kChunk, len);
+ if (mincore(p, chunk_len, buf) != 0) {
+ Crash(kCrash, __FILE__, __LINE__, "mincore failed, errno", errno);
+ }
+ const size_t lim = chunk_len / pagesize;
+ for (size_t i = 0; i < lim; ++i) {
+ if (buf[i] & 1) resident += pagesize;
+ }
+ len -= chunk_len;
+ p = static_cast<char*>(p) + chunk_len;
+ }
+
+ return resident;
+}
+
+// Is any page of this hugepage resident?
+bool HugePageResident(HugePage p) {
+ return BytesInCore(p.start_addr(), kHugePageSize) > 0;
+}
+
+void Touch(PageId p) {
+ // a tcmalloc-page may contain more than an actual kernel page
+ volatile char* base = reinterpret_cast<char*>(p.start_addr());
+ static size_t kActualPages = std::max<size_t>(kPageSize / pagesize, 1);
+ for (int i = 0; i < kActualPages; ++i) {
+ base[i * pagesize] = 1;
+ }
+}
+
+// Fault an entire hugepage, as if THP chose to do so on an entirely
+// empty hugepage. (In real life, this will usually, but not always,
+// happen: we make sure it does so our accounting is accurate.)
+void Touch(HugePage hp) {
+ PageId p = hp.first_page();
+ const PageId lim = p + kPagesPerHugePage;
+ while (p < lim) {
+ Touch(p);
+ ++p;
+ }
+}
+
+// Fault in memory across a span (SystemBack doesn't always do this.)
+void TouchTHP(Span* s) {
+ PageId p = s->first_page();
+ PageId lim = s->last_page();
+ HugePage last = HugePageContaining(nullptr);
+ while (p <= lim) {
+ HugePage hp = HugePageContaining(p);
+ // Suppose that we are touching a hugepage for the first time (it
+ // is entirely non-resident.) The page fault we take will usually
+ // be promoted to a full transparent hugepage, and our accounting
+ // assumes this is true. But we can't actually guarantee that
+ // (the kernel won't wait if memory is too fragmented.) Do it ourselves
+ // by hand, to ensure our mincore() calculations return the right answers.
+ if (hp != last && !HugePageResident(hp)) {
+ last = hp;
+ Touch(hp);
+ }
+
+ // Regardless of whether we've optimistically faulted in a
+ // hugepage, we also touch each page in the span.
+ Touch(p);
+ ++p;
+ }
+}
+
+// Similar to above but much more careful about touching memory / mallocing
+// and without the validation
+class StatTest : public testing::Test {
+ protected:
+ StatTest() : rng_() {}
+
+ class RegionFactory;
+
+ class Region : public AddressRegion {
+ public:
+ Region(AddressRegion* underlying, RegionFactory* factory)
+ : underlying_(underlying), factory_(factory) {}
+
+ std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+ std::pair<void*, size_t> ret = underlying_->Alloc(size, alignment);
+ if (!ret.first) return {nullptr, 0};
+
+ // we only support so many allocations here for simplicity
+ CHECK_CONDITION(factory_->n_ < factory_->kNumAllocs);
+ // Anything coming from the test allocator will request full
+ // alignment. Metadata allocations will not. Since we can't
+ // control the backing of metadata allocations, elide them.
+ // TODO(b/128521238): this is not a good way to do this.
+ if (alignment >= kHugePageSize) {
+ factory_->allocs_[factory_->n_] = ret;
+ factory_->n_++;
+ }
+ return ret;
+ }
+
+ private:
+ AddressRegion* underlying_;
+ RegionFactory* factory_;
+ };
+
+ class RegionFactory : public AddressRegionFactory {
+ public:
+ explicit RegionFactory(AddressRegionFactory* underlying)
+ : underlying_(underlying), n_(0) {}
+
+ AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+ AddressRegion* underlying_region = underlying_->Create(start, size, hint);
+ CHECK_CONDITION(underlying_region);
+ void* region_space = MallocInternal(sizeof(Region));
+ CHECK_CONDITION(region_space);
+ return new (region_space) Region(underlying_region, this);
+ }
+
+ size_t GetStats(absl::Span<char> buffer) override {
+ return underlying_->GetStats(buffer);
+ }
+
+ MemoryBytes Memory() {
+ MemoryBytes b = {0, 0};
+ for (int i = 0; i < n_; ++i) {
+ void* p = allocs_[i].first;
+ size_t len = allocs_[i].second;
+ b.virt += len;
+ b.phys += BytesInCore(p, len);
+ }
+
+ return b;
+ }
+
+ AddressRegionFactory* underlying() const { return underlying_; }
+
+ private:
+ friend class Region;
+ AddressRegionFactory* underlying_;
+
+ static constexpr size_t kNumAllocs = 1000;
+ size_t n_;
+ std::pair<void*, size_t> allocs_[kNumAllocs];
+ };
+
+ // Carefully get memory usage without touching anything.
+ MemoryBytes GetSystemBytes() { return replacement_region_factory_.Memory(); }
+
+ // This is essentially a test case set up, but run manually -
+ // we can't guarantee gunit won't malloc between.
+ void PrepTest() {
+ memset(buf, 0, sizeof(buf));
+ MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
+ SetRegionFactory(&replacement_region_factory_);
+ alloc = new (buf) HugePageAwareAllocator(MemoryTag::kNormal);
+ }
+
+ ~StatTest() override {
+ SetRegionFactory(replacement_region_factory_.underlying());
+ }
+
+ BackingStats Stats() {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ BackingStats stats = alloc->stats();
+ return stats;
+ }
+
+ // Use bigger allocs here to ensure growth:
+ Length RandomAllocSize() {
+ // Since we touch all of the pages, try to avoid OOM'ing by limiting the
+ // number of big allocations.
+ const Length kMaxBigAllocs = Length(4096);
+
+ if (big_allocs_ < kMaxBigAllocs && absl::Bernoulli(rng_, 1.0 / 50)) {
+ auto n =
+ Length(1024 * (1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 9) - 1)));
+ n += Length(absl::Uniform<int32_t>(rng_, 0, 1024));
+ big_allocs_ += n;
+ return n;
+ }
+ return Length(1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 10) - 1));
+ }
+
+ Span* Alloc(Length n) {
+ Span* span = alloc->New(n);
+ TouchTHP(span);
+ if (n > span->num_pages()) {
+ Crash(kCrash, __FILE__, __LINE__, n.raw_num(),
+ "not <=", span->num_pages().raw_num());
+ }
+ n = span->num_pages();
+ if (n > longest_) longest_ = n;
+ total_ += n;
+ if (total_ > peak_) peak_ = total_;
+ return span;
+ }
+
+ void Free(Span* s) {
+ Length n = s->num_pages();
+ total_ -= n;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ alloc->Delete(s);
+ }
+ }
+
+ void CheckStats() {
+ MemoryBytes here = GetSystemBytes();
+ BackingStats stats = Stats();
+ SmallSpanStats small;
+ LargeSpanStats large;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ alloc->GetSmallSpanStats(&small);
+ alloc->GetLargeSpanStats(&large);
+ }
+
+ size_t span_stats_free_bytes = 0, span_stats_released_bytes = 0;
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ span_stats_free_bytes += i.in_bytes() * small.normal_length[i.raw_num()];
+ span_stats_released_bytes +=
+ i.in_bytes() * small.returned_length[i.raw_num()];
+ }
+ span_stats_free_bytes += large.normal_pages.in_bytes();
+ span_stats_released_bytes += large.returned_pages.in_bytes();
+
+#ifndef __ppc__
+ const size_t alloced_bytes = total_.in_bytes();
+#endif
+ ASSERT_EQ(here.virt, stats.system_bytes);
+#ifndef __ppc__
+ const size_t actual_unmapped = here.virt - here.phys;
+#endif
+ // TODO(b/122551676): On PPC, our release granularity may be smaller than
+ // the system page size, so we may not actually unmap memory that we expect.
+ // Pending using the return value of madvise, relax this constraint.
+#ifndef __ppc__
+ ASSERT_EQ(actual_unmapped, stats.unmapped_bytes);
+ ASSERT_EQ(here.phys, stats.free_bytes + alloced_bytes);
+ ASSERT_EQ(alloced_bytes,
+ stats.system_bytes - stats.free_bytes - stats.unmapped_bytes);
+#endif
+ ASSERT_EQ(stats.free_bytes, span_stats_free_bytes);
+ ASSERT_EQ(stats.unmapped_bytes, span_stats_released_bytes);
+ }
+
+ char buf[sizeof(HugePageAwareAllocator)];
+ HugePageAwareAllocator* alloc;
+ RegionFactory replacement_region_factory_{GetRegionFactory()};
+ absl::BitGen rng_;
+
+ Length total_;
+ Length longest_;
+ Length peak_;
+ Length big_allocs_;
+};
+
+TEST_F(StatTest, Basic) {
+ static const size_t kNumAllocs = 500;
+ Span* allocs[kNumAllocs];
+
+ const bool always_check_usage = absl::GetFlag(FLAGS_always_check_usage);
+
+ PrepTest();
+ // DO NOT MALLOC ANYTHING BELOW THIS LINE! WE'RE TRYING TO CAREFULLY COUNT
+ // ALLOCATIONS.
+ // (note we can't stop background threads, but hopefully they're idle enough.)
+
+ for (int i = 0; i < kNumAllocs; ++i) {
+ Length k = RandomAllocSize();
+ allocs[i] = Alloc(k);
+ // stats are expensive, don't always check
+ if (i % 10 != 0 && !always_check_usage) continue;
+ CheckStats();
+ }
+
+ static const size_t kReps = 1000;
+ for (int i = 0; i < kReps; ++i) {
+ size_t index = absl::Uniform<int32_t>(rng_, 0, kNumAllocs);
+
+ Free(allocs[index]);
+ Length k = RandomAllocSize();
+ allocs[index] = Alloc(k);
+
+ if (absl::Bernoulli(rng_, 1.0 / 3)) {
+ Length pages(absl::LogUniform<int32_t>(rng_, 0, (1 << 10) - 1) + 1);
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ alloc->ReleaseAtLeastNPages(pages);
+ }
+
+ // stats are expensive, don't always check
+ if (i % 10 != 0 && !always_check_usage) continue;
+ CheckStats();
+ }
+
+ for (int i = 0; i < kNumAllocs; ++i) {
+ Free(allocs[i]);
+ if (i % 10 != 0 && !always_check_usage) continue;
+ CheckStats();
+ }
+
+ {
+ CheckStats();
+ pageheap_lock.Lock();
+ auto final_stats = alloc->stats();
+ pageheap_lock.Unlock();
+ ASSERT_EQ(final_stats.free_bytes + final_stats.unmapped_bytes,
+ final_stats.system_bytes);
+ }
+
+ // test over, malloc all you like
+}
+
+TEST_F(HugePageAwareAllocatorTest, ParallelRelease) {
+ ThreadManager threads;
+ constexpr int kThreads = 10;
+
+ struct ABSL_CACHELINE_ALIGNED Metadata {
+ absl::BitGen rng;
+ std::vector<Span*> spans;
+ };
+
+ std::vector<Metadata> metadata;
+ metadata.resize(kThreads);
+
+ threads.Start(kThreads, [&](int thread_id) {
+ Metadata& m = metadata[thread_id];
+
+ if (thread_id == 0) {
+ ReleasePages(Length(absl::Uniform(m.rng, 1, 1 << 10)));
+ return;
+ } else if (thread_id == 1) {
+ benchmark::DoNotOptimize(Print());
+ return;
+ }
+
+ if (absl::Bernoulli(m.rng, 0.6) || m.spans.empty()) {
+ Span* s = AllocatorNew(Length(absl::LogUniform(m.rng, 1, 1 << 10)));
+ CHECK_CONDITION(s != nullptr);
+
+ // Touch the contents of the buffer. We later use it to verify we are the
+ // only thread manipulating the Span, for example, if another thread
+ // madvise DONTNEED'd the contents and zero'd them.
+ const uintptr_t key = reinterpret_cast<uintptr_t>(s) ^ thread_id;
+ *reinterpret_cast<uintptr_t*>(s->start_address()) = key;
+
+ m.spans.push_back(s);
+ } else {
+ size_t index = absl::Uniform<size_t>(m.rng, 0, m.spans.size());
+
+ Span* back = m.spans.back();
+ Span* s = m.spans[index];
+ m.spans[index] = back;
+ m.spans.pop_back();
+
+ const uintptr_t key = reinterpret_cast<uintptr_t>(s) ^ thread_id;
+ EXPECT_EQ(*reinterpret_cast<uintptr_t*>(s->start_address()), key);
+
+ AllocatorDelete(s);
+ }
+ });
+
+ absl::SleepFor(absl::Seconds(1));
+
+ threads.Stop();
+
+ for (auto& m : metadata) {
+ for (Span* s : m.spans) {
+ AllocatorDelete(s);
+ }
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h
new file mode 100644
index 0000000000..2f72b43881
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h
@@ -0,0 +1,2113 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_FILLER_H_
+#define TCMALLOC_HUGE_PAGE_FILLER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "absl/algorithm/container.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_cache.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/range_tracker.h"
+#include "tcmalloc/internal/timeseries_tracker.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This and the following classes implement the adaptive hugepage subrelease
+// mechanism and realized fragmentation metric described in "Adaptive Hugepage
+// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers"
+// (ISMM 2021).
+
+// Tracks correctness of skipped subrelease decisions over time.
+template <size_t kEpochs = 16>
+class SkippedSubreleaseCorrectnessTracker {
+ public:
+ struct SkippedSubreleaseDecision {
+ Length pages; // Number of pages we skipped subreleasing.
+ size_t count; // Number of times we skipped a subrelease.
+
+ SkippedSubreleaseDecision() : pages(0), count(0) {}
+ explicit SkippedSubreleaseDecision(Length pages) : pages(pages), count(1) {}
+ explicit SkippedSubreleaseDecision(Length pages, size_t count)
+ : pages(pages), count(count) {}
+
+ SkippedSubreleaseDecision& operator+=(SkippedSubreleaseDecision rhs) {
+ pages += rhs.pages;
+ count += rhs.count;
+ return *this;
+ }
+
+ static SkippedSubreleaseDecision Zero() {
+ return SkippedSubreleaseDecision();
+ }
+ };
+
+ explicit constexpr SkippedSubreleaseCorrectnessTracker(Clock clock,
+ absl::Duration w)
+ : window_(w),
+ epoch_length_(window_ / kEpochs),
+ last_confirmed_peak_(0),
+ tracker_(clock, w) {}
+
+ // Not copyable or movable
+ SkippedSubreleaseCorrectnessTracker(
+ const SkippedSubreleaseCorrectnessTracker&) = delete;
+ SkippedSubreleaseCorrectnessTracker& operator=(
+ const SkippedSubreleaseCorrectnessTracker&) = delete;
+
+ void ReportSkippedSubreleasePages(
+ Length skipped_pages, Length peak_pages,
+ absl::Duration expected_time_until_next_peak) {
+ total_skipped_ += SkippedSubreleaseDecision(skipped_pages);
+ pending_skipped_ += SkippedSubreleaseDecision(skipped_pages);
+
+ SkippedSubreleaseUpdate update;
+ update.decision = SkippedSubreleaseDecision(skipped_pages);
+ update.num_pages_at_decision = peak_pages;
+ update.correctness_interval_epochs =
+ expected_time_until_next_peak / epoch_length_;
+ tracker_.Report(update);
+ }
+
+ void ReportUpdatedPeak(Length current_peak) {
+ // Record this peak for the current epoch (so we don't double-count correct
+ // predictions later) and advance the tracker.
+ SkippedSubreleaseUpdate update;
+ update.confirmed_peak = current_peak;
+ if (tracker_.Report(update)) {
+ // Also keep track of the largest peak we have confirmed this epoch.
+ last_confirmed_peak_ = Length(0);
+ }
+
+ // Recompute currently pending decisions.
+ pending_skipped_ = SkippedSubreleaseDecision::Zero();
+
+ Length largest_peak_already_confirmed = last_confirmed_peak_;
+
+ tracker_.IterBackwards(
+ [&](size_t offset, int64_t ts, const SkippedSubreleaseEntry& e) {
+ // Do not clear any decisions in the current epoch.
+ if (offset == 0) {
+ return;
+ }
+
+ if (e.decisions.count > 0 &&
+ e.max_num_pages_at_decision > largest_peak_already_confirmed &&
+ offset <= e.correctness_interval_epochs) {
+ if (e.max_num_pages_at_decision <= current_peak) {
+ // We can confirm a subrelease decision as correct and it had not
+ // been confirmed correct by an earlier peak yet.
+ correctly_skipped_ += e.decisions;
+ } else {
+ pending_skipped_ += e.decisions;
+ }
+ }
+
+ // Did we clear any earlier decisions based on a peak in this epoch?
+ // Keep track of the peak, so we do not clear them again.
+ largest_peak_already_confirmed =
+ std::max(largest_peak_already_confirmed, e.max_confirmed_peak);
+ },
+ -1);
+
+ last_confirmed_peak_ = std::max(last_confirmed_peak_, current_peak);
+ }
+
+ inline SkippedSubreleaseDecision total_skipped() const {
+ return total_skipped_;
+ }
+
+ inline SkippedSubreleaseDecision correctly_skipped() const {
+ return correctly_skipped_;
+ }
+
+ inline SkippedSubreleaseDecision pending_skipped() const {
+ return pending_skipped_;
+ }
+
+ private:
+ struct SkippedSubreleaseUpdate {
+ // A subrelease decision that was made at this time step: How much did we
+ // decide not to release?
+ SkippedSubreleaseDecision decision;
+
+ // What does our future demand have to be for this to be correct? If there
+ // were multiple subrelease decisions in the same epoch, use the max.
+ Length num_pages_at_decision;
+
+ // How long from the time of the decision do we have before the decision
+ // will be determined incorrect?
+ int64_t correctness_interval_epochs = 0;
+
+ // At this time step, we confirmed a demand peak at this level, which means
+ // all subrelease decisions in earlier time steps that had peak_demand_pages
+ // <= this confirmed_peak were confirmed correct and don't need to be
+ // considered again in the future.
+ Length confirmed_peak;
+ };
+
+ struct SkippedSubreleaseEntry {
+ SkippedSubreleaseDecision decisions = SkippedSubreleaseDecision::Zero();
+ Length max_num_pages_at_decision;
+ int64_t correctness_interval_epochs = 0;
+ Length max_confirmed_peak;
+
+ static SkippedSubreleaseEntry Nil() { return SkippedSubreleaseEntry(); }
+
+ void Report(SkippedSubreleaseUpdate e) {
+ decisions += e.decision;
+ correctness_interval_epochs =
+ std::max(correctness_interval_epochs, e.correctness_interval_epochs);
+ max_num_pages_at_decision =
+ std::max(max_num_pages_at_decision, e.num_pages_at_decision);
+ max_confirmed_peak = std::max(max_confirmed_peak, e.confirmed_peak);
+ }
+ };
+
+ const absl::Duration window_;
+ const absl::Duration epoch_length_;
+
+ // The largest peak we processed this epoch. This is required to avoid us
+ // double-counting correctly predicted decisions.
+ Length last_confirmed_peak_;
+
+ SkippedSubreleaseDecision total_skipped_;
+ SkippedSubreleaseDecision correctly_skipped_;
+ SkippedSubreleaseDecision pending_skipped_;
+
+ TimeSeriesTracker<SkippedSubreleaseEntry, SkippedSubreleaseUpdate, kEpochs>
+ tracker_;
+};
+
+struct SubreleaseStats {
+ Length total_pages_subreleased; // cumulative since startup
+ Length num_pages_subreleased;
+ HugeLength total_hugepages_broken{NHugePages(0)}; // cumulative since startup
+ HugeLength num_hugepages_broken{NHugePages(0)};
+
+ bool is_limit_hit = false;
+ // Keep these limit-related stats cumulative since startup only
+ Length total_pages_subreleased_due_to_limit;
+ HugeLength total_hugepages_broken_due_to_limit{NHugePages(0)};
+
+ void reset() {
+ total_pages_subreleased += num_pages_subreleased;
+ total_hugepages_broken += num_hugepages_broken;
+ num_pages_subreleased = Length(0);
+ num_hugepages_broken = NHugePages(0);
+ }
+
+ // Must be called at the beginning of each subrelease request
+ void set_limit_hit(bool value) { is_limit_hit = value; }
+
+ // This only has a well-defined meaning within ReleaseCandidates where
+ // set_limit_hit() has been called earlier. Do not use anywhere else.
+ bool limit_hit() { return is_limit_hit; }
+};
+
+// Track filler statistics over a time window.
+template <size_t kEpochs = 16>
+class FillerStatsTracker {
+ public:
+ enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes };
+
+ struct FillerStats {
+ Length num_pages;
+ Length free_pages;
+ Length unmapped_pages;
+ Length used_pages_in_subreleased_huge_pages;
+ HugeLength huge_pages[kNumTypes];
+ Length num_pages_subreleased;
+ HugeLength num_hugepages_broken = NHugePages(0);
+
+ HugeLength total_huge_pages() const {
+ HugeLength total_huge_pages;
+ for (int i = 0; i < kNumTypes; i++) {
+ total_huge_pages += huge_pages[i];
+ }
+ return total_huge_pages;
+ }
+ };
+
+ struct NumberOfFreePages {
+ Length free;
+ Length free_backed;
+ };
+
+ explicit constexpr FillerStatsTracker(Clock clock, absl::Duration w,
+ absl::Duration summary_interval)
+ : summary_interval_(summary_interval),
+ window_(w),
+ epoch_length_(window_ / kEpochs),
+ tracker_(clock, w),
+ skipped_subrelease_correctness_(clock, w) {}
+
+ // Not copyable or movable
+ FillerStatsTracker(const FillerStatsTracker&) = delete;
+ FillerStatsTracker& operator=(const FillerStatsTracker&) = delete;
+
+ void Report(const FillerStats stats) {
+ if (ABSL_PREDICT_FALSE(tracker_.Report(stats))) {
+ if (ABSL_PREDICT_FALSE(pending_skipped().count > 0)) {
+ // Consider the peak within the just completed epoch to confirm the
+ // correctness of any recent subrelease decisions.
+ skipped_subrelease_correctness_.ReportUpdatedPeak(std::max(
+ stats.num_pages,
+ tracker_.GetEpochAtOffset(1).stats[kStatsAtMaxDemand].num_pages));
+ }
+ }
+ }
+
+ void Print(Printer* out) const;
+ void PrintInPbtxt(PbtxtRegion* hpaa) const;
+
+ // Calculates recent peaks for skipping subrelease decisions. If our allocated
+ // memory is below the demand peak within the last peak_interval, we stop
+ // subreleasing. If our demand is going above that peak again within another
+ // peak_interval, we report that we made the correct decision.
+ FillerStats GetRecentPeak(absl::Duration peak_interval) {
+ last_peak_interval_ = peak_interval;
+ FillerStats recent_peak;
+ Length max_demand_pages;
+
+ int64_t num_epochs = peak_interval / epoch_length_;
+ tracker_.IterBackwards(
+ [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+ if (!e.empty()) {
+ // Identify the maximum number of demand pages we have seen within
+ // the time interval.
+ if (e.stats[kStatsAtMaxDemand].num_pages > max_demand_pages) {
+ recent_peak = e.stats[kStatsAtMaxDemand];
+ max_demand_pages = recent_peak.num_pages;
+ }
+ }
+ },
+ num_epochs);
+
+ return recent_peak;
+ }
+
+ void ReportSkippedSubreleasePages(
+ Length pages, Length peak_pages,
+ absl::Duration expected_time_until_next_peak) {
+ if (pages == Length(0)) {
+ return;
+ }
+
+ skipped_subrelease_correctness_.ReportSkippedSubreleasePages(
+ pages, peak_pages, expected_time_until_next_peak);
+ }
+
+ inline typename SkippedSubreleaseCorrectnessTracker<
+ kEpochs>::SkippedSubreleaseDecision
+ total_skipped() const {
+ return skipped_subrelease_correctness_.total_skipped();
+ }
+
+ inline typename SkippedSubreleaseCorrectnessTracker<
+ kEpochs>::SkippedSubreleaseDecision
+ correctly_skipped() const {
+ return skipped_subrelease_correctness_.correctly_skipped();
+ }
+
+ inline typename SkippedSubreleaseCorrectnessTracker<
+ kEpochs>::SkippedSubreleaseDecision
+ pending_skipped() const {
+ return skipped_subrelease_correctness_.pending_skipped();
+ }
+
+ // Returns the minimum number of free pages throughout the tracker period.
+ // The first value of the pair is the number of all free pages, the second
+ // value contains only the backed ones.
+ NumberOfFreePages min_free_pages(absl::Duration w) const {
+ NumberOfFreePages mins;
+ mins.free = Length::max();
+ mins.free_backed = Length::max();
+
+ int64_t num_epochs = std::clamp(w / epoch_length_, int64_t{0},
+ static_cast<int64_t>(kEpochs));
+
+ tracker_.IterBackwards(
+ [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+ if (!e.empty()) {
+ mins.free = std::min(mins.free, e.min_free_pages);
+ mins.free_backed =
+ std::min(mins.free_backed, e.min_free_backed_pages);
+ }
+ },
+ num_epochs);
+ mins.free = (mins.free == Length::max()) ? Length(0) : mins.free;
+ mins.free_backed =
+ (mins.free_backed == Length::max()) ? Length(0) : mins.free_backed;
+ return mins;
+ }
+
+ private:
+ // We collect filler statistics at four "interesting points" within each time
+ // step: at min/max demand of pages and at min/max use of hugepages. This
+ // allows us to approximate the envelope of the different metrics.
+ enum StatsType {
+ kStatsAtMinDemand,
+ kStatsAtMaxDemand,
+ kStatsAtMinHugePages,
+ kStatsAtMaxHugePages,
+ kNumStatsTypes
+ };
+
+ struct FillerStatsEntry {
+ // Collect filler stats at "interesting points" (minimum/maximum page demand
+ // and at minimum/maximum usage of huge pages).
+ FillerStats stats[kNumStatsTypes] = {};
+ static constexpr Length kDefaultValue = Length::max();
+ Length min_free_pages = kDefaultValue;
+ Length min_free_backed_pages = kDefaultValue;
+ Length num_pages_subreleased;
+ HugeLength num_hugepages_broken = NHugePages(0);
+
+ static FillerStatsEntry Nil() { return FillerStatsEntry(); }
+
+ void Report(FillerStats e) {
+ if (empty()) {
+ for (int i = 0; i < kNumStatsTypes; i++) {
+ stats[i] = e;
+ }
+ }
+
+ if (e.num_pages < stats[kStatsAtMinDemand].num_pages) {
+ stats[kStatsAtMinDemand] = e;
+ }
+
+ if (e.num_pages > stats[kStatsAtMaxDemand].num_pages) {
+ stats[kStatsAtMaxDemand] = e;
+ }
+
+ if (e.total_huge_pages() <
+ stats[kStatsAtMinHugePages].total_huge_pages()) {
+ stats[kStatsAtMinHugePages] = e;
+ }
+
+ if (e.total_huge_pages() >
+ stats[kStatsAtMaxHugePages].total_huge_pages()) {
+ stats[kStatsAtMaxHugePages] = e;
+ }
+
+ min_free_pages =
+ std::min(min_free_pages, e.free_pages + e.unmapped_pages);
+ min_free_backed_pages = std::min(min_free_backed_pages, e.free_pages);
+
+ // Subrelease stats
+ num_pages_subreleased += e.num_pages_subreleased;
+ num_hugepages_broken += e.num_hugepages_broken;
+ }
+
+ bool empty() const { return min_free_pages == kDefaultValue; }
+ };
+
+ // The tracker reports pages that have been free for at least this interval,
+ // as well as peaks within this interval.
+ const absl::Duration summary_interval_;
+
+ const absl::Duration window_;
+ const absl::Duration epoch_length_;
+
+ TimeSeriesTracker<FillerStatsEntry, FillerStats, kEpochs> tracker_;
+ SkippedSubreleaseCorrectnessTracker<kEpochs> skipped_subrelease_correctness_;
+
+ // Records the last peak_interval value, for reporting and debugging only.
+ absl::Duration last_peak_interval_;
+};
+
+// Evaluate a/b, avoiding division by zero
+inline double safe_div(double a, double b) {
+ if (b == 0) {
+ return 0.;
+ } else {
+ return a / b;
+ }
+}
+
+inline double safe_div(Length a, Length b) {
+ return safe_div(a.raw_num(), b.raw_num());
+}
+
+template <size_t kEpochs>
+void FillerStatsTracker<kEpochs>::Print(Printer* out) const {
+ NumberOfFreePages free_pages = min_free_pages(summary_interval_);
+ out->printf("HugePageFiller: time series over %d min interval\n\n",
+ absl::ToInt64Minutes(summary_interval_));
+
+ // Realized fragmentation is equivalent to backed minimum free pages over a
+ // 5-min interval. It is printed for convenience but not included in pbtxt.
+ out->printf("HugePageFiller: realized fragmentation: %.1f MiB\n",
+ free_pages.free_backed.in_mib());
+ out->printf("HugePageFiller: minimum free pages: %zu (%zu backed)\n",
+ free_pages.free.raw_num(), free_pages.free_backed.raw_num());
+
+ FillerStatsEntry at_peak_demand;
+ FillerStatsEntry at_peak_hps;
+
+ tracker_.IterBackwards(
+ [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+ if (!e.empty()) {
+ if (at_peak_demand.empty() ||
+ at_peak_demand.stats[kStatsAtMaxDemand].num_pages <
+ e.stats[kStatsAtMaxDemand].num_pages) {
+ at_peak_demand = e;
+ }
+
+ if (at_peak_hps.empty() ||
+ at_peak_hps.stats[kStatsAtMaxHugePages].total_huge_pages() <
+ e.stats[kStatsAtMaxHugePages].total_huge_pages()) {
+ at_peak_hps = e;
+ }
+ }
+ },
+ summary_interval_ / epoch_length_);
+
+ out->printf(
+ "HugePageFiller: at peak demand: %zu pages (and %zu free, %zu unmapped)\n"
+ "HugePageFiller: at peak demand: %zu hps (%zu regular, %zu donated, "
+ "%zu partial, %zu released)\n",
+ at_peak_demand.stats[kStatsAtMaxDemand].num_pages.raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].free_pages.raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand]
+ .huge_pages[kPartialReleased]
+ .raw_num(),
+ at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num());
+
+ out->printf(
+ "HugePageFiller: at peak hps: %zu pages (and %zu free, %zu unmapped)\n"
+ "HugePageFiller: at peak hps: %zu hps (%zu regular, %zu donated, "
+ "%zu partial, %zu released)\n",
+ at_peak_hps.stats[kStatsAtMaxDemand].num_pages.raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].free_pages.raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand]
+ .huge_pages[kPartialReleased]
+ .raw_num(),
+ at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num());
+
+ out->printf(
+ "\nHugePageFiller: Since the start of the execution, %zu subreleases (%zu"
+ " pages) were skipped due to recent (%llds) peaks.\n",
+ total_skipped().count, total_skipped().pages.raw_num(),
+ static_cast<long long>(absl::ToInt64Seconds(last_peak_interval_)));
+
+ Length skipped_pages = total_skipped().pages - pending_skipped().pages;
+ double correctly_skipped_pages_percentage =
+ safe_div(100.0 * correctly_skipped().pages, skipped_pages);
+
+ size_t skipped_count = total_skipped().count - pending_skipped().count;
+ double correctly_skipped_count_percentage =
+ safe_div(100.0 * correctly_skipped().count, skipped_count);
+
+ out->printf(
+ "HugePageFiller: %.4f%% of decisions confirmed correct, %zu "
+ "pending (%.4f%% of pages, %zu pending).\n",
+ correctly_skipped_count_percentage, pending_skipped().count,
+ correctly_skipped_pages_percentage, pending_skipped().pages.raw_num());
+
+ // Print subrelease stats
+ Length total_subreleased;
+ HugeLength total_broken = NHugePages(0);
+ tracker_.Iter(
+ [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+ total_subreleased += e.num_pages_subreleased;
+ total_broken += e.num_hugepages_broken;
+ },
+ tracker_.kSkipEmptyEntries);
+ out->printf(
+ "HugePageFiller: Subrelease stats last %d min: total "
+ "%zu pages subreleased, %zu hugepages broken\n",
+ static_cast<int64_t>(absl::ToInt64Minutes(window_)),
+ total_subreleased.raw_num(), total_broken.raw_num());
+}
+
+template <size_t kEpochs>
+void FillerStatsTracker<kEpochs>::PrintInPbtxt(PbtxtRegion* hpaa) const {
+ {
+ auto skip_subrelease = hpaa->CreateSubRegion("filler_skipped_subrelease");
+ skip_subrelease.PrintI64("skipped_subrelease_interval_ms",
+ absl::ToInt64Milliseconds(last_peak_interval_));
+ skip_subrelease.PrintI64("skipped_subrelease_pages",
+ total_skipped().pages.raw_num());
+ skip_subrelease.PrintI64("correctly_skipped_subrelease_pages",
+ correctly_skipped().pages.raw_num());
+ skip_subrelease.PrintI64("pending_skipped_subrelease_pages",
+ pending_skipped().pages.raw_num());
+ skip_subrelease.PrintI64("skipped_subrelease_count", total_skipped().count);
+ skip_subrelease.PrintI64("correctly_skipped_subrelease_count",
+ correctly_skipped().count);
+ skip_subrelease.PrintI64("pending_skipped_subrelease_count",
+ pending_skipped().count);
+ }
+
+ auto filler_stats = hpaa->CreateSubRegion("filler_stats_timeseries");
+ filler_stats.PrintI64("window_ms", absl::ToInt64Milliseconds(epoch_length_));
+ filler_stats.PrintI64("epochs", kEpochs);
+
+ NumberOfFreePages free_pages = min_free_pages(summary_interval_);
+ filler_stats.PrintI64("min_free_pages_interval_ms",
+ absl::ToInt64Milliseconds(summary_interval_));
+ filler_stats.PrintI64("min_free_pages", free_pages.free.raw_num());
+ filler_stats.PrintI64("min_free_backed_pages",
+ free_pages.free_backed.raw_num());
+
+ static const char* labels[kNumStatsTypes] = {
+ "at_minimum_demand", "at_maximum_demand", "at_minimum_huge_pages",
+ "at_maximum_huge_pages"};
+
+ tracker_.Iter(
+ [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+ auto region = filler_stats.CreateSubRegion("measurements");
+ region.PrintI64("epoch", offset);
+ region.PrintI64("timestamp_ms",
+ absl::ToInt64Milliseconds(absl::Nanoseconds(ts)));
+ region.PrintI64("min_free_pages", e.min_free_pages.raw_num());
+ region.PrintI64("min_free_backed_pages",
+ e.min_free_backed_pages.raw_num());
+ region.PrintI64("num_pages_subreleased",
+ e.num_pages_subreleased.raw_num());
+ region.PrintI64("num_hugepages_broken",
+ e.num_hugepages_broken.raw_num());
+ for (int i = 0; i < kNumStatsTypes; i++) {
+ auto m = region.CreateSubRegion(labels[i]);
+ FillerStats stats = e.stats[i];
+ m.PrintI64("num_pages", stats.num_pages.raw_num());
+ m.PrintI64("regular_huge_pages",
+ stats.huge_pages[kRegular].raw_num());
+ m.PrintI64("donated_huge_pages",
+ stats.huge_pages[kDonated].raw_num());
+ m.PrintI64("partial_released_huge_pages",
+ stats.huge_pages[kPartialReleased].raw_num());
+ m.PrintI64("released_huge_pages",
+ stats.huge_pages[kReleased].raw_num());
+ m.PrintI64("used_pages_in_subreleased_huge_pages",
+ stats.used_pages_in_subreleased_huge_pages.raw_num());
+ }
+ },
+ tracker_.kSkipEmptyEntries);
+}
+
+// PageTracker keeps track of the allocation status of every page in a HugePage.
+// It allows allocation and deallocation of a contiguous run of pages.
+//
+// Its mutating methods are annotated as requiring the pageheap_lock, in order
+// to support unlocking the page heap lock in a dynamic annotation-friendly way.
+template <MemoryModifyFunction Unback>
+class PageTracker : public TList<PageTracker<Unback>>::Elem {
+ public:
+ static void UnbackImpl(void* p, size_t size) { Unback(p, size); }
+
+ constexpr PageTracker(HugePage p, uint64_t when)
+ : location_(p),
+ released_count_(0),
+ donated_(false),
+ unbroken_(true),
+ free_{} {
+ init_when(when);
+
+#ifndef __ppc64__
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
+#endif
+ // Verify fields are structured so commonly accessed members (as part of
+ // Put) are on the first two cache lines. This allows the CentralFreeList
+ // to accelerate deallocations by prefetching PageTracker instances before
+ // taking the pageheap_lock.
+ //
+ // On PPC64, kHugePageSize / kPageSize is typically ~2K (16MB / 8KB),
+ // requiring 512 bytes for representing free_. While its cache line size is
+ // larger, the entirety of free_ will not fit on two cache lines.
+ static_assert(
+ offsetof(PageTracker<Unback>, location_) + sizeof(location_) <=
+ 2 * ABSL_CACHELINE_SIZE,
+ "location_ should fall within the first two cachelines of "
+ "PageTracker.");
+ static_assert(offsetof(PageTracker<Unback>, when_numerator_) +
+ sizeof(when_numerator_) <=
+ 2 * ABSL_CACHELINE_SIZE,
+ "when_numerator_ should fall within the first two cachelines "
+ "of PageTracker.");
+ static_assert(offsetof(PageTracker<Unback>, when_denominator_) +
+ sizeof(when_denominator_) <=
+ 2 * ABSL_CACHELINE_SIZE,
+ "when_denominator_ should fall within the first two "
+ "cachelines of PageTracker.");
+ static_assert(
+ offsetof(PageTracker<Unback>, donated_) + sizeof(donated_) <=
+ 2 * ABSL_CACHELINE_SIZE,
+ "donated_ should fall within the first two cachelines of PageTracker.");
+ static_assert(
+ offsetof(PageTracker<Unback>, free_) + sizeof(free_) <=
+ 2 * ABSL_CACHELINE_SIZE,
+ "free_ should fall within the first two cachelines of PageTracker.");
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+#endif // __ppc64__
+ }
+
+ struct PageAllocation {
+ PageId page;
+ Length previously_unbacked;
+ };
+
+ // REQUIRES: there's a free range of at least n pages
+ //
+ // Returns a PageId i and a count of previously unbacked pages in the range
+ // [i, i+n) in previously_unbacked.
+ PageAllocation Get(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // REQUIRES: p was the result of a previous call to Get(n)
+ void Put(PageId p, Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Returns true if any unused pages have been returned-to-system.
+ bool released() const { return released_count_ > 0; }
+
+ // Was this tracker donated from the tail of a multi-hugepage allocation?
+ // Only up-to-date when the tracker is on a TrackerList in the Filler;
+ // otherwise the value is meaningless.
+ bool donated() const { return donated_; }
+ // Set/reset the donated flag. The donated status is lost, for instance,
+ // when further allocations are made on the tracker.
+ void set_donated(bool status) { donated_ = status; }
+
+ // These statistics help us measure the fragmentation of a hugepage and
+ // the desirability of allocating from this hugepage.
+ Length longest_free_range() const { return Length(free_.longest_free()); }
+ size_t nallocs() const { return free_.allocs(); }
+ Length used_pages() const { return Length(free_.used()); }
+ Length released_pages() const { return Length(released_count_); }
+ Length free_pages() const;
+ bool empty() const;
+
+ bool unbroken() const { return unbroken_; }
+
+ // Returns the hugepage whose availability is being tracked.
+ HugePage location() const { return location_; }
+
+ // Return all unused pages to the system, mark future frees to do same.
+ // Returns the count of pages unbacked.
+ Length ReleaseFree() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Return this allocation to the system, if policy warrants it.
+ //
+ // As of 3/2020 our policy is to rerelease: Once we break a hugepage by
+ // returning a fraction of it, we return *anything* unused. This simplifies
+ // tracking.
+ //
+ // TODO(b/141550014): Make retaining the default/sole policy.
+ void MaybeRelease(PageId p, Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ if (released_count_ == 0) {
+ return;
+ }
+
+ // Mark pages as released.
+ Length index = p - location_.first_page();
+ ASSERT(released_by_page_.CountBits(index.raw_num(), n.raw_num()) == 0);
+ released_by_page_.SetRange(index.raw_num(), n.raw_num());
+ released_count_ += n.raw_num();
+ ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+ released_count_);
+
+ // TODO(b/122551676): If release fails, we should not SetRange above.
+ ReleasePagesWithoutLock(p, n);
+ }
+
+ void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+ PageAgeHistograms* ages) const;
+
+ private:
+ void init_when(uint64_t w) {
+ const Length before = Length(free_.total_free());
+ when_numerator_ = w * before.raw_num();
+ when_denominator_ = before.raw_num();
+ }
+
+ HugePage location_;
+ // We keep track of an average time weighted by Length::raw_num. In order to
+ // avoid doing division on fast path, store the numerator and denominator and
+ // only do the division when we need the average.
+ uint64_t when_numerator_;
+ uint64_t when_denominator_;
+
+ // Cached value of released_by_page_.CountBits(0, kPagesPerHugePages)
+ //
+ // TODO(b/151663108): Logically, this is guarded by pageheap_lock.
+ uint16_t released_count_;
+ bool donated_;
+ bool unbroken_;
+
+ RangeTracker<kPagesPerHugePage.raw_num()> free_;
+ // Bitmap of pages based on them being released to the OS.
+ // * Not yet released pages are unset (considered "free")
+ // * Released pages are set.
+ //
+ // Before releasing any locks to release memory to the OS, we mark the bitmap.
+ //
+ // Once released, a huge page is considered released *until* free_ is
+ // exhausted and no pages released_by_page_ are set. We may have up to
+ // kPagesPerHugePage-1 parallel subreleases in-flight.
+ //
+ // TODO(b/151663108): Logically, this is guarded by pageheap_lock.
+ Bitmap<kPagesPerHugePage.raw_num()> released_by_page_;
+
+ static_assert(kPagesPerHugePage.raw_num() <
+ std::numeric_limits<uint16_t>::max(),
+ "nallocs must be able to support kPagesPerHugePage!");
+
+ void ReleasePages(PageId p, Length n) {
+ void* ptr = p.start_addr();
+ size_t byte_len = n.in_bytes();
+ Unback(ptr, byte_len);
+ unbroken_ = false;
+ }
+
+ void ReleasePagesWithoutLock(PageId p, Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ pageheap_lock.Unlock();
+
+ void* ptr = p.start_addr();
+ size_t byte_len = n.in_bytes();
+ Unback(ptr, byte_len);
+
+ pageheap_lock.Lock();
+ unbroken_ = false;
+ }
+};
+
+enum class FillerPartialRerelease : bool {
+ // Once we break a hugepage by returning a fraction of it, we return
+ // *anything* unused. This simplifies tracking.
+ //
+ // As of 2/2020, this is the default behavior.
+ Return,
+ // When releasing a page onto an already-released huge page, retain the page
+ // rather than releasing it back to the OS. This can reduce minor page
+ // faults for hot pages.
+ //
+ // TODO(b/141550014, b/122551676): Make this the default behavior.
+ Retain,
+};
+
+// This tracks a set of unfilled hugepages, and fulfills allocations
+// with a goal of filling some hugepages as tightly as possible and emptying
+// out the remainder.
+template <class TrackerType>
+class HugePageFiller {
+ public:
+ explicit HugePageFiller(FillerPartialRerelease partial_rerelease);
+ HugePageFiller(FillerPartialRerelease partial_rerelease, Clock clock);
+
+ typedef TrackerType Tracker;
+
+ struct TryGetResult {
+ TrackerType* pt;
+ PageId page;
+ };
+
+ // Our API is simple, but note that it does not include an unconditional
+ // allocation, only a "try"; we expect callers to allocate new hugepages if
+ // needed. This simplifies using it in a few different contexts (and improves
+ // the testing story - no dependencies.)
+ //
+ // On failure, returns nullptr/PageId{0}.
+ TryGetResult TryGet(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Marks [p, p + n) as usable by new allocations into *pt; returns pt
+ // if that hugepage is now empty (nullptr otherwise.)
+ // REQUIRES: pt is owned by this object (has been Contribute()), and
+ // {pt, p, n} was the result of a previous TryGet.
+ TrackerType* Put(TrackerType* pt, PageId p, Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Contributes a tracker to the filler. If "donated," then the tracker is
+ // marked as having come from the tail of a multi-hugepage allocation, which
+ // causes it to be treated slightly differently.
+ void Contribute(TrackerType* pt, bool donated);
+
+ HugeLength size() const { return size_; }
+
+ // Useful statistics
+ Length pages_allocated() const { return allocated_; }
+ Length used_pages() const { return allocated_; }
+ Length unmapped_pages() const { return unmapped_; }
+ Length free_pages() const;
+ Length used_pages_in_released() const { return n_used_released_; }
+ Length used_pages_in_partial_released() const {
+ return n_used_partial_released_;
+ }
+ Length used_pages_in_any_subreleased() const {
+ return n_used_released_ + n_used_partial_released_;
+ }
+
+ // Fraction of used pages that are on non-released hugepages and
+ // thus could be backed by kernel hugepages. (Of course, we can't
+ // guarantee that the kernel had available 2-mib regions of physical
+ // memory--so this being 1 doesn't mean that everything actually
+ // *is* hugepage-backed!)
+ double hugepage_frac() const;
+
+ // Returns the amount of memory to release if all remaining options of
+ // releasing memory involve subreleasing pages.
+ Length GetDesiredSubreleasePages(Length desired, Length total_released,
+ absl::Duration peak_interval)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Tries to release desired pages by iteratively releasing from the emptiest
+ // possible hugepage and releasing its free memory to the system. Return the
+ // number of pages actually released.
+ Length ReleasePages(Length desired,
+ absl::Duration skip_subrelease_after_peaks_interval,
+ bool hit_limit)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+ PageAgeHistograms* ages) const;
+
+ BackingStats stats() const;
+ SubreleaseStats subrelease_stats() const { return subrelease_stats_; }
+ void Print(Printer* out, bool everything) const;
+ void PrintInPbtxt(PbtxtRegion* hpaa) const;
+
+ private:
+ typedef TList<TrackerType> TrackerList;
+
+ // This class wraps an array of N TrackerLists and a Bitmap storing which
+ // elements are non-empty.
+ template <size_t N>
+ class HintedTrackerLists {
+ public:
+ HintedTrackerLists() : nonempty_{}, size_(NHugePages(0)) {}
+
+ // Removes a TrackerType from the first non-empty freelist with index at
+ // least n and returns it. Returns nullptr if there is none.
+ TrackerType* GetLeast(const size_t n) {
+ ASSERT(n < N);
+ size_t i = nonempty_.FindSet(n);
+ if (i == N) {
+ return nullptr;
+ }
+ ASSERT(!lists_[i].empty());
+ TrackerType* pt = lists_[i].first();
+ if (lists_[i].remove(pt)) {
+ nonempty_.ClearBit(i);
+ }
+ --size_;
+ return pt;
+ }
+ void Add(TrackerType* pt, const size_t i) {
+ ASSERT(i < N);
+ ASSERT(pt != nullptr);
+ lists_[i].prepend(pt);
+ nonempty_.SetBit(i);
+ ++size_;
+ }
+ void Remove(TrackerType* pt, const size_t i) {
+ ASSERT(i < N);
+ ASSERT(pt != nullptr);
+ if (lists_[i].remove(pt)) {
+ nonempty_.ClearBit(i);
+ }
+ --size_;
+ }
+ const TrackerList& operator[](const size_t n) const {
+ ASSERT(n < N);
+ return lists_[n];
+ }
+ HugeLength size() const { return size_; }
+ bool empty() const { return size().raw_num() == 0; }
+ // Runs a functor on all HugePages in the TrackerLists.
+ // This method is const but the Functor gets passed a non-const pointer.
+ // This quirk is inherited from TrackerList.
+ template <typename Functor>
+ void Iter(const Functor& func, size_t start) const {
+ size_t i = nonempty_.FindSet(start);
+ while (i < N) {
+ auto& list = lists_[i];
+ ASSERT(!list.empty());
+ for (TrackerType* pt : list) {
+ func(pt);
+ }
+ i++;
+ if (i < N) i = nonempty_.FindSet(i);
+ }
+ }
+
+ private:
+ TrackerList lists_[N];
+ Bitmap<N> nonempty_;
+ HugeLength size_;
+ };
+
+ SubreleaseStats subrelease_stats_;
+
+ // We group hugepages first by longest-free (as a measure of fragmentation),
+ // then into 8 chunks inside there by desirability of allocation.
+ static constexpr size_t kChunks = 8;
+ // Which chunk should this hugepage be in?
+ // This returns the largest possible value kChunks-1 iff pt has a single
+ // allocation.
+ size_t IndexFor(TrackerType* pt);
+ // Returns index for regular_alloc_.
+ static size_t ListFor(Length longest, size_t chunk);
+ static constexpr size_t kNumLists = kPagesPerHugePage.raw_num() * kChunks;
+
+ HintedTrackerLists<kNumLists> regular_alloc_;
+ HintedTrackerLists<kPagesPerHugePage.raw_num()> donated_alloc_;
+ // Partially released ones that we are trying to release.
+ //
+ // When FillerPartialRerelease == Return:
+ // regular_alloc_partial_released_ is empty and n_used_partial_released_ is
+ // 0.
+ //
+ // When FillerPartialRerelease == Retain:
+ // regular_alloc_partial_released_ contains huge pages that are partially
+ // allocated, partially free, and partially returned to the OS.
+ // n_used_partial_released_ is the number of pages which have been allocated
+ // of the set.
+ //
+ // regular_alloc_released_: This list contains huge pages whose pages are
+ // either allocated or returned to the OS. There are no pages that are free,
+ // but not returned to the OS. n_used_released_ contains the number of
+ // pages in those huge pages that are not free (i.e., allocated).
+ Length n_used_partial_released_;
+ Length n_used_released_;
+ HintedTrackerLists<kNumLists> regular_alloc_partial_released_;
+ HintedTrackerLists<kNumLists> regular_alloc_released_;
+
+ // RemoveFromFillerList pt from the appropriate HintedTrackerList.
+ void RemoveFromFillerList(TrackerType* pt);
+ // Put pt in the appropriate HintedTrackerList.
+ void AddToFillerList(TrackerType* pt);
+ // Like AddToFillerList(), but for use when donating from the tail of a
+ // multi-hugepage allocation.
+ void DonateToFillerList(TrackerType* pt);
+
+ // CompareForSubrelease identifies the worse candidate for subrelease, between
+ // the choice of huge pages a and b.
+ static bool CompareForSubrelease(TrackerType* a, TrackerType* b) {
+ ASSERT(a != nullptr);
+ ASSERT(b != nullptr);
+
+ return a->used_pages() < b->used_pages();
+ }
+
+ // SelectCandidates identifies the candidates.size() best candidates in the
+ // given tracker list.
+ //
+ // To support gathering candidates from multiple tracker lists,
+ // current_candidates is nonzero.
+ template <size_t N>
+ static int SelectCandidates(absl::Span<TrackerType*> candidates,
+ int current_candidates,
+ const HintedTrackerLists<N>& tracker_list,
+ size_t tracker_start);
+
+ // Release desired pages from the page trackers in candidates. Returns the
+ // number of pages released.
+ Length ReleaseCandidates(absl::Span<TrackerType*> candidates, Length desired)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ HugeLength size_;
+
+ Length allocated_;
+ Length unmapped_;
+
+ // How much have we eagerly unmapped (in already released hugepages), but
+ // not reported to ReleasePages calls?
+ Length unmapping_unaccounted_;
+
+ FillerPartialRerelease partial_rerelease_;
+
+ // Functionality related to time series tracking.
+ void UpdateFillerStatsTracker();
+ using StatsTrackerType = FillerStatsTracker<600>;
+ StatsTrackerType fillerstats_tracker_;
+};
+
+template <MemoryModifyFunction Unback>
+inline typename PageTracker<Unback>::PageAllocation PageTracker<Unback>::Get(
+ Length n) {
+ size_t index = free_.FindAndMark(n.raw_num());
+
+ ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+ released_count_);
+
+ size_t unbacked = 0;
+ // If release_count_ == 0, CountBits will return 0 and ClearRange will be a
+ // no-op (but will touch cachelines) due to the invariants guaranteed by
+ // CountBits() == released_count_.
+ //
+ // This is a performance optimization, not a logical requirement.
+ if (ABSL_PREDICT_FALSE(released_count_ > 0)) {
+ unbacked = released_by_page_.CountBits(index, n.raw_num());
+ released_by_page_.ClearRange(index, n.raw_num());
+ ASSERT(released_count_ >= unbacked);
+ released_count_ -= unbacked;
+ }
+
+ ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+ released_count_);
+ return PageAllocation{location_.first_page() + Length(index),
+ Length(unbacked)};
+}
+
+template <MemoryModifyFunction Unback>
+inline void PageTracker<Unback>::Put(PageId p, Length n) {
+ Length index = p - location_.first_page();
+ free_.Unmark(index.raw_num(), n.raw_num());
+
+ when_numerator_ += n.raw_num() * absl::base_internal::CycleClock::Now();
+ when_denominator_ += n.raw_num();
+}
+
+template <MemoryModifyFunction Unback>
+inline Length PageTracker<Unback>::ReleaseFree() {
+ size_t count = 0;
+ size_t index = 0;
+ size_t n;
+ // For purposes of tracking, pages which are not yet released are "free" in
+ // the released_by_page_ bitmap. We subrelease these pages in an iterative
+ // process:
+ //
+ // 1. Identify the next range of still backed pages.
+ // 2. Iterate on the free_ tracker within this range. For any free range
+ // found, mark these as unbacked.
+ // 3. Release the subrange to the OS.
+ while (released_by_page_.NextFreeRange(index, &index, &n)) {
+ size_t free_index;
+ size_t free_n;
+
+ // Check for freed pages in this unreleased region.
+ if (free_.NextFreeRange(index, &free_index, &free_n) &&
+ free_index < index + n) {
+ // If there is a free range which overlaps with [index, index+n), release
+ // it.
+ size_t end = std::min(free_index + free_n, index + n);
+
+ // In debug builds, verify [free_index, end) is backed.
+ size_t length = end - free_index;
+ ASSERT(released_by_page_.CountBits(free_index, length) == 0);
+ // Mark pages as released. Amortize the update to release_count_.
+ released_by_page_.SetRange(free_index, length);
+
+ PageId p = location_.first_page() + Length(free_index);
+ // TODO(b/122551676): If release fails, we should not SetRange above.
+ ReleasePages(p, Length(length));
+
+ index = end;
+ count += length;
+ } else {
+ // [index, index+n) did not have an overlapping range in free_, move to
+ // the next backed range of pages.
+ index += n;
+ }
+ }
+
+ released_count_ += count;
+ ASSERT(Length(released_count_) <= kPagesPerHugePage);
+ ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+ released_count_);
+ init_when(absl::base_internal::CycleClock::Now());
+ return Length(count);
+}
+
+template <MemoryModifyFunction Unback>
+inline void PageTracker<Unback>::AddSpanStats(SmallSpanStats* small,
+ LargeSpanStats* large,
+ PageAgeHistograms* ages) const {
+ size_t index = 0, n;
+
+ uint64_t w = when_denominator_ == 0 ? when_numerator_
+ : when_numerator_ / when_denominator_;
+ while (free_.NextFreeRange(index, &index, &n)) {
+ bool is_released = released_by_page_.GetBit(index);
+ // Find the last bit in the run with the same state (set or cleared) as
+ // index.
+ size_t end;
+ if (index >= kPagesPerHugePage.raw_num() - 1) {
+ end = kPagesPerHugePage.raw_num();
+ } else {
+ end = is_released ? released_by_page_.FindClear(index + 1)
+ : released_by_page_.FindSet(index + 1);
+ }
+ n = std::min(end - index, n);
+ ASSERT(n > 0);
+
+ if (n < kMaxPages.raw_num()) {
+ if (small != nullptr) {
+ if (is_released) {
+ small->returned_length[n]++;
+ } else {
+ small->normal_length[n]++;
+ }
+ }
+ } else {
+ if (large != nullptr) {
+ large->spans++;
+ if (is_released) {
+ large->returned_pages += Length(n);
+ } else {
+ large->normal_pages += Length(n);
+ }
+ }
+ }
+
+ if (ages) {
+ ages->RecordRange(Length(n), is_released, w);
+ }
+ index += n;
+ }
+}
+
+template <MemoryModifyFunction Unback>
+inline bool PageTracker<Unback>::empty() const {
+ return free_.used() == 0;
+}
+
+template <MemoryModifyFunction Unback>
+inline Length PageTracker<Unback>::free_pages() const {
+ return kPagesPerHugePage - used_pages();
+}
+
+template <class TrackerType>
+inline HugePageFiller<TrackerType>::HugePageFiller(
+ FillerPartialRerelease partial_rerelease)
+ : HugePageFiller(
+ partial_rerelease,
+ Clock{.now = absl::base_internal::CycleClock::Now,
+ .freq = absl::base_internal::CycleClock::Frequency}) {}
+
+// For testing with mock clock
+template <class TrackerType>
+inline HugePageFiller<TrackerType>::HugePageFiller(
+ FillerPartialRerelease partial_rerelease, Clock clock)
+ : size_(NHugePages(0)),
+ partial_rerelease_(partial_rerelease),
+ fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {}
+
+template <class TrackerType>
+inline typename HugePageFiller<TrackerType>::TryGetResult
+HugePageFiller<TrackerType>::TryGet(Length n) {
+ ASSERT(n > Length(0));
+
+ // How do we choose which hugepage to allocate from (among those with
+ // a free range of at least n?) Our goal is to be as space-efficient
+ // as possible, which leads to two priorities:
+ //
+ // (1) avoid fragmentation; keep free ranges in a hugepage as long
+ // as possible. This maintains our ability to satisfy large
+ // requests without allocating new hugepages
+ // (2) fill mostly-full hugepages more; let mostly-empty hugepages
+ // empty out. This lets us recover totally empty hugepages (and
+ // return them to the OS.)
+ //
+ // In practice, avoiding fragmentation is by far more important:
+ // space usage can explode if we don't jealously guard large free ranges.
+ //
+ // Our primary measure of fragmentation of a hugepage by a proxy measure: the
+ // longest free range it contains. If this is short, any free space is
+ // probably fairly fragmented. It also allows us to instantly know if a
+ // hugepage can support a given allocation.
+ //
+ // We quantize the number of allocations in a hugepage (chunked
+ // logarithmically.) We favor allocating from hugepages with many allocations
+ // already present, which helps with (2) above. Note that using the number of
+ // allocations works substantially better than the number of allocated pages;
+ // to first order allocations of any size are about as likely to be freed, and
+ // so (by simple binomial probability distributions) we're more likely to
+ // empty out a hugepage with 2 5-page allocations than one with 5 1-pages.
+ //
+ // The above suggests using the hugepage with the shortest longest empty
+ // range, breaking ties in favor of fewest number of allocations. This works
+ // well for most workloads but caused bad page heap fragmentation for some:
+ // b/63301358 and b/138618726. The intuition for what went wrong is
+ // that although the tail of large allocations is donated to the Filler (see
+ // HugePageAwareAllocator::AllocRawHugepages) for use, we don't actually
+ // want to use them until the regular Filler hugepages are used up. That
+ // way, they can be reassembled as a single large hugepage range if the
+ // large allocation is freed.
+ // Some workloads can tickle this discrepancy a lot, because they have a lot
+ // of large, medium-lifetime allocations. To fix this we treat hugepages
+ // that are freshly donated as less preferable than hugepages that have been
+ // already used for small allocations, regardless of their longest_free_range.
+ //
+ // Overall our allocation preference is:
+ // - We prefer allocating from used freelists rather than freshly donated
+ // - We prefer donated pages over previously released hugepages ones.
+ // - Among donated freelists we prefer smaller longest_free_range
+ // - Among used freelists we prefer smaller longest_free_range
+ // with ties broken by (quantized) alloc counts
+ //
+ // We group hugepages by longest_free_range and quantized alloc count and
+ // store each group in a TrackerList. All freshly-donated groups are stored
+ // in a "donated" array and the groups with (possibly prior) small allocs are
+ // stored in a "regular" array. Each of these arrays is encapsulated in a
+ // HintedTrackerLists object, which stores the array together with a bitmap to
+ // quickly find non-empty lists. The lists are ordered to satisfy the
+ // following two useful properties:
+ //
+ // - later (nonempty) freelists can always fulfill requests that
+ // earlier ones could.
+ // - earlier freelists, by the above criteria, are preferred targets
+ // for allocation.
+ //
+ // So all we have to do is find the first nonempty freelist in the regular
+ // HintedTrackerList that *could* support our allocation, and it will be our
+ // best choice. If there is none we repeat with the donated HintedTrackerList.
+ ASSUME(n < kPagesPerHugePage);
+ TrackerType* pt;
+
+ bool was_released = false;
+ do {
+ pt = regular_alloc_.GetLeast(ListFor(n, 0));
+ if (pt) {
+ ASSERT(!pt->donated());
+ break;
+ }
+ pt = donated_alloc_.GetLeast(n.raw_num());
+ if (pt) {
+ break;
+ }
+ if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+ pt = regular_alloc_partial_released_.GetLeast(ListFor(n, 0));
+ if (pt) {
+ ASSERT(!pt->donated());
+ was_released = true;
+ ASSERT(n_used_partial_released_ >= pt->used_pages());
+ n_used_partial_released_ -= pt->used_pages();
+ break;
+ }
+ }
+ pt = regular_alloc_released_.GetLeast(ListFor(n, 0));
+ if (pt) {
+ ASSERT(!pt->donated());
+ was_released = true;
+ ASSERT(n_used_released_ >= pt->used_pages());
+ n_used_released_ -= pt->used_pages();
+ break;
+ }
+
+ return {nullptr, PageId{0}};
+ } while (false);
+ ASSUME(pt != nullptr);
+ ASSERT(pt->longest_free_range() >= n);
+ const auto page_allocation = pt->Get(n);
+ AddToFillerList(pt);
+ allocated_ += n;
+
+ ASSERT(was_released || page_allocation.previously_unbacked == Length(0));
+ (void)was_released;
+ ASSERT(unmapped_ >= page_allocation.previously_unbacked);
+ unmapped_ -= page_allocation.previously_unbacked;
+ // We're being used for an allocation, so we are no longer considered
+ // donated by this point.
+ ASSERT(!pt->donated());
+ UpdateFillerStatsTracker();
+ return {pt, page_allocation.page};
+}
+
+// Marks [p, p + n) as usable by new allocations into *pt; returns pt
+// if that hugepage is now empty (nullptr otherwise.)
+// REQUIRES: pt is owned by this object (has been Contribute()), and
+// {pt, p, n} was the result of a previous TryGet.
+template <class TrackerType>
+inline TrackerType* HugePageFiller<TrackerType>::Put(TrackerType* pt, PageId p,
+ Length n) {
+ // Consider releasing [p, p+n). We do this here:
+ // * To unback the memory before we mark it as free. When partially
+ // unbacking, we release the pageheap_lock. Another thread could see the
+ // "free" memory and begin using it before we retake the lock.
+ // * To maintain maintain the invariant that
+ // pt->released() => regular_alloc_released_.size() > 0 ||
+ // regular_alloc_partial_released_.size() > 0
+ // We do this before removing pt from our lists, since another thread may
+ // encounter our post-RemoveFromFillerList() update to
+ // regular_alloc_released_.size() and regular_alloc_partial_released_.size()
+ // while encountering pt.
+ if (partial_rerelease_ == FillerPartialRerelease::Return) {
+ pt->MaybeRelease(p, n);
+ }
+
+ RemoveFromFillerList(pt);
+
+ pt->Put(p, n);
+
+ allocated_ -= n;
+ if (partial_rerelease_ == FillerPartialRerelease::Return && pt->released()) {
+ unmapped_ += n;
+ unmapping_unaccounted_ += n;
+ }
+
+ if (pt->longest_free_range() == kPagesPerHugePage) {
+ --size_;
+ if (pt->released()) {
+ const Length free_pages = pt->free_pages();
+ const Length released_pages = pt->released_pages();
+ ASSERT(free_pages >= released_pages);
+ ASSERT(unmapped_ >= released_pages);
+ unmapped_ -= released_pages;
+
+ if (free_pages > released_pages) {
+ // We should only see a difference between free pages and released pages
+ // when we retain returned pages.
+ ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain);
+
+ // pt is partially released. As the rest of the hugepage-aware
+ // allocator works in terms of whole hugepages, we need to release the
+ // rest of the hugepage. This simplifies subsequent accounting by
+ // allowing us to work with hugepage-granularity, rather than needing to
+ // retain pt's state indefinitely.
+ pageheap_lock.Unlock();
+ TrackerType::UnbackImpl(pt->location().start_addr(), kHugePageSize);
+ pageheap_lock.Lock();
+
+ unmapping_unaccounted_ += free_pages - released_pages;
+ }
+ }
+
+ UpdateFillerStatsTracker();
+ return pt;
+ }
+ AddToFillerList(pt);
+ UpdateFillerStatsTracker();
+ return nullptr;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::Contribute(TrackerType* pt,
+ bool donated) {
+ // A contributed huge page should not yet be subreleased.
+ ASSERT(pt->released_pages() == Length(0));
+
+ allocated_ += pt->used_pages();
+ if (donated) {
+ DonateToFillerList(pt);
+ } else {
+ AddToFillerList(pt);
+ }
+ ++size_;
+ UpdateFillerStatsTracker();
+}
+
+template <class TrackerType>
+template <size_t N>
+inline int HugePageFiller<TrackerType>::SelectCandidates(
+ absl::Span<TrackerType*> candidates, int current_candidates,
+ const HintedTrackerLists<N>& tracker_list, size_t tracker_start) {
+ auto PushCandidate = [&](TrackerType* pt) {
+ // If we have few candidates, we can avoid creating a heap.
+ //
+ // In ReleaseCandidates(), we unconditionally sort the list and linearly
+ // iterate through it--rather than pop_heap repeatedly--so we only need the
+ // heap for creating a bounded-size priority queue.
+ if (current_candidates < candidates.size()) {
+ candidates[current_candidates] = pt;
+ current_candidates++;
+
+ if (current_candidates == candidates.size()) {
+ std::make_heap(candidates.begin(), candidates.end(),
+ CompareForSubrelease);
+ }
+ return;
+ }
+
+ // Consider popping the worst candidate from our list.
+ if (CompareForSubrelease(candidates[0], pt)) {
+ // pt is worse than the current worst.
+ return;
+ }
+
+ std::pop_heap(candidates.begin(), candidates.begin() + current_candidates,
+ CompareForSubrelease);
+ candidates[current_candidates - 1] = pt;
+ std::push_heap(candidates.begin(), candidates.begin() + current_candidates,
+ CompareForSubrelease);
+ };
+
+ tracker_list.Iter(PushCandidate, tracker_start);
+
+ return current_candidates;
+}
+
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::ReleaseCandidates(
+ absl::Span<TrackerType*> candidates, Length target) {
+ absl::c_sort(candidates, CompareForSubrelease);
+
+ Length total_released;
+ HugeLength total_broken = NHugePages(0);
+#ifndef NDEBUG
+ Length last;
+#endif
+ for (int i = 0; i < candidates.size() && total_released < target; i++) {
+ TrackerType* best = candidates[i];
+ ASSERT(best != nullptr);
+
+#ifndef NDEBUG
+ // Double check that our sorting criteria were applied correctly.
+ ASSERT(last <= best->used_pages());
+ last = best->used_pages();
+#endif
+
+ if (best->unbroken()) {
+ ++total_broken;
+ }
+ RemoveFromFillerList(best);
+ Length ret = best->ReleaseFree();
+ unmapped_ += ret;
+ ASSERT(unmapped_ >= best->released_pages());
+ total_released += ret;
+ AddToFillerList(best);
+ }
+
+ subrelease_stats_.num_pages_subreleased += total_released;
+ subrelease_stats_.num_hugepages_broken += total_broken;
+
+ // Keep separate stats if the on going release is triggered by reaching
+ // tcmalloc limit
+ if (subrelease_stats_.limit_hit()) {
+ subrelease_stats_.total_pages_subreleased_due_to_limit += total_released;
+ subrelease_stats_.total_hugepages_broken_due_to_limit += total_broken;
+ }
+ return total_released;
+}
+
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::GetDesiredSubreleasePages(
+ Length desired, Length total_released, absl::Duration peak_interval) {
+ // Don't subrelease pages if it wouldn't push you under the latest peak.
+ // This is a bit subtle: We want the current *mapped* pages not to be below
+ // the recent *demand* peak, i.e., if we have a large amount of free memory
+ // right now but demand is below a recent peak, we still want to subrelease.
+ ASSERT(total_released < desired);
+
+ if (peak_interval == absl::ZeroDuration()) {
+ return desired;
+ }
+
+ UpdateFillerStatsTracker();
+ Length demand_at_peak =
+ fillerstats_tracker_.GetRecentPeak(peak_interval).num_pages;
+ Length current_pages = used_pages() + free_pages();
+
+ if (demand_at_peak != Length(0)) {
+ Length new_desired;
+ if (demand_at_peak >= current_pages) {
+ new_desired = total_released;
+ } else {
+ new_desired = total_released + (current_pages - demand_at_peak);
+ }
+
+ if (new_desired >= desired) {
+ return desired;
+ }
+
+ // Report the amount of memory that we didn't release due to this
+ // mechanism, but never more than free_pages, since we would not have
+ // been able to release that much memory with or without this mechanism
+ // (i.e., reporting more would be confusing).
+ Length skipped_pages = std::min(free_pages(), (desired - new_desired));
+ fillerstats_tracker_.ReportSkippedSubreleasePages(
+ skipped_pages, current_pages, peak_interval);
+ return new_desired;
+ }
+
+ return desired;
+}
+
+// Tries to release desired pages by iteratively releasing from the emptiest
+// possible hugepage and releasing its free memory to the system. Return the
+// number of pages actually released.
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::ReleasePages(
+ Length desired, absl::Duration skip_subrelease_after_peaks_interval,
+ bool hit_limit) {
+ Length total_released;
+
+ // We also do eager release, once we've called this at least once:
+ // claim credit for anything that gets done.
+ if (unmapping_unaccounted_.raw_num() > 0) {
+ // TODO(ckennelly): This may overshoot in releasing more than desired
+ // pages.
+ Length n = unmapping_unaccounted_;
+ unmapping_unaccounted_ = Length(0);
+ subrelease_stats_.num_pages_subreleased += n;
+
+ if (n >= desired) {
+ return n;
+ }
+
+ total_released += n;
+ }
+
+ if (skip_subrelease_after_peaks_interval != absl::ZeroDuration()) {
+ desired = GetDesiredSubreleasePages(desired, total_released,
+ skip_subrelease_after_peaks_interval);
+ if (desired <= total_released) {
+ return total_released;
+ }
+ }
+
+ subrelease_stats_.set_limit_hit(hit_limit);
+
+ // Optimize for releasing up to a huge page worth of small pages (scattered
+ // over many parts of the filler). Since we hold pageheap_lock, we cannot
+ // allocate here.
+ constexpr size_t kCandidates = kPagesPerHugePage.raw_num();
+ using CandidateArray = std::array<TrackerType*, kCandidates>;
+
+ if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+ while (total_released < desired) {
+ CandidateArray candidates;
+ // We can skip the first kChunks lists as they are known to be 100% full.
+ // (Those lists are likely to be long.)
+ //
+ // We do not examine the regular_alloc_released_ lists, as only contain
+ // completely released pages.
+ int n_candidates =
+ SelectCandidates(absl::MakeSpan(candidates), 0,
+ regular_alloc_partial_released_, kChunks);
+
+ Length released =
+ ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates),
+ desired - total_released);
+ if (released == Length(0)) {
+ break;
+ }
+ total_released += released;
+ }
+ }
+
+ // Only consider breaking up a hugepage if there are no partially released
+ // pages.
+ while (total_released < desired) {
+ CandidateArray candidates;
+ int n_candidates = SelectCandidates(absl::MakeSpan(candidates), 0,
+ regular_alloc_, kChunks);
+ // TODO(b/138864853): Perhaps remove donated_alloc_ from here, it's not a
+ // great candidate for partial release.
+ n_candidates = SelectCandidates(absl::MakeSpan(candidates), n_candidates,
+ donated_alloc_, 0);
+
+ Length released =
+ ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates),
+ desired - total_released);
+ if (released == Length(0)) {
+ break;
+ }
+ total_released += released;
+ }
+
+ return total_released;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::AddSpanStats(
+ SmallSpanStats* small, LargeSpanStats* large,
+ PageAgeHistograms* ages) const {
+ auto loop = [&](const TrackerType* pt) {
+ pt->AddSpanStats(small, large, ages);
+ };
+ // We can skip the first kChunks lists as they are known to be 100% full.
+ regular_alloc_.Iter(loop, kChunks);
+ donated_alloc_.Iter(loop, 0);
+
+ if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+ regular_alloc_partial_released_.Iter(loop, 0);
+ } else {
+ ASSERT(regular_alloc_partial_released_.empty());
+ ASSERT(n_used_partial_released_ == Length(0));
+ }
+ regular_alloc_released_.Iter(loop, 0);
+}
+
+template <class TrackerType>
+inline BackingStats HugePageFiller<TrackerType>::stats() const {
+ BackingStats s;
+ s.system_bytes = size_.in_bytes();
+ s.free_bytes = free_pages().in_bytes();
+ s.unmapped_bytes = unmapped_pages().in_bytes();
+ return s;
+}
+
+namespace huge_page_filler_internal {
+// Computes some histograms of fullness. Because nearly empty/full huge pages
+// are much more interesting, we calculate 4 buckets at each of the beginning
+// and end of size one, and then divide the overall space by 16 to have 16
+// (mostly) even buckets in the middle.
+class UsageInfo {
+ public:
+ enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes };
+
+ UsageInfo() {
+ size_t i;
+ for (i = 0; i <= 4 && i < kPagesPerHugePage.raw_num(); ++i) {
+ bucket_bounds_[buckets_size_] = i;
+ buckets_size_++;
+ }
+ if (i < kPagesPerHugePage.raw_num() - 4) {
+ // Because kPagesPerHugePage is a power of two, it must be at least 16
+ // to get inside this "if" - either i=5 and kPagesPerHugePage=8 and
+ // the test fails, or kPagesPerHugePage <= 4 and the test fails.
+ ASSERT(kPagesPerHugePage >= Length(16));
+ constexpr int step = kPagesPerHugePage.raw_num() / 16;
+ // We want to move in "step"-sized increments, aligned every "step".
+ // So first we have to round i up to the nearest step boundary. This
+ // logic takes advantage of step being a power of two, so step-1 is
+ // all ones in the low-order bits.
+ i = ((i - 1) | (step - 1)) + 1;
+ for (; i < kPagesPerHugePage.raw_num() - 4; i += step) {
+ bucket_bounds_[buckets_size_] = i;
+ buckets_size_++;
+ }
+ i = kPagesPerHugePage.raw_num() - 4;
+ }
+ for (; i < kPagesPerHugePage.raw_num(); ++i) {
+ bucket_bounds_[buckets_size_] = i;
+ buckets_size_++;
+ }
+ CHECK_CONDITION(buckets_size_ <= kBucketCapacity);
+ }
+
+ template <class TrackerType>
+ void Record(const TrackerType* pt, Type which) {
+ const Length free = kPagesPerHugePage - pt->used_pages();
+ const Length lf = pt->longest_free_range();
+ const size_t nalloc = pt->nallocs();
+ // This is a little annoying as our buckets *have* to differ;
+ // nalloc is in [1,256], free_pages and longest_free are in [0, 255].
+ free_page_histo_[which][BucketNum(free.raw_num())]++;
+ longest_free_histo_[which][BucketNum(lf.raw_num())]++;
+ nalloc_histo_[which][BucketNum(nalloc - 1)]++;
+ }
+
+ void Print(Printer* out) {
+ PrintHisto(out, free_page_histo_[kRegular],
+ "# of regular hps with a<= # of free pages <b", 0);
+ PrintHisto(out, free_page_histo_[kDonated],
+ "# of donated hps with a<= # of free pages <b", 0);
+ PrintHisto(out, free_page_histo_[kPartialReleased],
+ "# of partial released hps with a<= # of free pages <b", 0);
+ PrintHisto(out, free_page_histo_[kReleased],
+ "# of released hps with a<= # of free pages <b", 0);
+ // For donated huge pages, number of allocs=1 and longest free range =
+ // number of free pages, so it isn't useful to show the next two.
+ PrintHisto(out, longest_free_histo_[kRegular],
+ "# of regular hps with a<= longest free range <b", 0);
+ PrintHisto(out, longest_free_histo_[kPartialReleased],
+ "# of partial released hps with a<= longest free range <b", 0);
+ PrintHisto(out, longest_free_histo_[kReleased],
+ "# of released hps with a<= longest free range <b", 0);
+ PrintHisto(out, nalloc_histo_[kRegular],
+ "# of regular hps with a<= # of allocations <b", 1);
+ PrintHisto(out, nalloc_histo_[kPartialReleased],
+ "# of partial released hps with a<= # of allocations <b", 1);
+ PrintHisto(out, nalloc_histo_[kReleased],
+ "# of released hps with a<= # of allocations <b", 1);
+ }
+
+ void Print(PbtxtRegion* hpaa) {
+ static constexpr absl::string_view kTrackerTypes[kNumTypes] = {
+ "REGULAR", "DONATED", "PARTIAL", "RELEASED"};
+ for (int i = 0; i < kNumTypes; ++i) {
+ PbtxtRegion scoped = hpaa->CreateSubRegion("filler_tracker");
+ scoped.PrintRaw("type", kTrackerTypes[i]);
+ PrintHisto(&scoped, free_page_histo_[i], "free_pages_histogram", 0);
+ PrintHisto(&scoped, longest_free_histo_[i],
+ "longest_free_range_histogram", 0);
+ PrintHisto(&scoped, nalloc_histo_[i], "allocations_histogram", 1);
+ }
+ }
+
+ private:
+ // Maximum of 4 buckets at the start and end, and 16 in the middle.
+ static constexpr size_t kBucketCapacity = 4 + 16 + 4;
+ using Histo = size_t[kBucketCapacity];
+
+ int BucketNum(size_t page) {
+ auto it =
+ std::upper_bound(bucket_bounds_, bucket_bounds_ + buckets_size_, page);
+ CHECK_CONDITION(it != bucket_bounds_);
+ return it - bucket_bounds_ - 1;
+ }
+
+ void PrintHisto(Printer* out, Histo h, const char blurb[], size_t offset) {
+ out->printf("\nHugePageFiller: %s", blurb);
+ for (size_t i = 0; i < buckets_size_; ++i) {
+ if (i % 6 == 0) {
+ out->printf("\nHugePageFiller:");
+ }
+ out->printf(" <%3zu<=%6zu", bucket_bounds_[i] + offset, h[i]);
+ }
+ out->printf("\n");
+ }
+
+ void PrintHisto(PbtxtRegion* hpaa, Histo h, const char key[], size_t offset) {
+ for (size_t i = 0; i < buckets_size_; ++i) {
+ auto hist = hpaa->CreateSubRegion(key);
+ hist.PrintI64("lower_bound", bucket_bounds_[i] + offset);
+ hist.PrintI64("upper_bound",
+ (i == buckets_size_ - 1 ? bucket_bounds_[i]
+ : bucket_bounds_[i + 1] - 1) +
+ offset);
+ hist.PrintI64("value", h[i]);
+ }
+ }
+
+ // Arrays, because they are split per alloc type.
+ Histo free_page_histo_[kNumTypes]{};
+ Histo longest_free_histo_[kNumTypes]{};
+ Histo nalloc_histo_[kNumTypes]{};
+ size_t bucket_bounds_[kBucketCapacity];
+ int buckets_size_ = 0;
+};
+} // namespace huge_page_filler_internal
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::Print(Printer* out,
+ bool everything) const {
+ out->printf("HugePageFiller: densely pack small requests into hugepages\n");
+
+ HugeLength nrel =
+ regular_alloc_released_.size() + regular_alloc_partial_released_.size();
+ HugeLength nfull = NHugePages(0);
+
+ // note kChunks, not kNumLists here--we're iterating *full* lists.
+ for (size_t chunk = 0; chunk < kChunks; ++chunk) {
+ nfull += NHugePages(
+ regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length());
+ }
+ // A donated alloc full list is impossible because it would have never been
+ // donated in the first place. (It's an even hugepage.)
+ ASSERT(donated_alloc_[0].empty());
+ // Evaluate a/b, avoiding division by zero
+ const auto safe_div = [](Length a, Length b) {
+ return b == Length(0) ? 0.
+ : static_cast<double>(a.raw_num()) /
+ static_cast<double>(b.raw_num());
+ };
+ const HugeLength n_partial = size() - nrel - nfull;
+ const HugeLength n_nonfull =
+ n_partial + regular_alloc_partial_released_.size();
+ out->printf(
+ "HugePageFiller: %zu total, %zu full, %zu partial, %zu released "
+ "(%zu partially), 0 quarantined\n",
+ size().raw_num(), nfull.raw_num(), n_partial.raw_num(), nrel.raw_num(),
+ regular_alloc_partial_released_.size().raw_num());
+ out->printf("HugePageFiller: %zu pages free in %zu hugepages, %.4f free\n",
+ free_pages().raw_num(), size().raw_num(),
+ safe_div(free_pages(), size().in_pages()));
+
+ ASSERT(free_pages() <= n_nonfull.in_pages());
+ out->printf("HugePageFiller: among non-fulls, %.4f free\n",
+ safe_div(free_pages(), n_nonfull.in_pages()));
+
+ out->printf(
+ "HugePageFiller: %zu used pages in subreleased hugepages (%zu of them in "
+ "partially released)\n",
+ used_pages_in_any_subreleased().raw_num(),
+ used_pages_in_partial_released().raw_num());
+
+ out->printf(
+ "HugePageFiller: %zu hugepages partially released, %.4f released\n",
+ nrel.raw_num(), safe_div(unmapped_pages(), nrel.in_pages()));
+ out->printf("HugePageFiller: %.4f of used pages hugepageable\n",
+ hugepage_frac());
+
+ // Subrelease
+ out->printf(
+ "HugePageFiller: Since startup, %zu pages subreleased, %zu hugepages "
+ "broken, (%zu pages, %zu hugepages due to reaching tcmalloc limit)\n",
+ subrelease_stats_.total_pages_subreleased.raw_num(),
+ subrelease_stats_.total_hugepages_broken.raw_num(),
+ subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num(),
+ subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num());
+
+ if (!everything) return;
+
+ // Compute some histograms of fullness.
+ using huge_page_filler_internal::UsageInfo;
+ UsageInfo usage;
+ regular_alloc_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0);
+ donated_alloc_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0);
+ if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+ regular_alloc_partial_released_.Iter(
+ [&](const TrackerType* pt) {
+ usage.Record(pt, UsageInfo::kPartialReleased);
+ },
+ 0);
+ } else {
+ ASSERT(regular_alloc_partial_released_.empty());
+ ASSERT(n_used_partial_released_.raw_num() == 0);
+ }
+ regular_alloc_released_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); },
+ 0);
+
+ out->printf("\n");
+ out->printf("HugePageFiller: fullness histograms\n");
+ usage.Print(out);
+
+ out->printf("\n");
+ fillerstats_tracker_.Print(out);
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::PrintInPbtxt(PbtxtRegion* hpaa) const {
+ HugeLength nrel =
+ regular_alloc_released_.size() + regular_alloc_partial_released_.size();
+ HugeLength nfull = NHugePages(0);
+
+ // note kChunks, not kNumLists here--we're iterating *full* lists.
+ for (size_t chunk = 0; chunk < kChunks; ++chunk) {
+ nfull += NHugePages(
+ regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length());
+ }
+ // A donated alloc full list is impossible because it would have never been
+ // donated in the first place. (It's an even hugepage.)
+ ASSERT(donated_alloc_[0].empty());
+ // Evaluate a/b, avoiding division by zero
+ const auto safe_div = [](Length a, Length b) {
+ return b == Length(0) ? 0
+ : static_cast<double>(a.raw_num()) /
+ static_cast<double>(b.raw_num());
+ };
+ const HugeLength n_partial = size() - nrel - nfull;
+ hpaa->PrintI64("filler_full_huge_pages", nfull.raw_num());
+ hpaa->PrintI64("filler_partial_huge_pages", n_partial.raw_num());
+ hpaa->PrintI64("filler_released_huge_pages", nrel.raw_num());
+ hpaa->PrintI64("filler_partially_released_huge_pages",
+ regular_alloc_partial_released_.size().raw_num());
+ hpaa->PrintI64("filler_free_pages", free_pages().raw_num());
+ hpaa->PrintI64("filler_used_pages_in_subreleased",
+ used_pages_in_any_subreleased().raw_num());
+ hpaa->PrintI64("filler_used_pages_in_partial_released",
+ used_pages_in_partial_released().raw_num());
+ hpaa->PrintI64(
+ "filler_unmapped_bytes",
+ static_cast<uint64_t>(nrel.raw_num() *
+ safe_div(unmapped_pages(), nrel.in_pages())));
+ hpaa->PrintI64(
+ "filler_hugepageable_used_bytes",
+ static_cast<uint64_t>(hugepage_frac() *
+ static_cast<double>(allocated_.in_bytes())));
+ hpaa->PrintI64("filler_num_pages_subreleased",
+ subrelease_stats_.total_pages_subreleased.raw_num());
+ hpaa->PrintI64("filler_num_hugepages_broken",
+ subrelease_stats_.total_hugepages_broken.raw_num());
+ hpaa->PrintI64(
+ "filler_num_pages_subreleased_due_to_limit",
+ subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num());
+ hpaa->PrintI64(
+ "filler_num_hugepages_broken_due_to_limit",
+ subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num());
+ // Compute some histograms of fullness.
+ using huge_page_filler_internal::UsageInfo;
+ UsageInfo usage;
+ regular_alloc_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0);
+ donated_alloc_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0);
+ if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+ regular_alloc_partial_released_.Iter(
+ [&](const TrackerType* pt) {
+ usage.Record(pt, UsageInfo::kPartialReleased);
+ },
+ 0);
+ } else {
+ ASSERT(regular_alloc_partial_released_.empty());
+ ASSERT(n_used_partial_released_ == Length(0));
+ }
+ regular_alloc_released_.Iter(
+ [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); },
+ 0);
+
+ usage.Print(hpaa);
+
+ fillerstats_tracker_.PrintInPbtxt(hpaa);
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::UpdateFillerStatsTracker() {
+ StatsTrackerType::FillerStats stats;
+ stats.num_pages = allocated_;
+ stats.free_pages = free_pages();
+ stats.unmapped_pages = unmapped_pages();
+ stats.used_pages_in_subreleased_huge_pages =
+ n_used_partial_released_ + n_used_released_;
+ stats.huge_pages[StatsTrackerType::kRegular] = regular_alloc_.size();
+ stats.huge_pages[StatsTrackerType::kDonated] = donated_alloc_.size();
+ stats.huge_pages[StatsTrackerType::kPartialReleased] =
+ regular_alloc_partial_released_.size();
+ stats.huge_pages[StatsTrackerType::kReleased] =
+ regular_alloc_released_.size();
+ stats.num_pages_subreleased = subrelease_stats_.num_pages_subreleased;
+ stats.num_hugepages_broken = subrelease_stats_.num_hugepages_broken;
+ fillerstats_tracker_.Report(stats);
+ subrelease_stats_.reset();
+}
+
+template <class TrackerType>
+inline size_t HugePageFiller<TrackerType>::IndexFor(TrackerType* pt) {
+ ASSERT(!pt->empty());
+ // Prefer to allocate from hugepages with many allocations already present;
+ // spaced logarithmically.
+ const size_t na = pt->nallocs();
+ // This equals 63 - ceil(log2(na))
+ // (or 31 if size_t is 4 bytes, etc.)
+ const size_t neg_ceil_log = __builtin_clzl(2 * na - 1);
+
+ // We want the same spread as neg_ceil_log, but spread over [0,
+ // kChunks) (clamped at the left edge) instead of [0, 64). So subtract off
+ // the difference (computed by forcing na=1 to kChunks - 1.)
+ const size_t kOffset = __builtin_clzl(1) - (kChunks - 1);
+ const size_t i = std::max(neg_ceil_log, kOffset) - kOffset;
+ ASSERT(i < kChunks);
+ return i;
+}
+
+template <class TrackerType>
+inline size_t HugePageFiller<TrackerType>::ListFor(const Length longest,
+ const size_t chunk) {
+ ASSERT(chunk < kChunks);
+ ASSERT(longest < kPagesPerHugePage);
+ return longest.raw_num() * kChunks + chunk;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::RemoveFromFillerList(TrackerType* pt) {
+ Length longest = pt->longest_free_range();
+ ASSERT(longest < kPagesPerHugePage);
+
+ if (pt->donated()) {
+ donated_alloc_.Remove(pt, longest.raw_num());
+ } else {
+ size_t chunk = IndexFor(pt);
+ size_t i = ListFor(longest, chunk);
+ if (!pt->released()) {
+ regular_alloc_.Remove(pt, i);
+ } else if (partial_rerelease_ == FillerPartialRerelease::Return ||
+ pt->free_pages() <= pt->released_pages()) {
+ regular_alloc_released_.Remove(pt, i);
+ ASSERT(n_used_released_ >= pt->used_pages());
+ n_used_released_ -= pt->used_pages();
+ } else {
+ regular_alloc_partial_released_.Remove(pt, i);
+ ASSERT(n_used_partial_released_ >= pt->used_pages());
+ n_used_partial_released_ -= pt->used_pages();
+ }
+ }
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::AddToFillerList(TrackerType* pt) {
+ size_t chunk = IndexFor(pt);
+ Length longest = pt->longest_free_range();
+ ASSERT(longest < kPagesPerHugePage);
+
+ // Once a donated alloc is used in any way, it degenerates into being a
+ // regular alloc. This allows the algorithm to keep using it (we had to be
+ // desperate to use it in the first place), and thus preserves the other
+ // donated allocs.
+ pt->set_donated(false);
+
+ size_t i = ListFor(longest, chunk);
+ if (!pt->released()) {
+ regular_alloc_.Add(pt, i);
+ } else if (partial_rerelease_ == FillerPartialRerelease::Return ||
+ pt->free_pages() == pt->released_pages()) {
+ regular_alloc_released_.Add(pt, i);
+ n_used_released_ += pt->used_pages();
+ } else {
+ ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain);
+ regular_alloc_partial_released_.Add(pt, i);
+ n_used_partial_released_ += pt->used_pages();
+ }
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::DonateToFillerList(TrackerType* pt) {
+ Length longest = pt->longest_free_range();
+ ASSERT(longest < kPagesPerHugePage);
+
+ // We should never be donating already-released trackers!
+ ASSERT(!pt->released());
+ pt->set_donated(true);
+
+ donated_alloc_.Add(pt, longest.raw_num());
+}
+
+template <class TrackerType>
+inline double HugePageFiller<TrackerType>::hugepage_frac() const {
+ // How many of our used pages are on non-huge pages? Since
+ // everything on a released hugepage is either used or released,
+ // just the difference:
+ const Length nrel = regular_alloc_released_.size().in_pages();
+ const Length used = used_pages();
+ const Length unmapped = unmapped_pages();
+ ASSERT(n_used_partial_released_ <=
+ regular_alloc_partial_released_.size().in_pages());
+ const Length used_on_rel = (nrel >= unmapped ? nrel - unmapped : Length(0)) +
+ n_used_partial_released_;
+ ASSERT(used >= used_on_rel);
+ const Length used_on_huge = used - used_on_rel;
+
+ const Length denom = used > Length(0) ? used : Length(1);
+ const double ret =
+ static_cast<double>(used_on_huge.raw_num()) / denom.raw_num();
+ ASSERT(ret >= 0);
+ ASSERT(ret <= 1);
+ return std::clamp<double>(ret, 0, 1);
+}
+
+// Helper for stat functions.
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::free_pages() const {
+ return size().in_pages() - used_pages() - unmapped_pages();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_PAGE_FILLER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc
new file mode 100644
index 0000000000..9879d41d79
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc
@@ -0,0 +1,3799 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_filler.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <new>
+#include <random>
+#include <string>
+#include <thread> // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/algorithm/container.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/flags/flag.h"
+#include "absl/memory/memory.h"
+#include "absl/random/bernoulli_distribution.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/stats.h"
+
+using tcmalloc::tcmalloc_internal::Length;
+
+ABSL_FLAG(Length, page_tracker_defrag_lim, Length(32),
+ "Max allocation size for defrag test");
+
+ABSL_FLAG(Length, frag_req_limit, Length(32),
+ "request size limit for frag test");
+ABSL_FLAG(Length, frag_size, Length(512 * 1024),
+ "target number of pages for frag test");
+ABSL_FLAG(uint64_t, frag_iters, 10 * 1000 * 1000, "iterations for frag test");
+
+ABSL_FLAG(double, release_until, 0.01,
+ "fraction of used we target in pageheap");
+ABSL_FLAG(uint64_t, bytes, 1024 * 1024 * 1024, "baseline usage");
+ABSL_FLAG(double, growth_factor, 2.0, "growth over baseline");
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// This is an arbitrary distribution taken from page requests from
+// an empirical driver test. It seems realistic enough. We trim it to
+// [1, last].
+//
+std::discrete_distribution<size_t> EmpiricalDistribution(Length last) {
+ std::vector<size_t> page_counts = []() {
+ std::vector<size_t> ret(12289);
+ ret[1] = 375745576;
+ ret[2] = 59737961;
+ ret[3] = 35549390;
+ ret[4] = 43896034;
+ ret[5] = 17484968;
+ ret[6] = 15830888;
+ ret[7] = 9021717;
+ ret[8] = 208779231;
+ ret[9] = 3775073;
+ ret[10] = 25591620;
+ ret[11] = 2483221;
+ ret[12] = 3595343;
+ ret[13] = 2232402;
+ ret[16] = 17639345;
+ ret[21] = 4215603;
+ ret[25] = 4212756;
+ ret[28] = 760576;
+ ret[30] = 2166232;
+ ret[32] = 3021000;
+ ret[40] = 1186302;
+ ret[44] = 479142;
+ ret[48] = 570030;
+ ret[49] = 101262;
+ ret[55] = 592333;
+ ret[57] = 236637;
+ ret[64] = 785066;
+ ret[65] = 44700;
+ ret[73] = 539659;
+ ret[80] = 342091;
+ ret[96] = 488829;
+ ret[97] = 504;
+ ret[113] = 242921;
+ ret[128] = 157206;
+ ret[129] = 145;
+ ret[145] = 117191;
+ ret[160] = 91818;
+ ret[192] = 67824;
+ ret[193] = 144;
+ ret[225] = 40711;
+ ret[256] = 38569;
+ ret[257] = 1;
+ ret[297] = 21738;
+ ret[320] = 13510;
+ ret[384] = 19499;
+ ret[432] = 13856;
+ ret[490] = 9849;
+ ret[512] = 3024;
+ ret[640] = 3655;
+ ret[666] = 3963;
+ ret[715] = 2376;
+ ret[768] = 288;
+ ret[1009] = 6389;
+ ret[1023] = 2788;
+ ret[1024] = 144;
+ ret[1280] = 1656;
+ ret[1335] = 2592;
+ ret[1360] = 3024;
+ ret[1536] = 432;
+ ret[2048] = 288;
+ ret[2560] = 72;
+ ret[3072] = 360;
+ ret[12288] = 216;
+ return ret;
+ }();
+
+ Length lim = last;
+ auto i = page_counts.begin();
+ // remember lim might be too big (in which case we use the whole
+ // vector...)
+
+ auto j = page_counts.size() > lim.raw_num() ? i + (lim.raw_num() + 1)
+ : page_counts.end();
+
+ return std::discrete_distribution<size_t>(i, j);
+}
+
+class PageTrackerTest : public testing::Test {
+ protected:
+ PageTrackerTest()
+ : // an unlikely magic page
+ huge_(HugePageContaining(reinterpret_cast<void*>(0x1abcde200000))),
+ tracker_(huge_, absl::base_internal::CycleClock::Now()) {}
+
+ ~PageTrackerTest() override { mock_.VerifyAndClear(); }
+
+ struct PAlloc {
+ PageId p;
+ Length n;
+ };
+
+ void Mark(PAlloc a, size_t mark) {
+ EXPECT_LE(huge_.first_page(), a.p);
+ size_t index = (a.p - huge_.first_page()).raw_num();
+ size_t end = index + a.n.raw_num();
+ EXPECT_LE(end, kPagesPerHugePage.raw_num());
+ for (; index < end; ++index) {
+ marks_[index] = mark;
+ }
+ }
+
+ class MockUnbackInterface {
+ public:
+ void Unback(void* p, size_t len) {
+ CHECK_CONDITION(actual_index_ < kMaxCalls);
+ actual_[actual_index_] = {p, len};
+ ++actual_index_;
+ }
+
+ void Expect(void* p, size_t len) {
+ CHECK_CONDITION(expected_index_ < kMaxCalls);
+ expected_[expected_index_] = {p, len};
+ ++expected_index_;
+ }
+
+ void VerifyAndClear() {
+ EXPECT_EQ(expected_index_, actual_index_);
+ for (size_t i = 0, n = std::min(expected_index_, actual_index_); i < n;
+ ++i) {
+ EXPECT_EQ(expected_[i].ptr, actual_[i].ptr);
+ EXPECT_EQ(expected_[i].len, actual_[i].len);
+ }
+ expected_index_ = 0;
+ actual_index_ = 0;
+ }
+
+ private:
+ struct CallArgs {
+ void* ptr{nullptr};
+ size_t len{0};
+ };
+
+ static constexpr size_t kMaxCalls = 10;
+ CallArgs expected_[kMaxCalls] = {};
+ CallArgs actual_[kMaxCalls] = {};
+ size_t expected_index_{0};
+ size_t actual_index_{0};
+ };
+
+ static void MockUnback(void* p, size_t len);
+
+ typedef PageTracker<MockUnback> TestPageTracker;
+
+ // strict because release calls should only happen when we ask
+ static MockUnbackInterface mock_;
+
+ void Check(PAlloc a, size_t mark) {
+ EXPECT_LE(huge_.first_page(), a.p);
+ size_t index = (a.p - huge_.first_page()).raw_num();
+ size_t end = index + a.n.raw_num();
+ EXPECT_LE(end, kPagesPerHugePage.raw_num());
+ for (; index < end; ++index) {
+ EXPECT_EQ(mark, marks_[index]);
+ }
+ }
+ size_t marks_[kPagesPerHugePage.raw_num()];
+ HugePage huge_;
+ TestPageTracker tracker_;
+
+ void ExpectPages(PAlloc a) {
+ void* ptr = a.p.start_addr();
+ size_t bytes = a.n.in_bytes();
+ mock_.Expect(ptr, bytes);
+ }
+
+ PAlloc Get(Length n) {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ PageId p = tracker_.Get(n).page;
+ return {p, n};
+ }
+
+ void Put(PAlloc a) {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ tracker_.Put(a.p, a.n);
+ }
+
+ Length ReleaseFree() {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ return tracker_.ReleaseFree();
+ }
+
+ void MaybeRelease(PAlloc a) {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ tracker_.MaybeRelease(a.p, a.n);
+ }
+};
+
+void PageTrackerTest::MockUnback(void* p, size_t len) { mock_.Unback(p, len); }
+
+PageTrackerTest::MockUnbackInterface PageTrackerTest::mock_;
+
+TEST_F(PageTrackerTest, AllocSane) {
+ Length free = kPagesPerHugePage;
+ auto n = Length(1);
+ std::vector<PAlloc> allocs;
+ // This should work without fragmentation.
+ while (n <= free) {
+ ASSERT_LE(n, tracker_.longest_free_range());
+ EXPECT_EQ(kPagesPerHugePage - free, tracker_.used_pages());
+ EXPECT_EQ(free, tracker_.free_pages());
+ PAlloc a = Get(n);
+ Mark(a, n.raw_num());
+ allocs.push_back(a);
+ free -= n;
+ ++n;
+ }
+
+ // All should be distinct
+ for (auto alloc : allocs) {
+ Check(alloc, alloc.n.raw_num());
+ }
+}
+
+TEST_F(PageTrackerTest, ReleasingReturn) {
+ static const Length kAllocSize = kPagesPerHugePage / 4;
+ PAlloc a1 = Get(kAllocSize - Length(3));
+ PAlloc a2 = Get(kAllocSize);
+ PAlloc a3 = Get(kAllocSize + Length(1));
+ PAlloc a4 = Get(kAllocSize + Length(2));
+
+ Put(a2);
+ Put(a4);
+ // We now have a hugepage that looks like [alloced] [free] [alloced] [free].
+ // The free parts should be released when we mark the hugepage as such,
+ // but not the allocated parts.
+ ExpectPages(a2);
+ ExpectPages(a4);
+ ReleaseFree();
+ mock_.VerifyAndClear();
+
+ // Now we return the other parts, and they *should* get released.
+ ExpectPages(a1);
+ ExpectPages(a3);
+
+ MaybeRelease(a1);
+ Put(a1);
+
+ MaybeRelease(a3);
+ Put(a3);
+}
+
+TEST_F(PageTrackerTest, ReleasingRetain) {
+ static const Length kAllocSize = kPagesPerHugePage / 4;
+ PAlloc a1 = Get(kAllocSize - Length(3));
+ PAlloc a2 = Get(kAllocSize);
+ PAlloc a3 = Get(kAllocSize + Length(1));
+ PAlloc a4 = Get(kAllocSize + Length(2));
+
+ Put(a2);
+ Put(a4);
+ // We now have a hugepage that looks like [alloced] [free] [alloced] [free].
+ // The free parts should be released when we mark the hugepage as such,
+ // but not the allocated parts.
+ ExpectPages(a2);
+ ExpectPages(a4);
+ ReleaseFree();
+ mock_.VerifyAndClear();
+
+ // Now we return the other parts, and they shouldn't get released.
+ Put(a1);
+ Put(a3);
+
+ mock_.VerifyAndClear();
+
+ // But they will if we ReleaseFree.
+ ExpectPages(a1);
+ ExpectPages(a3);
+ ReleaseFree();
+ mock_.VerifyAndClear();
+}
+
+TEST_F(PageTrackerTest, Defrag) {
+ absl::BitGen rng;
+ const Length N = absl::GetFlag(FLAGS_page_tracker_defrag_lim);
+ auto dist = EmpiricalDistribution(N);
+
+ std::vector<PAlloc> allocs;
+
+ std::vector<PAlloc> doomed;
+ while (tracker_.longest_free_range() > Length(0)) {
+ Length n;
+ do {
+ n = Length(dist(rng));
+ } while (n > tracker_.longest_free_range());
+ PAlloc a = Get(n);
+ (absl::Bernoulli(rng, 1.0 / 2) ? allocs : doomed).push_back(a);
+ }
+
+ for (auto d : doomed) {
+ Put(d);
+ }
+
+ static const size_t kReps = 250 * 1000;
+
+ std::vector<double> frag_samples;
+ std::vector<Length> longest_free_samples;
+ frag_samples.reserve(kReps);
+ longest_free_samples.reserve(kReps);
+ for (size_t i = 0; i < kReps; ++i) {
+ const Length free = kPagesPerHugePage - tracker_.used_pages();
+ // Ideally, we'd like all of our free space to stay in a single
+ // nice little run.
+ const Length longest = tracker_.longest_free_range();
+ double frag = free > Length(0)
+ ? static_cast<double>(longest.raw_num()) / free.raw_num()
+ : 1;
+
+ if (i % (kReps / 25) == 0) {
+ printf("free = %zu longest = %zu frag = %f\n", free.raw_num(),
+ longest.raw_num(), frag);
+ }
+ frag_samples.push_back(frag);
+ longest_free_samples.push_back(longest);
+
+ // Randomly grow or shrink (picking the only safe option when we're either
+ // full or empty.)
+ if (tracker_.longest_free_range() == Length(0) ||
+ (absl::Bernoulli(rng, 1.0 / 2) && !allocs.empty())) {
+ size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+ std::swap(allocs[index], allocs.back());
+ Put(allocs.back());
+ allocs.pop_back();
+ } else {
+ Length n;
+ do {
+ n = Length(dist(rng));
+ } while (n > tracker_.longest_free_range());
+ allocs.push_back(Get(n));
+ }
+ }
+
+ std::sort(frag_samples.begin(), frag_samples.end());
+ std::sort(longest_free_samples.begin(), longest_free_samples.end());
+
+ {
+ const double p10 = frag_samples[kReps * 10 / 100];
+ const double p25 = frag_samples[kReps * 25 / 100];
+ const double p50 = frag_samples[kReps * 50 / 100];
+ const double p75 = frag_samples[kReps * 75 / 100];
+ const double p90 = frag_samples[kReps * 90 / 100];
+ printf("Fragmentation quantiles:\n");
+ printf("p10: %f p25: %f p50: %f p75: %f p90: %f\n", p10, p25, p50, p75,
+ p90);
+ // We'd like to prety consistently rely on (75% of the time) reasonable
+ // defragmentation (50% of space is fully usable...)
+ // ...but we currently can't hit that mark consistently.
+ // The situation is worse on ppc with larger huge pages:
+ // pass rate for test is ~50% at 0.20. Reducing from 0.2 to 0.07.
+ // TODO(b/127466107) figure out a better solution.
+ EXPECT_GE(p25, 0.07);
+ }
+
+ {
+ const Length p10 = longest_free_samples[kReps * 10 / 100];
+ const Length p25 = longest_free_samples[kReps * 25 / 100];
+ const Length p50 = longest_free_samples[kReps * 50 / 100];
+ const Length p75 = longest_free_samples[kReps * 75 / 100];
+ const Length p90 = longest_free_samples[kReps * 90 / 100];
+ printf("Longest free quantiles:\n");
+ printf("p10: %zu p25: %zu p50: %zu p75: %zu p90: %zu\n", p10.raw_num(),
+ p25.raw_num(), p50.raw_num(), p75.raw_num(), p90.raw_num());
+ // Similarly, we'd really like for there usually (p25) to be a space
+ // for a large allocation (N - note that we've cooked the books so that
+ // the page tracker is going to be something like half empty (ish) and N
+ // is small, so that should be doable.)
+ // ...but, of course, it isn't.
+ EXPECT_GE(p25, Length(4));
+ }
+
+ for (auto a : allocs) {
+ Put(a);
+ }
+}
+
+TEST_F(PageTrackerTest, Stats) {
+ struct Helper {
+ static void Stat(const TestPageTracker& tracker,
+ std::vector<Length>* small_backed,
+ std::vector<Length>* small_unbacked, LargeSpanStats* large,
+ double* avg_age_backed, double* avg_age_unbacked) {
+ SmallSpanStats small;
+ *large = LargeSpanStats();
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ tracker.AddSpanStats(&small, large, &ages);
+ small_backed->clear();
+ small_unbacked->clear();
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ for (int j = 0; j < small.normal_length[i.raw_num()]; ++j) {
+ small_backed->push_back(i);
+ }
+
+ for (int j = 0; j < small.returned_length[i.raw_num()]; ++j) {
+ small_unbacked->push_back(i);
+ }
+ }
+
+ *avg_age_backed = ages.GetTotalHistogram(false)->avg_age();
+ *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age();
+ }
+ };
+
+ LargeSpanStats large;
+ std::vector<Length> small_backed, small_unbacked;
+ double avg_age_backed, avg_age_unbacked;
+
+ const PageId p = Get(kPagesPerHugePage).p;
+ const PageId end = p + kPagesPerHugePage;
+ PageId next = p;
+ Put({next, kMaxPages + Length(1)});
+ next += kMaxPages + Length(1);
+
+ absl::SleepFor(absl::Milliseconds(10));
+ Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre());
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+ EXPECT_EQ(Length(0), large.returned_pages);
+ EXPECT_LE(0.01, avg_age_backed);
+
+ ++next;
+ Put({next, Length(1)});
+ next += Length(1);
+ absl::SleepFor(absl::Milliseconds(20));
+ Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre(Length(1)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+ EXPECT_EQ(Length(0), large.returned_pages);
+ EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.03 + 1 * 0.02) /
+ (kMaxPages + Length(2)).raw_num(),
+ avg_age_backed);
+ EXPECT_EQ(0, avg_age_unbacked);
+
+ ++next;
+ Put({next, Length(2)});
+ next += Length(2);
+ absl::SleepFor(absl::Milliseconds(30));
+ Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+ EXPECT_EQ(Length(0), large.returned_pages);
+ EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.06 + 1 * 0.05 + 2 * 0.03) /
+ (kMaxPages + Length(4)).raw_num(),
+ avg_age_backed);
+ EXPECT_EQ(0, avg_age_unbacked);
+
+ ++next;
+ Put({next, Length(3)});
+ next += Length(3);
+ ASSERT_LE(next, end);
+ absl::SleepFor(absl::Milliseconds(40));
+ Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed,
+ testing::ElementsAre(Length(1), Length(2), Length(3)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+ EXPECT_EQ(Length(0), large.returned_pages);
+ EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.10 + 1 * 0.09 + 2 * 0.07 +
+ 3 * 0.04) /
+ (kMaxPages + Length(7)).raw_num(),
+ avg_age_backed);
+ EXPECT_EQ(0, avg_age_unbacked);
+
+ ExpectPages({p, kMaxPages + Length(1)});
+ ExpectPages({p + kMaxPages + Length(2), Length(1)});
+ ExpectPages({p + kMaxPages + Length(4), Length(2)});
+ ExpectPages({p + kMaxPages + Length(7), Length(3)});
+ EXPECT_EQ(kMaxPages + Length(7), ReleaseFree());
+ absl::SleepFor(absl::Milliseconds(100));
+ Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre());
+ EXPECT_THAT(small_unbacked,
+ testing::ElementsAre(Length(1), Length(2), Length(3)));
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(Length(0), large.normal_pages);
+ EXPECT_EQ(kMaxPages + Length(1), large.returned_pages);
+ EXPECT_EQ(0, avg_age_backed);
+ EXPECT_LE(0.1, avg_age_unbacked);
+}
+
+TEST_F(PageTrackerTest, b151915873) {
+ // This test verifies, while generating statistics for the huge page, that we
+ // do not go out-of-bounds in our bitmaps (b/151915873).
+
+ // While the PageTracker relies on FindAndMark to decide which pages to hand
+ // out, we do not specify where in the huge page we get our allocations.
+ // Allocate single pages and then use their returned addresses to create the
+ // desired pattern in the bitmaps, namely:
+ //
+ // | | kPagesPerHugePage - 2 | kPagesPerHugePages - 1 |
+ // | .... | not free | free |
+ //
+ // This causes AddSpanStats to try index = kPagesPerHugePage - 1, n=1. We
+ // need to not overflow FindClear/FindSet.
+
+ std::vector<PAlloc> allocs;
+ allocs.reserve(kPagesPerHugePage.raw_num());
+ for (int i = 0; i < kPagesPerHugePage.raw_num(); i++) {
+ allocs.push_back(Get(Length(1)));
+ }
+
+ std::sort(allocs.begin(), allocs.end(),
+ [](const PAlloc& a, const PAlloc& b) { return a.p < b.p; });
+
+ Put(allocs.back());
+ allocs.erase(allocs.begin() + allocs.size() - 1);
+
+ ASSERT_EQ(tracker_.used_pages(), kPagesPerHugePage - Length(1));
+
+ SmallSpanStats small;
+ LargeSpanStats large;
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+
+ tracker_.AddSpanStats(&small, &large, &ages);
+
+ EXPECT_EQ(small.normal_length[1], 1);
+ EXPECT_THAT(0,
+ testing::AllOfArray(&small.normal_length[2],
+ &small.normal_length[kMaxPages.raw_num()]));
+}
+
+class BlockingUnback {
+ public:
+ static void Unback(void* p, size_t len) {
+ if (!mu_) {
+ return;
+ }
+
+ if (counter) {
+ counter->DecrementCount();
+ }
+
+ mu_->Lock();
+ mu_->Unlock();
+ }
+
+ static void set_lock(absl::Mutex* mu) { mu_ = mu; }
+
+ static absl::BlockingCounter* counter;
+
+ private:
+ static thread_local absl::Mutex* mu_;
+};
+
+thread_local absl::Mutex* BlockingUnback::mu_ = nullptr;
+absl::BlockingCounter* BlockingUnback::counter = nullptr;
+
+class FillerTest : public testing::TestWithParam<FillerPartialRerelease> {
+ protected:
+ // Allow tests to modify the clock used by the cache.
+ static int64_t FakeClock() { return clock_; }
+ static double GetFakeClockFrequency() {
+ return absl::ToDoubleNanoseconds(absl::Seconds(2));
+ }
+ static void Advance(absl::Duration d) {
+ clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+ }
+ static void ResetClock() { clock_ = 1234; }
+
+ static void Unback(void* p, size_t len) {}
+
+ // Our templating approach lets us directly override certain functions
+ // and have mocks without virtualization. It's a bit funky but works.
+ typedef PageTracker<BlockingUnback::Unback> FakeTracker;
+
+ // We have backing of one word per (normal-sized) page for our "hugepages".
+ std::vector<size_t> backing_;
+ // This is space efficient enough that we won't bother recycling pages.
+ HugePage GetBacking() {
+ intptr_t i = backing_.size();
+ backing_.resize(i + kPagesPerHugePage.raw_num());
+ intptr_t addr = i << kPageShift;
+ CHECK_CONDITION(addr % kHugePageSize == 0);
+ return HugePageContaining(reinterpret_cast<void*>(addr));
+ }
+
+ size_t* GetFakePage(PageId p) { return &backing_[p.index()]; }
+
+ void MarkRange(PageId p, Length n, size_t mark) {
+ for (auto i = Length(0); i < n; ++i) {
+ *GetFakePage(p + i) = mark;
+ }
+ }
+
+ void CheckRange(PageId p, Length n, size_t mark) {
+ for (auto i = Length(0); i < n; ++i) {
+ EXPECT_EQ(mark, *GetFakePage(p + i));
+ }
+ }
+
+ HugePageFiller<FakeTracker> filler_;
+
+ FillerTest()
+ : filler_(GetParam(),
+ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}) {
+ ResetClock();
+ }
+
+ ~FillerTest() override { EXPECT_EQ(NHugePages(0), filler_.size()); }
+
+ struct PAlloc {
+ FakeTracker* pt;
+ PageId p;
+ Length n;
+ size_t mark;
+ };
+
+ void Mark(const PAlloc& alloc) { MarkRange(alloc.p, alloc.n, alloc.mark); }
+
+ void Check(const PAlloc& alloc) { CheckRange(alloc.p, alloc.n, alloc.mark); }
+
+ size_t next_mark_{0};
+
+ HugeLength hp_contained_{NHugePages(0)};
+ Length total_allocated_{0};
+
+ absl::InsecureBitGen gen_;
+
+ void CheckStats() {
+ EXPECT_EQ(hp_contained_, filler_.size());
+ auto stats = filler_.stats();
+ const uint64_t freelist_bytes = stats.free_bytes + stats.unmapped_bytes;
+ const uint64_t used_bytes = stats.system_bytes - freelist_bytes;
+ EXPECT_EQ(total_allocated_.in_bytes(), used_bytes);
+ EXPECT_EQ((hp_contained_.in_pages() - total_allocated_).in_bytes(),
+ freelist_bytes);
+ }
+ PAlloc AllocateRaw(Length n, bool donated = false) {
+ EXPECT_LT(n, kPagesPerHugePage);
+ PAlloc ret;
+ ret.n = n;
+ ret.pt = nullptr;
+ ret.mark = ++next_mark_;
+ if (!donated) { // Donated means always create a new hugepage
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ auto [pt, page] = filler_.TryGet(n);
+ ret.pt = pt;
+ ret.p = page;
+ }
+ if (ret.pt == nullptr) {
+ ret.pt =
+ new FakeTracker(GetBacking(), absl::base_internal::CycleClock::Now());
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ ret.p = ret.pt->Get(n).page;
+ }
+ filler_.Contribute(ret.pt, donated);
+ ++hp_contained_;
+ }
+
+ total_allocated_ += n;
+ return ret;
+ }
+
+ PAlloc Allocate(Length n, bool donated = false) {
+ CHECK_CONDITION(n <= kPagesPerHugePage);
+ PAlloc ret = AllocateRaw(n, donated);
+ ret.n = n;
+ Mark(ret);
+ CheckStats();
+ return ret;
+ }
+
+ // Returns true iff the filler returned an empty hugepage.
+ bool DeleteRaw(const PAlloc& p) {
+ FakeTracker* pt;
+ {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ pt = filler_.Put(p.pt, p.p, p.n);
+ }
+ total_allocated_ -= p.n;
+ if (pt != nullptr) {
+ EXPECT_EQ(kPagesPerHugePage, pt->longest_free_range());
+ EXPECT_TRUE(pt->empty());
+ --hp_contained_;
+ delete pt;
+ return true;
+ }
+
+ return false;
+ }
+
+ // Returns true iff the filler returned an empty hugepage
+ bool Delete(const PAlloc& p) {
+ Check(p);
+ bool r = DeleteRaw(p);
+ CheckStats();
+ return r;
+ }
+
+ Length ReleasePages(Length desired, absl::Duration d = absl::ZeroDuration()) {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ return filler_.ReleasePages(desired, d, false);
+ }
+
+ Length HardReleasePages(Length desired) {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ return filler_.ReleasePages(desired, absl::ZeroDuration(), true);
+ }
+
+ // Generates an "interesting" pattern of allocations that highlights all the
+ // various features of our stats.
+ std::vector<PAlloc> GenerateInterestingAllocs();
+
+ private:
+ static int64_t clock_;
+};
+
+int64_t FillerTest::clock_{1234};
+
+TEST_P(FillerTest, Density) {
+ absl::BitGen rng;
+ // Start with a really annoying setup: some hugepages half
+ // empty (randomly)
+ std::vector<PAlloc> allocs;
+ std::vector<PAlloc> doomed_allocs;
+ static const HugeLength kNumHugePages = NHugePages(64);
+ for (auto i = Length(0); i < kNumHugePages.in_pages(); ++i) {
+ ASSERT_EQ(i, filler_.pages_allocated());
+ if (absl::Bernoulli(rng, 1.0 / 2)) {
+ allocs.push_back(Allocate(Length(1)));
+ } else {
+ doomed_allocs.push_back(Allocate(Length(1)));
+ }
+ }
+ for (auto d : doomed_allocs) {
+ Delete(d);
+ }
+ EXPECT_EQ(kNumHugePages, filler_.size());
+ // We want a good chance of touching ~every allocation.
+ size_t n = allocs.size();
+ // Now, randomly add and delete to the allocations.
+ // We should converge to full and empty pages.
+ for (int j = 0; j < 6; j++) {
+ absl::c_shuffle(allocs, rng);
+
+ for (int i = 0; i < n; ++i) {
+ Delete(allocs[i]);
+ allocs[i] = Allocate(Length(1));
+ ASSERT_EQ(Length(n), filler_.pages_allocated());
+ }
+ }
+
+ EXPECT_GE(allocs.size() / kPagesPerHugePage.raw_num() + 1,
+ filler_.size().raw_num());
+
+ // clean up, check for failures
+ for (auto a : allocs) {
+ Delete(a);
+ ASSERT_EQ(Length(--n), filler_.pages_allocated());
+ }
+}
+
+TEST_P(FillerTest, Release) {
+ static const Length kAlloc = kPagesPerHugePage / 2;
+ PAlloc p1 = Allocate(kAlloc - Length(1));
+ PAlloc p2 = Allocate(kAlloc + Length(1));
+
+ PAlloc p3 = Allocate(kAlloc - Length(2));
+ PAlloc p4 = Allocate(kAlloc + Length(2));
+ // We have two hugepages, both full: nothing to release.
+ ASSERT_EQ(Length(0), ReleasePages(kMaxValidPages));
+ Delete(p1);
+ Delete(p3);
+ // Now we should see the p1 hugepage - emptier - released.
+ ASSERT_EQ(kAlloc - Length(1), ReleasePages(kAlloc - Length(1)));
+ EXPECT_EQ(kAlloc - Length(1), filler_.unmapped_pages());
+ ASSERT_TRUE(p1.pt->released());
+ ASSERT_FALSE(p3.pt->released());
+
+ // We expect to reuse p1.pt.
+ PAlloc p5 = Allocate(kAlloc - Length(1));
+ ASSERT_TRUE(p1.pt == p5.pt || p3.pt == p5.pt);
+
+ Delete(p2);
+ Delete(p4);
+ Delete(p5);
+}
+
+TEST_P(FillerTest, Fragmentation) {
+ absl::BitGen rng;
+ auto dist = EmpiricalDistribution(absl::GetFlag(FLAGS_frag_req_limit));
+
+ std::vector<PAlloc> allocs;
+ Length total;
+ while (total < absl::GetFlag(FLAGS_frag_size)) {
+ auto n = Length(dist(rng));
+ total += n;
+ allocs.push_back(AllocateRaw(n));
+ }
+
+ double max_slack = 0.0;
+ const size_t kReps = absl::GetFlag(FLAGS_frag_iters);
+ for (size_t i = 0; i < kReps; ++i) {
+ auto stats = filler_.stats();
+ double slack = static_cast<double>(stats.free_bytes) / stats.system_bytes;
+
+ max_slack = std::max(slack, max_slack);
+ if (i % (kReps / 40) == 0) {
+ printf("%zu events: %zu allocs totalling %zu slack %f\n", i,
+ allocs.size(), total.raw_num(), slack);
+ }
+ if (absl::Bernoulli(rng, 1.0 / 2)) {
+ size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+ std::swap(allocs[index], allocs.back());
+ DeleteRaw(allocs.back());
+ total -= allocs.back().n;
+ allocs.pop_back();
+ } else {
+ auto n = Length(dist(rng));
+ allocs.push_back(AllocateRaw(n));
+ total += n;
+ }
+ }
+
+ EXPECT_LE(max_slack, 0.05);
+
+ for (auto a : allocs) {
+ DeleteRaw(a);
+ }
+}
+
+TEST_P(FillerTest, PrintFreeRatio) {
+ // This test is sensitive to the number of pages per hugepage, as we are
+ // printing raw stats.
+ if (kPagesPerHugePage != Length(256)) {
+ GTEST_SKIP();
+ }
+
+ // Allocate two huge pages, release one, verify that we do not get an invalid
+ // (>1.) ratio of free : non-fulls.
+
+ // First huge page
+ PAlloc a1 = Allocate(kPagesPerHugePage / 2);
+ PAlloc a2 = Allocate(kPagesPerHugePage / 2);
+
+ // Second huge page
+ constexpr Length kQ = kPagesPerHugePage / 4;
+
+ PAlloc a3 = Allocate(kQ);
+ PAlloc a4 = Allocate(kQ);
+ PAlloc a5 = Allocate(kQ);
+ PAlloc a6 = Allocate(kQ);
+
+ Delete(a6);
+
+ ReleasePages(kQ);
+
+ Delete(a5);
+
+ std::string buffer(1024 * 1024, '\0');
+ {
+ Printer printer(&*buffer.begin(), buffer.size());
+ filler_.Print(&printer, /*everything=*/true);
+ buffer.erase(printer.SpaceRequired());
+ }
+
+ if (GetParam() == FillerPartialRerelease::Retain) {
+ EXPECT_THAT(
+ buffer,
+ testing::StartsWith(
+ R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 2 total, 1 full, 0 partial, 1 released (1 partially), 0 quarantined
+HugePageFiller: 64 pages free in 2 hugepages, 0.1250 free
+HugePageFiller: among non-fulls, 0.2500 free
+HugePageFiller: 128 used pages in subreleased hugepages (128 of them in partially released)
+HugePageFiller: 1 hugepages partially released, 0.2500 released
+HugePageFiller: 0.6667 of used pages hugepageable)"));
+ } else {
+ EXPECT_THAT(
+ buffer,
+ testing::StartsWith(
+ R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 2 total, 1 full, 0 partial, 1 released (0 partially), 0 quarantined
+HugePageFiller: 0 pages free in 2 hugepages, 0.0000 free
+HugePageFiller: among non-fulls, 0.0000 free
+HugePageFiller: 128 used pages in subreleased hugepages (0 of them in partially released)
+HugePageFiller: 1 hugepages partially released, 0.5000 released
+HugePageFiller: 0.6667 of used pages hugepageable)"));
+ }
+
+ // Cleanup remaining allocs.
+ Delete(a1);
+ Delete(a2);
+ Delete(a3);
+ Delete(a4);
+}
+
+static double BytesToMiB(size_t bytes) { return bytes / (1024.0 * 1024.0); }
+
+using testing::AnyOf;
+using testing::Eq;
+using testing::StrEq;
+
+TEST_P(FillerTest, HugePageFrac) {
+ // I don't actually care which we get, both are
+ // reasonable choices, but don't report a NaN/complain
+ // about divide by 0s/ give some bogus number for empty.
+ EXPECT_THAT(filler_.hugepage_frac(), AnyOf(Eq(0), Eq(1)));
+ static const Length kQ = kPagesPerHugePage / 4;
+ // These are all on one page:
+ auto a1 = Allocate(kQ);
+ auto a2 = Allocate(kQ);
+ auto a3 = Allocate(kQ - Length(1));
+ auto a4 = Allocate(kQ + Length(1));
+
+ // As are these:
+ auto a5 = Allocate(kPagesPerHugePage - kQ);
+ auto a6 = Allocate(kQ);
+
+ EXPECT_EQ(1, filler_.hugepage_frac());
+ // Free space doesn't affect it...
+ Delete(a4);
+ Delete(a6);
+
+ EXPECT_EQ(1, filler_.hugepage_frac());
+
+ // Releasing the hugepage does.
+ ASSERT_EQ(kQ + Length(1), ReleasePages(kQ + Length(1)));
+ EXPECT_EQ((3.0 * kQ.raw_num()) / (6.0 * kQ.raw_num() - 1.0),
+ filler_.hugepage_frac());
+
+ // Check our arithmetic in a couple scenarios.
+
+ // 2 kQs on the release and 3 on the hugepage
+ Delete(a2);
+ EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1),
+ filler_.hugepage_frac());
+ // This releases the free page on the partially released hugepage.
+ ASSERT_EQ(kQ, ReleasePages(kQ));
+ EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1),
+ filler_.hugepage_frac());
+
+ // just-over-1 kQ on the release and 3 on the hugepage
+ Delete(a3);
+ EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac());
+ // This releases the free page on the partially released hugepage.
+ ASSERT_EQ(kQ - Length(1), ReleasePages(kQ - Length(1)));
+ EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac());
+
+ // All huge!
+ Delete(a1);
+ EXPECT_EQ(1, filler_.hugepage_frac());
+
+ Delete(a5);
+}
+
+// Repeatedly grow from FLAG_bytes to FLAG_bytes * growth factor, then shrink
+// back down by random deletion. Then release partial hugepages until
+// pageheap is bounded by some fraction of usage.
+// Measure the effective hugepage fraction at peak and baseline usage,
+// and the blowup in VSS footprint.
+//
+// This test is a tool for analyzing parameters -- not intended as an actual
+// unit test.
+TEST_P(FillerTest, DISABLED_ReleaseFrac) {
+ absl::BitGen rng;
+ const Length baseline = LengthFromBytes(absl::GetFlag(FLAGS_bytes));
+ const Length peak = baseline * absl::GetFlag(FLAGS_growth_factor);
+ const Length free_target = baseline * absl::GetFlag(FLAGS_release_until);
+
+ std::vector<PAlloc> allocs;
+ while (filler_.used_pages() < baseline) {
+ allocs.push_back(AllocateRaw(Length(1)));
+ }
+
+ while (true) {
+ while (filler_.used_pages() < peak) {
+ allocs.push_back(AllocateRaw(Length(1)));
+ }
+ const double peak_frac = filler_.hugepage_frac();
+ // VSS
+ const size_t footprint = filler_.size().in_bytes();
+
+ std::shuffle(allocs.begin(), allocs.end(), rng);
+
+ size_t limit = allocs.size();
+ while (filler_.used_pages() > baseline) {
+ --limit;
+ DeleteRaw(allocs[limit]);
+ }
+ allocs.resize(limit);
+ while (filler_.free_pages() > free_target) {
+ ReleasePages(kMaxValidPages);
+ }
+ const double baseline_frac = filler_.hugepage_frac();
+
+ printf("%.3f %.3f %6.1f MiB\n", peak_frac, baseline_frac,
+ BytesToMiB(footprint));
+ }
+}
+
+TEST_P(FillerTest, ReleaseAccounting) {
+ const Length N = kPagesPerHugePage;
+ auto big = Allocate(N - Length(2));
+ auto tiny1 = Allocate(Length(1));
+ auto tiny2 = Allocate(Length(1));
+ auto half1 = Allocate(N / 2);
+ auto half2 = Allocate(N / 2);
+
+ Delete(half1);
+ Delete(big);
+
+ ASSERT_EQ(NHugePages(2), filler_.size());
+
+ // We should pick the [empty big][full tiny] hugepage here.
+ EXPECT_EQ(N - Length(2), ReleasePages(N - Length(2)));
+ EXPECT_EQ(N - Length(2), filler_.unmapped_pages());
+ // This shouldn't trigger a release
+ Delete(tiny1);
+ if (GetParam() == FillerPartialRerelease::Retain) {
+ EXPECT_EQ(N - Length(2), filler_.unmapped_pages());
+ // Until we call ReleasePages()
+ EXPECT_EQ(Length(1), ReleasePages(Length(1)));
+ }
+ EXPECT_EQ(N - Length(1), filler_.unmapped_pages());
+
+ // As should this, but this will drop the whole hugepage
+ Delete(tiny2);
+ EXPECT_EQ(Length(0), filler_.unmapped_pages());
+ EXPECT_EQ(NHugePages(1), filler_.size());
+
+ // This shouldn't trigger any release: we just claim credit for the
+ // releases we did automatically on tiny2.
+ if (GetParam() == FillerPartialRerelease::Retain) {
+ EXPECT_EQ(Length(1), ReleasePages(Length(1)));
+ } else {
+ EXPECT_EQ(Length(2), ReleasePages(Length(2)));
+ }
+ EXPECT_EQ(Length(0), filler_.unmapped_pages());
+ EXPECT_EQ(NHugePages(1), filler_.size());
+
+ // Check subrelease stats
+ EXPECT_EQ(N / 2, filler_.used_pages());
+ EXPECT_EQ(Length(0), filler_.used_pages_in_any_subreleased());
+ EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released());
+ EXPECT_EQ(Length(0), filler_.used_pages_in_released());
+
+ // Now we pick the half/half hugepage
+ EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages));
+ EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+ // Check subrelease stats
+ EXPECT_EQ(N / 2, filler_.used_pages());
+ EXPECT_EQ(N / 2, filler_.used_pages_in_any_subreleased());
+ EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released());
+ EXPECT_EQ(N / 2, filler_.used_pages_in_released());
+
+ // Check accounting for partially released hugepages with partial rerelease
+ if (GetParam() == FillerPartialRerelease::Retain) {
+ // Allocating and deallocating a small object causes the page to turn from
+ // a released hugepage into a partially released hugepage.
+ auto tiny3 = Allocate(Length(1));
+ auto tiny4 = Allocate(Length(1));
+ Delete(tiny4);
+ EXPECT_EQ(N / 2 + Length(1), filler_.used_pages());
+ EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_any_subreleased());
+ EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_partial_released());
+ EXPECT_EQ(Length(0), filler_.used_pages_in_released());
+ Delete(tiny3);
+ }
+
+ Delete(half2);
+ EXPECT_EQ(NHugePages(0), filler_.size());
+ EXPECT_EQ(Length(0), filler_.unmapped_pages());
+}
+
+TEST_P(FillerTest, ReleaseWithReuse) {
+ const Length N = kPagesPerHugePage;
+ auto half = Allocate(N / 2);
+ auto tiny1 = Allocate(N / 4);
+ auto tiny2 = Allocate(N / 4);
+
+ Delete(half);
+
+ ASSERT_EQ(NHugePages(1), filler_.size());
+
+ // We should be able to release the pages from half1.
+ EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages));
+ EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+ // Release tiny1, release more.
+ Delete(tiny1);
+
+ EXPECT_EQ(N / 4, ReleasePages(kMaxValidPages));
+ EXPECT_EQ(3 * N / 4, filler_.unmapped_pages());
+
+ // Repopulate, confirm we can't release anything and unmapped pages goes to 0.
+ tiny1 = Allocate(N / 4);
+ EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages));
+ EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+ // Continue repopulating.
+ half = Allocate(N / 2);
+ EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages));
+ EXPECT_EQ(Length(0), filler_.unmapped_pages());
+ EXPECT_EQ(NHugePages(1), filler_.size());
+
+ // Release everything and cleanup.
+ Delete(half);
+ Delete(tiny1);
+ Delete(tiny2);
+ EXPECT_EQ(NHugePages(0), filler_.size());
+ EXPECT_EQ(Length(0), filler_.unmapped_pages());
+}
+
+TEST_P(FillerTest, AvoidArbitraryQuarantineVMGrowth) {
+ const Length N = kPagesPerHugePage;
+ // Guarantee we have a ton of released pages go empty.
+ for (int i = 0; i < 10 * 1000; ++i) {
+ auto half1 = Allocate(N / 2);
+ auto half2 = Allocate(N / 2);
+ Delete(half1);
+ ASSERT_EQ(N / 2, ReleasePages(N / 2));
+ Delete(half2);
+ }
+
+ auto s = filler_.stats();
+ EXPECT_GE(1024 * 1024 * 1024, s.system_bytes);
+}
+
+TEST_P(FillerTest, StronglyPreferNonDonated) {
+ // We donate several huge pages of varying fullnesses. Then we make several
+ // allocations that would be perfect fits for the donated hugepages, *after*
+ // making one allocation that won't fit, to ensure that a huge page is
+ // contributed normally. Finally, we verify that we can still get the
+ // donated huge pages back. (I.e. they weren't used.)
+ std::vector<PAlloc> donated;
+ ASSERT_GE(kPagesPerHugePage, Length(10));
+ for (auto i = Length(1); i <= Length(3); ++i) {
+ donated.push_back(Allocate(kPagesPerHugePage - i, /*donated=*/true));
+ }
+
+ std::vector<PAlloc> regular;
+ for (auto i = Length(4); i >= Length(1); --i) {
+ regular.push_back(Allocate(i));
+ }
+
+ for (const PAlloc& alloc : donated) {
+ // All the donated huge pages should be freeable.
+ EXPECT_TRUE(Delete(alloc));
+ }
+
+ for (const PAlloc& alloc : regular) {
+ Delete(alloc);
+ }
+}
+
+TEST_P(FillerTest, ParallelUnlockingSubrelease) {
+ if (GetParam() == FillerPartialRerelease::Retain) {
+ // When rerelease happens without going to Unback(), this test
+ // (intentionally) deadlocks, as we never receive the call.
+ return;
+ }
+
+ // Verify that we can deallocate a partial huge page and successfully unlock
+ // the pageheap_lock without introducing race conditions around the metadata
+ // for PageTracker::released_.
+ //
+ // Currently, HPAA unbacks *all* subsequent deallocations to a huge page once
+ // we have broken up *any* part of it.
+ //
+ // If multiple deallocations are in-flight, we need to leave sufficient
+ // breadcrumbs to ourselves (PageTracker::releasing_ is a Length, not a bool)
+ // so that one deallocation completing does not have us "forget" that another
+ // deallocation is about to unback other parts of the hugepage.
+ //
+ // If PageTracker::releasing_ were a bool, the completion of "t1" and
+ // subsequent reallocation of "a2" in this test would mark the entirety of the
+ // page as full, so we would choose to *not* unback a2 (when deallocated) or
+ // a3 (when deallocated by t3).
+ constexpr Length N = kPagesPerHugePage;
+
+ auto a1 = AllocateRaw(N / 2);
+ auto a2 = AllocateRaw(Length(1));
+ auto a3 = AllocateRaw(Length(1));
+
+ // Trigger subrelease. The filler now has a partial hugepage, so subsequent
+ // calls to Delete() will cause us to unback the remainder of it.
+ EXPECT_GT(ReleasePages(kMaxValidPages), Length(0));
+
+ auto m1 = absl::make_unique<absl::Mutex>();
+ auto m2 = absl::make_unique<absl::Mutex>();
+
+ m1->Lock();
+ m2->Lock();
+
+ absl::BlockingCounter counter(2);
+ BlockingUnback::counter = &counter;
+
+ std::thread t1([&]() {
+ BlockingUnback::set_lock(m1.get());
+
+ DeleteRaw(a2);
+ });
+
+ std::thread t2([&]() {
+ BlockingUnback::set_lock(m2.get());
+
+ DeleteRaw(a3);
+ });
+
+ // Wait for t1 and t2 to block.
+ counter.Wait();
+
+ // At this point, t1 and t2 are blocked (as if they were on a long-running
+ // syscall) on "unback" (m1 and m2, respectively). pageheap_lock is not held.
+ //
+ // Allocating a4 will complete the hugepage, but we have on-going releaser
+ // threads.
+ auto a4 = AllocateRaw((N / 2) - Length(2));
+ EXPECT_EQ(NHugePages(1), filler_.size());
+
+ // Let one of the threads proceed. The huge page consists of:
+ // * a1 (N/2 ): Allocated
+ // * a2 ( 1): Unbacked
+ // * a3 ( 1): Unbacking (blocked on m2)
+ // * a4 (N/2-2): Allocated
+ m1->Unlock();
+ t1.join();
+
+ // Reallocate a2. We should still consider the huge page partially backed for
+ // purposes of subreleasing.
+ a2 = AllocateRaw(Length(1));
+ EXPECT_EQ(NHugePages(1), filler_.size());
+ DeleteRaw(a2);
+
+ // Let the other thread proceed. The huge page consists of:
+ // * a1 (N/2 ): Allocated
+ // * a2 ( 1): Unbacked
+ // * a3 ( 1): Unbacked
+ // * a4 (N/2-2): Allocated
+ m2->Unlock();
+ t2.join();
+
+ EXPECT_EQ(filler_.used_pages(), N - Length(2));
+ EXPECT_EQ(filler_.unmapped_pages(), Length(2));
+ EXPECT_EQ(filler_.free_pages(), Length(0));
+
+ // Clean up.
+ DeleteRaw(a1);
+ DeleteRaw(a4);
+
+ BlockingUnback::counter = nullptr;
+}
+
+TEST_P(FillerTest, SkipSubrelease) {
+ // This test is sensitive to the number of pages per hugepage, as we are
+ // printing raw stats.
+ if (kPagesPerHugePage != Length(256)) {
+ GTEST_SKIP();
+ }
+
+ // Generate a peak, wait for time interval a, generate a trough, subrelease,
+ // wait for time interval b, generate another peak.
+ const auto peak_trough_peak = [&](absl::Duration a, absl::Duration b,
+ absl::Duration peak_interval,
+ bool expected_subrelease) {
+ const Length N = kPagesPerHugePage;
+ PAlloc half = Allocate(N / 2);
+ PAlloc tiny1 = Allocate(N / 4);
+ PAlloc tiny2 = Allocate(N / 4);
+
+ // To force a peak, we allocate 3/4 and 1/4 of a huge page. This is
+ // necessary after we delete `half` below, as a half huge page for the peak
+ // would fill into the gap previously occupied by it.
+ PAlloc peak1a = Allocate(3 * N / 4);
+ PAlloc peak1b = Allocate(N / 4);
+ EXPECT_EQ(filler_.used_pages(), 2 * N);
+ Delete(peak1a);
+ Delete(peak1b);
+ Advance(a);
+
+ Delete(half);
+
+ EXPECT_EQ(expected_subrelease ? N / 2 : Length(0),
+ ReleasePages(10 * N, peak_interval));
+
+ Advance(b);
+
+ PAlloc peak2a = Allocate(3 * N / 4);
+ PAlloc peak2b = Allocate(N / 4);
+
+ PAlloc peak3a = Allocate(3 * N / 4);
+ PAlloc peak3b = Allocate(N / 4);
+
+ Delete(tiny1);
+ Delete(tiny2);
+ Delete(peak2a);
+ Delete(peak2b);
+ Delete(peak3a);
+ Delete(peak3b);
+
+ EXPECT_EQ(filler_.used_pages(), Length(0));
+ EXPECT_EQ(filler_.unmapped_pages(), Length(0));
+ EXPECT_EQ(filler_.free_pages(), Length(0));
+
+ EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), ReleasePages(10 * N));
+ };
+
+ {
+ SCOPED_TRACE("peak-trough-peak 1");
+ peak_trough_peak(absl::Minutes(2), absl::Minutes(2), absl::Minutes(3),
+ false);
+ }
+
+ Advance(absl::Minutes(30));
+
+ {
+ SCOPED_TRACE("peak-trough-peak 2");
+ peak_trough_peak(absl::Minutes(2), absl::Minutes(7), absl::Minutes(3),
+ false);
+ }
+
+ Advance(absl::Minutes(30));
+
+ {
+ SCOPED_TRACE("peak-trough-peak 3");
+ peak_trough_peak(absl::Minutes(5), absl::Minutes(3), absl::Minutes(2),
+ true);
+ }
+
+ Advance(absl::Minutes(30));
+
+ // This captures a corner case: If we hit another peak immediately after a
+ // subrelease decision (in the same time series epoch), do not count this as
+ // a correct subrelease decision.
+ {
+ SCOPED_TRACE("peak-trough-peak 4");
+ peak_trough_peak(absl::Milliseconds(10), absl::Milliseconds(10),
+ absl::Minutes(2), false);
+ }
+
+ Advance(absl::Minutes(30));
+
+ // Ensure that the tracker is updated.
+ auto tiny = Allocate(Length(1));
+ Delete(tiny);
+
+ std::string buffer(1024 * 1024, '\0');
+ {
+ Printer printer(&*buffer.begin(), buffer.size());
+ filler_.Print(&printer, true);
+ }
+ buffer.resize(strlen(buffer.c_str()));
+
+ EXPECT_THAT(buffer, testing::HasSubstr(R"(
+HugePageFiller: Since the start of the execution, 4 subreleases (512 pages) were skipped due to recent (120s) peaks.
+HugePageFiller: 25.0000% of decisions confirmed correct, 0 pending (25.0000% of pages, 0 pending).
+)"));
+}
+
+class FillerStatsTrackerTest : public testing::Test {
+ private:
+ static int64_t clock_;
+ static int64_t FakeClock() { return clock_; }
+ static double GetFakeClockFrequency() {
+ return absl::ToDoubleNanoseconds(absl::Seconds(2));
+ }
+
+ protected:
+ static constexpr absl::Duration kWindow = absl::Minutes(10);
+
+ using StatsTrackerType = FillerStatsTracker<16>;
+ StatsTrackerType tracker_{
+ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kWindow,
+ absl::Minutes(5)};
+
+ void Advance(absl::Duration d) {
+ clock_ += static_cast<int64_t>(absl::ToDoubleSeconds(d) *
+ GetFakeClockFrequency());
+ }
+
+ // Generates four data points for the tracker that represent "interesting"
+ // points (i.e., min/max pages demand, min/max hugepages).
+ void GenerateInterestingPoints(Length num_pages, HugeLength num_hugepages,
+ Length num_free_pages);
+
+ // Generates a data point with a particular amount of demand pages, while
+ // ignoring the specific number of hugepages.
+ void GenerateDemandPoint(Length num_pages, Length num_free_pages);
+};
+
+int64_t FillerStatsTrackerTest::clock_{0};
+
+void FillerStatsTrackerTest::GenerateInterestingPoints(Length num_pages,
+ HugeLength num_hugepages,
+ Length num_free_pages) {
+ for (int i = 0; i <= 1; ++i) {
+ for (int j = 0; j <= 1; ++j) {
+ StatsTrackerType::FillerStats stats;
+ stats.num_pages = num_pages + Length((i == 0) ? 4 : 8 * j);
+ stats.free_pages = num_free_pages + Length(10 * i + j);
+ stats.unmapped_pages = Length(10);
+ stats.used_pages_in_subreleased_huge_pages = num_pages;
+ stats.huge_pages[StatsTrackerType::kRegular] =
+ num_hugepages + ((i == 1) ? NHugePages(4) : NHugePages(8) * j);
+ stats.huge_pages[StatsTrackerType::kDonated] = num_hugepages;
+ stats.huge_pages[StatsTrackerType::kPartialReleased] = NHugePages(i);
+ stats.huge_pages[StatsTrackerType::kReleased] = NHugePages(j);
+ tracker_.Report(stats);
+ }
+ }
+}
+
+void FillerStatsTrackerTest::GenerateDemandPoint(Length num_pages,
+ Length num_free_pages) {
+ HugeLength hp = NHugePages(1);
+ StatsTrackerType::FillerStats stats;
+ stats.num_pages = num_pages;
+ stats.free_pages = num_free_pages;
+ stats.unmapped_pages = Length(0);
+ stats.used_pages_in_subreleased_huge_pages = Length(0);
+ stats.huge_pages[StatsTrackerType::kRegular] = hp;
+ stats.huge_pages[StatsTrackerType::kDonated] = hp;
+ stats.huge_pages[StatsTrackerType::kPartialReleased] = hp;
+ stats.huge_pages[StatsTrackerType::kReleased] = hp;
+ tracker_.Report(stats);
+}
+
+// Tests that the tracker aggregates all data correctly. The output is tested by
+// comparing the text output of the tracker. While this is a bit verbose, it is
+// much cleaner than extracting and comparing all data manually.
+TEST_F(FillerStatsTrackerTest, Works) {
+ // Ensure that the beginning (when free pages are 0) is outside the 5-min
+ // window the instrumentation is recording.
+ GenerateInterestingPoints(Length(1), NHugePages(1), Length(1));
+ Advance(absl::Minutes(5));
+
+ GenerateInterestingPoints(Length(100), NHugePages(5), Length(200));
+
+ Advance(absl::Minutes(1));
+
+ GenerateInterestingPoints(Length(200), NHugePages(10), Length(100));
+
+ Advance(absl::Minutes(1));
+
+ // Test text output (time series summary).
+ {
+ std::string buffer(1024 * 1024, '\0');
+ Printer printer(&*buffer.begin(), buffer.size());
+ {
+ tracker_.Print(&printer);
+ buffer.erase(printer.SpaceRequired());
+ }
+
+ EXPECT_THAT(buffer, StrEq(R"(HugePageFiller: time series over 5 min interval
+
+HugePageFiller: realized fragmentation: 0.8 MiB
+HugePageFiller: minimum free pages: 110 (100 backed)
+HugePageFiller: at peak demand: 208 pages (and 111 free, 10 unmapped)
+HugePageFiller: at peak demand: 26 hps (14 regular, 10 donated, 1 partial, 1 released)
+HugePageFiller: at peak hps: 208 pages (and 111 free, 10 unmapped)
+HugePageFiller: at peak hps: 26 hps (14 regular, 10 donated, 1 partial, 1 released)
+
+HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks.
+HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending).
+HugePageFiller: Subrelease stats last 10 min: total 0 pages subreleased, 0 hugepages broken
+)"));
+ }
+
+ // Test pbtxt output (full time series).
+ {
+ std::string buffer(1024 * 1024, '\0');
+ Printer printer(&*buffer.begin(), buffer.size());
+ {
+ PbtxtRegion region(&printer, kTop, /*indent=*/0);
+ tracker_.PrintInPbtxt(&region);
+ }
+ buffer.erase(printer.SpaceRequired());
+
+ EXPECT_THAT(buffer, StrEq(R"(
+ filler_skipped_subrelease {
+ skipped_subrelease_interval_ms: 0
+ skipped_subrelease_pages: 0
+ correctly_skipped_subrelease_pages: 0
+ pending_skipped_subrelease_pages: 0
+ skipped_subrelease_count: 0
+ correctly_skipped_subrelease_count: 0
+ pending_skipped_subrelease_count: 0
+ }
+ filler_stats_timeseries {
+ window_ms: 37500
+ epochs: 16
+ min_free_pages_interval_ms: 300000
+ min_free_pages: 110
+ min_free_backed_pages: 100
+ measurements {
+ epoch: 6
+ timestamp_ms: 0
+ min_free_pages: 11
+ min_free_backed_pages: 1
+ num_pages_subreleased: 0
+ num_hugepages_broken: 0
+ at_minimum_demand {
+ num_pages: 1
+ regular_huge_pages: 5
+ donated_huge_pages: 1
+ partial_released_huge_pages: 1
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 1
+ }
+ at_maximum_demand {
+ num_pages: 9
+ regular_huge_pages: 5
+ donated_huge_pages: 1
+ partial_released_huge_pages: 1
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 1
+ }
+ at_minimum_huge_pages {
+ num_pages: 5
+ regular_huge_pages: 1
+ donated_huge_pages: 1
+ partial_released_huge_pages: 0
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 1
+ }
+ at_maximum_huge_pages {
+ num_pages: 5
+ regular_huge_pages: 9
+ donated_huge_pages: 1
+ partial_released_huge_pages: 0
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 1
+ }
+ }
+ measurements {
+ epoch: 14
+ timestamp_ms: 300000
+ min_free_pages: 210
+ min_free_backed_pages: 200
+ num_pages_subreleased: 0
+ num_hugepages_broken: 0
+ at_minimum_demand {
+ num_pages: 100
+ regular_huge_pages: 9
+ donated_huge_pages: 5
+ partial_released_huge_pages: 1
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 100
+ }
+ at_maximum_demand {
+ num_pages: 108
+ regular_huge_pages: 9
+ donated_huge_pages: 5
+ partial_released_huge_pages: 1
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 100
+ }
+ at_minimum_huge_pages {
+ num_pages: 104
+ regular_huge_pages: 5
+ donated_huge_pages: 5
+ partial_released_huge_pages: 0
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 100
+ }
+ at_maximum_huge_pages {
+ num_pages: 104
+ regular_huge_pages: 13
+ donated_huge_pages: 5
+ partial_released_huge_pages: 0
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 100
+ }
+ }
+ measurements {
+ epoch: 15
+ timestamp_ms: 337500
+ min_free_pages: 110
+ min_free_backed_pages: 100
+ num_pages_subreleased: 0
+ num_hugepages_broken: 0
+ at_minimum_demand {
+ num_pages: 200
+ regular_huge_pages: 14
+ donated_huge_pages: 10
+ partial_released_huge_pages: 1
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 200
+ }
+ at_maximum_demand {
+ num_pages: 208
+ regular_huge_pages: 14
+ donated_huge_pages: 10
+ partial_released_huge_pages: 1
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 200
+ }
+ at_minimum_huge_pages {
+ num_pages: 204
+ regular_huge_pages: 10
+ donated_huge_pages: 10
+ partial_released_huge_pages: 0
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 200
+ }
+ at_maximum_huge_pages {
+ num_pages: 204
+ regular_huge_pages: 18
+ donated_huge_pages: 10
+ partial_released_huge_pages: 0
+ released_huge_pages: 1
+ used_pages_in_subreleased_huge_pages: 200
+ }
+ }
+ }
+)"));
+ }
+}
+
+TEST_F(FillerStatsTrackerTest, InvalidDurations) {
+ // These should not crash.
+ tracker_.min_free_pages(absl::InfiniteDuration());
+ tracker_.min_free_pages(kWindow + absl::Seconds(1));
+ tracker_.min_free_pages(-(kWindow + absl::Seconds(1)));
+ tracker_.min_free_pages(-absl::InfiniteDuration());
+}
+
+TEST_F(FillerStatsTrackerTest, ComputeRecentPeaks) {
+ GenerateDemandPoint(Length(3000), Length(1000));
+ Advance(absl::Minutes(1.25));
+ GenerateDemandPoint(Length(1500), Length(0));
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(100), Length(2000));
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(200), Length(3000));
+
+ GenerateDemandPoint(Length(200), Length(3000));
+ FillerStatsTracker<>::FillerStats stats =
+ tracker_.GetRecentPeak(absl::Minutes(3));
+ EXPECT_EQ(stats.num_pages, Length(1500));
+ EXPECT_EQ(stats.free_pages, Length(0));
+
+ FillerStatsTracker<>::FillerStats stats2 =
+ tracker_.GetRecentPeak(absl::Minutes(5));
+ EXPECT_EQ(stats2.num_pages, Length(3000));
+ EXPECT_EQ(stats2.free_pages, Length(1000));
+
+ Advance(absl::Minutes(4));
+ GenerateDemandPoint(Length(200), Length(3000));
+
+ FillerStatsTracker<>::FillerStats stats3 =
+ tracker_.GetRecentPeak(absl::Minutes(4));
+ EXPECT_EQ(stats3.num_pages, Length(200));
+ EXPECT_EQ(stats3.free_pages, Length(3000));
+
+ Advance(absl::Minutes(5));
+ GenerateDemandPoint(Length(200), Length(3000));
+
+ FillerStatsTracker<>::FillerStats stats4 =
+ tracker_.GetRecentPeak(absl::Minutes(5));
+ EXPECT_EQ(stats4.num_pages, Length(200));
+ EXPECT_EQ(stats4.free_pages, Length(3000));
+}
+
+TEST_F(FillerStatsTrackerTest, TrackCorrectSubreleaseDecisions) {
+ // First peak (large)
+ GenerateDemandPoint(Length(1000), Length(1000));
+
+ // Incorrect subrelease: Subrelease to 1000
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(100), Length(1000));
+ tracker_.ReportSkippedSubreleasePages(Length(900), Length(1000),
+ absl::Minutes(3));
+
+ // Second peak (small)
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(500), Length(1000));
+
+ EXPECT_EQ(tracker_.total_skipped().pages, Length(900));
+ EXPECT_EQ(tracker_.total_skipped().count, 1);
+ EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0));
+ EXPECT_EQ(tracker_.correctly_skipped().count, 0);
+ EXPECT_EQ(tracker_.pending_skipped().pages, Length(900));
+ EXPECT_EQ(tracker_.pending_skipped().count, 1);
+
+ // Correct subrelease: Subrelease to 500
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(500), Length(100));
+ tracker_.ReportSkippedSubreleasePages(Length(50), Length(550),
+ absl::Minutes(3));
+ GenerateDemandPoint(Length(500), Length(50));
+ tracker_.ReportSkippedSubreleasePages(Length(50), Length(500),
+ absl::Minutes(3));
+ GenerateDemandPoint(Length(500), Length(0));
+
+ EXPECT_EQ(tracker_.total_skipped().pages, Length(1000));
+ EXPECT_EQ(tracker_.total_skipped().count, 3);
+ EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0));
+ EXPECT_EQ(tracker_.correctly_skipped().count, 0);
+ EXPECT_EQ(tracker_.pending_skipped().pages, Length(1000));
+ EXPECT_EQ(tracker_.pending_skipped().count, 3);
+
+ // Third peak (large, too late for first peak)
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(1100), Length(1000));
+
+ Advance(absl::Minutes(5));
+ GenerateDemandPoint(Length(1100), Length(1000));
+
+ EXPECT_EQ(tracker_.total_skipped().pages, Length(1000));
+ EXPECT_EQ(tracker_.total_skipped().count, 3);
+ EXPECT_EQ(tracker_.correctly_skipped().pages, Length(100));
+ EXPECT_EQ(tracker_.correctly_skipped().count, 2);
+ EXPECT_EQ(tracker_.pending_skipped().pages, Length(0));
+ EXPECT_EQ(tracker_.pending_skipped().count, 0);
+}
+
+TEST_F(FillerStatsTrackerTest, SubreleaseCorrectnessWithChangingIntervals) {
+ // First peak (large)
+ GenerateDemandPoint(Length(1000), Length(1000));
+
+ Advance(absl::Minutes(1));
+ GenerateDemandPoint(Length(100), Length(1000));
+
+ tracker_.ReportSkippedSubreleasePages(Length(50), Length(1000),
+ absl::Minutes(4));
+ Advance(absl::Minutes(1));
+
+ // With two correctness intervals in the same epoch, take the maximum
+ tracker_.ReportSkippedSubreleasePages(Length(100), Length(1000),
+ absl::Minutes(1));
+ tracker_.ReportSkippedSubreleasePages(Length(200), Length(1000),
+ absl::Minutes(7));
+
+ Advance(absl::Minutes(5));
+ GenerateDemandPoint(Length(1100), Length(1000));
+ Advance(absl::Minutes(10));
+ GenerateDemandPoint(Length(1100), Length(1000));
+
+ EXPECT_EQ(tracker_.total_skipped().pages, Length(350));
+ EXPECT_EQ(tracker_.total_skipped().count, 3);
+ EXPECT_EQ(tracker_.correctly_skipped().pages, Length(300));
+ EXPECT_EQ(tracker_.correctly_skipped().count, 2);
+ EXPECT_EQ(tracker_.pending_skipped().pages, Length(0));
+ EXPECT_EQ(tracker_.pending_skipped().count, 0);
+}
+
+std::vector<FillerTest::PAlloc> FillerTest::GenerateInterestingAllocs() {
+ PAlloc a = Allocate(Length(1));
+ EXPECT_EQ(ReleasePages(kMaxValidPages), kPagesPerHugePage - Length(1));
+ Delete(a);
+ // Get the report on the released page
+ EXPECT_EQ(ReleasePages(kMaxValidPages), Length(1));
+
+ // Use a maximally-suboptimal pattern to get lots of hugepages into the
+ // filler.
+ std::vector<PAlloc> result;
+ static_assert(kPagesPerHugePage > Length(7),
+ "Not enough pages per hugepage!");
+ for (auto i = Length(0); i < Length(7); ++i) {
+ result.push_back(Allocate(kPagesPerHugePage - i - Length(1)));
+ }
+
+ // Get two released hugepages.
+ EXPECT_EQ(ReleasePages(Length(7)), Length(7));
+ EXPECT_EQ(ReleasePages(Length(6)), Length(6));
+
+ // Fill some of the remaining pages with small allocations.
+ for (int i = 0; i < 9; ++i) {
+ result.push_back(Allocate(Length(1)));
+ }
+
+ // Finally, donate one hugepage.
+ result.push_back(Allocate(Length(1), /*donated=*/true));
+ return result;
+}
+
+// Test the output of Print(). This is something of a change-detector test,
+// but that's not all bad in this case.
+TEST_P(FillerTest, Print) {
+ if (kPagesPerHugePage != Length(256)) {
+ // The output is hardcoded on this assumption, and dynamically calculating
+ // it would be way too much of a pain.
+ return;
+ }
+ auto allocs = GenerateInterestingAllocs();
+
+ std::string buffer(1024 * 1024, '\0');
+ {
+ Printer printer(&*buffer.begin(), buffer.size());
+ filler_.Print(&printer, /*everything=*/true);
+ buffer.erase(printer.SpaceRequired());
+ }
+
+ EXPECT_THAT(
+ buffer,
+ StrEq(R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 8 total, 3 full, 3 partial, 2 released (0 partially), 0 quarantined
+HugePageFiller: 261 pages free in 8 hugepages, 0.1274 free
+HugePageFiller: among non-fulls, 0.3398 free
+HugePageFiller: 499 used pages in subreleased hugepages (0 of them in partially released)
+HugePageFiller: 2 hugepages partially released, 0.0254 released
+HugePageFiller: 0.7187 of used pages hugepageable
+HugePageFiller: Since startup, 269 pages subreleased, 3 hugepages broken, (0 pages, 0 hugepages due to reaching tcmalloc limit)
+
+HugePageFiller: fullness histograms
+
+HugePageFiller: # of regular hps with a<= # of free pages <b
+HugePageFiller: < 0<= 3 < 1<= 1 < 2<= 0 < 3<= 0 < 4<= 1 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of donated hps with a<= # of free pages <b
+HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 1
+
+HugePageFiller: # of partial released hps with a<= # of free pages <b
+HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of released hps with a<= # of free pages <b
+HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 2 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of regular hps with a<= longest free range <b
+HugePageFiller: < 0<= 3 < 1<= 1 < 2<= 0 < 3<= 0 < 4<= 1 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of partial released hps with a<= longest free range <b
+HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of released hps with a<= longest free range <b
+HugePageFiller: < 0<= 0 < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 2 < 16<= 0
+HugePageFiller: < 32<= 0 < 48<= 0 < 64<= 0 < 80<= 0 < 96<= 0 <112<= 0
+HugePageFiller: <128<= 0 <144<= 0 <160<= 0 <176<= 0 <192<= 0 <208<= 0
+HugePageFiller: <224<= 0 <240<= 0 <252<= 0 <253<= 0 <254<= 0 <255<= 0
+
+HugePageFiller: # of regular hps with a<= # of allocations <b
+HugePageFiller: < 1<= 1 < 2<= 1 < 3<= 1 < 4<= 2 < 5<= 0 < 17<= 0
+HugePageFiller: < 33<= 0 < 49<= 0 < 65<= 0 < 81<= 0 < 97<= 0 <113<= 0
+HugePageFiller: <129<= 0 <145<= 0 <161<= 0 <177<= 0 <193<= 0 <209<= 0
+HugePageFiller: <225<= 0 <241<= 0 <253<= 0 <254<= 0 <255<= 0 <256<= 0
+
+HugePageFiller: # of partial released hps with a<= # of allocations <b
+HugePageFiller: < 1<= 0 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 < 17<= 0
+HugePageFiller: < 33<= 0 < 49<= 0 < 65<= 0 < 81<= 0 < 97<= 0 <113<= 0
+HugePageFiller: <129<= 0 <145<= 0 <161<= 0 <177<= 0 <193<= 0 <209<= 0
+HugePageFiller: <225<= 0 <241<= 0 <253<= 0 <254<= 0 <255<= 0 <256<= 0
+
+HugePageFiller: # of released hps with a<= # of allocations <b
+HugePageFiller: < 1<= 2 < 2<= 0 < 3<= 0 < 4<= 0 < 5<= 0 < 17<= 0
+HugePageFiller: < 33<= 0 < 49<= 0 < 65<= 0 < 81<= 0 < 97<= 0 <113<= 0
+HugePageFiller: <129<= 0 <145<= 0 <161<= 0 <177<= 0 <193<= 0 <209<= 0
+HugePageFiller: <225<= 0 <241<= 0 <253<= 0 <254<= 0 <255<= 0 <256<= 0
+
+HugePageFiller: time series over 5 min interval
+
+HugePageFiller: realized fragmentation: 0.0 MiB
+HugePageFiller: minimum free pages: 0 (0 backed)
+HugePageFiller: at peak demand: 1774 pages (and 261 free, 13 unmapped)
+HugePageFiller: at peak demand: 8 hps (5 regular, 1 donated, 0 partial, 2 released)
+HugePageFiller: at peak hps: 1774 pages (and 261 free, 13 unmapped)
+HugePageFiller: at peak hps: 8 hps (5 regular, 1 donated, 0 partial, 2 released)
+
+HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks.
+HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending).
+HugePageFiller: Subrelease stats last 10 min: total 269 pages subreleased, 3 hugepages broken
+)"));
+ for (const auto& alloc : allocs) {
+ Delete(alloc);
+ }
+}
+
+// Test the output of PrintInPbtxt(). This is something of a change-detector
+// test, but that's not all bad in this case.
+TEST_P(FillerTest, PrintInPbtxt) {
+ if (kPagesPerHugePage != Length(256)) {
+ // The output is hardcoded on this assumption, and dynamically calculating
+ // it would be way too much of a pain.
+ return;
+ }
+ auto allocs = GenerateInterestingAllocs();
+
+ std::string buffer(1024 * 1024, '\0');
+ Printer printer(&*buffer.begin(), buffer.size());
+ {
+ PbtxtRegion region(&printer, kTop, /*indent=*/0);
+ filler_.PrintInPbtxt(&region);
+ }
+ buffer.erase(printer.SpaceRequired());
+
+ EXPECT_THAT(buffer, StrEq(R"(
+ filler_full_huge_pages: 3
+ filler_partial_huge_pages: 3
+ filler_released_huge_pages: 2
+ filler_partially_released_huge_pages: 0
+ filler_free_pages: 261
+ filler_used_pages_in_subreleased: 499
+ filler_used_pages_in_partial_released: 0
+ filler_unmapped_bytes: 0
+ filler_hugepageable_used_bytes: 10444800
+ filler_num_pages_subreleased: 269
+ filler_num_hugepages_broken: 3
+ filler_num_pages_subreleased_due_to_limit: 0
+ filler_num_hugepages_broken_due_to_limit: 0
+ filler_tracker {
+ type: REGULAR
+ free_pages_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 3
+ }
+ free_pages_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 1
+ }
+ free_pages_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 1
+ }
+ free_pages_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 3
+ }
+ longest_free_range_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 1
+ }
+ longest_free_range_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 1
+ }
+ longest_free_range_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 1
+ }
+ allocations_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 1
+ }
+ allocations_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 1
+ }
+ allocations_histogram {
+ lower_bound: 4
+ upper_bound: 4
+ value: 2
+ }
+ allocations_histogram {
+ lower_bound: 5
+ upper_bound: 16
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 17
+ upper_bound: 32
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 33
+ upper_bound: 48
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 49
+ upper_bound: 64
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 65
+ upper_bound: 80
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 81
+ upper_bound: 96
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 97
+ upper_bound: 112
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 113
+ upper_bound: 128
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 129
+ upper_bound: 144
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 145
+ upper_bound: 160
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 161
+ upper_bound: 176
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 177
+ upper_bound: 192
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 193
+ upper_bound: 208
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 209
+ upper_bound: 224
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 225
+ upper_bound: 240
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 241
+ upper_bound: 252
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 256
+ upper_bound: 256
+ value: 0
+ }
+ }
+ filler_tracker {
+ type: DONATED
+ free_pages_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 1
+ }
+ longest_free_range_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 1
+ }
+ allocations_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 1
+ }
+ allocations_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 4
+ upper_bound: 4
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 5
+ upper_bound: 16
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 17
+ upper_bound: 32
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 33
+ upper_bound: 48
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 49
+ upper_bound: 64
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 65
+ upper_bound: 80
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 81
+ upper_bound: 96
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 97
+ upper_bound: 112
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 113
+ upper_bound: 128
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 129
+ upper_bound: 144
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 145
+ upper_bound: 160
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 161
+ upper_bound: 176
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 177
+ upper_bound: 192
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 193
+ upper_bound: 208
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 209
+ upper_bound: 224
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 225
+ upper_bound: 240
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 241
+ upper_bound: 252
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 256
+ upper_bound: 256
+ value: 0
+ }
+ }
+ filler_tracker {
+ type: PARTIAL
+ free_pages_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 4
+ upper_bound: 4
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 5
+ upper_bound: 16
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 17
+ upper_bound: 32
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 33
+ upper_bound: 48
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 49
+ upper_bound: 64
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 65
+ upper_bound: 80
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 81
+ upper_bound: 96
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 97
+ upper_bound: 112
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 113
+ upper_bound: 128
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 129
+ upper_bound: 144
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 145
+ upper_bound: 160
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 161
+ upper_bound: 176
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 177
+ upper_bound: 192
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 193
+ upper_bound: 208
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 209
+ upper_bound: 224
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 225
+ upper_bound: 240
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 241
+ upper_bound: 252
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 256
+ upper_bound: 256
+ value: 0
+ }
+ }
+ filler_tracker {
+ type: RELEASED
+ free_pages_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 2
+ }
+ free_pages_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ free_pages_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 0
+ upper_bound: 0
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 4
+ upper_bound: 15
+ value: 2
+ }
+ longest_free_range_histogram {
+ lower_bound: 16
+ upper_bound: 31
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 32
+ upper_bound: 47
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 48
+ upper_bound: 63
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 64
+ upper_bound: 79
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 80
+ upper_bound: 95
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 96
+ upper_bound: 111
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 112
+ upper_bound: 127
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 128
+ upper_bound: 143
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 144
+ upper_bound: 159
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 160
+ upper_bound: 175
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 176
+ upper_bound: 191
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 192
+ upper_bound: 207
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 208
+ upper_bound: 223
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 224
+ upper_bound: 239
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 240
+ upper_bound: 251
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 252
+ upper_bound: 252
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ longest_free_range_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 1
+ upper_bound: 1
+ value: 2
+ }
+ allocations_histogram {
+ lower_bound: 2
+ upper_bound: 2
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 3
+ upper_bound: 3
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 4
+ upper_bound: 4
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 5
+ upper_bound: 16
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 17
+ upper_bound: 32
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 33
+ upper_bound: 48
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 49
+ upper_bound: 64
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 65
+ upper_bound: 80
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 81
+ upper_bound: 96
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 97
+ upper_bound: 112
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 113
+ upper_bound: 128
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 129
+ upper_bound: 144
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 145
+ upper_bound: 160
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 161
+ upper_bound: 176
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 177
+ upper_bound: 192
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 193
+ upper_bound: 208
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 209
+ upper_bound: 224
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 225
+ upper_bound: 240
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 241
+ upper_bound: 252
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 253
+ upper_bound: 253
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 254
+ upper_bound: 254
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 255
+ upper_bound: 255
+ value: 0
+ }
+ allocations_histogram {
+ lower_bound: 256
+ upper_bound: 256
+ value: 0
+ }
+ }
+ filler_skipped_subrelease {
+ skipped_subrelease_interval_ms: 0
+ skipped_subrelease_pages: 0
+ correctly_skipped_subrelease_pages: 0
+ pending_skipped_subrelease_pages: 0
+ skipped_subrelease_count: 0
+ correctly_skipped_subrelease_count: 0
+ pending_skipped_subrelease_count: 0
+ }
+ filler_stats_timeseries {
+ window_ms: 1000
+ epochs: 600
+ min_free_pages_interval_ms: 300000
+ min_free_pages: 0
+ min_free_backed_pages: 0
+ measurements {
+ epoch: 599
+ timestamp_ms: 0
+ min_free_pages: 0
+ min_free_backed_pages: 0
+ num_pages_subreleased: 269
+ num_hugepages_broken: 3
+ at_minimum_demand {
+ num_pages: 0
+ regular_huge_pages: 0
+ donated_huge_pages: 0
+ partial_released_huge_pages: 0
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 0
+ }
+ at_maximum_demand {
+ num_pages: 1774
+ regular_huge_pages: 5
+ donated_huge_pages: 1
+ partial_released_huge_pages: 0
+ released_huge_pages: 2
+ used_pages_in_subreleased_huge_pages: 499
+ }
+ at_minimum_huge_pages {
+ num_pages: 0
+ regular_huge_pages: 0
+ donated_huge_pages: 0
+ partial_released_huge_pages: 0
+ released_huge_pages: 0
+ used_pages_in_subreleased_huge_pages: 0
+ }
+ at_maximum_huge_pages {
+ num_pages: 1774
+ regular_huge_pages: 5
+ donated_huge_pages: 1
+ partial_released_huge_pages: 0
+ released_huge_pages: 2
+ used_pages_in_subreleased_huge_pages: 499
+ }
+ }
+ }
+)"));
+ for (const auto& alloc : allocs) {
+ Delete(alloc);
+ }
+}
+
+// Testing subrelase stats: ensure that the cumulative number of released
+// pages and broken hugepages is no less than those of the last 10 mins
+TEST_P(FillerTest, CheckSubreleaseStats) {
+ // Get lots of hugepages into the filler.
+ Advance(absl::Minutes(1));
+ std::vector<PAlloc> result;
+ static_assert(kPagesPerHugePage > Length(10),
+ "Not enough pages per hugepage!");
+ for (int i = 0; i < 10; ++i) {
+ result.push_back(Allocate(kPagesPerHugePage - Length(i + 1)));
+ }
+
+ // Breaking up 2 hugepages, releasing 19 pages due to reaching limit,
+ EXPECT_EQ(HardReleasePages(Length(10)), Length(10));
+ EXPECT_EQ(HardReleasePages(Length(9)), Length(9));
+
+ Advance(absl::Minutes(1));
+ SubreleaseStats subrelease = filler_.subrelease_stats();
+ EXPECT_EQ(subrelease.total_pages_subreleased, Length(0));
+ EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 0);
+ EXPECT_EQ(subrelease.num_pages_subreleased, Length(19));
+ EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 2);
+ EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+ // Do some work so that the timeseries updates its stats
+ for (int i = 0; i < 5; ++i) {
+ result.push_back(Allocate(Length(1)));
+ }
+ subrelease = filler_.subrelease_stats();
+ EXPECT_EQ(subrelease.total_pages_subreleased, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2);
+ EXPECT_EQ(subrelease.num_pages_subreleased, Length(0));
+ EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0);
+ EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+ // Breaking up 3 hugepages, releasing 21 pages (background thread)
+ EXPECT_EQ(ReleasePages(Length(8)), Length(8));
+ EXPECT_EQ(ReleasePages(Length(7)), Length(7));
+ EXPECT_EQ(ReleasePages(Length(6)), Length(6));
+
+ subrelease = filler_.subrelease_stats();
+ EXPECT_EQ(subrelease.total_pages_subreleased, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2);
+ EXPECT_EQ(subrelease.num_pages_subreleased, Length(21));
+ EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 3);
+ EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+ Advance(absl::Minutes(10)); // This forces timeseries to wrap
+ // Do some work
+ for (int i = 0; i < 5; ++i) {
+ result.push_back(Allocate(Length(1)));
+ }
+ subrelease = filler_.subrelease_stats();
+ EXPECT_EQ(subrelease.total_pages_subreleased, Length(40));
+ EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 5);
+ EXPECT_EQ(subrelease.num_pages_subreleased, Length(0));
+ EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0);
+ EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+ EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+ std::string buffer(1024 * 1024, '\0');
+ {
+ Printer printer(&*buffer.begin(), buffer.size());
+ filler_.Print(&printer, /*everything=*/true);
+ buffer.erase(printer.SpaceRequired());
+ }
+
+ ASSERT_THAT(
+ buffer,
+ testing::HasSubstr(
+ "HugePageFiller: Since startup, 40 pages subreleased, 5 hugepages "
+ "broken, (19 pages, 2 hugepages due to reaching tcmalloc "
+ "limit)"));
+ ASSERT_THAT(buffer, testing::EndsWith(
+ "HugePageFiller: Subrelease stats last 10 min: total "
+ "21 pages subreleased, 3 hugepages broken\n"));
+
+ for (const auto& alloc : result) {
+ Delete(alloc);
+ }
+}
+
+TEST_P(FillerTest, ConstantBrokenHugePages) {
+ // Get and Fill up many huge pages
+ const HugeLength kHugePages = NHugePages(10 * kPagesPerHugePage.raw_num());
+
+ absl::BitGen rng;
+ std::vector<PAlloc> alloc;
+ alloc.reserve(kHugePages.raw_num());
+ std::vector<PAlloc> dead;
+ dead.reserve(kHugePages.raw_num());
+ std::vector<PAlloc> alloc_small;
+ alloc_small.reserve(kHugePages.raw_num() + 2);
+
+ for (HugeLength i; i < kHugePages; ++i) {
+ auto size =
+ Length(absl::Uniform<size_t>(rng, 2, kPagesPerHugePage.raw_num() - 1));
+ alloc_small.push_back(Allocate(Length(1)));
+ alloc.push_back(Allocate(size - Length(1)));
+ dead.push_back(Allocate(kPagesPerHugePage - size));
+ }
+ ASSERT_EQ(filler_.size(), kHugePages);
+
+ for (int i = 0; i < 2; ++i) {
+ for (auto& a : dead) {
+ Delete(a);
+ }
+ ReleasePages(filler_.free_pages());
+ ASSERT_EQ(filler_.free_pages(), Length(0));
+ alloc_small.push_back(
+ Allocate(Length(1))); // To force subrelease stats to update
+
+ std::string buffer(1024 * 1024, '\0');
+ {
+ Printer printer(&*buffer.begin(), buffer.size());
+ filler_.Print(&printer, /*everything=*/false);
+ buffer.erase(printer.SpaceRequired());
+ }
+
+ ASSERT_THAT(buffer, testing::HasSubstr(absl::StrCat(kHugePages.raw_num(),
+ " hugepages broken")));
+ if (i == 1) {
+ // Number of pages in alloc_small
+ ASSERT_THAT(buffer, testing::HasSubstr(absl::StrCat(
+ kHugePages.raw_num() + 2,
+ " used pages in subreleased hugepages")));
+ // Sum of pages in alloc and dead
+ ASSERT_THAT(buffer,
+ testing::HasSubstr(absl::StrCat(
+ kHugePages.raw_num() * kPagesPerHugePage.raw_num() -
+ kHugePages.raw_num(),
+ " pages subreleased")));
+ }
+
+ dead.swap(alloc);
+ alloc.clear();
+ }
+
+ // Clean up
+ for (auto& a : alloc_small) {
+ Delete(a);
+ }
+}
+
+// Confirms that a timeseries that contains every epoch does not exceed the
+// expected buffer capacity of 1 MiB.
+TEST_P(FillerTest, CheckBufferSize) {
+ const int kEpochs = 600;
+ const absl::Duration kEpochLength = absl::Seconds(1);
+
+ PAlloc big = Allocate(kPagesPerHugePage - Length(4));
+
+ for (int i = 0; i < kEpochs; i += 2) {
+ auto tiny = Allocate(Length(2));
+ Advance(kEpochLength);
+ Delete(tiny);
+ Advance(kEpochLength);
+ }
+
+ Delete(big);
+
+ std::string buffer(1024 * 1024, '\0');
+ Printer printer(&*buffer.begin(), buffer.size());
+ {
+ PbtxtRegion region(&printer, kTop, /*indent=*/0);
+ filler_.PrintInPbtxt(&region);
+ }
+
+ // We assume a maximum buffer size of 1 MiB. When increasing this size, ensure
+ // that all places processing mallocz protos get updated as well.
+ size_t buffer_size = printer.SpaceRequired();
+ printf("HugePageFiller buffer size: %zu\n", buffer_size);
+ EXPECT_LE(buffer_size, 1024 * 1024);
+}
+
+TEST_P(FillerTest, ReleasePriority) {
+ // Fill up many huge pages (>> kPagesPerHugePage). This relies on an
+ // implementation detail of ReleasePages buffering up at most
+ // kPagesPerHugePage as potential release candidates.
+ const HugeLength kHugePages = NHugePages(10 * kPagesPerHugePage.raw_num());
+
+ // We will ensure that we fill full huge pages, then deallocate some parts of
+ // those to provide space for subrelease.
+ absl::BitGen rng;
+ std::vector<PAlloc> alloc;
+ alloc.reserve(kHugePages.raw_num());
+ std::vector<PAlloc> dead;
+ dead.reserve(kHugePages.raw_num());
+
+ absl::flat_hash_set<FakeTracker*> unique_pages;
+ unique_pages.reserve(kHugePages.raw_num());
+
+ for (HugeLength i; i < kHugePages; ++i) {
+ Length size(absl::Uniform<size_t>(rng, 1, kPagesPerHugePage.raw_num() - 1));
+
+ PAlloc a = Allocate(size);
+ unique_pages.insert(a.pt);
+ alloc.push_back(a);
+ dead.push_back(Allocate(kPagesPerHugePage - size));
+ }
+
+ ASSERT_EQ(filler_.size(), kHugePages);
+
+ for (auto& a : dead) {
+ Delete(a);
+ }
+
+ // As of 5/2020, our release priority is to subrelease huge pages with the
+ // fewest used pages. Bucket unique_pages by that used_pages().
+ std::vector<std::vector<FakeTracker*>> ordered(kPagesPerHugePage.raw_num());
+ for (auto* pt : unique_pages) {
+ // None of these should be released yet.
+ EXPECT_FALSE(pt->released());
+ ordered[pt->used_pages().raw_num()].push_back(pt);
+ }
+
+ // Iteratively release random amounts of free memory--until all free pages
+ // become unmapped pages--and validate that we followed the expected release
+ // priority.
+ Length free_pages;
+ while ((free_pages = filler_.free_pages()) > Length(0)) {
+ Length to_release(absl::LogUniform<size_t>(rng, 1, free_pages.raw_num()));
+ Length released = ReleasePages(to_release);
+ ASSERT_LE(released, free_pages);
+
+ // Iterate through each element of ordered. If any trackers are released,
+ // all previous trackers must be released.
+ bool previous_all_released = true;
+ for (auto l = Length(0); l < kPagesPerHugePage; ++l) {
+ bool any_released = false;
+ bool all_released = true;
+
+ for (auto* pt : ordered[l.raw_num()]) {
+ bool released = pt->released();
+
+ any_released |= released;
+ all_released &= released;
+ }
+
+ if (any_released) {
+ EXPECT_TRUE(previous_all_released) << [&]() {
+ // On mismatch, print the bitmap of released states on l-1/l.
+ std::vector<bool> before;
+ if (l > Length(0)) {
+ before.reserve(ordered[l.raw_num() - 1].size());
+ for (auto* pt : ordered[l.raw_num() - 1]) {
+ before.push_back(pt->released());
+ }
+ }
+
+ std::vector<bool> after;
+ after.reserve(ordered[l.raw_num()].size());
+ for (auto* pt : ordered[l.raw_num()]) {
+ after.push_back(pt->released());
+ }
+
+ return absl::StrCat("before = {", absl::StrJoin(before, ";"),
+ "}\nafter = {", absl::StrJoin(after, ";"), "}");
+ }();
+ }
+
+ previous_all_released = all_released;
+ }
+ }
+
+ // All huge pages should be released.
+ for (auto* pt : unique_pages) {
+ EXPECT_TRUE(pt->released());
+ }
+
+ for (auto& a : alloc) {
+ Delete(a);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(All, FillerTest,
+ testing::Values(FillerPartialRerelease::Return,
+ FillerPartialRerelease::Retain));
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h
new file mode 100644
index 0000000000..4498994f75
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h
@@ -0,0 +1,343 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helpers for nicely typed interfaces that pass around refs to large
+// ranges. You probably don't want to store HugeRanges long term
+// (nothing will break, but that's not what they're efficient for.)
+#ifndef TCMALLOC_HUGE_PAGES_H_
+#define TCMALLOC_HUGE_PAGES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+#include <ostream>
+#include <utility>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline constexpr Length kPagesPerHugePage =
+ Length(1 << (kHugePageShift - kPageShift));
+
+// A single aligned huge page.
+struct HugePage {
+ void *start_addr() const {
+ ASSERT(pn <= kMaxPageNumber);
+ return reinterpret_cast<void *>(pn << kHugePageShift);
+ }
+
+ PageId first_page() const {
+ ASSERT(pn <= kMaxPageNumber);
+ return PageId(pn << (kHugePageShift - kPageShift));
+ }
+
+ size_t index() const {
+ ASSERT(pn <= kMaxPageNumber);
+ return pn;
+ }
+
+ static constexpr uintptr_t kMaxPageNumber =
+ std::numeric_limits<uintptr_t>::max() >> kHugePageShift;
+
+ uintptr_t pn;
+};
+
+struct HugeLength {
+ size_t n;
+
+ constexpr HugeLength() : n(0) {}
+ explicit HugeLength(double x) : n(ceil(x)) { ASSERT(x >= 0); }
+ constexpr size_t raw_num() const { return n; }
+ constexpr size_t in_bytes() const { return n * kHugePageSize; }
+ constexpr size_t in_mib() const {
+ static_assert(kHugePageSize >= 1024 * 1024, "tiny hugepages?");
+ return n * (kHugePageSize / 1024 / 1024);
+ }
+ constexpr Length in_pages() const { return n * kPagesPerHugePage; }
+
+ // It is possible to have a HugeLength that corresponds to more
+ // bytes than can be addressed (i.e. > size_t.) Check for that.
+ bool overflows() const;
+
+ private:
+ explicit constexpr HugeLength(size_t x) : n(x) {}
+ friend constexpr HugeLength NHugePages(size_t n);
+};
+
+// Literal constructors (made explicit to avoid accidental uses when
+// another unit was meant.)
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength NHugePages(size_t n) { return HugeLength(n); }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength HLFromBytes(size_t bytes) {
+ return NHugePages(bytes / kHugePageSize);
+}
+
+// Rounds *up* to the nearest hugepage.
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength HLFromPages(Length pages) {
+ return NHugePages((pages + kPagesPerHugePage - Length(1)) /
+ kPagesPerHugePage);
+}
+
+inline HugeLength &operator++(HugeLength &len) { // NOLINT(runtime/references)
+ len.n++;
+ return len;
+}
+
+inline HugePage &operator++(HugePage &p) { // NOLINT(runtime/references)
+ ASSERT(p.pn + 1 <= HugePage::kMaxPageNumber);
+ p.pn++;
+ return p;
+}
+
+inline HugeLength &operator--(HugeLength &len) { // NOLINT(runtime/references)
+ ASSERT(len.n >= 1);
+ len.n--;
+ return len;
+}
+
+inline constexpr bool operator<(HugeLength lhs, HugeLength rhs) {
+ return lhs.n < rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(HugeLength lhs, HugeLength rhs) {
+ return lhs.n > rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(HugeLength lhs, HugeLength rhs) {
+ return lhs.n <= rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(HugePage lhs, HugePage rhs) {
+ return lhs.pn < rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(HugePage lhs, HugePage rhs) {
+ return lhs.pn > rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(HugeLength lhs, HugeLength rhs) {
+ return lhs.n >= rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(HugePage lhs, HugePage rhs) {
+ return lhs.pn <= rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(HugePage lhs, HugePage rhs) {
+ return lhs.pn >= rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(HugePage lhs, HugePage rhs) {
+ return lhs.pn == rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(HugePage lhs, HugePage rhs) {
+ return !(lhs == rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(HugeLength lhs, HugeLength rhs) {
+ return lhs.n == rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(HugeLength lhs, HugeLength rhs) {
+ return lhs.n != rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr size_t operator/(HugeLength lhs, HugeLength rhs) {
+ return lhs.n / rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator*(HugeLength lhs, size_t rhs) {
+ return NHugePages(lhs.n * rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator/(HugeLength lhs, size_t rhs) {
+ return NHugePages(lhs.n / rhs);
+}
+
+inline HugeLength &operator*=(HugeLength &lhs, size_t rhs) {
+ lhs.n *= rhs;
+ return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator%(HugeLength lhs, HugeLength rhs) {
+ return NHugePages(lhs.n % rhs.n);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator+(HugePage lhs, HugeLength rhs) {
+ ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber);
+ return HugePage{lhs.pn + rhs.n};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator+(HugeLength lhs, HugePage rhs) {
+ return rhs + lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator-(HugePage lhs, HugeLength rhs) {
+ return ASSERT(lhs.pn >= rhs.n), HugePage{lhs.pn - rhs.n};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator-(HugePage lhs, HugePage rhs) {
+ return ASSERT(lhs.pn >= rhs.pn), NHugePages(lhs.pn - rhs.pn);
+}
+
+inline HugePage &operator+=(HugePage &lhs, HugeLength rhs) {
+ ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber);
+ lhs.pn += rhs.n;
+ return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs) {
+ return NHugePages(lhs.n + rhs.n);
+}
+
+inline HugeLength &operator+=(HugeLength &lhs, HugeLength rhs) {
+ lhs.n += rhs.n;
+ return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs) {
+ return ASSERT(lhs.n >= rhs.n), NHugePages(lhs.n - rhs.n);
+}
+
+inline HugeLength &operator-=(HugeLength &lhs, HugeLength rhs) {
+ ASSERT(lhs.n >= rhs.n);
+ lhs.n -= rhs.n;
+ return lhs;
+}
+
+inline bool HugeLength::overflows() const {
+ return *this > HLFromBytes(std::numeric_limits<size_t>::max());
+}
+
+inline void PrintTo(const HugeLength &n, ::std::ostream *os) {
+ *os << n.raw_num() << "hps";
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline HugePage HugePageContaining(PageId p) {
+ return {p.index() >> (kHugePageShift - kPageShift)};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline HugePage HugePageContaining(void *p) {
+ return HugePageContaining(PageIdContaining(p));
+}
+
+// A set of contiguous huge pages.
+struct HugeRange {
+ void *start_addr() const { return first.start_addr(); }
+ void *end_addr() const { return (first + n).start_addr(); }
+ size_t byte_len() const {
+ return static_cast<char *>(end_addr()) - static_cast<char *>(start_addr());
+ }
+
+ // Assume any range starting at 0 is bogus.
+ bool valid() const { return first.start_addr() != nullptr; }
+
+ constexpr HugePage start() const { return first; }
+
+ constexpr HugeLength len() const { return n; }
+
+ HugePage operator[](HugeLength i) const { return first + i; }
+
+ template <typename H>
+ friend H AbslHashValue(H h, const HugeRange &r) {
+ return H::combine(std::move(h), r.start().start_addr(), r.len().raw_num());
+ }
+
+ bool contains(PageId p) const { return contains(HugePageContaining(p)); }
+ bool contains(HugePage p) const { return p >= first && (p - first) < n; }
+ bool contains(HugeRange r) const {
+ return r.first >= first && (r.first + r.n) <= (first + n);
+ }
+
+ bool intersects(HugeRange r) const {
+ return r.contains(start()) || contains(r.start());
+ }
+
+ // True iff r is our immediate successor (i.e. this + r is one large
+ // (non-overlapping) range.)
+ bool precedes(HugeRange r) const { return end_addr() == r.start_addr(); }
+
+ static HugeRange Nil() {
+ return {HugePageContaining(nullptr), NHugePages(0)};
+ }
+
+ static HugeRange Make(HugePage p, HugeLength n) { return {p, n}; }
+
+ HugePage first;
+ HugeLength n;
+};
+
+inline constexpr bool operator==(HugeRange lhs, HugeRange rhs) {
+ return lhs.start() == rhs.start() && lhs.len() == rhs.len();
+}
+
+// REQUIRES: a and b are disjoint but adjacent (in that order)
+
+inline HugeRange Join(HugeRange a, HugeRange b) {
+ CHECK_CONDITION(a.precedes(b));
+ return {a.start(), a.len() + b.len()};
+}
+
+// REQUIRES r.len() >= n
+// Splits r into two ranges, one of length n. The other is either the rest
+// of the space (if any) or Nil.
+inline std::pair<HugeRange, HugeRange> Split(HugeRange r, HugeLength n) {
+ ASSERT(r.len() >= n);
+ if (r.len() > n) {
+ return {HugeRange::Make(r.start(), n),
+ HugeRange::Make(r.start() + n, r.len() - n)};
+ } else {
+ return {r, HugeRange::Nil()};
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+#endif // TCMALLOC_HUGE_PAGES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region.h b/contrib/libs/tcmalloc/tcmalloc/huge_region.h
new file mode 100644
index 0000000000..0262c007b2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_region.h
@@ -0,0 +1,551 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_REGION_H_
+#define TCMALLOC_HUGE_REGION_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "absl/base/internal/cycleclock.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_page_filler.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Track allocations from a fixed-size multiple huge page region.
+// Similar to PageTracker but a few important differences:
+// - crosses multiple hugepages
+// - backs region on demand
+// - supports breaking up the partially-allocated region for use elsewhere
+//
+// This is intended to help with fast allocation of regions too large
+// for HugePageFiller, but too small to round to a full hugepage; both
+// lengths that do fit in a hugepage, but often wouldn't fit in
+// available gaps (1.75 MiB), and lengths that don't fit, but would
+// introduce unacceptable fragmentation (2.1 MiB).
+//
+class HugeRegion : public TList<HugeRegion>::Elem {
+ public:
+ // We could template this if there was any need.
+ static constexpr HugeLength kRegionSize = HLFromBytes(1024 * 1024 * 1024);
+ static constexpr size_t kNumHugePages = kRegionSize.raw_num();
+ static constexpr HugeLength size() { return kRegionSize; }
+
+ // REQUIRES: r.len() == size(); r unbacked.
+ HugeRegion(HugeRange r, MemoryModifyFunction unback);
+ HugeRegion() = delete;
+
+ // If available, return a range of n free pages, setting *from_released =
+ // true iff the returned range is currently unbacked.
+ // Returns false if no range available.
+ bool MaybeGet(Length n, PageId *p, bool *from_released);
+
+ // Return [p, p + n) for new allocations.
+ // If release=true, release any hugepages made empty as a result.
+ // REQUIRES: [p, p + n) was the result of a previous MaybeGet.
+ void Put(PageId p, Length n, bool release);
+
+ // Release any hugepages that are unused but backed.
+ HugeLength Release();
+
+ // Is p located in this region?
+ bool contains(PageId p) { return location_.contains(p); }
+
+ // Stats
+ Length used_pages() const { return Length(tracker_.used()); }
+ Length free_pages() const {
+ return size().in_pages() - unmapped_pages() - used_pages();
+ }
+ Length unmapped_pages() const { return (size() - nbacked_).in_pages(); }
+
+ void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const;
+
+ HugeLength backed() const;
+
+ void Print(Printer *out) const;
+ void PrintInPbtxt(PbtxtRegion *detail) const;
+
+ BackingStats stats() const;
+
+ // We don't define this as operator< because it's a rather specialized order.
+ bool BetterToAllocThan(const HugeRegion *rhs) const {
+ return longest_free() < rhs->longest_free();
+ }
+
+ void prepend_it(HugeRegion *other) { this->prepend(other); }
+
+ void append_it(HugeRegion *other) { this->append(other); }
+
+ private:
+ RangeTracker<kRegionSize.in_pages().raw_num()> tracker_;
+
+ HugeRange location_;
+
+ static int64_t AverageWhens(Length a, int64_t a_when, Length b,
+ int64_t b_when) {
+ const double aw = static_cast<double>(a.raw_num()) * a_when;
+ const double bw = static_cast<double>(b.raw_num()) * b_when;
+ return static_cast<int64_t>((aw + bw) / (a.raw_num() + b.raw_num()));
+ }
+
+ Length longest_free() const { return Length(tracker_.longest_free()); }
+
+ // Adjust counts of allocs-per-hugepage for [p, p + n) being added/removed.
+
+ // *from_released is set to true iff [p, p + n) is currently unbacked
+ void Inc(PageId p, Length n, bool *from_released);
+ // If release is true, unback any hugepage that becomes empty.
+ void Dec(PageId p, Length n, bool release);
+
+ void UnbackHugepages(bool should[kNumHugePages]);
+
+ // How many pages are used in each hugepage?
+ Length pages_used_[kNumHugePages];
+ // Is this hugepage backed?
+ bool backed_[kNumHugePages];
+ HugeLength nbacked_;
+ int64_t whens_[kNumHugePages];
+ HugeLength total_unbacked_{NHugePages(0)};
+
+ MemoryModifyFunction unback_;
+};
+
+// Manage a set of regions from which we allocate.
+// Strategy: Allocate from the most fragmented region that fits.
+template <typename Region>
+class HugeRegionSet {
+ public:
+ HugeRegionSet() : n_(0) {}
+
+ // If available, return a range of n free pages, setting *from_released =
+ // true iff the returned range is currently unbacked.
+ // Returns false if no range available.
+ bool MaybeGet(Length n, PageId *page, bool *from_released);
+
+ // Return an allocation to a region (if one matches!)
+ bool MaybePut(PageId p, Length n);
+
+ // Add region to the set.
+ void Contribute(Region *region);
+
+ // Unback any totally unused hugepages; return the number of pages
+ // we managed to release.
+ HugeLength Release();
+
+ void Print(Printer *out) const;
+ void PrintInPbtxt(PbtxtRegion *hpaa) const;
+ void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+ PageAgeHistograms *ages) const;
+ BackingStats stats() const;
+
+ private:
+ void Fix(Region *r) {
+ // We've changed r's fragmentation--move it through the list to the
+ // correct home (if needed).
+ Rise(r);
+ Fall(r);
+ }
+
+ // Check if r has to move forward in the list.
+ void Rise(Region *r) {
+ auto prev = list_.at(r);
+ --prev;
+ if (prev == list_.end()) return; // we're at the front
+ if (!r->BetterToAllocThan(*prev)) return; // we're far enough forward
+ list_.remove(r);
+ for (auto iter = prev; iter != list_.end(); --iter) {
+ if (!r->BetterToAllocThan(*iter)) {
+ iter->append_it(r);
+ return;
+ }
+ }
+ list_.prepend(r);
+ }
+
+ // Check if r has to move backward in the list.
+ void Fall(Region *r) {
+ auto next = list_.at(r);
+ ++next;
+ if (next == list_.end()) return; // we're at the back
+ if (!next->BetterToAllocThan(r)) return; // we're far enough back
+ list_.remove(r);
+ for (auto iter = next; iter != list_.end(); ++iter) {
+ if (!iter->BetterToAllocThan(r)) {
+ iter->prepend_it(r);
+ return;
+ }
+ }
+ list_.append(r);
+ }
+
+ // Add r in its sorted place.
+ void AddToList(Region *r) {
+ for (Region *curr : list_) {
+ if (r->BetterToAllocThan(curr)) {
+ curr->prepend_it(r);
+ return;
+ }
+ }
+
+ // Note this handles the empty-list case
+ list_.append(r);
+ }
+
+ size_t n_;
+ // Sorted by longest_free increasing.
+ TList<Region> list_;
+};
+
+// REQUIRES: r.len() == size(); r unbacked.
+inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback)
+ : tracker_{},
+ location_(r),
+ pages_used_{},
+ backed_{},
+ nbacked_(NHugePages(0)),
+ unback_(unback) {
+ int64_t now = absl::base_internal::CycleClock::Now();
+ for (int i = 0; i < kNumHugePages; ++i) {
+ whens_[i] = now;
+ // These are already 0 but for clarity...
+ pages_used_[i] = Length(0);
+ backed_[i] = false;
+ }
+}
+
+inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) {
+ if (n > longest_free()) return false;
+ auto index = Length(tracker_.FindAndMark(n.raw_num()));
+
+ PageId page = location_.start().first_page() + index;
+ *p = page;
+
+ // the last hugepage we touch
+ Inc(page, n, from_released);
+ return true;
+}
+
+// If release=true, release any hugepages made empty as a result.
+inline void HugeRegion::Put(PageId p, Length n, bool release) {
+ Length index = p - location_.start().first_page();
+ tracker_.Unmark(index.raw_num(), n.raw_num());
+
+ Dec(p, n, release);
+}
+
+// Release any hugepages that are unused but backed.
+inline HugeLength HugeRegion::Release() {
+ HugeLength r = NHugePages(0);
+ bool should_unback_[kNumHugePages] = {};
+ for (size_t i = 0; i < kNumHugePages; ++i) {
+ if (backed_[i] && pages_used_[i] == Length(0)) {
+ should_unback_[i] = true;
+ ++r;
+ }
+ }
+ UnbackHugepages(should_unback_);
+ return r;
+}
+
+inline void HugeRegion::AddSpanStats(SmallSpanStats *small,
+ LargeSpanStats *large,
+ PageAgeHistograms *ages) const {
+ size_t index = 0, n;
+ Length f, u;
+ // This is complicated a bit by the backed/unbacked status of pages.
+ while (tracker_.NextFreeRange(index, &index, &n)) {
+ // [index, index + n) is an *unused* range. As it may cross
+ // hugepages, we may need to truncate it so it is either a
+ // *free* or a *released* range, and compute a reasonable value
+ // for its "when".
+ PageId p = location_.start().first_page() + Length(index);
+ const HugePage hp = HugePageContaining(p);
+ size_t i = (hp - location_.start()) / NHugePages(1);
+ const bool backed = backed_[i];
+ Length truncated;
+ int64_t when = 0;
+ while (n > 0 && backed_[i] == backed) {
+ const PageId lim = (location_.start() + NHugePages(i + 1)).first_page();
+ Length here = std::min(Length(n), lim - p);
+ when = AverageWhens(truncated, when, here, whens_[i]);
+ truncated += here;
+ n -= here.raw_num();
+ p += here;
+ i++;
+ ASSERT(i < kNumHugePages || n == 0);
+ }
+ n = truncated.raw_num();
+ const bool released = !backed;
+ if (released) {
+ u += Length(n);
+ } else {
+ f += Length(n);
+ }
+ if (Length(n) < kMaxPages) {
+ if (small != nullptr) {
+ if (released) {
+ small->returned_length[n]++;
+ } else {
+ small->normal_length[n]++;
+ }
+ }
+ } else {
+ if (large != nullptr) {
+ large->spans++;
+ if (released) {
+ large->returned_pages += Length(n);
+ } else {
+ large->normal_pages += Length(n);
+ }
+ }
+ }
+
+ if (ages != nullptr) {
+ ages->RecordRange(Length(n), released, when);
+ }
+ index += n;
+ }
+ CHECK_CONDITION(f == free_pages());
+ CHECK_CONDITION(u == unmapped_pages());
+}
+
+inline HugeLength HugeRegion::backed() const {
+ HugeLength b;
+ for (int i = 0; i < kNumHugePages; ++i) {
+ if (backed_[i]) {
+ ++b;
+ }
+ }
+
+ return b;
+}
+
+inline void HugeRegion::Print(Printer *out) const {
+ const size_t kib_used = used_pages().in_bytes() / 1024;
+ const size_t kib_free = free_pages().in_bytes() / 1024;
+ const size_t kib_longest_free = longest_free().in_bytes() / 1024;
+ const HugeLength unbacked = size() - backed();
+ const size_t mib_unbacked = unbacked.in_mib();
+ out->printf(
+ "HugeRegion: %zu KiB used, %zu KiB free, "
+ "%zu KiB contiguous space, %zu MiB unbacked, "
+ "%zu MiB unbacked lifetime\n",
+ kib_used, kib_free, kib_longest_free, mib_unbacked,
+ total_unbacked_.in_bytes() / 1024 / 1024);
+}
+
+inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const {
+ detail->PrintI64("used_bytes", used_pages().in_bytes());
+ detail->PrintI64("free_bytes", free_pages().in_bytes());
+ detail->PrintI64("longest_free_range_bytes", longest_free().in_bytes());
+ const HugeLength unbacked = size() - backed();
+ detail->PrintI64("unbacked_bytes", unbacked.in_bytes());
+ detail->PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes());
+}
+
+inline BackingStats HugeRegion::stats() const {
+ BackingStats s;
+ s.system_bytes = location_.len().in_bytes();
+ s.free_bytes = free_pages().in_bytes();
+ s.unmapped_bytes = unmapped_pages().in_bytes();
+ return s;
+}
+
+inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) {
+ bool should_back = false;
+ const int64_t now = absl::base_internal::CycleClock::Now();
+ while (n > Length(0)) {
+ const HugePage hp = HugePageContaining(p);
+ const size_t i = (hp - location_.start()) / NHugePages(1);
+ const PageId lim = (hp + NHugePages(1)).first_page();
+ Length here = std::min(n, lim - p);
+ if (pages_used_[i] == Length(0) && !backed_[i]) {
+ backed_[i] = true;
+ should_back = true;
+ ++nbacked_;
+ whens_[i] = now;
+ }
+ pages_used_[i] += here;
+ ASSERT(pages_used_[i] <= kPagesPerHugePage);
+ p += here;
+ n -= here;
+ }
+ *from_released = should_back;
+}
+
+inline void HugeRegion::Dec(PageId p, Length n, bool release) {
+ const int64_t now = absl::base_internal::CycleClock::Now();
+ bool should_unback_[kNumHugePages] = {};
+ while (n > Length(0)) {
+ const HugePage hp = HugePageContaining(p);
+ const size_t i = (hp - location_.start()) / NHugePages(1);
+ const PageId lim = (hp + NHugePages(1)).first_page();
+ Length here = std::min(n, lim - p);
+ ASSERT(here > Length(0));
+ ASSERT(pages_used_[i] >= here);
+ ASSERT(backed_[i]);
+ whens_[i] =
+ AverageWhens(here, now, kPagesPerHugePage - pages_used_[i], whens_[i]);
+ pages_used_[i] -= here;
+ if (pages_used_[i] == Length(0)) {
+ should_unback_[i] = true;
+ }
+ p += here;
+ n -= here;
+ }
+ if (release) {
+ UnbackHugepages(should_unback_);
+ }
+}
+
+inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) {
+ const int64_t now = absl::base_internal::CycleClock::Now();
+ size_t i = 0;
+ while (i < kNumHugePages) {
+ if (!should[i]) {
+ i++;
+ continue;
+ }
+ size_t j = i;
+ while (j < kNumHugePages && should[j]) {
+ backed_[j] = false;
+ whens_[j] = now;
+ j++;
+ }
+
+ HugeLength hl = NHugePages(j - i);
+ nbacked_ -= hl;
+ HugePage p = location_.start() + NHugePages(i);
+ unback_(p.start_addr(), hl.in_bytes());
+ total_unbacked_ += hl;
+ i = j;
+ }
+}
+
+// If available, return a range of n free pages, setting *from_released =
+// true iff the returned range is currently unbacked.
+// Returns false if no range available.
+template <typename Region>
+inline bool HugeRegionSet<Region>::MaybeGet(Length n, PageId *page,
+ bool *from_released) {
+ for (Region *region : list_) {
+ if (region->MaybeGet(n, page, from_released)) {
+ Fix(region);
+ return true;
+ }
+ }
+ return false;
+}
+
+// Return an allocation to a region (if one matches!)
+template <typename Region>
+inline bool HugeRegionSet<Region>::MaybePut(PageId p, Length n) {
+ for (Region *region : list_) {
+ if (region->contains(p)) {
+ region->Put(p, n, true);
+ Fix(region);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// Add region to the set.
+template <typename Region>
+inline void HugeRegionSet<Region>::Contribute(Region *region) {
+ n_++;
+ AddToList(region);
+}
+
+// Unback any totally unused hugepages; return the number of pages
+// we managed to release.
+template <typename Region>
+inline HugeLength HugeRegionSet<Region>::Release() {
+ HugeLength hl = NHugePages(0);
+ for (Region *region : list_) {
+ hl += region->Release();
+ }
+
+ return hl;
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::Print(Printer *out) const {
+ out->printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n",
+ Region::size().in_bytes() / 1024 / 1024);
+ out->printf("HugeRegionSet: %zu total regions\n", n_);
+ Length total_free;
+ HugeLength total_backed = NHugePages(0);
+
+ for (Region *region : list_) {
+ region->Print(out);
+ total_free += region->free_pages();
+ total_backed += region->backed();
+ }
+
+ out->printf("HugeRegionSet: %zu hugepages backed out of %zu total\n",
+ total_backed.raw_num(), Region::size().raw_num() * n_);
+
+ const Length in_pages = total_backed.in_pages();
+ out->printf("HugeRegionSet: %zu pages free in backed region, %.4f free\n",
+ total_free.raw_num(),
+ in_pages > Length(0) ? static_cast<double>(total_free.raw_num()) /
+ static_cast<double>(in_pages.raw_num())
+ : 0.0);
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::PrintInPbtxt(PbtxtRegion *hpaa) const {
+ hpaa->PrintI64("min_huge_region_alloc_size", 1024 * 1024);
+ hpaa->PrintI64("huge_region_size", Region::size().in_bytes());
+ for (Region *region : list_) {
+ auto detail = hpaa->CreateSubRegion("huge_region_details");
+ region->PrintInPbtxt(&detail);
+ }
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::AddSpanStats(SmallSpanStats *small,
+ LargeSpanStats *large,
+ PageAgeHistograms *ages) const {
+ for (Region *region : list_) {
+ region->AddSpanStats(small, large, ages);
+ }
+}
+
+template <typename Region>
+inline BackingStats HugeRegionSet<Region>::stats() const {
+ BackingStats stats;
+ for (Region *region : list_) {
+ stats += region->stats();
+ }
+
+ return stats;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_REGION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc
new file mode 100644
index 0000000000..4370b92762
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc
@@ -0,0 +1,565 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_region.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::NiceMock;
+using testing::StrictMock;
+
+class HugeRegionTest : public ::testing::Test {
+ protected:
+ HugeRegionTest()
+ : // an unlikely magic page
+ p_(HugePageContaining(reinterpret_cast<void *>(0x1faced200000))),
+ region_({p_, region_.size()}, MockUnback) {
+ // we usually don't care about backing calls, unless testing that
+ // specifically.
+ mock_ = absl::make_unique<NiceMock<MockBackingInterface>>();
+ }
+
+ ~HugeRegionTest() override { mock_.reset(nullptr); }
+
+ // This is wordy, but necessary for mocking:
+ class BackingInterface {
+ public:
+ virtual void Unback(void *p, size_t len) = 0;
+ virtual ~BackingInterface() {}
+ };
+
+ class MockBackingInterface : public BackingInterface {
+ public:
+ MOCK_METHOD2(Unback, void(void *p, size_t len));
+ };
+
+ static std::unique_ptr<MockBackingInterface> mock_;
+
+ static void MockUnback(void *p, size_t len) { mock_->Unback(p, len); }
+
+ void CheckMock() { testing::Mock::VerifyAndClearExpectations(mock_.get()); }
+
+ void ExpectUnback(HugeRange r) {
+ void *ptr = r.start_addr();
+ size_t bytes = r.byte_len();
+ EXPECT_CALL(*mock_, Unback(ptr, bytes)).Times(1);
+ }
+
+ struct Alloc {
+ PageId p;
+ Length n;
+ size_t mark;
+ };
+
+ HugePage p_;
+ typedef HugeRegion Region;
+ Region region_;
+ size_t next_mark_{0};
+ size_t marks_[Region::size().in_pages().raw_num()];
+
+ void Mark(Alloc a) {
+ EXPECT_LE(p_.first_page(), a.p);
+ size_t index = (a.p - p_.first_page()).raw_num();
+ size_t end = index + a.n.raw_num();
+ EXPECT_LE(end, region_.size().in_pages().raw_num());
+ for (; index < end; ++index) {
+ marks_[index] = a.mark;
+ }
+ }
+
+ void Check(Alloc a) {
+ EXPECT_LE(p_.first_page(), a.p);
+ size_t index = (a.p - p_.first_page()).raw_num();
+ size_t end = index + a.n.raw_num();
+ EXPECT_LE(end, region_.size().in_pages().raw_num());
+ for (; index < end; ++index) {
+ EXPECT_EQ(a.mark, marks_[index]);
+ }
+ }
+
+ Alloc Allocate(Length n) {
+ bool from_released;
+ return Allocate(n, &from_released);
+ }
+
+ Alloc Allocate(Length n, bool *from_released) {
+ Alloc ret;
+ CHECK_CONDITION(region_.MaybeGet(n, &ret.p, from_released));
+ ret.n = n;
+ ret.mark = ++next_mark_;
+ Mark(ret);
+ return ret;
+ }
+
+ void Delete(Alloc a) {
+ Check(a);
+ region_.Put(a.p, a.n, false);
+ }
+
+ void DeleteUnback(Alloc a) {
+ Check(a);
+ region_.Put(a.p, a.n, true);
+ }
+};
+
+std::unique_ptr<HugeRegionTest::MockBackingInterface> HugeRegionTest::mock_;
+
+TEST_F(HugeRegionTest, Basic) {
+ Length total;
+ std::vector<Alloc> allocs;
+ for (Length n(1); total + n < region_.size().in_pages(); ++n) {
+ allocs.push_back(Allocate(n));
+ total += n;
+ EXPECT_EQ(total, region_.used_pages());
+ }
+
+ // Free every other alloc
+ std::vector<Length> lengths;
+ std::vector<Alloc> new_allocs;
+ for (int j = 0; j < allocs.size(); ++j) {
+ if (j % 2 == 0) {
+ new_allocs.push_back(allocs[j]);
+ continue;
+ }
+ Length n = allocs[j].n;
+ Delete(allocs[j]);
+ total -= n;
+ EXPECT_EQ(total, region_.used_pages());
+ lengths.push_back(n);
+ }
+ allocs.swap(new_allocs);
+ // and reallocate them in a random order:
+ std::shuffle(lengths.begin(), lengths.end(), absl::BitGen());
+ // This should fit, since thge allocator is best-fit
+ // and we have unique gaps of each size.
+ for (auto n : lengths) {
+ allocs.push_back(Allocate(n));
+ total += n;
+ EXPECT_EQ(total, region_.used_pages());
+ }
+
+ for (auto a : allocs) {
+ Delete(a);
+ }
+}
+
+TEST_F(HugeRegionTest, ReqsBacking) {
+ const Length n = kPagesPerHugePage;
+ std::vector<Alloc> allocs;
+ // should back the first page
+ bool from_released;
+ allocs.push_back(Allocate(n - Length(1), &from_released));
+ EXPECT_TRUE(from_released);
+ // nothing
+ allocs.push_back(Allocate(Length(1), &from_released));
+ EXPECT_FALSE(from_released);
+ // second page
+ allocs.push_back(Allocate(Length(1), &from_released));
+ EXPECT_TRUE(from_released);
+ // third, fourth, fifth
+ allocs.push_back(Allocate(3 * n, &from_released));
+ EXPECT_TRUE(from_released);
+
+ for (auto a : allocs) {
+ Delete(a);
+ }
+}
+
+TEST_F(HugeRegionTest, Release) {
+ mock_ = absl::make_unique<StrictMock<MockBackingInterface>>();
+ const Length n = kPagesPerHugePage;
+ bool from_released;
+ auto a = Allocate(n * 4 - Length(1), &from_released);
+ EXPECT_TRUE(from_released);
+
+ auto b = Allocate(n * 3, &from_released);
+ EXPECT_TRUE(from_released);
+
+ auto c = Allocate(n * 5 + Length(1), &from_released);
+ EXPECT_TRUE(from_released);
+
+ auto d = Allocate(n * 2, &from_released);
+ EXPECT_TRUE(from_released);
+
+ auto e = Allocate(n / 2, &from_released);
+ EXPECT_TRUE(from_released);
+ auto f = Allocate(n / 2, &from_released);
+ EXPECT_FALSE(from_released);
+
+ // Don't unback the first or last hugepage this touches -- since they
+ // overlap with others.
+ Delete(b);
+ ExpectUnback({p_ + NHugePages(4), NHugePages(2)});
+ EXPECT_EQ(NHugePages(2), region_.Release());
+ CheckMock();
+
+ // Now we're on exact boundaries so we should unback the whole range.
+ Delete(d);
+ ExpectUnback({p_ + NHugePages(12), NHugePages(2)});
+ EXPECT_EQ(NHugePages(2), region_.Release());
+ CheckMock();
+
+ Delete(a);
+ ExpectUnback({p_ + NHugePages(0), NHugePages(4)});
+ EXPECT_EQ(NHugePages(4), region_.Release());
+ CheckMock();
+
+ // Should work just as well with aggressive Put():
+ ExpectUnback({p_ + NHugePages(6), NHugePages(6)});
+ DeleteUnback(c);
+ CheckMock();
+
+ // And this _shouldn't_ do anything (page still in use)
+ DeleteUnback(e);
+ // But this should:
+ ExpectUnback({p_ + NHugePages(14), NHugePages(1)});
+ DeleteUnback(f);
+ CheckMock();
+}
+
+TEST_F(HugeRegionTest, Reback) {
+ mock_ = absl::make_unique<StrictMock<MockBackingInterface>>();
+ const Length n = kPagesPerHugePage / 4;
+ bool from_released;
+ // Even in back/unback cycles we should still call the functions
+ // on every transition.
+ for (int i = 0; i < 20; ++i) {
+ std::vector<Alloc> allocs;
+ allocs.push_back(Allocate(n, &from_released));
+ EXPECT_TRUE(from_released);
+ allocs.push_back(Allocate(n, &from_released));
+ EXPECT_FALSE(from_released);
+ allocs.push_back(Allocate(n, &from_released));
+ EXPECT_FALSE(from_released);
+ allocs.push_back(Allocate(n, &from_released));
+ EXPECT_FALSE(from_released);
+
+ std::shuffle(allocs.begin(), allocs.end(), absl::BitGen());
+ DeleteUnback(allocs[0]);
+ DeleteUnback(allocs[1]);
+ DeleteUnback(allocs[2]);
+
+ ExpectUnback({p_, NHugePages(1)});
+ DeleteUnback(allocs[3]);
+ CheckMock();
+ }
+}
+
+TEST_F(HugeRegionTest, Stats) {
+ const Length kLen = region_.size().in_pages();
+ const size_t kBytes = kLen.in_bytes();
+ struct Helper {
+ static void Stat(const Region &region, std::vector<Length> *small_backed,
+ std::vector<Length> *small_unbacked, LargeSpanStats *large,
+ BackingStats *stats, double *avg_age_backed,
+ double *avg_age_unbacked) {
+ SmallSpanStats small;
+ *large = LargeSpanStats();
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ region.AddSpanStats(&small, large, &ages);
+ small_backed->clear();
+ small_unbacked->clear();
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ for (int j = 0; j < small.normal_length[i.raw_num()]; ++j) {
+ small_backed->push_back(i);
+ }
+
+ for (int j = 0; j < small.returned_length[i.raw_num()]; ++j) {
+ small_unbacked->push_back(i);
+ }
+ }
+
+ *stats = region.stats();
+
+ *avg_age_backed = ages.GetTotalHistogram(false)->avg_age();
+ *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age();
+ }
+ };
+
+ LargeSpanStats large;
+ std::vector<Length> small_backed, small_unbacked;
+ BackingStats stats;
+ double avg_age_backed, avg_age_unbacked;
+
+ absl::SleepFor(absl::Milliseconds(10));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre());
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(1, large.spans);
+ EXPECT_EQ(Length(0), large.normal_pages);
+ EXPECT_EQ(kLen, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ(0, stats.free_bytes);
+ EXPECT_EQ(kBytes, stats.unmapped_bytes);
+ EXPECT_LE(0.01, avg_age_unbacked);
+ EXPECT_EQ(0, avg_age_backed);
+
+ // We don't, in production, use small allocations from the region, but
+ // the API supports it, so test it here.
+ Alloc a = Allocate(Length(1));
+ Allocate(Length(1));
+ Alloc b = Allocate(Length(2));
+ Alloc barrier = Allocate(Length(1));
+ Alloc c = Allocate(Length(3));
+ Allocate(Length(1));
+ const Length slack = kPagesPerHugePage - Length(9);
+
+ absl::SleepFor(absl::Milliseconds(20));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre());
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(2, large.spans);
+ EXPECT_EQ(slack, large.normal_pages);
+ EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ(slack.in_bytes(), stats.free_bytes);
+ EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+ EXPECT_LE(0.02, avg_age_backed);
+ EXPECT_LE(0.03, avg_age_unbacked);
+
+ Delete(a);
+ absl::SleepFor(absl::Milliseconds(30));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre(Length(1)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(2, large.spans);
+ EXPECT_EQ(slack, large.normal_pages);
+ EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ((slack + Length(1)).in_bytes(), stats.free_bytes);
+ EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+ EXPECT_LE((slack.raw_num() * 0.05 + 1 * 0.03) / (slack.raw_num() + 1),
+ avg_age_backed);
+ EXPECT_LE(0.06, avg_age_unbacked);
+
+ Delete(b);
+ absl::SleepFor(absl::Milliseconds(40));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(2, large.spans);
+ EXPECT_EQ(slack, large.normal_pages);
+ EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ((slack + Length(3)).in_bytes(), stats.free_bytes);
+ EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+ EXPECT_LE(
+ (slack.raw_num() * 0.09 + 1 * 0.07 + 2 * 0.04) / (slack.raw_num() + 3),
+ avg_age_backed);
+ EXPECT_LE(0.10, avg_age_unbacked);
+
+ Delete(c);
+ absl::SleepFor(absl::Milliseconds(50));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed,
+ testing::ElementsAre(Length(1), Length(2), Length(3)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(2, large.spans);
+ EXPECT_EQ(slack, large.normal_pages);
+ EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ((slack + Length(6)).in_bytes(), stats.free_bytes);
+ EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+ EXPECT_LE((slack.raw_num() * 0.14 + 1 * 0.12 + 2 * 0.09 + 3 * 0.05) /
+ (slack.raw_num() + 6),
+ avg_age_backed);
+ EXPECT_LE(0.15, avg_age_unbacked);
+
+ Delete(barrier);
+ absl::SleepFor(absl::Milliseconds(60));
+ Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+ &avg_age_backed, &avg_age_unbacked);
+ EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(6)));
+ EXPECT_THAT(small_unbacked, testing::ElementsAre());
+ EXPECT_EQ(2, large.spans);
+ EXPECT_EQ(slack, large.normal_pages);
+ EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+ EXPECT_EQ(kBytes, stats.system_bytes);
+ EXPECT_EQ((slack + Length(7)).in_bytes(), stats.free_bytes);
+ EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+ EXPECT_LE(
+ (slack.raw_num() * 0.20 + 1 * 0.18 + 2 * 0.15 + 3 * 0.11 + 1 * 0.06) /
+ (slack.raw_num() + 7),
+ avg_age_backed);
+ EXPECT_LE(0.21, avg_age_unbacked);
+}
+
+// Test that free regions are broken down properly when they cross
+// page boundaries that change the backed/unbacked state.
+TEST_F(HugeRegionTest, StatBreakdown) {
+ const Length n = kPagesPerHugePage;
+ Alloc a = Allocate(n / 4);
+ Alloc b = Allocate(n * 3 + n / 3);
+ Alloc c = Allocate((n - n / 3 - n / 4) + n * 5 + n / 5);
+ Alloc d = Allocate(n - (n / 5) - Length(1));
+ // This unbacks the middle 2 hugepages, but not the beginning or
+ // trailing region
+ DeleteUnback(b);
+ Delete(c);
+ SmallSpanStats small;
+ LargeSpanStats large;
+ region_.AddSpanStats(&small, &large, nullptr);
+ // Backed beginning of hugepage 0, unbacked range in middle of b,
+ // long backed range from c, unbacked tail of allocation.
+ EXPECT_EQ(4, large.spans);
+ // Tail end of A's page, B/C combined page + all of C.
+ EXPECT_EQ((n - n / 4) + n * 6 + (n / 5), large.normal_pages);
+ // The above fill up 10 total pages.
+ EXPECT_EQ(2 * n + (Region::size().raw_num() - 10) * n, large.returned_pages);
+ EXPECT_EQ(1, small.normal_length[1]);
+
+ EXPECT_EQ(Length(1) + large.normal_pages + large.returned_pages +
+ region_.used_pages(),
+ Region::size().in_pages());
+ Delete(a);
+ Delete(d);
+}
+
+static void NilUnback(void *p, size_t bytes) {}
+
+class HugeRegionSetTest : public testing::Test {
+ protected:
+ typedef HugeRegion Region;
+
+ HugeRegionSetTest() { next_ = HugePageContaining(nullptr); }
+
+ std::unique_ptr<Region> GetRegion() {
+ // These regions are backed by "real" memory, but we don't touch it.
+ std::unique_ptr<Region> r(new Region({next_, Region::size()}, NilUnback));
+ next_ += Region::size();
+ return r;
+ }
+
+ HugeRegionSet<Region> set_;
+ HugePage next_;
+
+ struct Alloc {
+ PageId p;
+ Length n;
+ };
+};
+
+TEST_F(HugeRegionSetTest, Set) {
+ absl::BitGen rng;
+ PageId p;
+ constexpr Length kSize = kPagesPerHugePage + Length(1);
+ bool from_released;
+ ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released));
+ auto r1 = GetRegion();
+ auto r2 = GetRegion();
+ auto r3 = GetRegion();
+ auto r4 = GetRegion();
+ set_.Contribute(r1.get());
+ set_.Contribute(r2.get());
+ set_.Contribute(r3.get());
+ set_.Contribute(r4.get());
+
+ std::vector<Alloc> allocs;
+ std::vector<Alloc> doomed;
+
+ while (set_.MaybeGet(kSize, &p, &from_released)) {
+ allocs.push_back({p, kSize});
+ }
+
+ // Define a random set by shuffling, then move half of the allocations into
+ // doomed.
+ std::shuffle(allocs.begin(), allocs.end(), rng);
+ doomed.insert(doomed.begin(), allocs.begin() + allocs.size() / 2,
+ allocs.end());
+ allocs.erase(allocs.begin() + allocs.size() / 2, allocs.end());
+
+ for (auto d : doomed) {
+ ASSERT_TRUE(set_.MaybePut(d.p, d.n));
+ }
+
+ for (size_t i = 0; i < 100 * 1000; ++i) {
+ const size_t N = allocs.size();
+ size_t index = absl::Uniform<int32_t>(rng, 0, N);
+ std::swap(allocs[index], allocs[N - 1]);
+ auto a = allocs.back();
+ ASSERT_TRUE(set_.MaybePut(a.p, a.n));
+ allocs.pop_back();
+ ASSERT_TRUE(set_.MaybeGet(kSize, &p, &from_released));
+ allocs.push_back({p, kSize});
+ }
+
+ // Random traffic should have defragmented our allocations into full
+ // and empty regions, and released the empty ones. Annoyingly, we don't
+ // know which region is which, so we have to do a bit of silliness:
+ std::vector<Region *> regions = {r1.get(), r2.get(), r3.get(), r4.get()};
+ std::sort(regions.begin(), regions.end(),
+ [](const Region *a, const Region *b) -> bool {
+ return a->used_pages() > b->used_pages();
+ });
+
+ for (int i = 0; i < regions.size(); i++) {
+ Log(kLog, __FILE__, __LINE__, i, regions[i]->used_pages().raw_num(),
+ regions[i]->free_pages().raw_num(),
+ regions[i]->unmapped_pages().raw_num());
+ }
+ // Now first two should be "full" (ish)
+ EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+ regions[0]->used_pages().raw_num());
+ EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+ regions[1]->used_pages().raw_num());
+ // and last two "empty" (ish.)
+ EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+ regions[2]->unmapped_pages().raw_num());
+ EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+ regions[3]->unmapped_pages().raw_num());
+
+ // Check the stats line up.
+ auto stats = set_.stats();
+ auto raw = r1->stats();
+ raw += r2->stats();
+ raw += r3->stats();
+ raw += r4->stats();
+ EXPECT_EQ(raw.system_bytes, stats.system_bytes);
+ EXPECT_EQ(raw.unmapped_bytes, stats.unmapped_bytes);
+ EXPECT_EQ(raw.free_bytes, stats.free_bytes);
+
+ // Print out the stats for inspection of formats.
+ std::vector<char> buf(64 * 1024);
+ Printer out(&buf[0], buf.size());
+ set_.Print(&out);
+ printf("%s\n", &buf[0]);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h
new file mode 100644
index 0000000000..49c95d66cb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h
@@ -0,0 +1,60 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The routines exported by this module are subtle and dangerous.
+
+#ifndef TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
+#define TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
+
+#include <atomic>
+#include <type_traits>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace atomic_danger {
+
+// Casts the address of a std::atomic<IntType> to the address of an IntType.
+//
+// This is almost certainly not the function you are looking for! It is
+// undefined behavior, as the object under a std::atomic<int> isn't
+// fundamentally an int. This function is intended for passing the address of an
+// atomic integer to syscalls or for assembly interpretation.
+//
+// Callers should be migrated if C++ standardizes a better way to do this:
+// * http://wg21.link/n4013 (Atomic operations on non-atomic data)
+// * http://wg21.link/p0019 (Atomic Ref, merged into C++20)
+// * http://wg21.link/p1478 (Byte-wise atomic memcpy)
+template <typename IntType>
+IntType* CastToIntegral(std::atomic<IntType>* atomic_for_syscall) {
+ static_assert(std::is_integral<IntType>::value,
+ "CastToIntegral must be instantiated with an integral type.");
+#if __cpp_lib_atomic_is_always_lock_free >= 201603
+ static_assert(std::atomic<IntType>::is_always_lock_free,
+ "CastToIntegral must be instantiated with a lock-free type.");
+#else
+ static_assert(__atomic_always_lock_free(sizeof(IntType),
+ nullptr /* typical alignment */),
+ "CastToIntegral must be instantiated with a lock-free type.");
+#endif
+ return reinterpret_cast<IntType*>(atomic_for_syscall);
+}
+} // namespace atomic_danger
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h
new file mode 100644
index 0000000000..da7f30646d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h
@@ -0,0 +1,74 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
+#define TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
+
+#include <atomic>
+
+#include "absl/base/macros.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class StatsCounter {
+ public:
+ constexpr StatsCounter() : value_(0) {}
+ StatsCounter(const StatsCounter&) = delete;
+ StatsCounter& operator=(const StatsCounter&) = delete;
+
+ ~StatsCounter() = default;
+
+ using Value = int64_t;
+
+ // Add "increment" to this statistics counter.
+ // "increment" may take any value, including negative ones.
+ // Counts are not lost in the face of concurrent uses of Add().
+ // Counts added by this call may be lost in the face of concurrent calls
+ // by other calls, such as Clear() or LossyAdd().
+ // This call is suitable for maintaining statistics. It is not suitable
+ // for other purposes; in particular, it should not be used for
+ // data synchronization, generating sequence numbers, or reference counting.
+ void Add(Value increment) {
+ // As always, clients may not assume properties implied by the
+ // implementation, which may change.
+ this->value_.fetch_add(increment, std::memory_order_relaxed);
+ }
+
+ // Return the current value of the counter.
+ Value value() const { return this->value_.load(std::memory_order_relaxed); }
+
+ // Add "increment" to this lossy statistics counter. Counts (including those
+ // added by other calls) _may be lost_ if this call is used concurrently with
+ // other calls to LossyAdd() or Add(). This call is suitable for maintaining
+ // statistics where performance is more important than not losing counts. It
+ // is not suitable for other purposes; in particular, it should not be used
+ // for data synchronization, generating sequence numbers, or reference
+ // counting.
+ void LossyAdd(Value increment) {
+ this->value_.store(this->value_.load(std::memory_order_relaxed) + increment,
+ std::memory_order_relaxed);
+ }
+
+ private:
+ std::atomic<Value> value_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits.h b/contrib/libs/tcmalloc/tcmalloc/internal/bits.h
new file mode 100644
index 0000000000..80ca17085c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/bits.h
@@ -0,0 +1,82 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_BITS_H_
+#define TCMALLOC_INTERNAL_BITS_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class Bits {
+ public:
+ // Returns true if a value is zero or a power of two.
+ template <typename T>
+ static constexpr
+ typename std::enable_if<std::is_unsigned<T>::value, bool>::type
+ IsZeroOrPow2(T n) {
+ return (n & (n - 1)) == 0;
+ }
+
+ // Returns true if a value is a power of two.
+ template <typename T>
+ static constexpr
+ typename std::enable_if<std::is_unsigned<T>::value, bool>::type
+ IsPow2(T n) {
+ return n != 0 && (n & (n - 1)) == 0;
+ }
+
+ template <typename T>
+ static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+ Log2Floor(T n) {
+ if (n == 0) {
+ return -1;
+ }
+
+ if (sizeof(T) <= sizeof(unsigned int)) {
+ return std::numeric_limits<T>::digits - 1 - __builtin_clz(n);
+ } else if (sizeof(T) <= sizeof(unsigned long)) {
+ return std::numeric_limits<T>::digits - 1 - __builtin_clzl(n);
+ } else {
+ static_assert(sizeof(T) <= sizeof(unsigned long long));
+ return std::numeric_limits<T>::digits - 1 - __builtin_clzll(n);
+ }
+ }
+
+ template <typename T>
+ static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+ Log2Ceiling(T n) {
+ T floor = Log2Floor(n);
+ if (IsZeroOrPow2(n))
+ return floor;
+ else
+ return floor + 1;
+ }
+
+ template <typename T>
+ static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+ RoundUpToPow2(T n) {
+ if (n == 0) return 1;
+ return T{1} << Log2Ceiling(n);
+ }
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_INTERNAL_BITS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc
new file mode 100644
index 0000000000..0589b314d2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc
@@ -0,0 +1,104 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/bits.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(BitsTest, Log2EdgeCases) {
+ EXPECT_EQ(-1, Bits::Log2Floor(0u));
+ EXPECT_EQ(-1, Bits::Log2Ceiling(0u));
+
+ for (int i = 0; i < 32; i++) {
+ uint32_t n = 1U << i;
+ EXPECT_EQ(i, Bits::Log2Floor(n));
+ EXPECT_EQ(i, Bits::Log2Ceiling(n));
+ if (n > 2) {
+ EXPECT_EQ(i - 1, Bits::Log2Floor(n - 1));
+ EXPECT_EQ(i, Bits::Log2Floor(n + 1));
+ EXPECT_EQ(i, Bits::Log2Ceiling(n - 1));
+ EXPECT_EQ(i + 1, Bits::Log2Ceiling(n + 1));
+ }
+ }
+
+ EXPECT_EQ(Bits::Log2Ceiling(uint64_t{0x40000000000}), 42);
+ EXPECT_EQ(Bits::Log2Floor(uint64_t{0x40000000000}), 42);
+}
+
+TEST(BitsTest, Log2Random) {
+ absl::BitGen random;
+
+ const int kNumIterations = 10000;
+ for (int i = 0; i < kNumIterations; i++) {
+ int maxbit = -1;
+ uint32_t n = 0;
+ while (!absl::Bernoulli(random, 1.0 / 32)) {
+ int bit = absl::Uniform<int32_t>(random, 0, 32);
+ n |= (1U << bit);
+ maxbit = std::max(bit, maxbit);
+ }
+ EXPECT_EQ(maxbit, Bits::Log2Floor(n));
+ }
+}
+
+TEST(BitsTest, IsZeroOrPow2) {
+ EXPECT_TRUE(Bits::IsZeroOrPow2(0u));
+ EXPECT_TRUE(Bits::IsZeroOrPow2(1u));
+ EXPECT_TRUE(Bits::IsZeroOrPow2(2u));
+ EXPECT_FALSE(Bits::IsZeroOrPow2(3u));
+ EXPECT_TRUE(Bits::IsZeroOrPow2(4u));
+ EXPECT_FALSE(Bits::IsZeroOrPow2(1337u));
+ EXPECT_TRUE(Bits::IsZeroOrPow2(65536u));
+ EXPECT_FALSE(Bits::IsZeroOrPow2(std::numeric_limits<uint32_t>::max()));
+ EXPECT_TRUE(Bits::IsZeroOrPow2(uint32_t{1} << 31));
+}
+
+TEST(BitsTest, IsPow2) {
+ EXPECT_FALSE(Bits::IsPow2(0u));
+ EXPECT_TRUE(Bits::IsPow2(1u));
+ EXPECT_TRUE(Bits::IsPow2(2u));
+ EXPECT_FALSE(Bits::IsPow2(3u));
+ EXPECT_TRUE(Bits::IsPow2(4u));
+ EXPECT_FALSE(Bits::IsPow2(1337u));
+ EXPECT_TRUE(Bits::IsPow2(65536u));
+ EXPECT_FALSE(Bits::IsPow2(std::numeric_limits<uint32_t>::max()));
+ EXPECT_TRUE(Bits::IsPow2(uint32_t{1} << 31));
+}
+
+TEST(BitsTest, RoundUpToPow2) {
+ EXPECT_EQ(Bits::RoundUpToPow2(0u), 1);
+ EXPECT_EQ(Bits::RoundUpToPow2(1u), 1);
+ EXPECT_EQ(Bits::RoundUpToPow2(2u), 2);
+ EXPECT_EQ(Bits::RoundUpToPow2(3u), 4);
+ EXPECT_EQ(Bits::RoundUpToPow2(4u), 4);
+ EXPECT_EQ(Bits::RoundUpToPow2(1337u), 2048);
+ EXPECT_EQ(Bits::RoundUpToPow2(65536u), 65536);
+ EXPECT_EQ(Bits::RoundUpToPow2(65536u - 1337u), 65536);
+ EXPECT_EQ(Bits::RoundUpToPow2(uint64_t{0x40000000000}),
+ uint64_t{0x40000000000});
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc
new file mode 100644
index 0000000000..12a1709b34
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc
@@ -0,0 +1,88 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/cache_topology.h"
+
+#include <fcntl.h>
+#include <string.h>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+int OpenSysfsCacheList(size_t cpu) {
+ char path[PATH_MAX];
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%zu/cache/index3/shared_cpu_list", cpu);
+ return signal_safe_open(path, O_RDONLY | O_CLOEXEC);
+}
+} // namespace
+
+int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current) {
+ // Remove all parts coming after a dash or comma.
+ const size_t dash = current.find('-');
+ if (dash != absl::string_view::npos) current = current.substr(0, dash);
+ const size_t comma = current.find(',');
+ if (comma != absl::string_view::npos) current = current.substr(0, comma);
+
+ int first_cpu;
+ CHECK_CONDITION(absl::SimpleAtoi(current, &first_cpu));
+ CHECK_CONDITION(first_cpu < CPU_SETSIZE);
+ return first_cpu;
+}
+
+int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]) {
+ int index = 0;
+ // Set to a sane value.
+ memset(l3_cache_index, 0, CPU_SETSIZE);
+ for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) {
+ const int fd = OpenSysfsCacheList(cpu);
+ if (fd == -1) {
+ // At some point we reach the number of CPU on the system, and
+ // we should exit. We verify that there was no other problem.
+ CHECK_CONDITION(errno == ENOENT);
+ return index;
+ }
+ // The file contains something like:
+ // 0-11,22-33
+ // we are looking for the first number in that file.
+ char buf[10];
+ const size_t bytes_read =
+ signal_safe_read(fd, buf, 10, /*bytes_read=*/nullptr);
+ signal_safe_close(fd);
+ CHECK_CONDITION(bytes_read >= 0);
+
+ const int first_cpu =
+ BuildCpuToL3CacheMap_FindFirstNumberInBuf({buf, bytes_read});
+ CHECK_CONDITION(first_cpu < CPU_SETSIZE);
+ CHECK_CONDITION(first_cpu <= cpu);
+ if (cpu == first_cpu) {
+ l3_cache_index[cpu] = index++;
+ } else {
+ l3_cache_index[cpu] = l3_cache_index[first_cpu];
+ }
+ }
+ return index;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h
new file mode 100644
index 0000000000..292f175470
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h
@@ -0,0 +1,36 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
+#define TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
+
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Build a mapping from cpuid to the index of the L3 cache used by that cpu.
+// Returns the number of caches detected.
+int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]);
+
+// Helper function exposed to permit testing it.
+int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc
new file mode 100644
index 0000000000..927ecace94
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc
@@ -0,0 +1,51 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/cache_topology.h"
+
+#include <sched.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(CacheToplogy, ComputesSomethingReasonable) {
+ // This test verifies that each L3 cache serves the same number of CPU. This
+ // is not a strict requirement for the correct operation of this code, but a
+ // sign of sanity.
+ uint8_t l3_cache_index[CPU_SETSIZE];
+ const int num_nodes =
+ tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap(l3_cache_index);
+ EXPECT_EQ(absl::base_internal::NumCPUs() % num_nodes, 0);
+ ASSERT_GT(num_nodes, 0);
+ static const int kMaxNodes = 256 / 8;
+ int count_per_node[kMaxNodes] = {0};
+ for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) {
+ count_per_node[l3_cache_index[i]]++;
+ }
+ for (int i = 0; i < num_nodes; ++i) {
+ EXPECT_EQ(count_per_node[i], absl::base_internal::NumCPUs() / num_nodes);
+ }
+}
+
+TEST(CacheTopology, FindFirstNumberInBuf) {
+ using tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap_FindFirstNumberInBuf;
+ EXPECT_EQ(7, BuildCpuToL3CacheMap_FindFirstNumberInBuf("7,-787"));
+ EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5"));
+ EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5-9"));
+ EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5,9"));
+}
+
+} // namespace
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h
new file mode 100644
index 0000000000..65c765203c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h
@@ -0,0 +1,41 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CLOCK_H_
+#define TCMALLOC_INTERNAL_CLOCK_H_
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Represents an abstract clock. The now and freq functions are analogous to
+// CycleClock::Now and CycleClock::Frequency, which will be the most commonly
+// used implementations. Tests can use this interface to mock out the clock.
+struct Clock {
+ // Returns the current time in ticks (relative to an arbitrary time base).
+ int64_t (*now)();
+
+ // Returns the number of ticks per second.
+ double (*freq)();
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_CLOCK_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/config.h b/contrib/libs/tcmalloc/tcmalloc/internal/config.h
new file mode 100644
index 0000000000..73dbab06aa
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/config.h
@@ -0,0 +1,136 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CONFIG_H_
+#define TCMALLOC_INTERNAL_CONFIG_H_
+
+#include <stddef.h>
+
+#include "absl/base/policy_checks.h"
+
+// TCMALLOC_HAVE_SCHED_GETCPU is defined when the system implements
+// sched_getcpu(3) as by glibc and it's imitators.
+#if defined(__linux__) || defined(__ros__)
+#define TCMALLOC_HAVE_SCHED_GETCPU 1
+#else
+#undef TCMALLOC_HAVE_SCHED_GETCPU
+#endif
+
+// TCMALLOC_HAVE_STRUCT_MALLINFO is defined when we know that the system has
+// `struct mallinfo` available.
+//
+// The FreeBSD libc, and subsequently macOS, does not provide the `mallopt`
+// interfaces. We know that bionic, glibc (and variants), newlib, and uclibc do
+// provide the `mallopt` interface. The musl libc is known to not provide the
+// interface, nor does it provide a macro for checking. As a result, we
+// conservatively state that `struct mallinfo` is only available on these
+// environments.
+#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) && \
+ (defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \
+ defined(__UCLIBC__))
+#define TCMALLOC_HAVE_STRUCT_MALLINFO 1
+#else
+#undef TCMALLOC_HAVE_STRUCT_MALLINFO
+#endif
+
+// When possible, name the text section as google_malloc. This macro should not
+// be added to header files as that may move unrelated code to google_malloc
+// section.
+#if defined(__clang__) && defined(__linux__)
+#define GOOGLE_MALLOC_SECTION_BEGIN \
+ _Pragma("clang section text = \"google_malloc\"")
+#define GOOGLE_MALLOC_SECTION_END _Pragma("clang section text = \"\"")
+#else
+#define GOOGLE_MALLOC_SECTION_BEGIN
+#define GOOGLE_MALLOC_SECTION_END
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#if __GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 2)
+#error "GCC 9.2 or higher is required."
+#endif
+#endif
+
+#if defined(__clang__)
+#if __clang_major__ < 9
+#error "Clang 9 or higher is required."
+#endif
+#endif
+
+#if !defined(__x86_64__) && !defined(__ppc64__) && !defined(__arm__) && \
+ !defined(__aarch64__) && !defined(__riscv)
+#error "Unsupported architecture."
+#endif
+
+#if !defined(__cplusplus) || __cplusplus < 201703L
+#error "TCMalloc requires C++17 or later."
+#else
+// Also explicitly use some C++17 syntax, to prevent detect flags like
+// `-Wc++14-compat`.
+namespace tcmalloc::google3_requires_cpp17_or_later {}
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+#if defined __x86_64__
+// All current and planned x86_64 processors only look at the lower 48 bits
+// in virtual to physical address translation. The top 16 are thus unused.
+// TODO(b/134686025): Under what operating systems can we increase it safely to
+// 17? This lets us use smaller page maps. On first allocation, a 36-bit page
+// map uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
+inline constexpr int kAddressBits =
+ (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
+#elif defined __powerpc64__ && defined __linux__
+// Linux(4.12 and above) on powerpc64 supports 128TB user virtual address space
+// by default, and up to 512TB if user space opts in by specifing hint in mmap.
+// See comments in arch/powerpc/include/asm/processor.h
+// and arch/powerpc/mm/mmap.c.
+inline constexpr int kAddressBits =
+ (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 49);
+#elif defined __aarch64__ && defined __linux__
+// According to Documentation/arm64/memory.txt of kernel 3.16,
+// AARCH64 kernel supports 48-bit virtual addresses for both user and kernel.
+inline constexpr int kAddressBits =
+ (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
+#elif defined __riscv && defined __linux__
+inline constexpr int kAddressBits =
+ (sizeof(void *) < 8 ? (8 * sizeof(void *)) : 48);
+#else
+inline constexpr int kAddressBits = 8 * sizeof(void*);
+#endif
+
+#if defined(__x86_64__)
+// x86 has 2 MiB huge pages
+static constexpr size_t kHugePageShift = 21;
+#elif defined(__PPC64__)
+static constexpr size_t kHugePageShift = 24;
+#elif defined __aarch64__ && defined __linux__
+static constexpr size_t kHugePageShift = 21;
+#elif defined __riscv && defined __linux__
+static constexpr size_t kHugePageShift = 21;
+#else
+// ...whatever, guess something big-ish
+static constexpr size_t kHugePageShift = 21;
+#endif
+
+static constexpr size_t kHugePageSize = static_cast<size_t>(1)
+ << kHugePageShift;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_CONFIG_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h
new file mode 100644
index 0000000000..b82a3ce9e5
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// These declarations are for internal use, allowing us to have access to
+// allocation functions whose declarations are not provided by the standard
+// library.
+#ifndef TCMALLOC_INTERNAL_DECLARATIONS_H_
+#define TCMALLOC_INTERNAL_DECLARATIONS_H_
+
+#include <cstddef>
+#include <new>
+
+namespace std {
+enum class align_val_t : size_t;
+} // namespace std
+
+void* operator new(std::size_t, std::align_val_t);
+void* operator new(std::size_t, std::align_val_t,
+ const std::nothrow_t&) noexcept;
+void* operator new[](std::size_t, std::align_val_t);
+void* operator new[](std::size_t, std::align_val_t,
+ const std::nothrow_t&) noexcept;
+
+void operator delete(void*, std::align_val_t) noexcept;
+void operator delete(void*, std::size_t) noexcept;
+void operator delete(void*, std::size_t, std::align_val_t) noexcept;
+void operator delete[](void*, std::align_val_t) noexcept;
+void operator delete[](void*, std::size_t) noexcept;
+void operator delete[](void*, std::size_t, std::align_val_t) noexcept;
+
+#endif // TCMALLOC_INTERNAL_DECLARATIONS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc
new file mode 100644
index 0000000000..e786dd7a96
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc
@@ -0,0 +1,45 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/environment.h"
+
+#include <string.h>
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// POSIX provides the **environ array which contains environment variables in a
+// linear array, terminated by a NULL string. This array is only perturbed when
+// the environment is changed (which is inherently unsafe) so it's safe to
+// return a const pointer into it.
+// e.g. { "SHELL=/bin/bash", "MY_ENV_VAR=1", "" }
+extern "C" char** environ;
+const char* thread_safe_getenv(const char* env_var) {
+ int var_len = strlen(env_var);
+
+ char** envv = environ;
+ if (!envv) {
+ return nullptr;
+ }
+
+ for (; *envv != nullptr; envv++)
+ if (strncmp(*envv, env_var, var_len) == 0 && (*envv)[var_len] == '=')
+ return *envv + var_len + 1; // skip over the '='
+
+ return nullptr;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h
new file mode 100644
index 0000000000..f54840e8d7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_ENVIRONMENT_H_
+#define TCMALLOC_INTERNAL_ENVIRONMENT_H_
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// WARNING ********************************************************************
+// getenv(2) can only be safely used in the absence of calls which perturb the
+// environment (e.g. putenv/setenv/clearenv). The use of such calls is
+// strictly thread-hostile since these calls do *NOT* synchronize and there is
+// *NO* thread-safe way in which the POSIX **environ array may be queried about
+// modification.
+// ****************************************************************************
+// The default getenv(2) is not guaranteed to be thread-safe as there are no
+// semantics specifying the implementation of the result buffer. The result
+// from thread_safe_getenv() may be safely queried in a multi-threaded context.
+// If you have explicit synchronization with changes environment variables then
+// any copies of the returned pointer must be invalidated across modification.
+const char* thread_safe_getenv(const char* env_var);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_ENVIRONMENT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc
new file mode 100644
index 0000000000..6878301ec9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc
@@ -0,0 +1,45 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/environment.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(EnvironmentTest, thread_safe_getenv) {
+ // Should never be defined at test start
+ const char *result, *undefined_env_var = "UTIL_TEST_UNDEFINED_ENV_VAR";
+
+ // Check that we handle an undefined variable and then set it
+ ASSERT_TRUE(getenv(undefined_env_var) == nullptr);
+ ASSERT_TRUE(thread_safe_getenv(undefined_env_var) == nullptr);
+ ASSERT_EQ(setenv(undefined_env_var, "1234567890", 0), 0);
+ ASSERT_TRUE(getenv(undefined_env_var) != nullptr);
+
+ // Make sure we can find the new variable
+ result = thread_safe_getenv(undefined_env_var);
+ ASSERT_TRUE(result != nullptr);
+ // ... and that it matches what was set
+ EXPECT_EQ(strcmp(result, getenv(undefined_env_var)), 0);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h
new file mode 100644
index 0000000000..514dd4a73e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h
@@ -0,0 +1,252 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
+#define TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
+
+#include <algorithm>
+#include <cstdlib>
+#include <functional>
+
+#include "absl/algorithm/container.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/low_level_alloc.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/hash/hash.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/linked_list.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Counts how many times we observed objects with a particular stack trace
+// that were short lived/long lived. Each LifetimeStats object is associated
+// with a particular allocation site (i.e., allocation stack trace) and each
+// allocation site has at most one LifetimeStats object. All accesses to
+// LifetimeStats objects need to be synchronized via the page heap lock.
+class LifetimeStats : public TList<LifetimeStats>::Elem {
+ public:
+ enum class Certainty { kLowCertainty, kHighCertainty };
+ enum class Prediction { kShortLived, kLongLived };
+
+ void Update(Prediction prediction) {
+ if (prediction == Prediction::kShortLived) {
+ short_lived_++;
+ } else {
+ long_lived_++;
+ }
+ }
+
+ Prediction Predict(Certainty certainty) {
+ if (certainty == Certainty::kLowCertainty) {
+ return (short_lived_ > long_lived_) ? Prediction::kShortLived
+ : Prediction::kLongLived;
+ } else {
+ // If little data was collected, predict as long-lived (current behavior).
+ return (short_lived_ > (long_lived_ + 10)) ? Prediction::kShortLived
+ : Prediction::kLongLived;
+ }
+ }
+
+ // Reference counts are protected by LifetimeDatabase::table_lock_.
+
+ // Increments the reference count of this entry.
+ void IncRef() { ++refcount_; }
+
+ // Returns true if and only if the reference count reaches 0.
+ bool DecRef() { return --refcount_ == 0; }
+
+ private:
+ uint64_t refcount_ = 1;
+ uint64_t short_lived_ = 0;
+ uint64_t long_lived_ = 0;
+};
+
+// Manages stack traces and statistics about their associated lifetimes. Since
+// the database can fill up, old entries are evicted. Evicted entries need to
+// survive as long as the last lifetime tracker referencing them and are thus
+// reference-counted.
+class LifetimeDatabase {
+ public:
+ struct Key {
+ int depth; // Number of PC values stored in array below
+ void* stack[kMaxStackDepth];
+
+ // Statically instantiate at the start of the allocation to acquire
+ // the allocation stack trace.
+ Key() { depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); }
+
+ template <typename H>
+ friend H AbslHashValue(H h, const Key& c) {
+ return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth),
+ c.depth);
+ }
+
+ bool operator==(const Key& other) const {
+ if (depth != other.depth) {
+ return false;
+ }
+ return std::equal(stack, stack + depth, other.stack);
+ }
+ };
+
+ // Captures statistics associated with the low-level allocator backing the
+ // memory used by the database.
+ struct ArenaStats {
+ uint64_t bytes_allocated;
+ };
+
+ static constexpr int kMaxDatabaseSize = 1024;
+
+ LifetimeDatabase() {}
+ ~LifetimeDatabase() {}
+
+ // Not copyable or movable
+ LifetimeDatabase(const LifetimeDatabase&) = delete;
+ LifetimeDatabase& operator=(const LifetimeDatabase&) = delete;
+
+ // Identifies the current stack trace and returns a handle to the lifetime
+ // statistics associated with this stack trace. May run outside the page heap
+ // lock -- we therefore need to do our own locking. This increments the
+ // reference count of the lifetime stats object and the caller is responsible
+ // for calling RemoveLifetimeStatsReference when finished with the object.
+ LifetimeStats* LookupOrAddLifetimeStats(Key* k) {
+ absl::base_internal::SpinLockHolder h(&table_lock_);
+ auto it = table_.find(*k);
+ LifetimeStats* s;
+ if (it == table_.end()) {
+ MaybeEvictLRU();
+ // Allocate a new entry using the low-level allocator, which is safe
+ // to call from within TCMalloc.
+ s = stats_allocator_.allocate(1);
+ new (s) LifetimeStats();
+ table_.insert(std::make_pair(*k, s));
+ stats_fifo_.append(s);
+ } else {
+ s = it->second;
+ UpdateLRU(s);
+ }
+ s->IncRef();
+ return s;
+ }
+
+ void RemoveLifetimeStatsReference(LifetimeStats* s) {
+ absl::base_internal::SpinLockHolder h(&table_lock_);
+ if (s->DecRef()) {
+ stats_allocator_.deallocate(s, 1);
+ }
+ }
+
+ size_t size() const {
+ absl::base_internal::SpinLockHolder h(&table_lock_);
+ return table_.size();
+ }
+
+ size_t evictions() const {
+ absl::base_internal::SpinLockHolder h(&table_lock_);
+ return n_evictions_;
+ }
+
+ static ArenaStats* arena_stats() {
+ static ArenaStats stats = {0};
+ return &stats;
+ }
+
+ protected:
+ static const int kMaxStackDepth = 64;
+
+ static absl::base_internal::LowLevelAlloc::Arena* GetArena() {
+ static absl::base_internal::LowLevelAlloc::Arena* arena =
+ absl::base_internal::LowLevelAlloc::NewArena(0);
+ return arena;
+ }
+
+ static uint64_t bytes_allocated_ ABSL_GUARDED_BY(table_lock_);
+
+ void UpdateLRU(LifetimeStats* stats)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) {
+ stats_fifo_.remove(stats);
+ stats_fifo_.append(stats);
+ }
+
+ // If an entry is evicted, it is returned (nullptr otherwise).
+ void MaybeEvictLRU() ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) {
+ if (table_.size() < kMaxDatabaseSize) {
+ return;
+ }
+ n_evictions_++;
+ LifetimeStats* evict = stats_fifo_.first();
+ stats_fifo_.remove(evict);
+ for (auto it = table_.begin(); it != table_.end(); ++it) {
+ if (it->second == evict) {
+ table_.erase(it);
+ if (evict->DecRef()) {
+ stats_allocator_.deallocate(evict, 1);
+ }
+ return;
+ }
+ }
+ CHECK_CONDITION(false); // Should not happen
+ }
+
+ private:
+ template <typename T>
+ class MyAllocator : public std::allocator<T> {
+ public:
+ template <typename U>
+ struct rebind {
+ using other = MyAllocator<U>;
+ };
+
+ MyAllocator() noexcept {}
+
+ template <typename U>
+ explicit MyAllocator(const MyAllocator<U>&) noexcept {}
+
+ T* allocate(size_t num_objects, const void* = nullptr) {
+ size_t bytes = num_objects * sizeof(T);
+ arena_stats()->bytes_allocated += bytes;
+ return static_cast<T*>(absl::base_internal::LowLevelAlloc::AllocWithArena(
+ bytes, GetArena()));
+ }
+
+ void deallocate(T* p, size_t num_objects) {
+ size_t bytes = num_objects * sizeof(T);
+ arena_stats()->bytes_allocated -= bytes;
+ absl::base_internal::LowLevelAlloc::Free(p);
+ }
+ };
+
+ MyAllocator<LifetimeStats> stats_allocator_ ABSL_GUARDED_BY(table_lock_);
+ mutable absl::base_internal::SpinLock table_lock_{
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY};
+
+ // Stores the current mapping from allocation site to LifetimeStats.
+ std::unordered_map<Key, LifetimeStats*, absl::Hash<Key>, std::equal_to<Key>,
+ MyAllocator<std::pair<const Key, LifetimeStats*>>>
+ table_ ABSL_GUARDED_BY(table_lock_);
+
+ // Stores the entries ordered by how many times they have been accessed.
+ TList<LifetimeStats> stats_fifo_ ABSL_GUARDED_BY(table_lock_);
+ size_t n_evictions_ ABSL_GUARDED_BY(table_lock_) = 0;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc
new file mode 100644
index 0000000000..4280890afe
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc
@@ -0,0 +1,156 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/lifetime_predictions.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class LifetimeDatabaseTest : public testing::Test {
+ protected:
+ LifetimeDatabase lifetime_database_;
+
+ ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+ AllocateA() {
+ LifetimeDatabase::Key key;
+ return lifetime_database_.LookupOrAddLifetimeStats(&key);
+ }
+
+ ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+ AllocateB() {
+ LifetimeDatabase::Key key;
+ return lifetime_database_.LookupOrAddLifetimeStats(&key);
+ }
+
+ ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+ AllocateWithStacktraceId(int id) {
+ if (id == 0) {
+ LifetimeDatabase::Key key;
+ return lifetime_database_.LookupOrAddLifetimeStats(&key);
+ } else if (id % 2 == 0) {
+ return AllocateWithStacktraceId(id / 2);
+ } else {
+ return AllocateWithStacktraceId_2(id / 2);
+ }
+ }
+
+ // Record a sufficiently large number of short-lived allocations to make
+ // a prediction short-lived, absent any long-lived allocations.
+ void MakeShortLived(LifetimeStats* stats, bool high_certainty) {
+ for (int i = 0; i < (high_certainty ? 100 : 2); i++) {
+ stats->Update(LifetimeStats::Prediction::kShortLived);
+ }
+ }
+
+ private:
+ ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+ AllocateWithStacktraceId_2(int id) {
+ if (id == 0) {
+ LifetimeDatabase::Key key;
+ return lifetime_database_.LookupOrAddLifetimeStats(&key);
+ } else if (id % 2 == 0) {
+ return AllocateWithStacktraceId(id / 2);
+ } else {
+ return AllocateWithStacktraceId_2(id / 2);
+ }
+ }
+};
+
+TEST_F(LifetimeDatabaseTest, Basic) {
+ PRAGMA_NO_UNROLL
+ for (int i = 0; i < 2; i++) {
+ LifetimeStats* r1 = AllocateA();
+ LifetimeStats* r2 = AllocateB();
+ LifetimeStats* r3 = AllocateB();
+ ASSERT_NE(nullptr, r1);
+ ASSERT_NE(nullptr, r2);
+ ASSERT_NE(nullptr, r3);
+
+ // First iteration: set short-lived count.
+ if (i == 0) {
+ MakeShortLived(r1, false);
+ MakeShortLived(r2, true);
+ } else {
+ EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+ r1->Predict(LifetimeStats::Certainty::kLowCertainty));
+ EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+ r1->Predict(LifetimeStats::Certainty::kHighCertainty));
+ EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+ r2->Predict(LifetimeStats::Certainty::kLowCertainty));
+ EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+ r2->Predict(LifetimeStats::Certainty::kHighCertainty));
+ EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+ r3->Predict(LifetimeStats::Certainty::kLowCertainty));
+ EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+ r3->Predict(LifetimeStats::Certainty::kHighCertainty));
+ }
+
+ lifetime_database_.RemoveLifetimeStatsReference(r1);
+ lifetime_database_.RemoveLifetimeStatsReference(r2);
+ lifetime_database_.RemoveLifetimeStatsReference(r3);
+ }
+}
+
+TEST_F(LifetimeDatabaseTest, Eviction) {
+ const int kEntries = 5 * LifetimeDatabase::kMaxDatabaseSize;
+
+ std::vector<LifetimeStats*> refs;
+
+ PRAGMA_NO_UNROLL
+ for (int i = 0; i < kEntries; i++) {
+ LifetimeStats* r = AllocateWithStacktraceId(i);
+ refs.push_back(r);
+
+ ASSERT_NE(nullptr, r);
+ if (i < LifetimeDatabase::kMaxDatabaseSize) {
+ MakeShortLived(r, true);
+ }
+ }
+
+ // Check that even evicted entries are still accessible due to refcounts.
+ for (int i = 0; i < kEntries; i++) {
+ if (i < LifetimeDatabase::kMaxDatabaseSize) {
+ EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+ refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty));
+ } else {
+ EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+ refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty));
+ }
+ }
+
+ EXPECT_EQ(LifetimeDatabase::kMaxDatabaseSize, lifetime_database_.size());
+ EXPECT_EQ(kEntries - LifetimeDatabase::kMaxDatabaseSize,
+ lifetime_database_.evictions());
+
+ uint64_t before_bytes = lifetime_database_.arena_stats()->bytes_allocated;
+
+ // Return all of the references, which should drop the remaining refcounts.
+ for (int i = 0; i < kEntries; i++) {
+ lifetime_database_.RemoveLifetimeStatsReference(refs[i]);
+ }
+
+ uint64_t after_bytes = lifetime_database_.arena_stats()->bytes_allocated;
+
+ // Check that this freed up memory
+ EXPECT_LT(after_bytes, before_bytes);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h
new file mode 100644
index 0000000000..d348dbe609
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h
@@ -0,0 +1,172 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
+#define TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/clock.h"
+#include "tcmalloc/internal/lifetime_predictions.h"
+#include "tcmalloc/internal/linked_list.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+template <typename LifetimeDatabaseT, typename LifetimeStatsT>
+class LifetimeTrackerImpl {
+ public:
+ // A tracker is attached to an individual allocation and tracks its lifetime.
+ // This allocation can either be in a region or in the filler. It contains
+ // a pointer back to the LifetimeStats of the allocation site that generated
+ // this allocation, so that statistics can be updated.
+ struct Tracker : public TList<Tracker>::Elem {
+ // The deadline after which the object is considered long-lived.
+ uint64_t deadline = 0;
+
+ // If the allocation is associated with a counterfactual, this contains
+ // the hypothetical location in the short-lived region (null otherwise).
+ void* counterfactual_ptr = nullptr;
+
+ // Lifetime statistics associated with this allocation (will be updated when
+ // the lifetime is known).
+ LifetimeStatsT* lifetime;
+
+ // The allocation this stat belongs to was predicted short-lived.
+ bool predicted_short_lived = false;
+
+ // Is this element currently tracked by the lifetime tracker?
+ bool is_tracked() { return deadline != 0; }
+
+ // Reset the element (implies not tracked).
+ void reset() {
+ deadline = 0;
+ counterfactual_ptr = nullptr;
+ }
+ };
+
+ struct Stats {
+ uint64_t expired_lifetimes = 0;
+ uint64_t overestimated_lifetimes = 0;
+ uint64_t short_lived_predictions = 0;
+ uint64_t long_lived_predictions = 0;
+ };
+
+ explicit LifetimeTrackerImpl(
+ LifetimeDatabaseT* lifetime_database, absl::Duration timeout,
+ Clock clock = Clock{.now = absl::base_internal::CycleClock::Now,
+ .freq = absl::base_internal::CycleClock::Frequency})
+ : timeout_(absl::ToDoubleSeconds(timeout) * clock.freq()),
+ lifetime_database_(*lifetime_database),
+ clock_(clock) {}
+
+ // Registers a donated allocation with the tracker.
+ void AddAllocation(Tracker* tracker, LifetimeStatsT* lifetime,
+ bool predicted_short_lived) {
+ CheckForLifetimeExpirations();
+
+ if (predicted_short_lived) {
+ stats_.short_lived_predictions++;
+ } else {
+ stats_.long_lived_predictions++;
+ }
+
+ ASSERT(tracker != nullptr);
+ ASSERT(lifetime != nullptr);
+ tracker->deadline = clock_.now() + timeout_;
+ tracker->lifetime = lifetime;
+ tracker->predicted_short_lived = predicted_short_lived;
+ list_.append(tracker);
+ }
+
+ // Remove an allocation from the tracker. This will stop tracking the
+ // allocation and record whether it was correctly predicted.
+ void RemoveAllocation(Tracker* tracker) {
+ CheckForLifetimeExpirations();
+
+ // This is not tracked anymore.
+ if (!tracker->is_tracked()) {
+ return;
+ }
+
+ if (!tracker->predicted_short_lived) {
+ stats_.overestimated_lifetimes++;
+ }
+
+ if (tracker->lifetime != nullptr) {
+ tracker->lifetime->Update(LifetimeStatsT::Prediction::kShortLived);
+ lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime);
+ }
+
+ tracker->reset();
+
+ list_.remove(tracker);
+ }
+
+ // Check whether any lifetimes in the tracker have passed the threshold after
+ // which they are not short-lived anymore.
+ void CheckForLifetimeExpirations() {
+ // TODO(mmaas): Expirations are fairly cheap, but there is a theoretical
+ // possibility of having an arbitrary number of expirations at once, which
+ // could affect tail latency. We may want to limit the number of pages we
+ // let expire per unit time.
+ uint64_t now = clock_.now();
+ Tracker* tracker = TryGetExpired(now);
+ while (tracker != nullptr) {
+ ASSERT(tracker->is_tracked());
+
+ // A page that was predicted short-lived was actually long-lived.
+ if (tracker->predicted_short_lived) {
+ stats_.expired_lifetimes++;
+ }
+
+ if (tracker->lifetime != nullptr) {
+ tracker->lifetime->Update(LifetimeStatsT::Prediction::kLongLived);
+ lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime);
+ }
+
+ tracker->reset();
+ tracker = TryGetExpired(now);
+ }
+ }
+
+ Stats stats() const { return stats_; }
+
+ private:
+ // Returns the earliest expiring entry, or nullptr if none expired.
+ Tracker* TryGetExpired(uint64_t now) {
+ if (!list_.empty() && list_.first()->deadline < now) {
+ Tracker* s = list_.first();
+ list_.remove(s);
+ return s;
+ }
+ return nullptr;
+ }
+
+ const uint64_t timeout_;
+
+ TList<Tracker> list_;
+ Stats stats_;
+ LifetimeDatabaseT& lifetime_database_;
+ Clock clock_;
+};
+
+using LifetimeTracker = LifetimeTrackerImpl<LifetimeDatabase, LifetimeStats>;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc
new file mode 100644
index 0000000000..78ed38ecae
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc
@@ -0,0 +1,129 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/lifetime_tracker.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/lifetime_predictions.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class MockLifetimeStats {
+ public:
+ enum class Prediction { kShortLived, kLongLived };
+ MOCK_METHOD(void, Update, (Prediction prediction), ());
+};
+
+class MockLifetimeDatabase {
+ public:
+ MOCK_METHOD(void, RemoveLifetimeStatsReference, (MockLifetimeStats*), ());
+};
+
+using LifetimeTrackerUnderTest =
+ LifetimeTrackerImpl<MockLifetimeDatabase, MockLifetimeStats>;
+
+class LifetimeTrackerTest : public testing::Test {
+ protected:
+ const Clock kFakeClock =
+ Clock{.now = FakeClock, .freq = GetFakeClockFrequency};
+
+ void Advance(absl::Duration d) {
+ clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+ }
+
+ private:
+ static int64_t FakeClock() { return clock_; }
+
+ static double GetFakeClockFrequency() {
+ return absl::ToDoubleNanoseconds(absl::Seconds(2));
+ }
+
+ static int64_t clock_;
+};
+
+int64_t LifetimeTrackerTest::clock_{0};
+
+TEST_F(LifetimeTrackerTest, Basic) {
+ MockLifetimeDatabase database;
+ LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock);
+ MockLifetimeStats stats;
+
+ LifetimeTrackerUnderTest::Tracker tracker1;
+ tracker.AddAllocation(&tracker1, &stats, false);
+ Advance(absl::Seconds(1));
+
+ EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kLongLived));
+ EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats));
+
+ LifetimeTrackerUnderTest::Tracker tracker2;
+ tracker.AddAllocation(&tracker2, &stats, false);
+
+ EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kShortLived));
+ EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats));
+
+ Advance(absl::Seconds(0.1));
+ tracker.RemoveAllocation(&tracker2);
+
+ EXPECT_EQ(tracker.stats().expired_lifetimes, 0);
+ EXPECT_EQ(tracker.stats().overestimated_lifetimes, 1);
+ EXPECT_EQ(tracker.stats().short_lived_predictions, 0);
+ EXPECT_EQ(tracker.stats().long_lived_predictions, 2);
+}
+
+TEST_F(LifetimeTrackerTest, ExpirationLogic) {
+ MockLifetimeDatabase database;
+ LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock);
+
+ // Create 100 trackers, all predicted short-lived. Every second tracker will
+ // be long-lived and therefore expire.
+ const int kNumTrackers = 100;
+ std::vector<LifetimeTrackerUnderTest::Tracker> trackers(kNumTrackers);
+ MockLifetimeStats stats[] = {MockLifetimeStats(), MockLifetimeStats()};
+
+ for (int i = 0; i < kNumTrackers; ++i) {
+ tracker.AddAllocation(&trackers[i], &stats[i % 2], true);
+ Advance(absl::Milliseconds(1));
+ }
+
+ EXPECT_CALL(stats[0], Update(MockLifetimeStats::Prediction::kShortLived))
+ .Times(kNumTrackers / 2);
+ EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[0]))
+ .Times(kNumTrackers / 2);
+
+ for (int i = 0; i < kNumTrackers; i += 2) {
+ tracker.RemoveAllocation(&trackers[i]);
+ }
+
+ // After an additional 450ms, 1/4 of the allocations should have expired.
+ EXPECT_CALL(stats[1], Update(MockLifetimeStats::Prediction::kLongLived))
+ .Times(kNumTrackers / 4);
+ EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[1]))
+ .Times(kNumTrackers / 4);
+
+ Advance(absl::Milliseconds(450));
+ tracker.CheckForLifetimeExpirations();
+
+ EXPECT_EQ(tracker.stats().expired_lifetimes, kNumTrackers / 4);
+ EXPECT_EQ(tracker.stats().overestimated_lifetimes, 0);
+ EXPECT_EQ(tracker.stats().short_lived_predictions, kNumTrackers);
+ EXPECT_EQ(tracker.stats().long_lived_predictions, 0);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h
new file mode 100644
index 0000000000..181a480275
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h
@@ -0,0 +1,254 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Some very basic linked list functions for dealing with using void * as
+// storage.
+
+#ifndef TCMALLOC_INTERNAL_LINKED_LIST_H_
+#define TCMALLOC_INTERNAL_LINKED_LIST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* SLL_Next(void* t) {
+ return *(reinterpret_cast<void**>(t));
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_SetNext(void* t, void* n) {
+ *(reinterpret_cast<void**>(t)) = n;
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_Push(void** list, void* element) {
+ SLL_SetNext(element, *list);
+ *list = element;
+}
+
+inline void* SLL_Pop(void** list) {
+ void* result = *list;
+ void* next = SLL_Next(*list);
+ *list = next;
+ // Prefetching NULL leads to a DTLB miss, thus only prefetch when 'next'
+ // is not NULL.
+#if defined(__GNUC__)
+ if (next) {
+ __builtin_prefetch(next, 0, 3);
+ }
+#endif
+ return result;
+}
+
+// LinkedList forms an in-place linked list with its void* elements.
+class LinkedList {
+ private:
+ void* list_; // Linked list.
+ uint32_t length_; // Current length.
+
+ public:
+ void Init() {
+ list_ = nullptr;
+ length_ = 0;
+ }
+
+ // Return current length of list
+ size_t length() const { return length_; }
+
+ // Is list empty?
+ bool empty() const { return list_ == nullptr; }
+
+ void ABSL_ATTRIBUTE_ALWAYS_INLINE Push(void* ptr) {
+ SLL_Push(&list_, ptr);
+ length_++;
+ }
+
+ bool ABSL_ATTRIBUTE_ALWAYS_INLINE TryPop(void** ret) {
+ void* obj = list_;
+ if (ABSL_PREDICT_FALSE(obj == nullptr)) {
+ return false;
+ }
+
+ void* next = SLL_Next(obj);
+ list_ = next;
+ length_--;
+
+#if defined(__GNUC__)
+ if (ABSL_PREDICT_TRUE(next)) {
+ __builtin_prefetch(next, 0, 0);
+ }
+#endif
+
+ *ret = obj;
+ return true;
+ }
+
+ // PushBatch and PopBatch do not guarantee an ordering.
+ void PushBatch(int N, void** batch) {
+ ASSERT(N > 0);
+ for (int i = 0; i < N - 1; ++i) {
+ SLL_SetNext(batch[i], batch[i + 1]);
+ }
+ SLL_SetNext(batch[N - 1], list_);
+ list_ = batch[0];
+ length_ += N;
+ }
+
+ void PopBatch(int N, void** batch) {
+ void* p = list_;
+ for (int i = 0; i < N; ++i) {
+ batch[i] = p;
+ p = SLL_Next(p);
+ }
+ list_ = p;
+ ASSERT(length_ >= N);
+ length_ -= N;
+ }
+};
+
+// A well-typed intrusive doubly linked list.
+template <typename T>
+class TList {
+ private:
+ class Iter;
+
+ public:
+ // The intrusive element supertype. Use the CRTP to declare your class:
+ // class MyListItems : public TList<MyListItems>::Elem { ...
+ class Elem {
+ friend class Iter;
+ friend class TList<T>;
+ Elem* next_;
+ Elem* prev_;
+
+ protected:
+ constexpr Elem() : next_(nullptr), prev_(nullptr) {}
+
+ // Returns true iff the list is empty after removing this
+ bool remove() {
+ // Copy out next/prev before doing stores, otherwise compiler assumes
+ // potential aliasing and does unnecessary reloads after stores.
+ Elem* next = next_;
+ Elem* prev = prev_;
+ ASSERT(prev->next_ == this);
+ prev->next_ = next;
+ ASSERT(next->prev_ == this);
+ next->prev_ = prev;
+#ifndef NDEBUG
+ prev_ = nullptr;
+ next_ = nullptr;
+#endif
+ return next == prev;
+ }
+
+ void prepend(Elem* item) {
+ Elem* prev = prev_;
+ item->prev_ = prev;
+ item->next_ = this;
+ prev->next_ = item;
+ prev_ = item;
+ }
+
+ void append(Elem* item) {
+ Elem* next = next_;
+ item->next_ = next;
+ item->prev_ = this;
+ next->prev_ = item;
+ next_ = item;
+ }
+ };
+
+ // Initialize to empty list.
+ constexpr TList() { head_.next_ = head_.prev_ = &head_; }
+
+ bool empty() const { return head_.next_ == &head_; }
+
+ // Return the length of the linked list. O(n).
+ size_t length() const {
+ size_t result = 0;
+ for (Elem* e = head_.next_; e != &head_; e = e->next_) {
+ result++;
+ }
+ return result;
+ }
+
+ // Returns first element in the list. The list must not be empty.
+ ABSL_ATTRIBUTE_RETURNS_NONNULL T* first() const {
+ ASSERT(!empty());
+ ASSERT(head_.next_ != nullptr);
+ return static_cast<T*>(head_.next_);
+ }
+
+ // Returns last element in the list. The list must not be empty.
+ ABSL_ATTRIBUTE_RETURNS_NONNULL T* last() const {
+ ASSERT(!empty());
+ ASSERT(head_.prev_ != nullptr);
+ return static_cast<T*>(head_.prev_);
+ }
+
+ // Add item to the front of list.
+ void prepend(T* item) { head_.append(item); }
+
+ void append(T* item) { head_.prepend(item); }
+
+ bool remove(T* item) {
+ // must be on the list; we don't check.
+ return item->remove();
+ }
+
+ // Support for range-based iteration over a list.
+ Iter begin() const { return Iter(head_.next_); }
+ Iter end() const { return Iter(const_cast<Elem*>(&head_)); }
+
+ // Iterator pointing to a given list item.
+ // REQUIRES: item is a member of the list.
+ Iter at(T* item) const { return Iter(item); }
+
+ private:
+ // Support for range-based iteration over a list.
+ class Iter {
+ friend class TList;
+ Elem* elem_;
+ explicit Iter(Elem* elem) : elem_(elem) {}
+
+ public:
+ Iter& operator++() {
+ elem_ = elem_->next_;
+ return *this;
+ }
+ Iter& operator--() {
+ elem_ = elem_->prev_;
+ return *this;
+ }
+
+ bool operator!=(Iter other) const { return elem_ != other.elem_; }
+ bool operator==(Iter other) const { return elem_ == other.elem_; }
+ T* operator*() const { return static_cast<T*>(elem_); }
+ T* operator->() const { return static_cast<T*>(elem_); }
+ };
+ friend class Iter;
+
+ Elem head_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_LINKED_LIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc
new file mode 100644
index 0000000000..505b1b62c2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc
@@ -0,0 +1,146 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mock_span.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+void BM_PushPop(benchmark::State& state) {
+ const int pointers = state.range(0);
+ const int sequential_calls = state.range(1);
+
+ LinkedList list;
+ list.Init();
+ const size_t size = pointers * sizeof(void*);
+
+ std::vector<void*> v(sequential_calls);
+ for (int i = 0; i < sequential_calls; i++) {
+ v[i] = malloc(size);
+ }
+ std::shuffle(v.begin(), v.end(), absl::BitGen());
+
+ for (auto s : state) {
+ // Push sequential_calls times.
+ for (int j = 0; j < sequential_calls; j++) {
+ list.Push(v[j]);
+ }
+
+ // Pop sequential_calls times.
+ for (int j = 0; j < sequential_calls; j++) {
+ void* ret;
+ list.TryPop(&ret);
+ }
+ }
+
+ state.SetItemsProcessed(sequential_calls * state.iterations());
+
+ for (int i = 0; i < sequential_calls; i++) {
+ free(v[i]);
+ }
+}
+BENCHMARK(BM_PushPop)->RangePair(1, 64, 1, 32);
+
+void BM_PushPopBatch(benchmark::State& state) {
+ const int pointers = state.range(0);
+ const int batch_size = state.range(1);
+
+ LinkedList list;
+ list.Init();
+ const size_t size = pointers * sizeof(void*);
+
+ const int kNumberOfObjects = 64 << 10;
+ std::vector<void*> v(kNumberOfObjects);
+ for (int i = 0; i < kNumberOfObjects; i++) {
+ v[i] = malloc(size);
+ }
+ std::shuffle(v.begin(), v.end(), absl::BitGen());
+
+ const int kMaxObjectsToMove = 32;
+ void* batch[kMaxObjectsToMove];
+
+ for (auto s : state) {
+ // PushBatch
+ for (int j = 0; j < kNumberOfObjects / batch_size; j++) {
+ list.PushBatch(batch_size, v.data() + j * batch_size);
+ }
+
+ // PopBatch.
+ for (int j = 0; j < kNumberOfObjects / batch_size; j++) {
+ list.PopBatch(batch_size, batch);
+ }
+ }
+
+ state.SetItemsProcessed((kNumberOfObjects / batch_size) * batch_size *
+ state.iterations());
+
+ for (int i = 0; i < kNumberOfObjects; i++) {
+ free(v[i]);
+ }
+}
+BENCHMARK(BM_PushPopBatch)->RangePair(1, 64, 1, 32);
+
+static void BM_AppendRemove(benchmark::State& state) {
+ MockSpanList list;
+
+ int sequential_calls = state.range(0);
+
+ std::vector<MockSpan*> vappend(sequential_calls);
+
+ // Create MockSpans in append order
+ for (int i = 0; i < sequential_calls; i++) {
+ MockSpan* s = MockSpan::New(i);
+ CHECK_CONDITION(s != nullptr);
+ vappend[i] = s;
+ }
+
+ // Remove all sequential_calls elements from the list in a random order
+ std::vector<MockSpan*> vremove(sequential_calls);
+ vremove = vappend;
+ std::shuffle(vremove.begin(), vremove.end(), absl::BitGen());
+
+ for (auto _ : state) {
+ // Append sequential_calls elements to the list.
+ for (MockSpan* s : vappend) {
+ list.append(s);
+ }
+
+ // Remove in a random order
+ for (MockSpan* s : vremove) {
+ list.remove(s);
+ }
+ }
+
+ for (MockSpan* s : vappend) {
+ delete s;
+ }
+}
+
+BENCHMARK(BM_AppendRemove)->Range(32, 32 * 1024);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc
new file mode 100644
index 0000000000..3299bca8d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc
@@ -0,0 +1,239 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/linked_list.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/container/node_hash_set.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/mock_span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class LinkedListTest : public ::testing::Test {
+ protected:
+ void SetUp() override { list_.Init(); }
+
+ LinkedList list_;
+};
+
+TEST_F(LinkedListTest, PushPop) {
+ const int N = 20;
+ std::vector<void*> ptrs{nullptr};
+
+ EXPECT_EQ(0, list_.length());
+ EXPECT_TRUE(list_.empty());
+
+ for (int i = 0; i < N; i++) {
+ void* ptr = malloc(sizeof(void*));
+ ASSERT_FALSE(ptr == nullptr);
+ ptrs.push_back(ptr);
+
+ list_.Push(ptr);
+
+ EXPECT_EQ(i + 1, list_.length());
+ EXPECT_FALSE(list_.empty());
+ }
+
+ for (int i = N; i > 0; i--) {
+ EXPECT_EQ(i, list_.length());
+ EXPECT_FALSE(list_.empty());
+
+ void* ptr;
+ bool ret = list_.TryPop(&ptr);
+ EXPECT_TRUE(ret);
+ EXPECT_EQ(ptrs[i], ptr);
+
+ free(ptrs[i]);
+ }
+
+ EXPECT_EQ(0, list_.length());
+ EXPECT_TRUE(list_.empty());
+}
+
+// PushPopBatch validates that the batch operations push and pop the required
+// number of elements from the list, but it does not assert that order within
+// the batch is maintained.
+TEST_F(LinkedListTest, PushPopBatch) {
+ const std::vector<int> batch_sizes{1, 3, 5, 7, 10, 16};
+ absl::flat_hash_set<void*> pushed;
+
+ size_t length = 0;
+ for (int batch_size : batch_sizes) {
+ std::vector<void*> batch;
+
+ for (int i = 0; i < batch_size; i++) {
+ void* ptr = malloc(sizeof(void*));
+ ASSERT_FALSE(ptr == nullptr);
+ batch.push_back(ptr);
+ pushed.insert(ptr);
+ }
+
+ list_.PushBatch(batch_size, batch.data());
+ length += batch_size;
+
+ EXPECT_EQ(length, list_.length());
+ EXPECT_EQ(length == 0, list_.empty());
+ }
+
+ absl::flat_hash_set<void*> popped;
+ for (int batch_size : batch_sizes) {
+ std::vector<void*> batch(batch_size, nullptr);
+ list_.PopBatch(batch_size, batch.data());
+ length -= batch_size;
+
+ popped.insert(batch.begin(), batch.end());
+ EXPECT_EQ(length, list_.length());
+ EXPECT_EQ(length == 0, list_.empty());
+ }
+
+ EXPECT_EQ(pushed, popped);
+
+ for (void* ptr : pushed) {
+ free(ptr);
+ }
+}
+
+class TListTest : public ::testing::Test {
+ protected:
+ MockSpanList list_;
+};
+
+TEST_F(TListTest, AppendPushPop) {
+ const int N = 20;
+
+ EXPECT_EQ(list_.length(), 0);
+ EXPECT_TRUE(list_.empty());
+
+ // Append N elements to the list.
+ for (int i = 0; i < N; i++) {
+ MockSpan* s = MockSpan::New(i);
+ ASSERT_FALSE(s == nullptr);
+ list_.append(s);
+ EXPECT_EQ(list_.first()->index_, 0);
+ EXPECT_EQ(list_.last()->index_, i);
+
+ EXPECT_EQ(list_.length(), i + 1);
+ EXPECT_FALSE(list_.empty());
+ }
+
+ // Remove all N elements from the end of the list.
+ for (int i = N; i > 0; i--) {
+ EXPECT_EQ(list_.length(), i);
+ EXPECT_FALSE(list_.empty());
+
+ MockSpan* last = list_.last();
+ EXPECT_EQ(list_.first()->index_, 0);
+ EXPECT_EQ(list_.last()->index_, i - 1);
+
+ EXPECT_FALSE(last == nullptr);
+ bool ret = list_.remove(last);
+ // Returns true iff the list is empty after the remove.
+ EXPECT_EQ(ret, i == 1);
+
+ delete last;
+ }
+ EXPECT_EQ(list_.length(), 0);
+ EXPECT_TRUE(list_.empty());
+}
+
+TEST_F(TListTest, PrependPushPop) {
+ const int N = 20;
+
+ EXPECT_EQ(list_.length(), 0);
+ EXPECT_TRUE(list_.empty());
+
+ // Prepend N elements to the list.
+ for (int i = 0; i < N; i++) {
+ MockSpan* s = MockSpan::New(i);
+ ASSERT_FALSE(s == nullptr);
+ list_.prepend(s);
+ EXPECT_EQ(list_.first()->index_, i);
+ EXPECT_EQ(list_.last()->index_, 0);
+
+ EXPECT_EQ(list_.length(), i + 1);
+ EXPECT_FALSE(list_.empty());
+ }
+
+ // Check range iterator
+ {
+ int x = N - 1;
+ for (const MockSpan* s : list_) {
+ EXPECT_EQ(s->index_, x);
+ x--;
+ }
+ }
+
+ // Remove all N elements from the front of the list.
+ for (int i = N; i > 0; i--) {
+ EXPECT_EQ(list_.length(), i);
+ EXPECT_FALSE(list_.empty());
+
+ MockSpan* first = list_.first();
+ EXPECT_EQ(list_.first()->index_, i - 1);
+ EXPECT_EQ(list_.last()->index_, 0);
+
+ EXPECT_FALSE(first == nullptr);
+ bool ret = list_.remove(first);
+ // Returns true iff the list is empty after the remove.
+ EXPECT_EQ(ret, i == 1);
+
+ delete first;
+ }
+ EXPECT_EQ(list_.length(), 0);
+ EXPECT_TRUE(list_.empty());
+}
+
+TEST_F(TListTest, AppendRandomRemove) {
+ const int N = 100;
+ std::vector<MockSpan*> v(N);
+
+ // Append N elements to the list.
+ for (int i = 0; i < N; i++) {
+ MockSpan* s = MockSpan::New(i);
+ ASSERT_FALSE(s == nullptr);
+ v[i] = s;
+ list_.append(s);
+ }
+
+ // Remove all N elements from the list in a random order
+ std::shuffle(v.begin(), v.end(), absl::BitGen());
+ int i = N;
+ for (MockSpan* s : v) {
+ EXPECT_EQ(list_.length(), i);
+ EXPECT_FALSE(list_.empty());
+
+ bool ret = list_.remove(s);
+ // Returns true iff the list is empty after the remove.
+ EXPECT_EQ(ret, i == 1);
+
+ delete s;
+ i--;
+ }
+ EXPECT_EQ(list_.length(), 0);
+ EXPECT_TRUE(list_.empty());
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h
new file mode 100644
index 0000000000..0abf54ff1c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h
@@ -0,0 +1,65 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
+#define TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
+
+/* include/uapi/linux/rseq.h */
+
+struct kernel_rseq {
+ unsigned cpu_id_start;
+ unsigned cpu_id;
+ unsigned long long rseq_cs;
+ unsigned flags;
+ unsigned padding[2];
+ // This is a prototype extension to the rseq() syscall. Since a process may
+ // run on only a few cores at a time, we can use a dense set of "v(irtual)
+ // cpus." This can reduce cache requirements, as we only need N caches for
+ // the cores we actually run on simultaneously, rather than a cache for every
+ // physical core.
+ union {
+ struct {
+ short numa_node_id;
+ short vcpu_id;
+ };
+ int vcpu_flat;
+ };
+} __attribute__((aligned(4 * sizeof(unsigned long long))));
+
+static_assert(sizeof(kernel_rseq) == (4 * sizeof(unsigned long long)),
+ "Unexpected size for rseq structure");
+
+struct kernel_rseq_cs {
+ unsigned version;
+ unsigned flags;
+ unsigned long long start_ip;
+ unsigned long long post_commit_offset;
+ unsigned long long abort_ip;
+ // This is aligned, per upstream RSEQ specification.
+} __attribute__((aligned(4 * sizeof(unsigned long long))));
+
+static_assert(sizeof(kernel_rseq_cs) == (4 * sizeof(unsigned long long)),
+ "Unexpected size for rseq_cs structure");
+
+#if !defined(__NR_rseq)
+#if defined(__x86_64__)
+#define __NR_rseq 334
+#elif defined(__aarch64__)
+#define __NR_rseq 293
+#elif defined(__PPC__)
+#define __NR_rseq 387
+#endif
+#endif
+
+#endif // TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc
new file mode 100644
index 0000000000..2b70bc1502
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc
@@ -0,0 +1,276 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/logging.h"
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/debugging/stacktrace.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Variables for storing crash output. Allocated statically since we
+// may not be able to heap-allocate while crashing.
+ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+static bool crashed = false;
+
+static const size_t kStatsBufferSize = 16 << 10;
+static char stats_buffer[kStatsBufferSize] = {0};
+
+static void WriteMessage(const char* msg, int length) {
+ (void)::write(STDERR_FILENO, msg, length);
+}
+
+void (*log_message_writer)(const char* msg, int length) = WriteMessage;
+
+class Logger {
+ public:
+ bool Add(const LogItem& item);
+ bool AddStr(const char* str, int n);
+ bool AddNum(uint64_t num, int base); // base must be 10 or 16.
+
+ static constexpr int kBufSize = 512;
+ char* p_;
+ char* end_;
+ char buf_[kBufSize];
+
+ StackTrace trace;
+};
+
+static Logger FormatLog(bool with_stack, const char* filename, int line,
+ LogItem a, LogItem b, LogItem c, LogItem d) {
+ Logger state;
+ state.p_ = state.buf_;
+ state.end_ = state.buf_ + sizeof(state.buf_);
+ // clang-format off
+ state.AddStr(filename, strlen(filename)) &&
+ state.AddStr(":", 1) &&
+ state.AddNum(line, 10) &&
+ state.AddStr("]", 1) &&
+ state.Add(a) &&
+ state.Add(b) &&
+ state.Add(c) &&
+ state.Add(d);
+ // clang-format on
+
+ if (with_stack) {
+ state.trace.depth =
+ absl::GetStackTrace(state.trace.stack, kMaxStackDepth, 1);
+ state.Add(LogItem("@"));
+ for (int i = 0; i < state.trace.depth; i++) {
+ state.Add(LogItem(state.trace.stack[i]));
+ }
+ }
+
+ // Teminate with newline
+ if (state.p_ >= state.end_) {
+ state.p_ = state.end_ - 1;
+ }
+ *state.p_ = '\n';
+ state.p_++;
+
+ return state;
+}
+
+ABSL_ATTRIBUTE_NOINLINE
+void Log(LogMode mode, const char* filename, int line, LogItem a, LogItem b,
+ LogItem c, LogItem d) {
+ Logger state = FormatLog(mode == kLogWithStack, filename, line, a, b, c, d);
+ int msglen = state.p_ - state.buf_;
+ (*log_message_writer)(state.buf_, msglen);
+}
+
+ABSL_ATTRIBUTE_NOINLINE
+void Crash(CrashMode mode, const char* filename, int line, LogItem a, LogItem b,
+ LogItem c, LogItem d) {
+ Logger state = FormatLog(true, filename, line, a, b, c, d);
+
+ int msglen = state.p_ - state.buf_;
+
+ // FailureSignalHandler mallocs for various logging attempts.
+ // We might be crashing holding tcmalloc locks.
+ // We're substantially less likely to try to take those locks
+ // (and thus deadlock until the alarm timer fires) if we disable sampling.
+#ifndef __APPLE__
+ if (&TCMalloc_Internal_SetProfileSamplingRate != nullptr) {
+ TCMalloc_Internal_SetProfileSamplingRate(0);
+ }
+#endif // __APPLE__
+
+ bool first_crash = false;
+ {
+ absl::base_internal::SpinLockHolder l(&crash_lock);
+ if (!crashed) {
+ crashed = true;
+ first_crash = true;
+ }
+ }
+
+ (*log_message_writer)(state.buf_, msglen);
+ if (first_crash && mode == kCrashWithStats) {
+#ifndef __APPLE__
+ if (&TCMalloc_Internal_GetStats != nullptr) {
+ size_t n = TCMalloc_Internal_GetStats(stats_buffer, kStatsBufferSize);
+ (*log_message_writer)(stats_buffer, std::min(n, kStatsBufferSize));
+ }
+#endif // __APPLE__
+ }
+
+ abort();
+}
+
+bool Logger::Add(const LogItem& item) {
+ // Separate real items with spaces
+ if (item.tag_ != LogItem::kEnd && p_ < end_) {
+ *p_ = ' ';
+ p_++;
+ }
+
+ switch (item.tag_) {
+ case LogItem::kStr:
+ return AddStr(item.u_.str, strlen(item.u_.str));
+ case LogItem::kUnsigned:
+ return AddNum(item.u_.unum, 10);
+ case LogItem::kSigned:
+ if (item.u_.snum < 0) {
+ // The cast to uint64_t is intentionally before the negation
+ // so that we do not attempt to negate -2^63.
+ return AddStr("-", 1) &&
+ AddNum(-static_cast<uint64_t>(item.u_.snum), 10);
+ } else {
+ return AddNum(static_cast<uint64_t>(item.u_.snum), 10);
+ }
+ case LogItem::kPtr:
+ return AddStr("0x", 2) &&
+ AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16);
+ default:
+ return false;
+ }
+}
+
+bool Logger::AddStr(const char* str, int n) {
+ ptrdiff_t remaining = end_ - p_;
+ if (remaining < n) {
+ // Try to log a truncated message if there is some space.
+ static constexpr absl::string_view kDots = "...";
+ if (remaining > kDots.size() + 1) {
+ int truncated = remaining - kDots.size();
+ memcpy(p_, str, truncated);
+ p_ += truncated;
+ memcpy(p_, kDots.data(), kDots.size());
+ p_ += kDots.size();
+
+ return true;
+ }
+ return false;
+ } else {
+ memcpy(p_, str, n);
+ p_ += n;
+ return true;
+ }
+}
+
+bool Logger::AddNum(uint64_t num, int base) {
+ static const char kDigits[] = "0123456789abcdef";
+ char space[22]; // more than enough for 2^64 in smallest supported base (10)
+ char* end = space + sizeof(space);
+ char* pos = end;
+ do {
+ pos--;
+ *pos = kDigits[num % base];
+ num /= base;
+ } while (num > 0 && pos > space);
+ return AddStr(pos, end - pos);
+}
+
+PbtxtRegion::PbtxtRegion(Printer* out, PbtxtRegionType type, int indent)
+ : out_(out), type_(type), indent_(indent) {
+ switch (type_) {
+ case kTop:
+ break;
+ case kNested:
+ out_->printf("{");
+ break;
+ }
+ ++indent_;
+}
+
+PbtxtRegion::~PbtxtRegion() {
+ --indent_;
+ out_->printf("\n");
+ for (int i = 0; i < indent_; i++) {
+ out_->printf(" ");
+ }
+ switch (type_) {
+ case kTop:
+ break;
+ case kNested:
+ out_->printf("}");
+ break;
+ }
+}
+
+void PbtxtRegion::NewLineAndIndent() {
+ out_->printf("\n");
+ for (int i = 0; i < indent_; i++) {
+ out_->printf(" ");
+ }
+}
+
+void PbtxtRegion::PrintI64(absl::string_view key, int64_t value) {
+ NewLineAndIndent();
+ out_->printf("%s: %" PRIi64, key, value);
+}
+
+void PbtxtRegion::PrintDouble(absl::string_view key, double value) {
+ NewLineAndIndent();
+ out_->printf("%s: %f", key, value);
+}
+
+void PbtxtRegion::PrintBool(absl::string_view key, bool value) {
+ NewLineAndIndent();
+ out_->printf("%s: %s", key, value ? "true" : "false");
+}
+
+void PbtxtRegion::PrintRaw(absl::string_view key, absl::string_view value) {
+ NewLineAndIndent();
+ out_->printf("%s: %s", key, value);
+}
+
+PbtxtRegion PbtxtRegion::CreateSubRegion(absl::string_view key) {
+ NewLineAndIndent();
+ out_->printf("%s ", key);
+ PbtxtRegion sub(out_, kNested, indent_);
+ return sub;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h
new file mode 100644
index 0000000000..4d42aa40a9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h
@@ -0,0 +1,222 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Internal logging and related utility routines.
+
+#ifndef TCMALLOC_INTERNAL_LOGGING_H_
+#define TCMALLOC_INTERNAL_LOGGING_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/optimization.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+
+//-------------------------------------------------------------------
+// Utility routines
+//-------------------------------------------------------------------
+
+// Safe logging helper: we write directly to the stderr file
+// descriptor and avoid FILE buffering because that may invoke
+// malloc().
+//
+// Example:
+// Log(kLog, __FILE__, __LINE__, "error", bytes);
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static constexpr int kMaxStackDepth = 64;
+
+// size/depth are made the same size as a pointer so that some generic
+// code below can conveniently cast them back and forth to void*.
+struct StackTrace {
+
+ // For small sampled objects, we allocate a full span to hold the
+ // sampled object. However to avoid disturbing fragmentation
+ // profiles, in such cases we also allocate a small proxy object
+ // using the normal mechanism.
+ //
+ // proxy field is defined only for heap sample stack traces.
+ // For heap samples, proxy==NULL iff size > kMaxSize.
+ void* proxy;
+
+ uintptr_t requested_size;
+ uintptr_t requested_alignment;
+ uintptr_t allocated_size; // size after sizeclass/page rounding
+
+ uintptr_t depth; // Number of PC values stored in array below
+ void* stack[kMaxStackDepth];
+
+ // weight is the expected number of *bytes* that were requested
+ // between the previous sample and this one
+ size_t weight;
+
+ void* user_data;
+
+ template <typename H>
+ friend H AbslHashValue(H h, const StackTrace& t) {
+ // As we use StackTrace as a key-value node in StackTraceTable, we only
+ // produce a hasher for the fields used as keys.
+ return H::combine(H::combine_contiguous(std::move(h), t.stack, t.depth),
+ t.depth, t.requested_size, t.requested_alignment,
+ t.allocated_size
+ );
+ }
+};
+
+enum LogMode {
+ kLog, // Just print the message
+ kLogWithStack, // Print the message and a stack trace
+};
+
+class Logger;
+
+// A LogItem holds any of the argument types that can be passed to Log()
+class LogItem {
+ public:
+ LogItem() : tag_(kEnd) {}
+ LogItem(const char* v) : tag_(kStr) { u_.str = v; }
+ LogItem(int v) : tag_(kSigned) { u_.snum = v; }
+ LogItem(long v) : tag_(kSigned) { u_.snum = v; }
+ LogItem(long long v) : tag_(kSigned) { u_.snum = v; }
+ LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; }
+ LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; }
+ LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; }
+ LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; }
+
+ private:
+ friend class Logger;
+ enum Tag { kStr, kSigned, kUnsigned, kPtr, kEnd };
+ Tag tag_;
+ union {
+ const char* str;
+ const void* ptr;
+ int64_t snum;
+ uint64_t unum;
+ } u_;
+};
+
+extern void Log(LogMode mode, const char* filename, int line, LogItem a,
+ LogItem b = LogItem(), LogItem c = LogItem(),
+ LogItem d = LogItem());
+
+enum CrashMode {
+ kCrash, // Print the message and crash
+ kCrashWithStats // Print the message, some stats, and crash
+};
+
+ABSL_ATTRIBUTE_NORETURN
+void Crash(CrashMode mode, const char* filename, int line, LogItem a,
+ LogItem b = LogItem(), LogItem c = LogItem(), LogItem d = LogItem());
+
+// Tests can override this function to collect logging messages.
+extern void (*log_message_writer)(const char* msg, int length);
+
+// Like assert(), but executed even in NDEBUG mode
+#undef CHECK_CONDITION
+#define CHECK_CONDITION(cond) \
+ (ABSL_PREDICT_TRUE(cond) ? (void)0 \
+ : (::tcmalloc::tcmalloc_internal::Crash( \
+ ::tcmalloc::tcmalloc_internal::kCrash, \
+ __FILE__, __LINE__, #cond)))
+
+// Our own version of assert() so we can avoid hanging by trying to do
+// all kinds of goofy printing while holding the malloc lock.
+#ifndef NDEBUG
+#define ASSERT(cond) CHECK_CONDITION(cond)
+#else
+#define ASSERT(cond) ((void)0)
+#endif
+
+// Print into buffer
+class Printer {
+ private:
+ char* buf_; // Where should we write next
+ int left_; // Space left in buffer (including space for \0)
+ int required_; // Space we needed to complete all printf calls up to this
+ // point
+
+ public:
+ // REQUIRES: "length > 0"
+ Printer(char* buf, int length) : buf_(buf), left_(length), required_(0) {
+ ASSERT(length > 0);
+ buf[0] = '\0';
+ }
+
+ template <typename... Args>
+ void printf(const absl::FormatSpec<Args...>& format, const Args&... args) {
+ ASSERT(left_ >= 0);
+ if (left_ <= 0) {
+ return;
+ }
+
+ const int r = absl::SNPrintF(buf_, left_, format, args...);
+ if (r < 0) {
+ left_ = 0;
+ return;
+ }
+ required_ += r;
+
+ if (r > left_) {
+ left_ = 0;
+ } else {
+ left_ -= r;
+ buf_ += r;
+ }
+ }
+
+ int SpaceRequired() const { return required_; }
+};
+
+enum PbtxtRegionType { kTop, kNested };
+
+// A helper class that prints pbtxt via RAII. A pbtxt region can be either a
+// top region (with no brackets) or a nested region (enclosed by curly
+// brackets).
+class PbtxtRegion {
+ public:
+ PbtxtRegion(Printer* out, PbtxtRegionType type, int indent);
+ ~PbtxtRegion();
+
+ PbtxtRegion(const PbtxtRegion&) = delete;
+ PbtxtRegion(PbtxtRegion&&) = default;
+
+ // Prints 'key: value'.
+ void PrintI64(absl::string_view key, int64_t value);
+ void PrintDouble(absl::string_view key, double value);
+ void PrintBool(absl::string_view key, bool value);
+ // Useful for enums.
+ void PrintRaw(absl::string_view key, absl::string_view value);
+
+ // Prints 'key subregion'. Return the created subregion.
+ PbtxtRegion CreateSubRegion(absl::string_view key);
+
+ private:
+ void NewLineAndIndent();
+
+ Printer* out_;
+ PbtxtRegionType type_;
+ int indent_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_LOGGING_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc
new file mode 100644
index 0000000000..c7b58de40f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/logging.h"
+
+#include <string.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static std::string* log_buffer;
+
+static void RecordLogMessage(const char* msg, int length) {
+ // Make tests less brittle by trimming trailing whitespace
+ while (length > 0 && (msg[length - 1] == ' ' || msg[length - 1] == '\n')) {
+ length--;
+ }
+ log_buffer->assign(msg, length);
+}
+
+TEST(InternalLogging, MessageFormatting) {
+ std::string long_string;
+ for (int i = 0; i < 100; i++) {
+ long_string += "the quick brown fox jumped over the lazy dog";
+ }
+
+ // Arrange to intercept Log() output
+ log_buffer = new std::string();
+ void (*old_writer)(const char*, int) = log_message_writer;
+ log_message_writer = RecordLogMessage;
+
+ Log(kLog, "foo.cc", 100, "Hello");
+ EXPECT_EQ("foo.cc:100] Hello", *log_buffer);
+
+ Log(kLog, "foo.cc", 100, 123u, -456, 0);
+ EXPECT_EQ("foo.cc:100] 123 -456 0", *log_buffer);
+
+ Log(kLog, "foo.cc", 100, 123u, std::numeric_limits<int64_t>::min());
+ EXPECT_EQ("foo.cc:100] 123 -9223372036854775808", *log_buffer);
+
+ Log(kLog, "foo.cc", 2,
+ reinterpret_cast<const void*>(static_cast<uintptr_t>(1025)));
+ EXPECT_EQ("foo.cc:2] 0x401", *log_buffer);
+
+ Log(kLog, "foo.cc", 10, "hello", long_string.c_str());
+ EXPECT_THAT(*log_buffer,
+ testing::StartsWith(
+ "foo.cc:10] hello the quick brown fox jumped over the lazy "
+ "dogthe quick brown fox jumped over the lazy dog"));
+
+ Log(kLogWithStack, "foo.cc", 10, "stk");
+ EXPECT_TRUE(strstr(log_buffer->c_str(), "stk @ 0x") != nullptr)
+ << *log_buffer;
+
+ log_message_writer = old_writer;
+ delete log_buffer;
+}
+
+TEST(InternalLogging, Assert) {
+ CHECK_CONDITION((2 + 2) == 4);
+
+ if (false)
+ CHECK_CONDITION(false);
+ else
+ CHECK_CONDITION(true);
+
+ ASSERT_DEATH(CHECK_CONDITION((2 + 2) == 5),
+ ".*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] "
+ "\\(2 \\+ 2\\) == 5 @( 0x[0-9a-f]+)+");
+}
+
+TEST(Printer, RequiredSpace) {
+ const char kChunk[] = "0123456789";
+ std::string expected;
+
+ for (int i = 0; i < 10; i++) {
+ int length = strlen(kChunk) * i + 1;
+ std::unique_ptr<char[]> buf(new char[length]);
+ Printer printer(buf.get(), length);
+
+ for (int j = 0; j < i; j++) {
+ printer.printf("%s", kChunk);
+ }
+ EXPECT_EQ(buf.get(), expected);
+ EXPECT_EQ(length - 1, printer.SpaceRequired());
+
+ // Go past the end of the buffer. This should not overrun or affect the
+ // existing contents of buf, but we should see SpaceRequired tick up.
+ printer.printf("%s", kChunk);
+ EXPECT_EQ(buf.get(), expected);
+ EXPECT_EQ(length - 1 + strlen(kChunk), printer.SpaceRequired());
+
+ expected.append(kChunk);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc
new file mode 100644
index 0000000000..36c2b38771
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc
@@ -0,0 +1,18 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is a trivial program. When run with a virtual address size rlimit,
+// TCMalloc should crash cleanly, rather than hang.
+
+int main() { return 0; }
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc
new file mode 100644
index 0000000000..71591834d4
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc
@@ -0,0 +1,132 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/memory_stats.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "absl/strings/numbers.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+
+struct FDCloser {
+ FDCloser() : fd(-1) {}
+ ~FDCloser() {
+ if (fd != -1) {
+ signal_safe_close(fd);
+ }
+ }
+ int fd;
+};
+
+} // namespace
+
+bool GetMemoryStats(MemoryStats* stats) {
+#if !defined(__linux__)
+ return false;
+#endif
+
+ FDCloser fd;
+ fd.fd = signal_safe_open("/proc/self/statm", O_RDONLY | O_CLOEXEC);
+ ASSERT(fd.fd >= 0);
+ if (fd.fd < 0) {
+ return false;
+ }
+
+ char buf[1024];
+ ssize_t rc = signal_safe_read(fd.fd, buf, sizeof(buf), nullptr);
+ ASSERT(rc >= 0);
+ ASSERT(rc < sizeof(buf));
+ if (rc < 0 || rc >= sizeof(buf)) {
+ return false;
+ }
+ buf[rc] = '\0';
+
+ const size_t pagesize = getpagesize();
+ absl::string_view contents(buf, rc);
+ absl::string_view::size_type start = 0;
+ int index = 0;
+ do {
+ auto end = contents.find(' ', start);
+
+ absl::string_view value;
+ if (end == absl::string_view::npos) {
+ value = contents.substr(start);
+ } else {
+ value = contents.substr(start, end - start);
+ }
+
+ int64_t parsed;
+ if (!absl::SimpleAtoi(value, &parsed)) {
+ return false;
+ }
+
+ // Fields in /proc/self/statm:
+ // [0] = vss
+ // [1] = rss
+ // [2] = shared
+ // [3] = code
+ // [4] = unused
+ // [5] = data + stack
+ // [6] = unused
+ switch (index) {
+ case 0:
+ stats->vss = parsed * pagesize;
+ break;
+ case 1:
+ stats->rss = parsed * pagesize;
+ break;
+ case 2:
+ stats->shared = parsed * pagesize;
+ break;
+ case 3:
+ stats->code = parsed * pagesize;
+ break;
+ case 5:
+ stats->data = parsed * pagesize;
+ break;
+ case 4:
+ case 6:
+ default:
+ // Unused
+ break;
+ }
+
+ if (end == absl::string_view::npos) {
+ break;
+ }
+
+ start = end + 1;
+ } while (start < contents.size() && index++ < 6);
+
+ if (index < 6) {
+ return false;
+ }
+
+ return true;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h
new file mode 100644
index 0000000000..a65f5b03d3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h
@@ -0,0 +1,41 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MEMORY_STATS_H_
+#define TCMALLOC_INTERNAL_MEMORY_STATS_H_
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct MemoryStats {
+ int64_t vss;
+ int64_t rss;
+ int64_t shared;
+ int64_t code;
+ int64_t data;
+};
+
+// Memory stats of a process
+bool GetMemoryStats(MemoryStats* stats);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_MEMORY_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc
new file mode 100644
index 0000000000..176c712734
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc
@@ -0,0 +1,43 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/memory_stats.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(Stats, ValidRanges) {
+ MemoryStats stats;
+#if defined(__linux__)
+ ASSERT_TRUE(GetMemoryStats(&stats));
+#else
+ ASSERT_FALSE(GetMemoryStats(&stats));
+ return;
+#endif
+
+ EXPECT_GT(stats.vss, 0);
+ EXPECT_GT(stats.rss, 0);
+ EXPECT_GT(stats.shared, 0);
+ EXPECT_GT(stats.code, 0);
+ EXPECT_GT(stats.data, 0);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc
new file mode 100644
index 0000000000..e4120bcf5a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc
@@ -0,0 +1,129 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/mincore.h"
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Class that implements the call into the OS provided mincore() function.
+class OsMInCore final : public MInCoreInterface {
+ public:
+ int mincore(void* addr, size_t length, unsigned char* result) final {
+ return ::mincore(addr, length, result);
+ }
+
+ ~OsMInCore() override = default;
+};
+
+// Returns the number of resident bytes for an range of memory of arbitrary
+// alignment and size.
+size_t MInCore::residence_impl(void* addr, size_t size,
+ MInCoreInterface* mincore) {
+ if (size == 0) {
+ return 0;
+ }
+ unsigned char res[kArrayLength];
+ const size_t kPageSize = getpagesize();
+
+ uintptr_t uaddr = reinterpret_cast<uintptr_t>(addr);
+ // Round address down to get the start of the page containing the data.
+ uintptr_t basePage = uaddr & ~(kPageSize - 1);
+ // Round end address up to get the end of the page containing the data.
+ uintptr_t endPage = (uaddr + size + kPageSize - 1) & ~(kPageSize - 1);
+
+ uintptr_t remainingPages = endPage - basePage;
+
+ // We need to handle the first and last pages differently. Most pages
+ // will contribute pagesize bytes to residence, but the first and last
+ // pages will contribute fewer than that. Easiest way to do this is to
+ // handle the special case where the entire object fits into a page,
+ // then handle the case where the object spans more than one page.
+ if (remainingPages == kPageSize) {
+ // Find out whether the first page is resident.
+ mincore->mincore(reinterpret_cast<void*>(basePage), remainingPages, res);
+ // Residence info is returned in LSB, other bits are undefined.
+ if ((res[0] & 1) == 1) {
+ return size;
+ }
+ return 0;
+ }
+
+ // We're calling this outside the loop so that we can get info for the
+ // first page, deal with subsequent pages in the loop, and then handle
+ // the last page after the loop.
+ size_t scanLength = std::min(remainingPages, kPageSize * kArrayLength);
+ if (mincore->mincore(reinterpret_cast<void*>(basePage), scanLength, res) !=
+ 0) {
+ return 0;
+ }
+
+ size_t totalResident = 0;
+
+ // Handle the first page.
+ size_t firstPageSize = kPageSize - (uaddr - basePage);
+ if ((res[0] & 1) == 1) {
+ totalResident += firstPageSize;
+ }
+ basePage += kPageSize;
+ remainingPages -= kPageSize;
+
+ int resIndex = 1;
+
+ // Handle all pages but the last page.
+ while (remainingPages > kPageSize) {
+ if ((res[resIndex] & 1) == 1) {
+ totalResident += kPageSize;
+ }
+ resIndex++;
+ basePage += kPageSize;
+ remainingPages -= kPageSize;
+ // Refresh the array if necessary.
+ if (resIndex == kArrayLength) {
+ resIndex = 0;
+ scanLength = std::min(remainingPages, kPageSize * kArrayLength);
+ if (mincore->mincore(reinterpret_cast<void*>(basePage), scanLength,
+ res) != 0) {
+ return 0;
+ }
+ }
+ }
+
+ // Check final page
+ size_t lastPageSize = kPageSize - (endPage - uaddr - size);
+ if ((res[resIndex] & 1) == 1) {
+ totalResident += lastPageSize;
+ }
+
+ return totalResident;
+}
+
+// Return residence info using call to OS provided mincore().
+size_t MInCore::residence(void* addr, size_t size) {
+ OsMInCore mc;
+ return residence_impl(addr, size, &mc);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h
new file mode 100644
index 0000000000..c353bdac87
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h
@@ -0,0 +1,65 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MINCORE_H_
+#define TCMALLOC_INTERNAL_MINCORE_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Class to wrap mincore so that we can replace it for testing.
+class MInCoreInterface {
+ public:
+ MInCoreInterface() {}
+ virtual ~MInCoreInterface() {}
+ virtual int mincore(void* addr, size_t length, unsigned char* result) = 0;
+
+ private:
+ MInCoreInterface(const MInCoreInterface&) = delete;
+ MInCoreInterface& operator=(const MInCoreInterface&) = delete;
+};
+
+// The MInCore class through the function residence(addr, size) provides
+// a convenient way to report the residence of an arbitrary memory region.
+// This is a wrapper for the ::mincore() function. The ::mincore() function has
+// the constraint of requiring the base address to be page aligned.
+class MInCore {
+ public:
+ MInCore() {}
+ // For a region of memory return the number of bytes that are
+ // actually resident in memory. Note that the address and size
+ // do not need to be a multiple of the system page size.
+ static size_t residence(void* addr, size_t size);
+
+ private:
+ // Separate out the implementation to make the code easier to test.
+ static size_t residence_impl(void* addr, size_t size,
+ MInCoreInterface* mincore);
+
+ // Size of the array used to gather results from mincore().
+ static constexpr int kArrayLength = 4096;
+ // Friends required for testing
+ friend class MInCoreTest;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_MINCORE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc
new file mode 100644
index 0000000000..02c8ead48d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc
@@ -0,0 +1,61 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <set>
+
+#include "absl/memory/memory.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mincore.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace {
+
+// Benchmark performance of mincore. We use an array on the stack to gather
+// mincore data. The larger the array the more we amortise the cost of calling
+// mincore, but the more stack space the array takes up.
+void BM_mincore(benchmark::State& state) {
+ const int size = state.range(0);
+
+ // If we want to place the array on the stack then the maximum frame size is
+ // 16KiB. So there is no point in benchmarking sizes larger than this.
+ const int kMaxArraySize = 16 * 1024;
+ CHECK_CONDITION(size <= kMaxArraySize);
+ auto resident = absl::make_unique<unsigned char[]>(kMaxArraySize);
+
+ const size_t kPageSize = getpagesize();
+ // We want to scan the same amount of memory in all cases
+ const size_t regionSize = 1 * 1024 * 1024 * 1024;
+ for (auto s : state) {
+ uintptr_t memory = 0;
+ while (memory < regionSize) {
+ // Call mincore for the next section
+ int length = std::min(size * kPageSize, (regionSize - memory));
+ ::mincore(reinterpret_cast<void*>(memory), length, resident.get());
+ memory += length * kPageSize;
+ }
+ }
+}
+BENCHMARK(BM_mincore)->Range(1, 16 * 1024);
+
+} // namespace
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc
new file mode 100644
index 0000000000..daa1178b25
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc
@@ -0,0 +1,193 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/mincore.h"
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <set>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+using ::testing::Eq;
+
+// Mock interface to mincore() which has reports residence based on
+// an array provided at construction.
+class MInCoreMock : public MInCoreInterface {
+ public:
+ MInCoreMock() : mapped_() {}
+ ~MInCoreMock() override {}
+
+ // Implementation of minCore that reports presence based on provided array.
+ int mincore(void* addr, size_t length, unsigned char* result) override {
+ const size_t kPageSize = getpagesize();
+ uintptr_t uAddress = reinterpret_cast<uintptr_t>(addr);
+ // Check that we only pass page aligned addresses into mincore().
+ EXPECT_THAT(uAddress & (kPageSize - 1), Eq(0));
+
+ uintptr_t uEndAddress = uAddress + length;
+ int index = 0;
+ // Check for presence of the target pages in the map.
+ while (uAddress < uEndAddress) {
+ result[index] = (mapped_.find(uAddress) != mapped_.end() ? 1 : 0);
+ uAddress += kPageSize;
+ index++;
+ }
+ return 0;
+ }
+
+ void addPage(uintptr_t uAddress) { mapped_.insert(uAddress); }
+
+ private:
+ std::set<uintptr_t> mapped_;
+};
+
+// Friend class of MInCore which calls the mincore mock.
+class MInCoreTest {
+ public:
+ MInCoreTest() : mcm_() {}
+ ~MInCoreTest() {}
+
+ size_t residence(uintptr_t addr, size_t size) {
+ return MInCore::residence_impl(reinterpret_cast<void*>(addr), size, &mcm_);
+ }
+
+ void addPage(uintptr_t page) { mcm_.addPage(page); }
+
+ // Expose the internal size of array that we use to call mincore() so
+ // that we can be sure to need multiple calls to cover large memory regions.
+ const size_t chunkSize() { return MInCore::kArrayLength; }
+
+ private:
+ MInCoreMock mcm_;
+};
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(StaticVarsTest, TestResidence) {
+ MInCoreTest mct;
+ const size_t kPageSize = getpagesize();
+
+ // Set up a pattern with a few resident pages.
+ // page 0 not mapped
+ mct.addPage(kPageSize);
+ // page 2 not mapped
+ mct.addPage(3 * kPageSize);
+ mct.addPage(4 * kPageSize);
+
+ // An object of size zero should have a residence of zero.
+ EXPECT_THAT(mct.residence(320, 0), Eq(0));
+
+ // Check that an object entirely on the first page is
+ // reported as entirely unmapped.
+ EXPECT_THAT(mct.residence(320, 55), Eq(0));
+
+ // Check that an object entirely on the second page is
+ // reported as entirely mapped.
+ EXPECT_THAT(mct.residence(kPageSize + 320, 55), Eq(55));
+
+ // An object of size zero should have a residence of zero.
+ EXPECT_THAT(mct.residence(kPageSize + 320, 0), Eq(0));
+
+ // Check that an object over a mapped and unmapped page is half mapped.
+ EXPECT_THAT(mct.residence(kPageSize / 2, kPageSize), Eq(kPageSize / 2));
+
+ // Check that an object which spans two pages is reported as being mapped
+ // only on the page that's resident.
+ EXPECT_THAT(mct.residence(kPageSize / 2 * 3, kPageSize), Eq(kPageSize / 2));
+
+ // Check that an object that is on two mapped pages is reported as entirely
+ // resident.
+ EXPECT_THAT(mct.residence(kPageSize / 2 * 7, kPageSize), Eq(kPageSize));
+
+ // Check that an object that is on one mapped page is reported as only
+ // resident on the mapped page.
+ EXPECT_THAT(mct.residence(kPageSize * 2, kPageSize + 1), Eq(1));
+
+ // Check that an object that is on one mapped page is reported as only
+ // resident on the mapped page.
+ EXPECT_THAT(mct.residence(kPageSize + 1, kPageSize + 1), Eq(kPageSize - 1));
+
+ // Check that an object which spans beyond the mapped pages is reported
+ // as unmapped
+ EXPECT_THAT(mct.residence(kPageSize * 6, kPageSize), Eq(0));
+
+ // Check an object that spans three pages, two of them mapped.
+ EXPECT_THAT(mct.residence(kPageSize / 2 * 7 + 1, kPageSize * 2),
+ Eq(kPageSize * 3 / 2 - 1));
+}
+
+// Test whether we are correctly handling multiple calls to mincore.
+TEST(StaticVarsTest, TestLargeResidence) {
+ MInCoreTest mct;
+ uintptr_t uAddress = 0;
+ const size_t kPageSize = getpagesize();
+ // Set up a pattern covering 6 * page size * MInCore::kArrayLength to
+ // allow us to test for situations where the region we're checking
+ // requires multiple calls to mincore().
+ // Use a mapped/unmapped/unmapped pattern, this will mean that
+ // the regions examined by mincore() do not have regular alignment
+ // with the pattern.
+ for (int i = 0; i < 2 * mct.chunkSize(); i++) {
+ mct.addPage(uAddress);
+ uAddress += 3 * kPageSize;
+ }
+
+ uintptr_t baseAddress = 0;
+ for (int size = kPageSize; size < 32 * 1024 * 1024; size += 2 * kPageSize) {
+ uintptr_t unit = kPageSize * 3;
+ EXPECT_THAT(mct.residence(baseAddress, size),
+ Eq(kPageSize * ((size + unit - 1) / unit)));
+ }
+}
+
+TEST(StaticVarsTest, UnmappedMemory) {
+ const size_t kPageSize = getpagesize();
+ const int kNumPages = 16;
+
+ // Overallocate kNumPages of memory, so we can munmap the page before and
+ // after it.
+ void* p = mmap(nullptr, (kNumPages + 2) * kPageSize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ASSERT_NE(p, MAP_FAILED) << errno;
+ ASSERT_EQ(munmap(p, kPageSize), 0);
+ void* q = reinterpret_cast<char*>(p) + kPageSize;
+ void* last = reinterpret_cast<char*>(p) + (kNumPages + 1) * kPageSize;
+ ASSERT_EQ(munmap(last, kPageSize), 0);
+
+ memset(q, 0, kNumPages * kPageSize);
+ ::benchmark::DoNotOptimize(q);
+
+ for (int i = 0; i <= kNumPages; i++) {
+ EXPECT_EQ(i * kPageSize, MInCore::residence(q, i * kPageSize));
+ }
+
+ ASSERT_EQ(munmap(q, kNumPages * kPageSize), 0);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h
new file mode 100644
index 0000000000..10922c48bd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MOCK_SPAN_H_
+#define TCMALLOC_INTERNAL_MOCK_SPAN_H_
+
+#include "tcmalloc/internal/linked_list.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class MockSpan;
+typedef TList<MockSpan> MockSpanList;
+
+class MockSpan : public MockSpanList::Elem {
+ public:
+ MockSpan() {}
+
+ static MockSpan* New(int idx = 0) {
+ MockSpan* ret = new MockSpan();
+ ret->index_ = idx;
+ return ret;
+ }
+
+ int index_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_INTERNAL_MOCK_SPAN_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc
new file mode 100644
index 0000000000..1639bd1b6d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc
@@ -0,0 +1,220 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/numa.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <array>
+#include <cstring>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/functional/function_ref.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Returns true iff NUMA awareness should be enabled by default (i.e. in the
+// absence of the TCMALLOC_NUMA_AWARE environment variable). This weak
+// implementation may be overridden by the one in want_numa_aware.cc.
+ABSL_ATTRIBUTE_WEAK bool default_want_numa_aware() { return false; }
+
+int OpenSysfsCpulist(size_t node) {
+ char path[PATH_MAX];
+ snprintf(path, sizeof(path), "/sys/devices/system/node/node%zu/cpulist",
+ node);
+ return signal_safe_open(path, O_RDONLY | O_CLOEXEC);
+}
+
+cpu_set_t ParseCpulist(absl::FunctionRef<ssize_t(char *, size_t)> read) {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+
+ std::array<char, 16> buf;
+ size_t carry_over = 0;
+ int cpu_from = -1;
+
+ while (true) {
+ const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over);
+ CHECK_CONDITION(rc >= 0);
+
+ const absl::string_view current(buf.data(), carry_over + rc);
+
+ // If we have no more data to parse & couldn't read any then we've reached
+ // the end of the input & are done.
+ if (current.empty() && rc == 0) {
+ break;
+ }
+
+ size_t consumed;
+ const size_t dash = current.find('-');
+ const size_t comma = current.find(',');
+ if (dash != absl::string_view::npos && dash < comma) {
+ CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, dash), &cpu_from));
+ consumed = dash + 1;
+ } else if (comma != absl::string_view::npos || rc == 0) {
+ int cpu;
+ CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, comma), &cpu));
+ if (comma == absl::string_view::npos) {
+ consumed = current.size();
+ } else {
+ consumed = comma + 1;
+ }
+ if (cpu_from != -1) {
+ for (int c = cpu_from; c <= cpu; c++) {
+ CPU_SET(c, &set);
+ }
+ cpu_from = -1;
+ } else {
+ CPU_SET(cpu, &set);
+ }
+ } else {
+ consumed = 0;
+ }
+
+ carry_over = current.size() - consumed;
+ memmove(buf.data(), buf.data() + consumed, carry_over);
+ }
+
+ return set;
+}
+
+bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE],
+ uint64_t *const partition_to_nodes,
+ NumaBindMode *const bind_mode,
+ const size_t num_partitions, const size_t scale_by,
+ absl::FunctionRef<int(size_t)> open_node_cpulist) {
+ // Node 0 will always map to partition 0; record it here in case the system
+ // doesn't support NUMA or the user opts out of our awareness of it - in
+ // either case we'll record nothing in the loop below.
+ partition_to_nodes[NodeToPartition(0, num_partitions)] |= 1 << 0;
+
+ // If we only compiled in support for one partition then we're trivially
+ // done; NUMA awareness is unavailable.
+ if (num_partitions == 1) return false;
+
+ // We rely on rseq to quickly obtain a CPU ID & lookup the appropriate
+ // partition in NumaTopology::GetCurrentPartition(). If rseq is unavailable,
+ // disable NUMA awareness.
+ if (!subtle::percpu::IsFast()) return false;
+
+ // Honor default_want_numa_aware() to allow compile time configuration of
+ // whether to enable NUMA awareness by default, and allow the user to
+ // override that either way by setting TCMALLOC_NUMA_AWARE in the
+ // environment.
+ //
+ // In cases where we don't enable NUMA awareness we simply return. Since the
+ // cpu_to_scaled_partition & partition_to_nodes arrays are zero initialized
+ // we're trivially done - CPUs all map to partition 0, which contains only
+ // CPU 0 added above.
+ const char *e =
+ tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_NUMA_AWARE");
+ if (e == nullptr) {
+ // Enable NUMA awareness iff default_want_numa_aware().
+ if (!default_want_numa_aware()) return false;
+ } else if (!strcmp(e, "no-binding")) {
+ // Enable NUMA awareness with no memory binding behavior.
+ *bind_mode = NumaBindMode::kNone;
+ } else if (!strcmp(e, "advisory-binding") || !strcmp(e, "1")) {
+ // Enable NUMA awareness with advisory memory binding behavior.
+ *bind_mode = NumaBindMode::kAdvisory;
+ } else if (!strcmp(e, "strict-binding")) {
+ // Enable NUMA awareness with strict memory binding behavior.
+ *bind_mode = NumaBindMode::kStrict;
+ } else if (!strcmp(e, "0")) {
+ // Disable NUMA awareness.
+ return false;
+ } else {
+ Crash(kCrash, __FILE__, __LINE__, "bad TCMALLOC_NUMA_AWARE env var", e);
+ }
+
+ // The cpu_to_scaled_partition array has a fixed size so that we can
+ // statically allocate it & avoid the need to check whether it has been
+ // allocated prior to lookups. It has CPU_SETSIZE entries which ought to be
+ // sufficient, but sanity check that indexing it by CPU number shouldn't
+ // exceed its bounds.
+ int num_cpus = absl::base_internal::NumCPUs();
+ CHECK_CONDITION(num_cpus <= CPU_SETSIZE);
+
+ // We could just always report that we're NUMA aware, but if a NUMA-aware
+ // binary runs on a system that doesn't include multiple NUMA nodes then our
+ // NUMA awareness will offer no benefit whilst incurring the cost of
+ // redundant work & stats. As such we only report that we're NUMA aware if
+ // there's actually NUMA to be aware of, which we track here.
+ bool numa_aware = false;
+
+ for (size_t node = 0;; node++) {
+ // Detect NUMA nodes by opening their cpulist files from sysfs.
+ const int fd = open_node_cpulist(node);
+ if (fd == -1) {
+ // We expect to encounter ENOENT once node surpasses the actual number of
+ // nodes present in the system. Any other error is a problem.
+ CHECK_CONDITION(errno == ENOENT);
+ break;
+ }
+
+ // Record this node in partition_to_nodes.
+ const size_t partition = NodeToPartition(node, num_partitions);
+ partition_to_nodes[partition] |= 1 << node;
+
+ // cpu_to_scaled_partition_ entries are default initialized to zero, so
+ // skip redundantly parsing CPU lists for nodes that map to partition 0.
+ if (partition == 0) {
+ signal_safe_close(fd);
+ continue;
+ }
+
+ // Parse the cpulist file to determine which CPUs are local to this node.
+ const cpu_set_t node_cpus =
+ ParseCpulist([&](char *const buf, const size_t count) {
+ return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr);
+ });
+
+ // Assign local CPUs to the appropriate partition.
+ for (size_t cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (CPU_ISSET(cpu, &node_cpus)) {
+ cpu_to_scaled_partition[cpu + kNumaCpuFudge] = partition * scale_by;
+ }
+ }
+
+ // If we observed any CPUs for this node then we've now got CPUs assigned
+ // to a non-zero partition; report that we're NUMA aware.
+ if (CPU_COUNT(&node_cpus) != 0) {
+ numa_aware = true;
+ }
+
+ signal_safe_close(fd);
+ }
+
+ return numa_aware;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h
new file mode 100644
index 0000000000..bf04c65c21
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h
@@ -0,0 +1,227 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_NUMA_H_
+#define TCMALLOC_INTERNAL_NUMA_H_
+
+#include <sched.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "absl/functional/function_ref.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/percpu.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Indicates how TCMalloc should handle binding memory regions to nodes within
+// particular NUMA partitions.
+enum class NumaBindMode {
+ // Don't bind memory at all. Note that this does not make NUMA awareness
+ // pointless so long as the NUMA memory policy of threads performing
+ // allocations favors the local node. It does mean that we won't be certain
+ // that memory is local to any particular partition, it will just be likely.
+ kNone,
+ // Attempt to bind memory but don't treat failure as fatal. If binding fails
+ // then a warning will be logged & we'll then be in much the same state as
+ // kNone.
+ kAdvisory,
+ // Strictly bind memory to nodes within the partition we expect - any error
+ // in doing so is fatal & the program will crash. This allows a program to
+ // ensure that memory is definitely bound to the set of nodes we expect.
+ kStrict,
+};
+
+// We use the result of RseqCpuId() in GetCurrentPartition() to avoid branching
+// in the fast path, but this means that the CPU number we look up in
+// cpu_to_scaled_partition_ might equal kCpuIdUninitialized or
+// kCpuIdUnsupported. We add this fudge factor to the value to compensate,
+// ensuring that our accesses to the cpu_to_scaled_partition_ array are always
+// in bounds.
+static constexpr size_t kNumaCpuFudge = -subtle::percpu::kCpuIdUnsupported;
+
+// Provides information about the topology of a NUMA system.
+//
+// In general we cannot know at compile time how many NUMA nodes the system
+// that we run upon will include, but we also cannot size our data structures
+// arbitrarily at runtime in the name of efficiency. In order to resolve the
+// conflict between these two constraints we define the concept of a NUMA
+// 'partition' as being an arbitrary set of NUMA nodes, disjoint from all other
+// partitions. At compile time we select a fixed number of partitions to
+// support, and at runtime we map each NUMA node in the system to a partition.
+// If the number of supported partitions is greater than or equal to the number
+// of NUMA nodes in the system then partition & node are effectively identical.
+// If however the system has more nodes than we do partitions then nodes
+// assigned to the same partition will share size classes & thus memory. This
+// may incur a performance hit, but allows us to at least run on any system.
+template <size_t NumPartitions, size_t ScaleBy = 1>
+class NumaTopology {
+ public:
+ // Trivially zero initialize data members.
+ constexpr NumaTopology() = default;
+
+ // Initialize topology information. This must be called only once, before any
+ // of the functions below.
+ void Init();
+
+ // Like Init(), but allows a test to specify a different `open_node_cpulist`
+ // function in order to provide NUMA topology information that doesn't
+ // reflect the system we're running upon.
+ void InitForTest(absl::FunctionRef<int(size_t)> open_node_cpulist);
+
+ // Returns true if NUMA awareness is available & enabled, otherwise false.
+ bool numa_aware() const {
+ // Explicitly checking NumPartitions here provides a compile time constant
+ // false in cases where NumPartitions==1, allowing NUMA awareness to be
+ // optimized away.
+ return (NumPartitions > 1) && numa_aware_;
+ }
+
+ // Returns the number of NUMA partitions deemed 'active' - i.e. the number of
+ // partitions that other parts of TCMalloc need to concern themselves with.
+ // Checking this rather than using kNumaPartitions allows users to avoid work
+ // on non-zero partitions when NUMA awareness is disabled.
+ size_t active_partitions() const { return numa_aware() ? NumPartitions : 1; }
+
+ // Return a value indicating how we should behave with regards to binding
+ // memory regions to NUMA nodes.
+ NumaBindMode bind_mode() const { return bind_mode_; }
+
+ // Return the NUMA partition number to which the CPU we're currently
+ // executing upon belongs. Note that whilst the CPU->partition mapping is
+ // fixed, the return value of this function may change at arbitrary times as
+ // this thread migrates between CPUs.
+ size_t GetCurrentPartition() const;
+
+ // Like GetCurrentPartition(), but returns a partition number multiplied by
+ // ScaleBy.
+ size_t GetCurrentScaledPartition() const;
+
+ // Return the NUMA partition number to which `cpu` belongs.
+ //
+ // It is valid for cpu to equal subtle::percpu::kCpuIdUninitialized or
+ // subtle::percpu::kCpuIdUnsupported. In either case partition 0 will be
+ // returned.
+ size_t GetCpuPartition(int cpu) const;
+
+ // Like GetCpuPartition(), but returns a partition number multiplied by
+ // ScaleBy.
+ size_t GetCpuScaledPartition(int cpu) const;
+
+ // Return a bitmap in which set bits identify the nodes that belong to the
+ // specified NUMA `partition`.
+ uint64_t GetPartitionNodes(int partition) const;
+
+ private:
+ // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition.
+ size_t cpu_to_scaled_partition_[CPU_SETSIZE + kNumaCpuFudge] = {0};
+ // Maps from NUMA partition to a bitmap of NUMA nodes within the partition.
+ uint64_t partition_to_nodes_[NumPartitions] = {0};
+ // Indicates whether NUMA awareness is available & enabled.
+ bool numa_aware_ = false;
+ // Desired memory binding behavior.
+ NumaBindMode bind_mode_ = NumaBindMode::kAdvisory;
+};
+
+// Opens a /sys/devices/system/node/nodeX/cpulist file for read only access &
+// returns the file descriptor.
+int OpenSysfsCpulist(size_t node);
+
+// Parse a CPU list in the format used by
+// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU
+// numbers or ranges in the format <start>-<end> inclusive all joined by comma
+// characters.
+//
+// The read function is expected to operate much like the read syscall. It
+// should read up to `count` bytes into `buf` and return the number of bytes
+// actually read. If an error occurs during reading it should return -1 with
+// errno set to an appropriate error code.
+cpu_set_t ParseCpulist(
+ absl::FunctionRef<ssize_t(char *buf, size_t count)> read);
+
+// Initialize the data members of a NumaTopology<> instance.
+//
+// This function must only be called once per NumaTopology<> instance, and
+// relies upon the data members of that instance being default initialized.
+//
+// The `open_node_cpulist` function is typically OpenSysfsCpulist but tests may
+// use a different implementation.
+//
+// Returns true if we're actually NUMA aware; i.e. if we have CPUs mapped to
+// multiple partitions.
+bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE],
+ uint64_t *partition_to_nodes, NumaBindMode *bind_mode,
+ size_t num_partitions, size_t scale_by,
+ absl::FunctionRef<int(size_t)> open_node_cpulist);
+
+// Returns the NUMA partition to which `node` belongs.
+inline size_t NodeToPartition(const size_t node, const size_t num_partitions) {
+ return node % num_partitions;
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline void NumaTopology<NumPartitions, ScaleBy>::Init() {
+ numa_aware_ =
+ InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_,
+ &bind_mode_, NumPartitions, ScaleBy, OpenSysfsCpulist);
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline void NumaTopology<NumPartitions, ScaleBy>::InitForTest(
+ absl::FunctionRef<int(size_t)> open_node_cpulist) {
+ numa_aware_ =
+ InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_,
+ &bind_mode_, NumPartitions, ScaleBy, open_node_cpulist);
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentPartition()
+ const {
+ if constexpr (NumPartitions == 1) return 0;
+ return GetCpuPartition(subtle::percpu::RseqCpuId());
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentScaledPartition()
+ const {
+ if constexpr (NumPartitions == 1) return 0;
+ return GetCpuScaledPartition(subtle::percpu::RseqCpuId());
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuPartition(
+ const int cpu) const {
+ return GetCpuScaledPartition(cpu) / ScaleBy;
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuScaledPartition(
+ const int cpu) const {
+ if constexpr (NumPartitions == 1) return 0;
+ return cpu_to_scaled_partition_[cpu + kNumaCpuFudge];
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline uint64_t NumaTopology<NumPartitions, ScaleBy>::GetPartitionNodes(
+ const int partition) const {
+ return partition_to_nodes_[partition];
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_NUMA_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc
new file mode 100644
index 0000000000..bbd86a3f7d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc
@@ -0,0 +1,284 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/numa.h"
+
+#include <errno.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <new>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+int memfd_create(const char *name, unsigned int flags) {
+#ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+// A synthetic cpulist that can be read from a file descriptor.
+class SyntheticCpuList {
+ public:
+ explicit SyntheticCpuList(const absl::string_view content) {
+ fd_ = memfd_create("cpulist", MFD_CLOEXEC);
+ CHECK_CONDITION(fd_ != -1);
+
+ CHECK_CONDITION(write(fd_, content.data(), content.size()) ==
+ content.size());
+ CHECK_CONDITION(write(fd_, "\n", 1) == 1);
+ CHECK_CONDITION(lseek(fd_, 0, SEEK_SET) == 0);
+ }
+
+ ~SyntheticCpuList() { close(fd_); }
+
+ // Disallow copies, which would make require reference counting to know when
+ // we should close fd_.
+ SyntheticCpuList(const SyntheticCpuList &) = delete;
+ SyntheticCpuList &operator=(const SyntheticCpuList &) = delete;
+
+ // Moves are fine - only one instance at a time holds the fd.
+ SyntheticCpuList(SyntheticCpuList &&other)
+ : fd_(std::exchange(other.fd_, -1)) {}
+ SyntheticCpuList &operator=(SyntheticCpuList &&other) {
+ new (this) SyntheticCpuList(std::move(other));
+ return *this;
+ }
+
+ int fd() const { return fd_; }
+
+ private:
+ // The underlying memfd.
+ int fd_;
+};
+
+class NumaTopologyTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ // We use memfd to create synthetic cpulist files, and can't run without
+ // it. Skip all affected tests if memfd is not supported (i.e. Linux <
+ // 3.17).
+ const int fd = memfd_create("test", MFD_CLOEXEC);
+ if (fd == -1 && errno == ENOSYS) {
+ GTEST_SKIP() << "Test requires memfd support";
+ }
+ close(fd);
+
+ // If rseq is unavailable the NumaTopology never enables NUMA awareness.
+ if (!subtle::percpu::IsFast()) {
+ GTEST_SKIP() << "Test requires rseq support";
+ }
+ }
+};
+
+template <size_t NumPartitions>
+NumaTopology<NumPartitions> CreateNumaTopology(
+ const absl::Span<const SyntheticCpuList> cpu_lists) {
+ NumaTopology<NumPartitions> nt;
+ nt.InitForTest([&](const size_t node) {
+ if (node >= cpu_lists.size()) {
+ errno = ENOENT;
+ return -1;
+ }
+ return cpu_lists[node].fd();
+ });
+ return nt;
+}
+
+// Ensure that if we set NumPartitions=1 then NUMA awareness is disabled even
+// in the presence of a system with multiple NUMA nodes.
+TEST_F(NumaTopologyTest, NoCompileTimeNuma) {
+ std::vector<SyntheticCpuList> nodes;
+ nodes.emplace_back("0");
+ nodes.emplace_back("1");
+
+ const auto nt = CreateNumaTopology<1>(nodes);
+
+ EXPECT_EQ(nt.numa_aware(), false);
+ EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Ensure that if we run on a system with no NUMA support at all (i.e. no
+// /sys/devices/system/node/nodeX/cpulist files) we correctly disable NUMA
+// awareness.
+TEST_F(NumaTopologyTest, NoRunTimeNuma) {
+ const auto nt = CreateNumaTopology<2>({});
+
+ EXPECT_EQ(nt.numa_aware(), false);
+ EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Ensure that if we run on a system with only 1 node then we disable NUMA
+// awareness.
+TEST_F(NumaTopologyTest, SingleNode) {
+ std::vector<SyntheticCpuList> nodes;
+ nodes.emplace_back("0-27");
+
+ const auto nt = CreateNumaTopology<4>(nodes);
+
+ EXPECT_EQ(nt.numa_aware(), false);
+ EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Basic sanity test modelling a simple 2 node system.
+TEST_F(NumaTopologyTest, TwoNode) {
+ std::vector<SyntheticCpuList> nodes;
+ nodes.emplace_back("0-5");
+ nodes.emplace_back("6-11");
+
+ const auto nt = CreateNumaTopology<2>(nodes);
+
+ EXPECT_EQ(nt.numa_aware(), true);
+ EXPECT_EQ(nt.active_partitions(), 2);
+
+ for (int cpu = 0; cpu <= 5; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+ }
+ for (int cpu = 6; cpu <= 11; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+ }
+}
+
+// Test that cpulists too long to fit into the 16 byte buffer used by
+// InitNumaTopology() parse successfully.
+TEST_F(NumaTopologyTest, LongCpuLists) {
+ std::vector<SyntheticCpuList> nodes;
+
+ // Content from here onwards lies |
+ // beyond the 16 byte buffer. |
+ // v
+ nodes.emplace_back("0-1,2-3,4-5,6-7,8"); // Right after a comma
+ nodes.emplace_back("9,10,11,12,13,14,15-19"); // Right before a comma
+ nodes.emplace_back("20-21,22-23,24-25,26-29"); // Within range end
+ nodes.emplace_back("30-32,33,34,35,36-38,39"); // Within range start
+ nodes.emplace_back("40-43,44,45-49");
+
+ const auto nt = CreateNumaTopology<3>(nodes);
+
+ EXPECT_EQ(nt.numa_aware(), true);
+ EXPECT_EQ(nt.active_partitions(), 3);
+
+ for (int cpu = 0; cpu <= 8; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+ }
+ for (int cpu = 9; cpu <= 19; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+ }
+ for (int cpu = 20; cpu <= 29; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 2);
+ }
+ for (int cpu = 30; cpu <= 39; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+ }
+ for (int cpu = 40; cpu <= 49; cpu++) {
+ EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+ }
+}
+
+// Ensure we can initialize using the host system's real NUMA topology
+// information.
+TEST_F(NumaTopologyTest, Host) {
+ NumaTopology<4> nt;
+ nt.Init();
+
+ // We don't actually know anything about the host, so there's not much more
+ // we can do beyond checking that we didn't crash.
+}
+
+// Ensure that we can parse randomized cpulists correctly.
+TEST(ParseCpulistTest, Random) {
+ absl::BitGen gen;
+
+ static constexpr int kIterations = 100;
+ for (int i = 0; i < kIterations; i++) {
+ cpu_set_t reference;
+ CPU_ZERO(&reference);
+
+ // Set a random number of CPUs within the reference set.
+ const double density = absl::Uniform(gen, 0.0, 1.0);
+ for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (absl::Bernoulli(gen, density)) {
+ CPU_SET(cpu, &reference);
+ }
+ }
+
+ // Serialize the reference set into a cpulist-style string.
+ std::vector<std::string> components;
+ for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ if (!CPU_ISSET(cpu, &reference)) continue;
+
+ const int start = cpu;
+ int next = cpu + 1;
+ while (next < CPU_SETSIZE && CPU_ISSET(next, &reference)) {
+ cpu = next;
+ next = cpu + 1;
+ }
+
+ if (cpu == start) {
+ components.push_back(absl::StrCat(cpu));
+ } else {
+ components.push_back(absl::StrCat(start, "-", cpu));
+ }
+ }
+ const std::string serialized = absl::StrJoin(components, ",");
+
+ // Now parse that string using our ParseCpulist function, randomizing the
+ // amount of data we provide to it from each read.
+ absl::string_view remaining(serialized);
+ const cpu_set_t parsed =
+ ParseCpulist([&](char *const buf, const size_t count) -> ssize_t {
+ // Calculate how much data we have left to provide.
+ const size_t max = std::min(count, remaining.size());
+
+ // If none, we have no choice but to provide nothing.
+ if (max == 0) return 0;
+
+ // If we do have data, return a randomly sized subset of it to stress
+ // the logic around reading partial values.
+ const size_t copy = absl::Uniform(gen, static_cast<size_t>(1), max);
+ memcpy(buf, remaining.data(), copy);
+ remaining.remove_prefix(copy);
+ return copy;
+ });
+
+ // We ought to have parsed the same set of CPUs that we serialized.
+ EXPECT_TRUE(CPU_EQUAL(&parsed, &reference));
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h
new file mode 100644
index 0000000000..6380183a50
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h
@@ -0,0 +1,45 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_OPTIMIZATION_H_
+#define TCMALLOC_INTERNAL_OPTIMIZATION_H_
+
+#include "tcmalloc/internal/logging.h"
+
+// Our wrapper for __builtin_assume, allowing us to check the assumption on
+// debug builds.
+#ifndef NDEBUG
+#ifdef __clang__
+#define ASSUME(cond) CHECK_CONDITION(cond), __builtin_assume(cond)
+#else
+#define ASSUME(cond) \
+ CHECK_CONDITION(cond), (!(cond) ? __builtin_unreachable() : (void)0)
+#endif
+#else
+#ifdef __clang__
+#define ASSUME(cond) __builtin_assume(cond)
+#else
+#define ASSUME(cond) (!(cond) ? __builtin_unreachable() : (void)0)
+#endif
+#endif
+
+// Annotations for functions that are not affected by nor affect observable
+// state of the program.
+#if ABSL_HAVE_ATTRIBUTE(const)
+#define TCMALLOC_ATTRIBUTE_CONST __attribute__((const))
+#else
+#define TCMALLOC_ATTRIBUTE_CONST
+#endif
+
+#endif // TCMALLOC_INTERNAL_OPTIMIZATION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h
new file mode 100644
index 0000000000..f14798fe74
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h
@@ -0,0 +1,56 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
+#define TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
+
+#include "absl/base/attributes.h"
+#include "absl/time/time.h"
+
+extern "C" {
+
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetBackgroundReleaseRate(
+ size_t value);
+ABSL_ATTRIBUTE_WEAK uint64_t TCMalloc_Internal_GetHeapSizeHardLimit();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHPAASubrelease();
+ABSL_ATTRIBUTE_WEAK void
+TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(absl::Duration* v);
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK double
+TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK size_t TCMalloc_Internal_GetStats(char* buffer,
+ size_t buffer_length);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHPAASubrelease(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(
+ bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(
+ bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(
+ int64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(
+ double v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetProfileSamplingRate(int64_t v);
+ABSL_ATTRIBUTE_WEAK void
+TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(absl::Duration v);
+}
+
+#endif // TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc
new file mode 100644
index 0000000000..f8706f0f21
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc
@@ -0,0 +1,352 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/percpu.h"
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/call_once.h" // IWYU pragma: keep
+#include "absl/base/internal/sysinfo.h"
+#include "tcmalloc/internal/linux_syscall_support.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+// ----------------------------------------------------------------------------
+// Internal structures
+// ----------------------------------------------------------------------------
+
+// Restartable Sequence (RSEQ)
+
+extern "C" {
+// We provide a per-thread value (defined in percpu_.c) which both tracks
+// thread-local initialization state and (with RSEQ) provides an atomic
+// in-memory reference for this thread's execution CPU. This value is only
+// valid when the thread is currently executing
+// Possible values:
+// Unavailable/uninitialized:
+// { kCpuIdUnsupported, kCpuIdUninitialized }
+// Initialized, available:
+// [0, NumCpus()) (Always updated at context-switch)
+ABSL_PER_THREAD_TLS_KEYWORD ABSL_ATTRIBUTE_WEAK volatile kernel_rseq
+ __rseq_abi = {
+ 0, static_cast<unsigned>(kCpuIdUninitialized), 0, 0,
+ {0, 0}, {{kCpuIdUninitialized, kCpuIdUninitialized}},
+};
+
+#ifdef __ppc__
+// On PPC, we have two cases for accessing the __rseq_abi TLS variable:
+// * For initial-exec TLS, we write the raw assembly for accessing the memory
+// with the appropriate relocations and offsets. On optimized builds, this is
+// the use case that matters.
+// * For non-initial-exec TLS, access is far more involved. We call this helper
+// function from percpu_rseq_ppc.S to leave the initialization and access to
+// the compiler.
+ABSL_ATTRIBUTE_UNUSED ABSL_ATTRIBUTE_NOINLINE void* tcmalloc_tls_fetch_pic() {
+ return const_cast<kernel_rseq*>(&__rseq_abi);
+}
+#endif
+
+} // extern "C"
+
+enum PerCpuInitStatus {
+ kFastMode,
+ kSlowMode,
+};
+
+ABSL_CONST_INIT static PerCpuInitStatus init_status = kSlowMode;
+ABSL_CONST_INIT static absl::once_flag init_per_cpu_once;
+#if TCMALLOC_PERCPU_USE_RSEQ
+ABSL_CONST_INIT static std::atomic<bool> using_upstream_fence{false};
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+
+// Is this thread's __rseq_abi struct currently registered with the kernel?
+static bool ThreadRegistered() { return RseqCpuId() >= kCpuIdInitialized; }
+
+static bool InitThreadPerCpu() {
+ // If we're already registered, there's nothing further for us to do.
+ if (ThreadRegistered()) {
+ return true;
+ }
+
+#ifdef __NR_rseq
+ return 0 == syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0,
+ TCMALLOC_PERCPU_RSEQ_SIGNATURE);
+#endif // __NR_rseq
+ return false;
+}
+
+bool UsingFlatVirtualCpus() {
+ return false;
+}
+
+static void InitPerCpu() {
+ CHECK_CONDITION(absl::base_internal::NumCPUs() <=
+ std::numeric_limits<uint16_t>::max());
+
+ // Based on the results of successfully initializing the first thread, mark
+ // init_status to initialize all subsequent threads.
+ if (InitThreadPerCpu()) {
+ init_status = kFastMode;
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+ constexpr int kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8);
+ // It is safe to make the syscall below multiple times.
+ using_upstream_fence.store(
+ 0 == syscall(__NR_membarrier,
+ kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0),
+ std::memory_order_relaxed);
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+ }
+}
+
+// Tries to initialize RSEQ both at the process-wide (init_status) and
+// thread-level (cpu-id) level. If process-wide initialization has already been
+// completed then only the thread-level will be completed. A return of false
+// indicates that initialization failed and RSEQ is unavailable.
+bool InitFastPerCpu() {
+ absl::base_internal::LowLevelCallOnce(&init_per_cpu_once, InitPerCpu);
+
+ // Once we've decided fast-cpu support is available, initialization for all
+ // subsequent threads must succeed for consistency.
+ if (init_status == kFastMode && RseqCpuId() == kCpuIdUninitialized) {
+ CHECK_CONDITION(InitThreadPerCpu());
+ }
+
+ // If we've decided to use slow mode, set the thread-local CPU ID to
+ // __rseq_abi.cpu_id so that IsFast doesn't call this function again for
+ // this thread.
+ if (init_status == kSlowMode) {
+ __rseq_abi.cpu_id = kCpuIdUnsupported;
+ }
+
+ return init_status == kFastMode;
+}
+
+// ----------------------------------------------------------------------------
+// Implementation of unaccelerated (no RSEQ) per-cpu operations
+// ----------------------------------------------------------------------------
+
+static bool SetAffinityOneCpu(int cpu) {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ if (0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)) {
+ return true;
+ }
+ CHECK_CONDITION(errno == EINVAL);
+ return false;
+}
+
+// We're being asked to fence against the mask <cpus>, but a NULL mask
+// means every CPU. Do we need <cpu>?
+static bool NeedCpu(int cpu, const cpu_set_t* cpus) {
+ if (cpus == nullptr) return true;
+ return CPU_ISSET(cpu, cpus);
+}
+
+static void SlowFence(const cpu_set_t* cpus) {
+ // Necessary, so the point in time mentioned below has visibility
+ // of our writes.
+ std::atomic_thread_fence(std::memory_order_seq_cst);
+
+ // First, save our cpumask (the user may want it back.)
+ cpu_set_t old;
+ CPU_ZERO(&old);
+ CHECK_CONDITION(0 == sched_getaffinity(0, sizeof(cpu_set_t), &old));
+
+ // Here's the basic idea: if we run on every CPU, then every thread
+ // that runs after us has certainly seen every store we've made up
+ // to this point, so we pin ourselves to each CPU in turn.
+ //
+ // But we can't run everywhere; our control plane may have set cpuset.cpus to
+ // some subset of CPUs (and may be changing it as we speak.) On the plus
+ // side, if we are unable to run on a particular CPU, the same is true for our
+ // siblings (up to some races, dealt with below), so we don't need to.
+
+ for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+ if (!NeedCpu(cpu, cpus)) {
+ // unnecessary -- user doesn't care about synchronization on this cpu
+ continue;
+ }
+ // If we can't pin ourselves there, then no one else can run there, so
+ // that's fine.
+ while (SetAffinityOneCpu(cpu)) {
+ // But even if the pin succeeds, we might not end up running there;
+ // between the pin trying to migrate and running on <cpu>, a change
+ // to cpuset.cpus may cause us to migrate somewhere else instead.
+ // So make sure we actually got where we wanted.
+ if (cpu == sched_getcpu()) {
+ break;
+ }
+ }
+ }
+ // Overly detailed explanation of kernel operations follows.
+ //
+ // OK, at this point, for each cpu i, there are two possibilities:
+ // * we've run on i (so we interrupted any sibling & writes are visible)
+ // * At some point in time T1, we read a value of cpuset.cpus disallowing i.
+ //
+ // Linux kernel details: all writes and reads to cpuset.cpus are
+ // serialized on a mutex (called callback_mutex). Because of the
+ // memory barrier above, our writes certainly happened-before T1.
+ //
+ // Moreover, whoever wrote cpuset.cpus to ban i looped over our
+ // threads in kernel, migrating all threads away from i and setting
+ // their masks to disallow i. So once that loop is known to be
+ // over, any thread that was running on i has been interrupted at
+ // least once, and migrated away. It is possible a second
+ // subsequent change to cpuset.cpus (at time T2) re-allowed i, but
+ // serialization of cpuset.cpus changes guarantee that our writes
+ // are visible at T2, and since migration is a barrier, any sibling
+ // migrated after T2 to cpu i will also see our writes.
+ //
+ // So we just have to make sure the update loop from whoever wrote
+ // cpuset.cpus at T1 is completed. That loop executes under a
+ // second mutex (cgroup_mutex.) So if we take that mutex ourselves,
+ // we can be sure that update loop at T1 is done. So read
+ // /proc/self/cpuset. We don't care what it says; as long as it takes the lock
+ // in question. This guarantees that every thread is either running on a cpu
+ // we visited, or received a cpuset.cpus rewrite that happened strictly after
+ // our writes.
+
+ using tcmalloc::tcmalloc_internal::signal_safe_close;
+ using tcmalloc::tcmalloc_internal::signal_safe_open;
+ using tcmalloc::tcmalloc_internal::signal_safe_read;
+ int fd = signal_safe_open("/proc/self/cpuset", O_RDONLY);
+ CHECK_CONDITION(fd >= 0);
+
+ char c;
+ CHECK_CONDITION(1 == signal_safe_read(fd, &c, 1, nullptr));
+
+ CHECK_CONDITION(0 == signal_safe_close(fd));
+
+ // Try to go back to what we originally had before Fence.
+ if (0 != sched_setaffinity(0, sizeof(cpu_set_t), &old)) {
+ CHECK_CONDITION(EINVAL == errno);
+ // The original set is no longer valid, which should only happen if
+ // cpuset.cpus was changed at some point in Fence. If that happened and we
+ // didn't fence, our control plane would have rewritten our affinity mask to
+ // everything in cpuset.cpus, so do that.
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) {
+ CPU_SET(i, &set);
+ }
+ CHECK_CONDITION(0 == sched_setaffinity(0, sizeof(cpu_set_t), &set));
+ }
+}
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+static void UpstreamRseqFenceCpu(int cpu) {
+ ABSL_RAW_CHECK(using_upstream_fence.load(std::memory_order_relaxed),
+ "upstream fence unavailable.");
+
+ constexpr int kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7);
+ constexpr int kMEMBARRIER_CMD_FLAG_CPU = (1 << 0);
+
+ int64_t res = syscall(__NR_membarrier, kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ kMEMBARRIER_CMD_FLAG_CPU, cpu);
+
+ ABSL_RAW_CHECK(res == 0 || res == -ENXIO /* missing CPU */,
+ "Upstream fence failed.");
+}
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+
+// Interrupt every concurrently running sibling thread on any cpu in
+// "cpus", and guarantee our writes up til now are visible to every
+// other CPU. (cpus == NULL is equivalent to all CPUs.)
+static void FenceInterruptCPUs(const cpu_set_t* cpus) {
+ CHECK_CONDITION(IsFast());
+
+ // TODO(b/149390298): Provide an upstream extension for sys_membarrier to
+ // interrupt ongoing restartable sequences.
+ SlowFence(cpus);
+}
+
+void Fence() {
+ CompilerBarrier();
+
+ // Other operations (or all in RSEQ mode) might just be running on another
+ // CPU. Do something about that: use RSEQ::Fence() to just send interrupts
+ // and restart any such operation.
+#if TCMALLOC_PERCPU_USE_RSEQ
+ if (using_upstream_fence.load(std::memory_order_relaxed)) {
+ UpstreamRseqFenceCpu(-1);
+ return;
+ }
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+
+ FenceInterruptCPUs(nullptr);
+}
+
+void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) {
+ // Prevent compiler re-ordering of code below. In particular, the call to
+ // GetCurrentCpu must not appear in assembly program order until after any
+ // code that comes before FenceCpu in C++ program order.
+ CompilerBarrier();
+
+ // A useful fast path: nothing needs doing at all to order us with respect
+ // to our own CPU.
+ if (GetCurrentVirtualCpu(virtual_cpu_id_offset) == cpu) {
+ return;
+ }
+
+ if (virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)) {
+ ASSUME(false);
+
+ // With virtual CPUs, we cannot identify the true physical core we need to
+ // interrupt.
+#if TCMALLOC_PERCPU_USE_RSEQ
+ if (using_upstream_fence.load(std::memory_order_relaxed)) {
+ UpstreamRseqFenceCpu(-1);
+ return;
+ }
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+ FenceInterruptCPUs(nullptr);
+ return;
+ }
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+ if (using_upstream_fence.load(std::memory_order_relaxed)) {
+ UpstreamRseqFenceCpu(cpu);
+ return;
+ }
+#endif // TCMALLOC_PERCPU_USE_RSEQ
+
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ FenceInterruptCPUs(&set);
+}
+
+} // namespace percpu
+} // namespace subtle
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h
new file mode 100644
index 0000000000..ad2124e0d1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h
@@ -0,0 +1,342 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PERCPU_H_
+#define TCMALLOC_INTERNAL_PERCPU_H_
+
+#define TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT 18
+
+// TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM defines whether or not we have an
+// implementation for the target OS and architecture.
+#if defined(__linux__) && \
+ (defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__))
+#define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 1
+#else
+#define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 0
+#endif
+
+#define TCMALLOC_PERCPU_RSEQ_VERSION 0x0
+#define TCMALLOC_PERCPU_RSEQ_FLAGS 0x0
+#if defined(__x86_64__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x53053053
+#elif defined(__ppc__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x0FE5000B
+#elif defined(__aarch64__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0xd428bc00
+#else
+// Rather than error, allow us to build, but with an invalid signature.
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x0
+#endif
+
+// The constants above this line must be macros since they are shared with the
+// RSEQ assembly sources.
+#ifndef __ASSEMBLER__
+
+#ifdef __linux__
+#include <sched.h>
+#endif
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/atomic_danger.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/linux_syscall_support.h"
+#include "tcmalloc/internal/logging.h"
+
+// TCMALLOC_PERCPU_USE_RSEQ defines whether TCMalloc support for RSEQ on the
+// target architecture exists. We currently only provide RSEQ for 64-bit x86 and
+// PPC binaries.
+#if !defined(TCMALLOC_PERCPU_USE_RSEQ)
+#if (ABSL_PER_THREAD_TLS == 1) && (TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM == 1)
+#define TCMALLOC_PERCPU_USE_RSEQ 1
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ 0
+#endif
+#endif // !defined(TCMALLOC_PERCPU_USE_RSEQ)
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+inline constexpr int kRseqUnregister = 1;
+
+// Internal state used for tracking initialization of RseqCpuId()
+inline constexpr int kCpuIdUnsupported = -2;
+inline constexpr int kCpuIdUninitialized = -1;
+inline constexpr int kCpuIdInitialized = 0;
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+extern "C" ABSL_PER_THREAD_TLS_KEYWORD volatile kernel_rseq __rseq_abi;
+
+static inline int RseqCpuId() { return __rseq_abi.cpu_id; }
+
+static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) {
+#ifdef __x86_64__
+ ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id) ||
+ virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id));
+ return *reinterpret_cast<short *>(reinterpret_cast<uintptr_t>(&__rseq_abi) +
+ virtual_cpu_id_offset);
+#else
+ ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id));
+ return RseqCpuId();
+#endif
+}
+#else // !TCMALLOC_PERCPU_USE_RSEQ
+static inline int RseqCpuId() { return kCpuIdUnsupported; }
+
+static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) {
+ return kCpuIdUnsupported;
+}
+#endif
+
+typedef int (*OverflowHandler)(int cpu, size_t cl, void *item);
+typedef void *(*UnderflowHandler)(int cpu, size_t cl);
+
+// Functions below are implemented in the architecture-specific percpu_rseq_*.S
+// files.
+extern "C" {
+int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p,
+ intptr_t old_val, intptr_t new_val);
+
+#ifndef __x86_64__
+int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift,
+ OverflowHandler f);
+int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item,
+ OverflowHandler f);
+void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f,
+ size_t shift);
+void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl,
+ UnderflowHandler f);
+#endif // __x86_64__
+
+// Push a batch for a slab which the Shift equal to
+// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl,
+ void **batch, size_t len);
+
+// Pop a batch for a slab which the Shift equal to
+// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl,
+ void **batch, size_t len);
+
+#ifdef __x86_64__
+int TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(int target_cpu, intptr_t *p,
+ intptr_t old_val,
+ intptr_t new_val);
+size_t TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(void *ptr, size_t cl,
+ void **batch,
+ size_t len);
+size_t TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(void *ptr, size_t cl,
+ void **batch, size_t len);
+#endif
+}
+
+// NOTE: We skirt the usual naming convention slightly above using "_" to
+// increase the visibility of functions embedded into the root-namespace (by
+// virtue of C linkage) in the supported case.
+
+// Return whether we are using flat virtual CPUs.
+bool UsingFlatVirtualCpus();
+
+inline int GetCurrentCpuUnsafe() {
+// On PowerPC, Linux maintains the current CPU in the bottom 12 bits of special
+// purpose register SPRG3, which is readable from user mode. References:
+//
+// https://github.com/torvalds/linux/blob/164c09978cebebd8b5fc198e9243777dbaecdfa0/arch/powerpc/kernel/vdso.c#L727
+// https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L966
+// https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L593
+// https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-July/099011.html
+//
+// This is intended for VDSO syscalls, but is much faster if we simply inline it
+// here, presumably due to the function call and null-check overheads of the
+// VDSO version. As of 2014-07 the CPU time costs are something like 1.2 ns for
+// the inline version vs 12 ns for VDSO.
+#if defined(__PPC64__) && defined(__linux__)
+ uint64_t spr;
+
+ // Mark the asm as volatile, so that it is not hoisted out of loops.
+ asm volatile("mfspr %0, 0x103;" : "=r"(spr));
+
+ return spr & 0xfff;
+#else
+ // Elsewhere, use the rseq mechanism.
+ return RseqCpuId();
+#endif
+}
+
+inline int GetCurrentCpu() {
+ // We can't use the unsafe version unless we have the appropriate version of
+ // the rseq extension. This also allows us a convenient escape hatch if the
+ // kernel changes the way it uses special-purpose registers for CPU IDs.
+ int cpu = GetCurrentCpuUnsafe();
+
+ // We open-code the check for fast-cpu availability since we do not want to
+ // force initialization in the first-call case. This so done so that we can
+ // use this in places where it may not always be safe to initialize and so
+ // that it may serve in the future as a proxy for callers such as
+ // CPULogicalId() without introducing an implicit dependence on the fast-path
+ // extensions. Initialization is also simply unneeded on some platforms.
+ if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+ return cpu;
+ }
+
+#ifdef TCMALLOC_HAVE_SCHED_GETCPU
+ cpu = sched_getcpu();
+ ASSERT(cpu >= 0);
+#endif // TCMALLOC_HAVE_SCHED_GETCPU
+
+ return cpu;
+}
+
+inline int GetCurrentVirtualCpuUnsafe(const size_t virtual_cpu_id_offset) {
+ return VirtualRseqCpuId(virtual_cpu_id_offset);
+}
+
+inline int GetCurrentVirtualCpu(const size_t virtual_cpu_id_offset) {
+ // We can't use the unsafe version unless we have the appropriate version of
+ // the rseq extension. This also allows us a convenient escape hatch if the
+ // kernel changes the way it uses special-purpose registers for CPU IDs.
+ int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+
+ // We open-code the check for fast-cpu availability since we do not want to
+ // force initialization in the first-call case. This so done so that we can
+ // use this in places where it may not always be safe to initialize and so
+ // that it may serve in the future as a proxy for callers such as
+ // CPULogicalId() without introducing an implicit dependence on the fast-path
+ // extensions. Initialization is also simply unneeded on some platforms.
+ if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+ return cpu;
+ }
+
+#ifdef TCMALLOC_HAVE_SCHED_GETCPU
+ cpu = sched_getcpu();
+ ASSERT(cpu >= 0);
+#endif // TCMALLOC_HAVE_SCHED_GETCPU
+
+ return cpu;
+}
+
+bool InitFastPerCpu();
+
+inline bool IsFast() {
+ if (!TCMALLOC_PERCPU_USE_RSEQ) {
+ return false;
+ }
+
+ int cpu = RseqCpuId();
+
+ if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+ return true;
+ } else if (ABSL_PREDICT_FALSE(cpu == kCpuIdUnsupported)) {
+ return false;
+ } else {
+ // Sets 'cpu' for next time, and calls EnsureSlowModeInitialized if
+ // necessary.
+ return InitFastPerCpu();
+ }
+}
+
+// As IsFast(), but if this thread isn't already initialized, will not
+// attempt to do so.
+inline bool IsFastNoInit() {
+ if (!TCMALLOC_PERCPU_USE_RSEQ) {
+ return false;
+ }
+ int cpu = RseqCpuId();
+ return ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized);
+}
+
+// A barrier that prevents compiler reordering.
+inline void CompilerBarrier() {
+#if defined(__GNUC__)
+ __asm__ __volatile__("" : : : "memory");
+#else
+ std::atomic_thread_fence(std::memory_order_seq_cst);
+#endif
+}
+
+// Internal tsan annotations, do not use externally.
+// Required as tsan does not natively understand RSEQ.
+#ifdef THREAD_SANITIZER
+extern "C" {
+void __tsan_acquire(void *addr);
+void __tsan_release(void *addr);
+}
+#endif
+
+// TSAN relies on seeing (and rewriting) memory accesses. It can't
+// get at the memory acccesses we make from RSEQ assembler sequences,
+// which means it doesn't know about the semantics our sequences
+// enforce. So if we're under TSAN, add barrier annotations.
+inline void TSANAcquire(void *p) {
+#ifdef THREAD_SANITIZER
+ __tsan_acquire(p);
+#endif
+}
+
+inline void TSANRelease(void *p) {
+#ifdef THREAD_SANITIZER
+ __tsan_release(p);
+#endif
+}
+
+inline void TSANMemoryBarrierOn(void *p) {
+ TSANAcquire(p);
+ TSANRelease(p);
+}
+
+// These methods may *only* be called if IsFast() has been called by the current
+// thread (and it returned true).
+inline int CompareAndSwapUnsafe(int target_cpu, std::atomic<intptr_t> *p,
+ intptr_t old_val, intptr_t new_val,
+ const size_t virtual_cpu_id_offset) {
+ TSANMemoryBarrierOn(p);
+#if TCMALLOC_PERCPU_USE_RSEQ
+ switch (virtual_cpu_id_offset) {
+ case offsetof(kernel_rseq, cpu_id):
+ return TcmallocSlab_Internal_PerCpuCmpxchg64(
+ target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p),
+ old_val, new_val);
+#ifdef __x86_64__
+ case offsetof(kernel_rseq, vcpu_id):
+ return TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(
+ target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p),
+ old_val, new_val);
+#endif // __x86_64__
+ default:
+ __builtin_unreachable();
+ }
+#else // !TCMALLOC_PERCPU_USE_RSEQ
+ __builtin_unreachable();
+#endif // !TCMALLOC_PERCPU_USE_RSEQ
+}
+
+void FenceCpu(int cpu, const size_t virtual_cpu_id_offset);
+
+} // namespace percpu
+} // namespace subtle
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // !__ASSEMBLER__
+#endif // TCMALLOC_INTERNAL_PERCPU_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S
new file mode 100644
index 0000000000..3cdaf17835
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2020 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __aarch64__
+#error "percpu_rseq_aarch64.S should only be included for AArch64 builds"
+#endif // __aarch64__
+
+#include "tcmalloc/internal/percpu.h"
+
+/*
+ * API Exposition:
+ *
+ * METHOD_abort: // Emitted as part of START_RSEQ()
+ * START_RSEQ() // Starts critical section between [start,commit)
+ * METHOD_start: // Emitted as part of START_RSEQ()
+ * FETCH_CPU() // Reads current CPU
+ * ...
+ * single store // Commits sequence
+ * METHOD_commit:
+ * ...return...
+ *
+ * This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+ * (rodata) constant table, whose address is used to start the critical
+ * section, and the abort trampoline.
+ *
+ * The trampoline is used because:
+ * 1. Restarts are expected to be rare, so the extra jump when restarting is
+ * expected to be infrequent.
+ * 2. The upstream restartable sequence implementation expects the trailing 4
+ * bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+ * to an arbitrary choice). For us, this is TCMALLOC_PERCPU_RSEQ_SIGNATURE.
+ * This value is passed to the kernel during configuration of the rseq
+ * syscall.
+ * This would either need to be encoded as a nop (SIGN_ABORT) at the start
+ * of every restartable sequence, increasing instruction cache pressure, or
+ * placed directly before the entry point.
+ *
+ * The trampoline returns us to METHOD_abort, which is the normal entry point
+ * for the restartable sequence. Upon restart, the (upstream) kernel API
+ * clears the per-thread restartable sequence state. We return to METHOD_abort
+ * (rather than METHOD_start), as we need to reinitialize this value.
+ */
+
+/* Place the code into the google_malloc section. This section is the heaviest
+ * user of Rseq code, so it makes sense to co-locate it.
+ */
+
+.section google_malloc, "ax"
+
+/* ---------------- start helper macros ---------------- */
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_AARCH64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// A function within a guarded memory region must start with a BTI C
+// instruction.
+// So per ABI that includes any externally visible code label.
+// Using hint to make sure we can use this on targets that support BTI and
+// targets that don't. It will behave as a no-op on targets that do not
+// support BTI or outside a guarded memory region.
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#define BTI_C hint 34
+#define TAILCALL(x) mov x16, x; br x16
+#else
+#define BTI_C
+#define TAILCALL(x) br x
+#endif
+
+// This macro defines:
+// * the rseq_cs instance that we'll use for label's critical section.
+// * a trampoline to return to when we abort. This label_trampoline is
+// distinct from label_start, as the return IP must be "signed" (see
+// SIGN_ABORT()).
+//
+// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label) \
+ .pushsection __rseq_cs, "aw"; \
+ .balign 32; \
+ .protected __rseq_cs_##label; \
+ .type __rseq_cs_##label,@object; \
+ .size __rseq_cs_##label,32; \
+ __rseq_cs_##label: \
+ .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+ .quad .L##label##_start; \
+ .quad .L##label##_commit - .L##label##_start; \
+ .quad label##_trampoline; \
+ PINSECTION(.L##label##array); \
+ .popsection; \
+ .pushsection __rseq_cs_ptr_array, "aw"; \
+ .L##label##array: \
+ .quad __rseq_cs_##label; \
+ .popsection; \
+ .pushsection rseq_trampoline, "ax"; \
+ SIGN_ABORT(); \
+ .globl label##_trampoline; \
+ .type label##_trampoline, @function; \
+label##_trampoline: \
+ .cfi_startproc; \
+ BTI_C; \
+ b .L##label##_abort; \
+ .cfi_endproc; \
+ .size label##_trampoline, . - label##_trampoline; \
+ .popsection;
+
+// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter). This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+// We use .inst here instead of a data directive so it works for both small and
+// big endian.
+#define SIGN_ABORT() \
+ .inst TCMALLOC_PERCPU_RSEQ_SIGNATURE
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label
+/* We are assuming small memory model. */
+#if __clang_major__ >= 11 && !defined(__AARCH64_CMODEL_SMALL__)
+#error "Memory model not supported!"
+#endif
+
+/* FETCH_CPU assumes &__rseq_abi is in x5. */
+#define FETCH_CPU(dest) \
+ ldr dest, [x5, #4] /* cpuid is 32-bits */
+
+/* With PIE have initial-exec TLS, even in the presence of position
+ independent code. */
+#if !defined(__PIC__) || defined(__PIE__)
+
+#define START_RSEQ(src) \
+ .L##src##_abort: \
+ mrs x5, tpidr_el0; \
+ adrp x6, :gottprel:__rseq_abi; \
+ ldr x6, [x6,:gottprel_lo12:__rseq_abi]; \
+ add x5, x5, x6; \
+ adrp x6, __rseq_cs_##src; \
+ add x6, x6, :lo12:__rseq_cs_##src; \
+ str x6, [x5, #8]; \
+ .L##src##_start:
+
+#else /* !defined(__PIC__) || defined(__PIE__) */
+
+/*
+ * In the case where we can't guarantee we have initial-exec TLS we obtain
+ * __rseq_abi's TP offset using a TLS descriptor sequence, which we then add to
+ * the TP to get __rseq_abi's address.
+ * The call to the TLS descriptor can be optimized away by the linker, but since
+ * we can not guarantee it will we must save and restore the registers used to
+ * store the arguments of our functions. The function with most arguments has 5
+ * arguments, so we save x0-x4 and lr.
+ * TODO: Add PAC support because we are spiling LR.
+ */
+#define START_RSEQ(src) \
+ .L##src##_abort: \
+ mov x5, lr; \
+ stp x0, x1, [sp, -48]!; \
+ stp x2, x3, [sp, #16]; \
+ stp x4, x5, [sp, #32]; \
+ adrp x0, :tlsdesc:__rseq_abi; \
+ ldr x1, [x0, :tlsdesc_lo12:__rseq_abi]; \
+ add x0, x0, :tlsdesc_lo12:__rseq_abi; \
+ .tlsdesccall __rseq_abi; \
+ blr x1; \
+ ldp x4, x5, [sp, #32]; \
+ mov lr, x5; \
+ mrs x5, tpidr_el0; \
+ add x5, x5, x0; \
+ ldp x2, x3, [sp, #16]; \
+ ldp x0, x1, [sp], #48; \
+ adrp x6, __rseq_cs_##src; \
+ add x6, x6, :lo12:__rseq_cs_##src; \
+ str x6, [x5, #8]; \
+ .L##src##_start:
+
+#endif
+/* ---------------- end helper macros ---------------- */
+
+/* start of atomic restartable sequences */
+
+/*
+ * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p,
+ * long old_val, long new_val)
+ * w0: target_cpu
+ * x1: p
+ * x2: old_val
+ * x3: new_val
+ */
+ .p2align 6 /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PerCpuCmpxchg64
+ .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+ .cfi_startproc
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64)
+ FETCH_CPU(w4)
+ cmp w0, w4 /* check cpu vs current_cpu */
+ bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit
+ ldr x6, [x1]
+ cmp x6, x2 /* verify *p == old */
+ bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch
+ str x3, [x1]
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit:
+ mov x0, x4
+ ret /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch:
+ mov x0, #-1 /* mismatch versus "old" or "check", return -1 */
+ ret
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+/* size_t TcmallocSlab_Internal_PushBatch_FixedShift(
+ * void *ptr (x0),
+ * size_t cl (w1),
+ * void** batch (x2),
+ * size_t len (w3) {
+ * uint64_t r8 = __rseq_abi.cpu_id
+ * uint64_t* r8 = CpuMemoryStart(x0, r8)
+ * Header* hdr = r8 + w1 * 8
+ * uint64_t r9 = hdr->current (zero-extend 16bit)
+ * uint64_t r10 = hdr->end (zero-extend 16bit)
+ * if (r9 >= r10) return 0
+ * r11 = r3
+ * r10 = r9 + min(len, r10 - r9)
+ * r13 = r9 + r10
+ * r9 = r8 + r9 * 8
+ * r14 = r8 + r13 * 8
+ * loop:
+ * r12 = *(r11-=8) (pre-index) Pop from Batch
+ * *(r9+=8) = r12 (post-index) Push to Slab
+ * if (r9 != r14) goto loop
+ * hdr->current = r13 (16bit store)
+ * return r10
+ * }
+ */
+ .p2align 6 /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PushBatch_FixedShift
+ .type TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+ .cfi_startproc
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift)
+ FETCH_CPU(w8)
+ lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */
+ add x8, x0, x8
+ add x4, x8, x1, LSL #3 /* r4 = hdr */
+ ldrh w9, [x4] /* r9 = current */
+ ldrh w10, [x4, #6] /* r10 = end */
+ cmp w9, w10
+ bge .LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity
+ add x11, x2, x3, LSL #3 /* r11 = batch + len * 8 */
+ sub w10, w10, w9 /* r10 = free capacity */
+ cmp w3, w10
+ csel w10, w3, w10, ls /* r10 = min(len, free capacity), amount we are
+ pushing */
+ add x13, x9, x10 /* r13 = current + amount we are pushing. */
+ add x9, x8, x9, LSL #3 /* r9 = current cpu slab stack */
+ add x14, x8, x13, LSL #3 /* r14 = new current address */
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+ ldr x12, [x11, #-8]! /* r12 = [--r11] */
+ str x12, [x9], #8 /* [r9++] = r12 */
+ cmp x9, x14 /* if current cpu slab address == new current
+ address */
+ bne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+ strh w13, [x4] /* store new current index */
+.LTcmallocSlab_Internal_PushBatch_FixedShift_commit:
+ mov x0, x10
+ ret
+.LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity:
+ mov x0, #0
+ ret
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+/* size_t TcmallocSlab_Internal_PopBatch_FixedShift(
+ * void *ptr (x0),
+ * size_t cl (w1),
+ * void** batch (x2),
+ * size_t len (w3) {
+ * uint64_t r8 = __rseq_abi.cpu_id
+ * uint64_t* r8 = CpuMemoryStart(ptr, r8)
+ * Header* hdr = GetHeader(r8, cl)
+ * uint64_t r9 = hdr->current
+ * uint64_t r10 = hdr->begin
+ * if (r9 <= r10) return 0
+ * r11 = min(len, r9 - r10)
+ * r13 = r8 + r9 * 8
+ * r9 = r9 - r11
+ * r12 = r2
+ * r14 = r2 + r11 * 8
+ * loop:
+ * r10 = *(r13 -= 8) (pre-index) Pop from slab
+ * *(r12+=8) = r10 (post-index) Push to Batch
+ * if (r12 != r14) goto loop
+ * hdr->current = r9
+ * return r11
+ * }
+ */
+ .p2align 6 /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PopBatch_FixedShift
+ .type TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+ .cfi_startproc
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift)
+ FETCH_CPU(w8)
+ lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */
+ add x8, x0, x8
+ add x4, x8, x1, LSL #3
+ ldrh w9, [x4] /* current */
+ ldrh w10, [x4, #4] /* begin */
+ cmp w10, w9
+ bhs .LTcmallocSlab_Internal_PopBatch_FixedShift_no_items
+ sub w11, w9, w10 /* r11 = available items */
+ cmp w3, w11
+ csel w11, w3, w11, ls /* r11 = min(len, available items), amount we are
+ popping */
+ add x13, x8, x9, LSL #3 /* r13 = current cpu slab stack */
+ sub x9, x9, x11 /* update new current */
+ mov x12, x2 /* r12 = batch */
+ add x14, x2, x11, LSL #3 /* r14 = batch + amount we are popping*8 */
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+ ldr x10, [x13, #-8]! /* r10 = [--r13] */
+ str x10, [x12], #8 /* [r12++] = r10 */
+ cmp x12, x14 /* if current batch == batch + amount we are
+ popping */
+ bne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+ strh w9, [x4] /* store new current */
+.LTcmallocSlab_Internal_PopBatch_FixedShift_commit:
+ mov x0, x11
+ ret
+.LTcmallocSlab_Internal_PopBatch_FixedShift_no_items:
+ mov x0, #0
+ ret
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+ .globl TcmallocSlab_Internal_Push
+ .type TcmallocSlab_Internal_Push, @function
+TcmallocSlab_Internal_Push:
+.LTcmallocSlab_Internal_Push_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * x0: (Argument: Slabs*) cpu_0_slab_ptr
+ // * x1: (Argument: uintptr_t) cl
+ // * x2: (Argument: uintptr_t) p
+ // * w3: (Argument: size_t) shift
+ // * x4: (Argument: uintptr_t) f
+ // Return value: current CPU
+ // Available x5-x15
+
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_Push)
+ FETCH_CPU(w8)
+ lsl x9, x8, x3
+ add x9, x0, x9
+ add x10, x9, x1, LSL #3
+ ldrh w12, [x10] /* current */
+ ldrh w11, [x10, #6] /* end */
+ cmp w11, w12
+ ble .LTcmallocSlab_Internal_Push_no_capacity
+ str x2, [x9, x12, LSL #3]
+ add w12, w12, #1
+ strh w12, [x10]
+.LTcmallocSlab_Internal_Push_commit:
+ mov x0, x8
+ ret
+.LTcmallocSlab_Internal_Push_no_capacity:
+ mov x0, x8
+ TAILCALL(x4)
+.LTcmallocSlab_Internal_Push_region3:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push)
+
+
+ .globl TcmallocSlab_Internal_Push_FixedShift
+ .type TcmallocSlab_Internal_Push_FixedShift, @function
+TcmallocSlab_Internal_Push_FixedShift:
+ .cfi_startproc
+ // Arguments use:
+ // * x0: (Argument: Slabs*) cpu_0_slab_ptr
+ // * x1: (Argument: uintptr_t) cl
+ // * x2: (Argument: uintptr_t) p
+ // * x3: (Argument: uintptr_t) f
+ // Return value: current CPU
+ // Available x4-x15
+
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_Push_FixedShift)
+ FETCH_CPU(w8)
+ lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ add x9, x0, x9
+ add x10, x9, x1, LSL #3
+ ldrh w12, [x10] /* current */
+ ldrh w11, [x10, #6] /* end */
+ cmp w11, w12
+ ble .LTcmallocSlab_Internal_Push_FixedShift_no_capacity
+ str x2, [x9, x12, LSL #3]
+ add w12, w12, #1
+ strh w12, [x10]
+.LTcmallocSlab_Internal_Push_FixedShift_commit:
+ mov x0, x8
+ ret
+.LTcmallocSlab_Internal_Push_FixedShift_no_capacity:
+ mov x0, x8
+ TAILCALL(x3)
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift)
+
+ .globl TcmallocSlab_Internal_Pop_FixedShift
+ .type TcmallocSlab_Internal_Pop_FixedShift, @function
+TcmallocSlab_Internal_Pop_FixedShift:
+ .cfi_startproc
+ // Arguments use:
+ // * x0: (Argument: Slabs*) cpu_0_slab_ptr
+ // * x1: (Argument: uintptr_t) cl
+ // * x2: (Argument: uintptr_t) f
+ // Return value: current CPU
+ // Available x3-x15
+
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift)
+ FETCH_CPU(w8) /* r8 = CPU */
+ lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ /* r9 = CPU shifted */
+ add x9, x0, x9 /* r9 = start of CPU region */
+ add x10, x9, x1, LSL #3 /* r10 = start of slab header */
+ ldrh w12, [x10] /* r12 = current index */
+ ldrh w11, [x10, #4] /* r11 = begin index */
+ cmp w11, w12 /* if begin >= current */
+ bge .LTcmallocSlab_Internal_Pop_FixedShift_no_items
+ sub w12, w12, #1 /* r12 = current-- */
+ ldr x3, [x9, x12, LSL #3] /* r3 = [start + current * 8] */
+ strh w12, [x10] /* store new current index */
+.LTcmallocSlab_Internal_Pop_FixedShift_commit:
+ mov x0, x3 /* return popped item */
+ ret
+.LTcmallocSlab_Internal_Pop_FixedShift_no_items:
+ mov x0, x8 /* call overflow handler with CPU ID */
+ TAILCALL(x2)
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift)
+
+ .globl TcmallocSlab_Internal_Pop
+ .type TcmallocSlab_Internal_Pop, @function
+TcmallocSlab_Internal_Pop:
+ .cfi_startproc
+ // Arguments use:
+ // * x0: (Argument: Slabs*) cpu_0_slab_ptr
+ // * x1: (Argument: uintptr_t) cl
+ // * x2: (Argument: uintptr_t) f
+ // * w3: (Argument: size_t) shift
+ // Return value: Value
+ // Available x4-x15
+
+ BTI_C
+ START_RSEQ(TcmallocSlab_Internal_Pop)
+ FETCH_CPU(w8) /* r8 = CPU ID */
+ lsl x9, x8, x3 /* x9 = CPU shifted by (r3) */
+ add x9, x0, x9 /* x9 = start of this CPU region */
+ add x10, x9, x1, LSL #3 /* r10 = slab header addr */
+ ldrh w12, [x10] /* r12 = current index */
+ ldrh w11, [x10, #4] /* x11 = begin index */
+ cmp w11, w12 /* if begin >= current */
+ bge .LTcmallocSlab_Internal_Pop_no_items
+ sub w12, w12, #1 /* r12 = current-- */
+ ldr x4, [x9, x12, LSL #3] /* r4 = [start + current * 8] */
+ strh w12, [x10] /* update current index */
+.LTcmallocSlab_Internal_Pop_commit:
+ mov x0, x4 /* return popped item */
+ ret
+.LTcmallocSlab_Internal_Pop_no_items:
+ mov x0, x8 /* call overflow handler with CPU ID */
+ TAILCALL(x2)
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop)
+
+.section .note.GNU-stack,"",@progbits
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note. */
+#define GNU_PROPERTY(type, value) \
+ .section .note.gnu.property, "a"; \
+ .p2align 3; \
+ .word 4; \
+ .word 16; \
+ .word 5; \
+ .asciz "GNU"; \
+ .word type; \
+ .word 4; \
+ .word value; \
+ .word 0;
+
+/* Add GNU property note if built with branch protection. */
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+GNU_PROPERTY (0xc0000000, 1)
+#endif
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S
new file mode 100644
index 0000000000..0219a2760a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S
@@ -0,0 +1,41 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Single file to include target specific implementations for percpu.
+
+#include "tcmalloc/internal/percpu.h"
+
+#if TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+#if defined(__x86_64__)
+#include "tcmalloc/internal/percpu_rseq_x86_64.S"
+#elif defined(__ppc__)
+#include "tcmalloc/internal/percpu_rseq_ppc.S"
+#elif defined(__aarch64__)
+#include "tcmalloc/internal/percpu_rseq_aarch64.S"
+#else
+#error "RSEQ support expected, but not found."
+#endif
+#endif // TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+
+// We do not need an executable stack. Put this outside the
+// architecture-specific region above in order to suppress "missing
+// .note.GNU-stack section implies executable stack" errors.
+//
+// Cf. http://en.chys.info/2010/12/note-gnu-stack/
+#if defined(__arm__) || defined(__PPC64__)
+.section .note.GNU-stack, "", %progbits
+#else
+.section .note.GNU-stack, "", @progbits
+#endif // __arm__ || __PPC64__
+
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S
new file mode 100644
index 0000000000..234f28c2e7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S
@@ -0,0 +1,606 @@
+/*
+ * Copyright 2019 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Rseq critical section functions and restart handlers.
+//
+// They must also avoid writing the nonvolatile and reserved general purpose
+// registers defined by the Power Architecture 64-Bit ELF V2 ABI
+//
+// * r1-r2
+// * r13
+// * r14-r31
+//
+// Finally, note that the restart handler reserves the right to clobber
+// condition registers. This means that critical section functions must not
+// explicitly or implicitly read condition registers outside of their
+// [start, limit) critical regions.
+
+#ifndef __ppc__
+#error "percpu_rseq_ppc.S should only be included for PPC builds"
+#endif
+
+#include "tcmalloc/internal/percpu.h"
+
+// Use the ELFv2 ABI.
+.abiversion 2
+.section google_malloc, "ax"
+
+////////////////////////////////////////////////////////////////////////
+// Macros
+////////////////////////////////////////////////////////////////////////
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label;
+
+// Place the CPU number into the bottom 12 bits of dst. The upper 52 bits are
+// unspecified.
+//
+// See GetCurrentCpu() for notes on the implementation.
+#define GET_CPU_UNMASKED(dst) \
+ mfspr dst, 259
+
+// Given an unmasked CPU number, put the interesting parts into dst.
+#define MASK_CPU(dst, src) \
+ clrldi dst, src, 52
+
+// Like GET_CPU_UNMASKED, but guarantees that the upper bits are cleared. May
+// be slower than the unmasked version.
+#define GET_CPU(dst) \
+ GET_CPU_UNMASKED(dst); \
+ MASK_CPU(dst, dst)
+
+// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter). This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+#define SIGN_ABORT() \
+ .long TCMALLOC_PERCPU_RSEQ_SIGNATURE;
+
+// DEFINE_UPSTREAM_CS triggers the generation of rseq_cs table (the triple of
+// start, commit, abort IPs) and a trampoline function.
+//
+// Upstream API Exposition:
+//
+// START_RSEQ() // vvvvv emits a bunch of things
+// global entry point:
+// TOC setup
+// METHOD_critical_abort:
+// local entry point:
+// store rseq_cs to __rseq_abi.rseq_cs, starting restartable sequence
+// METHOD_start: // Emitted as part of START_RSEQ()
+// // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+//
+// GET_CPU...() // Reads current CPU
+// ...
+// single store // Commits sequence
+// METHOD_critical_limit:
+// ...return...
+//
+// START_RSEQ does several things:
+// * We need to set up the TOC pointer for global entry points.
+// * When restarting, we return to the local entry point, since the TOC pointer
+// is left intact from the restart. METHOD_critical_abort and local entry
+// point are therefore the same address.
+// * It stores to the TLS to register that we're in a restartable sequence with
+// the kernel.
+//
+// This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+// (rodata) constant table, whose address is used to start the critical
+// section, and the abort trampoline.
+//
+// The trampoline is used because:
+// 1. Restarts are expected to be rare, so the extra jump when restarting is
+// expected to be infrequent.
+// 2. The upstream restartable sequence implementation expects the trailing 4
+// bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+// to an arbitrary choice). For us, this is
+// TCMALLOC_PERCPU_RSEQ_SIGNATURE. This value is passed to the kernel
+// during configuration of the rseq syscall. This would either need to be
+// encoded as a nop* at the start of every restartable sequence, increasing
+// instruction cache pressure, or placed directly before the entry point.
+//
+// * The upstream rseq protocol appears to be converging on using a trap
+// instruction (twui), so we cannot allow it to appear anywhere in our
+// actual executed path.
+//
+// Upon restart, the (upstream) kernel API clears the per-thread restartable
+// sequence state. We return to METHOD_abort (rather than METHOD_start), as we
+// need to reinitialize this value.
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_PPC64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label) \
+ .pushsection __rseq_cs, "aw"; \
+ .balign 32; \
+ .protected __rseq_cs_##label; \
+ .type __rseq_cs_##label,@object; \
+ .size __rseq_cs_##label,32; \
+ __rseq_cs_##label: \
+ .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+ .quad .L##label##_critical_start; \
+ .quad .L##label##_critical_limit - .L##label##_critical_start; \
+ .quad label##_trampoline; \
+ PINSECTION(.L##label##array); \
+ .popsection; \
+ .pushsection __rseq_cs_ptr_array, "aw"; \
+ .L##label##array: \
+ .quad __rseq_cs_##label; \
+ .popsection; \
+ .pushsection rseq_trampoline, "ax"; \
+ SIGN_ABORT(); \
+ .globl label##_trampoline; \
+ .type label##_trampoline, @function; \
+label##_trampoline: \
+ .cfi_startproc; \
+ b .L##label##_critical_abort; \
+ .cfi_endproc; \
+ .size label##_trampoline, . - label##_trampoline; \
+ .popsection
+
+// With PIE: We have initial-exec TLS, even in the presence of position
+// independent code.
+#if !defined(__PIC__) || defined(__PIE__)
+
+#define START_RSEQ(label) \
+ .L##label##_gep0: \
+ addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \
+ addi %r2, %r2, .TOC.-.L##label##_gep0@l; \
+ .L##label##_critical_abort: \
+ .L##label##_lep0: \
+ .localentry label,.-label; \
+ addis %r9, %r2, __rseq_cs_##label@toc@ha; \
+ addi %r9, %r9, __rseq_cs_##label@toc@l; \
+ addis %r10, %r13, __rseq_abi@tprel@ha; \
+ addi %r10, %r10, __rseq_abi@tprel@l; \
+ std %r9, 8(%r10); \
+ .L##label##_critical_start:
+
+#else /* !defined(__PIC__) || defined(__PIE__) */
+
+// Handle non-initial exec TLS. When performance matters, we should be using
+// initial-exec TLS.
+//
+// We need to caller-save r3-r8, as they are our arguments to the actual
+// restartable sequence code.
+
+#define START_RSEQ(label) \
+ .L##label##_gep0: \
+ addis %r2, %r12, .TOC.-.L##label##_gep0@ha; \
+ addi %r2, %r2, .TOC.-.L##label##_gep0@l; \
+ .L##label##_critical_abort: \
+ .L##label##_lep0: \
+ .localentry label,.-label; \
+ mflr 0; \
+ std %r0, 0x10(1); \
+ std %r3, -0x10(1); \
+ std %r4, -0x18(1); \
+ std %r5, -0x20(1); \
+ std %r6, -0x28(1); \
+ std %r7, -0x30(1); \
+ std %r8, -0x38(1); \
+ stdu %r1, -0x200(1); \
+ bl tcmalloc_tls_fetch_pic; \
+ nop; \
+ mr %r10, %r3; \
+ addi %r1, %r1, 0x200; \
+ ld %r8, -0x38(1); \
+ ld %r7, -0x30(1); \
+ ld %r6, -0x28(1); \
+ ld %r5, -0x20(1); \
+ ld %r4, -0x18(1); \
+ ld %r3, -0x10(1); \
+ ld %r0, 0x10(1); \
+ mtlr 0; \
+ addis %r9, %r2, __rseq_cs_##label@toc@ha; \
+ addi %r9, %r9, __rseq_cs_##label@toc@l; \
+ std %r9, 8(%r10); \
+ .L##label##_critical_start:
+
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PerCpuCmpxchg64
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PerCpuCmpxchg64
+.type TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_entry:
+ .cfi_startproc
+ // Register use:
+ //
+ // * r3: (Argument: int64) target_cpu
+ // * r4: (Argument: intptr_t*) p
+ // * r5: (Argument: intptr_t) old_val
+ // * r6: (Argument: intptr_t) new_val
+ // * r7: The current CPU number.
+ // * r8: The current value of *p.
+ //
+
+ START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+ // Are we running on the target CPU?
+ GET_CPU(%r7)
+ cmpd %r7, %r3
+ bne .LCAS_wrong_cpu
+
+ // Load the current value of *p.
+ ld %r8, 0(%r4)
+
+ // Is the value up to date?
+ cmpd %r8, %r5
+ bne .LCAS_wrong_value
+
+ // Store the new value, committing the operation.
+ std %r6, 0(%r4)
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_critical_limit:
+
+ // Return the target CPU, which is already in r3.
+ blr
+
+.LCAS_wrong_cpu:
+ // Return the current CPU.
+ mr %r3, %r7
+ blr
+
+.LCAS_wrong_value:
+ // Return -1.
+ li %r3, -1
+ blr
+
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64);
+
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Push
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Push
+.type TcmallocSlab_Internal_Push, @function
+TcmallocSlab_Internal_Push:
+.LTcmallocSlab_Internal_Push_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) p
+ // * r6: (Argument: size_t) shift
+ // * r7: (Argument: uintptr_t) f
+ // Return value: current CPU
+ // Available r8 r9 r10 r11 r12
+ // Note that r12 may be overwritten in rseq_restart_address_internal so
+ // cannot be relied upon across restartable sequence boundaries.
+
+ START_RSEQ(TcmallocSlab_Internal_Push)
+
+ GET_CPU(%r8) // r8 = current CPU, includes MASK operation
+ sld %r9, %r8, %r6 // r9 = r8 << shift (r6)
+ add %r9, %r3, %r9 // r9 = start of this CPU region
+ rldicr %r10, %r4, 3, 60 // r10 = header offset for class size cl (r4)
+ add %r10, %r9, %r10 // r10 = slab header addr (class offset + CPU base)
+ lhz %r12, 0(%r10) // r12 = current index
+ lhz %r11, 6(%r10) // r11 = length
+ cmpld %cr7, %r11, %r12 // compare current index with length
+ ble %cr7, .LTcmallocSlab_Internal_Push_no_capacity
+ rldicr %r11, %r12, 3, 60 // r11 = offset of current index
+ addi %r12, %r12, 1 // current index += 1
+ stdx %r5, %r9, %r11 // store pointer p (r5) into current offset
+ sth %r12, 0(%r10) // update current index
+
+.LTcmallocSlab_Internal_Push_critical_limit:
+ mr %r3, %r8 // Return current CPU in r3
+ blr
+
+.LTcmallocSlab_Internal_Push_no_capacity:
+ mr %r3, %r8 // Place current CPU in r3
+ // r7 already contains target function
+ b .LPushOverflowTrampoline
+
+.LTcmallocSlab_Internal_Push_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Push_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Push_FixedShift
+.type TcmallocSlab_Internal_Push_FixedShift, @function
+TcmallocSlab_Internal_Push_FixedShift:
+.LTcmallocSlab_Internal_Push_FixedShift_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) p
+ // * r6: (Argument: uintptr_t) f
+
+ START_RSEQ(TcmallocSlab_Internal_Push_FixedShift)
+
+ GET_CPU_UNMASKED(%r7) // r7 = unmasked CPU
+ // Mask upper 52 bits of %r7 and shift left in single
+ // operation. Removes the need to have a separate
+ // MASK operation on the critical path.
+ clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ add %r8, %r3, %r8 // r8 = start of this CPU region
+ rldicr %r9, %r4, 3, 60 // r9 = start of header
+ add %r9, %r8, %r9 // r9 = slab header addr
+ lhz %r10, 0(%r9) // r10 = current index
+ lhz %r11, 6(%r9) // r11 = end index
+ cmpld %cr7, %r11, %r10 // Check for space
+ ble %cr7, .LTcmallocSlab_Internal_Push_FixedShift_no_capacity
+ rldicr %r11, %r10, 3, 60 // r11 = offset of current index
+ addi %r10, %r10, 1 // current index ++
+ stdx %r5, %r8, %r11 // store the item (from r5)
+ sth %r10, 0(%r9) // store current index
+
+.LTcmallocSlab_Internal_Push_FixedShift_critical_limit:
+ MASK_CPU(%r3, %r7) // Return and mask CPU into %r3
+ blr
+
+.LTcmallocSlab_Internal_Push_FixedShift_no_capacity:
+ MASK_CPU(%r3, %r7) // Move and mask CPU into %r3
+ mr %r7, %r6 // Move target function into r7
+ b .LPushOverflowTrampoline
+
+.LTcmallocSlab_Internal_Push_FixedShift_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift);
+
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Pop
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Pop
+.type TcmallocSlab_Internal_Pop, @function
+TcmallocSlab_Internal_Pop:
+.LTcmallocSlab_Internal_Pop_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) f
+ // * r6: (Argument: size_t) shift
+ // Available r7 r8 r9 r10 r11
+ // r12 can be used as a temporary within rseq
+
+ START_RSEQ(TcmallocSlab_Internal_Pop)
+
+ GET_CPU(%r7) // r7 = CPU, includes mask operation
+ sld %r12, %r7, %r6 // r12 = CPU shifted by shift (r6)
+ add %r12, %r3, %r12 // r12 = start of this CPU region
+ rldicr %r8, %r4, 3, 60 // r8 = offset to class size
+ add %r8, %r12, %r8 // r8 = slab header addr for class size
+ lhz %r9, 0(%r8) // r9 = current index
+ lhz %r10, 4(%r8) // r10 = begin
+ cmpld %cr7, %r10, %r9 // Check that we have items to pop
+ bge %cr7, .LTcmallocSlab_Internal_Pop_no_item
+ subi %r9, %r9, 1 // r9 = current index --
+ rldicr %r10, %r9, 3, 60 // r10 = offset to current item
+ ldx %r11, %r12, %r10 // load the item from base + index
+ sth %r9, 0(%r8) // store current index
+
+.LTcmallocSlab_Internal_Pop_critical_limit:
+ // Move the item into r3, now that it's safe to do so.
+ mr %r3, %r11
+ blr
+
+.LTcmallocSlab_Internal_Pop_no_item:
+ mr %r3, %r7 // Place CPU into r3
+ b .LPopUnderflowTrampoline
+
+.LTcmallocSlab_Internal_Pop_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Pop_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Pop_FixedShift
+.type TcmallocSlab_Internal_Pop_FixedShift, @function
+TcmallocSlab_Internal_Pop_FixedShift:
+.LTcmallocSlab_Internal_Pop_FixedShift_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) f
+
+ START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift)
+
+ GET_CPU_UNMASKED(%r6) // r6 = current CPU
+ // Following instruction combines mask and shift
+ clrlsldi %r7, %r6, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ // r7 = header offset
+ add %r7, %r3, %r7 // r7 = start of this CPU region
+ rldicr %r8, %r4, 3, 60 // r8 = offset of size class
+ add %r8, %r7, %r8 // r8 = slab header addr
+ lhz %r9, 0(%r8) // r9 = current index
+ lhz %r10, 4(%r8) // r10 = begin index
+ cmpld %cr7, %r10, %r9 // Check that there are elements available
+ bge %cr7, .LTcmallocSlab_Internal_Pop_FixedShift_no_item
+ subi %r9, %r9, 1 // current index --
+ rldicr %r10, %r9, 3, 60 // r10 = offset of current index
+ ldx %r11, %r7, %r10 // r11 = load the item
+ sth %r9, 0(%r8) // update current index
+
+.LTcmallocSlab_Internal_Pop_FixedShift_critical_limit:
+ // Move the item into r3, now that it's safe to do so.
+ mr %r3, %r11
+ blr
+
+.LTcmallocSlab_Internal_Pop_FixedShift_no_item:
+ MASK_CPU(%r3, %r6) // Extract CPU from unmasked value in %r6
+ b .LPopUnderflowTrampoline
+
+.LTcmallocSlab_Internal_Pop_FixedShift_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PushBatch_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PushBatch_FixedShift
+.type TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+.LTcmallocSlab_Internal_PushBatch_FixedShift_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) batch
+ // * r6: (Argument: uintptr_t) len
+
+ START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+ GET_CPU_UNMASKED(%r7)
+ clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ add %r8, %r3, %r8 // r8 - start of this CPU region
+ sldi %r9, %r4, 3
+ add %r9, %r8, %r9 // r9 - slab header addr
+ lhz %r10, 0(%r9) // r10 - current
+ lhz %r11, 6(%r9) // r11 - end
+ sldi %r7, %r6, 3 // r7 - len * 8
+ cmpld %cr7, %r11, %r10 // current < end?
+ ble %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit
+ sub %r11, %r11, %r10 // r11 - available capacity
+ // r11 = min(r11, r6)
+ cmpld %cr7, %r6, %r11
+ bge %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_min
+ mr %r11, %r6
+.LTcmallocSlab_Internal_PushBatch_FixedShift_min:
+ add %r11, %r10, %r11
+ sldi %r11, %r11, 3
+ sldi %r10, %r10, 3
+
+ // At this point:
+ // r5 - batch, r7 - offset in the batch
+ // r8 - cpu region, r10 - offset into the cpu region, r11 - limit of offset
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+ subi %r7, %r7, 8
+ ldx %r12, %r5, %r7 // load the item
+ stdx %r12, %r8, %r10 // store the item
+ addi %r10, %r10, 8
+ cmpld %cr7, %r10, %r11
+ bne %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+ rotrdi %r10, %r10, 3
+ sth %r10, 0(%r9) // update current
+
+.LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit:
+ // return r6 - r7 / 8
+ rotrdi %r7, %r7, 3
+ sub %r3, %r6, %r7
+ blr
+
+.LTcmallocSlab_Internal_PushBatch_FixedShift_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PopBatch_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PopBatch_FixedShift
+.type TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+.LTcmallocSlab_Internal_PopBatch_FixedShift_entry:
+ .cfi_startproc
+ // Arguments use:
+ // * r3: (Argument: Slabs*) cpu_0_slab_ptr
+ // * r4: (Argument: uintptr_t) cl
+ // * r5: (Argument: uintptr_t) batch
+ // * r6: (Argument: uintptr_t) len
+
+ START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+ GET_CPU_UNMASKED(%r7)
+ clrlsldi %r7, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+ add %r7, %r3, %r7 // r7 - start of this CPU region
+ sldi %r8, %r4, 3
+ add %r8, %r7, %r8 // r8 - slab header addr
+ lhz %r9, 0(%r8) // r9 - current
+ lhz %r10, 4(%r8) // r10 - begin
+ li %r11, 0 // current position in batch
+ cmpld %cr7, %r10, %r9
+ bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit
+ sub %r10, %r9, %r10 // r10 - available items
+ // r10 = min(r10, r6)
+ cmpld %cr7, %r6, %r10
+ bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_min
+ mr %r10, %r6
+.LTcmallocSlab_Internal_PopBatch_FixedShift_min:
+ sub %r10, %r9, %r10
+ sldi %r10, %r10, 3
+ sldi %r9, %r9, 3
+
+ // At this point:
+ // r5 - batch, r11 - offset in the batch
+ // r7 - cpu region, r9 - offset into the cpu region, r10 - limit of offset
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+ subi %r9, %r9, 8
+ ldx %r12, %r7, %r9 // load the item
+ stdx %r12, %r5, %r11 // store the item
+ addi %r11, %r11, 8
+ cmpld %cr7, %r9, %r10
+ bne %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+ rotrdi %r9, %r9, 3
+ sth %r9, 0(%r8) // update current
+
+.LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit:
+ rotrdi %r3, %r11, 3
+ blr
+
+.LTcmallocSlab_Internal_PopBatch_FixedShift_function_limit:
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift);
+
+ // Input: r7 points to the function to tail call. r3...r6 are args for it.
+.LPushOverflowTrampoline:
+ mtctr %r7
+ mr %r12, %r7 // Callee expects r12 to point to its first instruction.
+ bctr
+
+ // Input: r5 points to the function to tail call. r3...r4 are args for it.
+.LPopUnderflowTrampoline:
+ mtctr %r5
+ mr %r12, %r5 // Callee expects r12 to point to its first instruction.
+ bctr
+
+.section .note.GNU-stack,"",%progbits
+
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc
new file mode 100644
index 0000000000..1438d8c3d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Provides skeleton RSEQ functions which raise a hard error in the case of
+// being erroneously called on an unsupported platform.
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+
+#if !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+static void Unsupported() {
+ Crash(kCrash, __FILE__, __LINE__,
+ "RSEQ function called on unsupported platform.");
+}
+
+int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p,
+ intptr_t old_val, intptr_t new_val) {
+ Unsupported();
+ return -1;
+}
+
+int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift,
+ OverflowHandler f) {
+ Unsupported();
+ return -1;
+}
+
+int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item,
+ OverflowHandler f) {
+ Unsupported();
+ return -1;
+}
+
+void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f,
+ size_t shift) {
+ Unsupported();
+ return nullptr;
+}
+
+void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl,
+ UnderflowHandler f) {
+ Unsupported();
+ return nullptr;
+}
+
+size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl,
+ void **batch, size_t len) {
+ Unsupported();
+ return 0;
+}
+
+size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl,
+ void **batch, size_t len) {
+ Unsupported();
+ return 0;
+}
+
+int PerCpuReadCycleCounter(int64_t *cycles) {
+ Unsupported();
+ return -1;
+}
+
+} // namespace percpu
+} // namespace subtle
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S
new file mode 100644
index 0000000000..866f4f90ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S
@@ -0,0 +1,463 @@
+/*
+ * Copyright 2019 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __x86_64__
+#error "percpu_rseq_x86_64.S should only be included for x86-64 builds"
+#endif // __x86_64__
+
+#include "tcmalloc/internal/percpu.h"
+
+/*
+ * API Exposition:
+ *
+ * METHOD_abort: // Emitted as part of START_RSEQ()
+ * START_RSEQ() // Starts critical section between [start,commit)
+ * METHOD_start: // Emitted as part of START_RSEQ()
+ * FETCH_CPU() // Reads current CPU
+ * ...
+ * single store // Commits sequence
+ * METHOD_commit:
+ * ...return...
+ *
+ * This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+ * (rodata) constant table, whose address is used to start the critical
+ * section, and the abort trampoline.
+ *
+ * The trampoline is used because:
+ * 1. Restarts are expected to be rare, so the extra jump when restarting is
+ * expected to be infrequent.
+ * 2. The upstream restartable sequence implementation expects the trailing 4
+ * bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+ * to an arbitrary choice). For us, this is TCMALLOC_PERCPU_RSEQ_SIGNATURE. This
+ * value is passed to the kernel during configuration of the rseq syscall.
+ * This would either need to be encoded as a nop (SIGN_ABORT) at the start
+ * of every restartable sequence, increasing instruction cache pressure, or
+ * placed directly before the entry point.
+ *
+ * The trampoline returns us to METHOD_abort, which is the normal entry point
+ * for the restartable sequence. Upon restart, the (upstream) kernel API
+ * clears the per-thread restartable sequence state. We return to METHOD_abort
+ * (rather than METHOD_start), as we need to reinitialize this value.
+ */
+
+/* Place the code into the google_malloc section. This section is the heaviest
+ * user of Rseq code, so it makes sense to co-locate it.
+ */
+
+.section google_malloc, "ax"
+
+/* ---------------- start helper macros ---------------- */
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_X86_64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// This macro defines:
+// * the rseq_cs instance that we'll use for label's critical section.
+// * a trampoline to return to when we abort. This label_trampoline is
+// distinct from label_start, as the return IP must be "signed" (see
+// SIGN_ABORT()).
+//
+// TODO(b/141629158): __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label) \
+ .pushsection __rseq_cs, "aw"; \
+ .balign 32; \
+ .protected __rseq_cs_##label; \
+ .type __rseq_cs_##label,@object; \
+ .size __rseq_cs_##label,32; \
+ __rseq_cs_##label: \
+ .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+ .quad .L##label##_start; \
+ .quad .L##label##_commit - .L##label##_start; \
+ .quad label##_trampoline; \
+ PINSECTION(.L##label##array); \
+ .popsection; \
+ .pushsection __rseq_cs_ptr_array, "aw"; \
+ .L##label##array: \
+ .quad __rseq_cs_##label; \
+ .popsection; \
+ SIGN_ABORT(); \
+ .globl label##_trampoline; \
+ .type label##_trampoline, @function; \
+label##_trampoline: \
+ .cfi_startproc; \
+ jmp .L##label##_abort; \
+ .cfi_endproc; \
+ .size label##_trampoline, . - label##_trampoline;
+
+// This is part of the upstream rseq ABI. The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter). This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+//
+// To allow this to be safely executed as a valid instruction, we encode the
+// value with a nop. This is decoded as:
+//
+// nopl 0xSIGNATURE(%rip)
+//
+#define SIGN_ABORT() \
+ .byte 0x0f, 0x1f, 0x05; \
+ .long TCMALLOC_PERCPU_RSEQ_SIGNATURE;
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label;
+
+/* In all non-position independent cases we need to use RIP-relative label
+ addresses */
+#if !defined(__PIC__)
+#define LABEL_ADDR(label) $label
+#else
+#define LABEL_ADDR(label) label@GOTPCREL(%rip)
+#endif /* !defined(__PIC__) */
+
+/* With PIE; have initial-exec TLS, even in the presence of position
+ independent code. */
+#if !defined(__PIC__) || defined(__PIE__)
+#define FETCH_CPU(dest) movl %fs:__rseq_abi@TPOFF+4, dest;
+#define FETCH_VCPU(dest) movzwl %fs:__rseq_abi@TPOFF+30, dest;
+#define START_RSEQ(src) \
+ .L##src##_abort: \
+ leaq __rseq_cs_##src(%rip), %rax; \
+ movq %rax, %fs:__rseq_abi@TPOFF+8; \
+ .L##src##_start:
+
+#else /* !defined(__PIC__) || defined(__PIE__) */
+
+/*
+ * FETCH_CPU assumes &__rseq_abi is in %rax. We cannot call
+ * tcmalloc_tls_fetch_pic at this point, as we have started our restartable
+ * sequence. If we are prempted there, the kernel will clear rseq_cs as
+ * tcmalloc_tls_fetch_pic does not appear in the restartable sequence's address
+ * range.
+ */
+#define FETCH_CPU(dest) \
+ movl 4(%rax), dest; /* cpuid is 32-bits */
+#define FETCH_VCPU(dest) \
+ movzwl 30(%rax), dest; /* vcpu_id is 16-bits */
+#define START_RSEQ(src) \
+ .L##src##_abort: \
+ call tcmalloc_internal_tls_fetch_pic@PLT; \
+ leaq __rseq_cs_##src(%rip), %r11; \
+ movq %r11, 8(%rax); \
+ .L##src##_start:
+
+/*
+ * We can safely call this function from within an RSEQ section as it only
+ * generates a thread-local address which will not change across a missed
+ * restart. This must precede the construction of any preparatory state.
+ */
+ .local tcmalloc_internal_tls_fetch_pic
+ .type tcmalloc_internal_tls_fetch_pic, @function
+tcmalloc_internal_tls_fetch_pic:
+ .cfi_startproc
+ push %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ mov %rsp, %rbp
+ .cfi_def_cfa_register 6
+ sub $0x30, %rsp
+ mov %rsi, -0x08(%rbp) /* atypical abi: tcmalloc_tls_fetch_pic preserves regs */
+ mov %rdi, -0x10(%rbp)
+ mov %rdx, -0x18(%rbp)
+ mov %rcx, -0x20(%rbp)
+ mov %r8, -0x28(%rbp)
+ mov %r9, -0x30(%rbp)
+ /*
+ * Below is an optimized relocatable TLS lookup per ELF spec:
+ * http://www.akkadia.org/drepper/tls.pdf
+ * When possible, this is replaced at link-time with a call-free variant.
+ */
+ .byte 0x66;
+ leaq __rseq_abi@TLSGD(%rip), %rdi;
+ .word 0x6666;
+ rex64;
+ call __tls_get_addr@PLT;
+ mov -0x08(%rbp), %rsi
+ mov -0x10(%rbp), %rdi
+ mov -0x18(%rbp), %rdx
+ mov -0x20(%rbp), %rcx
+ mov -0x28(%rbp), %r8
+ mov -0x30(%rbp), %r9
+ add $0x30, %rsp
+ leave
+ .cfi_def_cfa_register 7
+ .cfi_def_cfa_offset 8
+ ret; /* &__rseq_abi in %rax */
+ .cfi_endproc
+ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic)
+#endif /* !defined(__PIC__) || defined(__PIE__) */
+
+/* ---------------- end helper macros ---------------- */
+
+/* start of atomic restartable sequences */
+
+/*
+ * NOTE: We don't use cmpxchgq in the following functions since this would
+ make checking the success of our commit operation dependent on flags (which
+ * are in turn clobbered by the restart region) -- furthermore we can't just
+ * retry to fill in the flags since the restarted cmpxchg may have actually
+ * succeeded; spuriously failing subsequent attempts.
+ */
+
+/*
+ * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p,
+ * long old_val, long new_val)
+ */
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PerCpuCmpxchg64
+ .type TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64);
+ FETCH_CPU(%eax);
+ cmp %eax, %edi; /* check cpu vs current_cpu */
+ jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit;
+ cmp %rdx, (%rsi); /* verify *p == old */
+ jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch;
+ mov %rcx, (%rsi);
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit:
+ ret; /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch:
+ mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU
+ .type TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU);
+ FETCH_VCPU(%eax);
+ cmp %eax, %edi; /* check cpu vs current_cpu */
+ jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit;
+ cmp %rdx, (%rsi); /* verify *p == old */
+ jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch;
+ mov %rcx, (%rsi);
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit:
+ ret; /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch:
+ mov $-1, %eax; /* mismatch versus "old" or "check", return -1 */
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU)
+
+/* size_t TcmallocSlab_Internal_PushBatch_FixedShift(
+ * void *ptr (%rdi),
+ * size_t cl (%rsi),
+ * void** batch (%rdx),
+ * size_t len (%rcx) {
+ * uint64_t r8 = __rseq_abi.cpu_id;
+ * uint64_t* r8 = CpuMemoryStart(rdi, r8);
+ * Header* hdr = r8 + rsi * 8;
+ * uint64_t r9 = hdr->current;
+ * uint64_t r10 = hdr->end;
+ * if (r9 >= r10) return 0;
+ * r11 = rcx;
+ * r10 = r9 + min(rcx, r10 - r9);
+ * loop:
+ * r11--;
+ * rax = batch[r11];
+ * *(r8 + r9 * 8) = rax;
+ * r9++;
+ * if (r9 != r10) goto loop;
+ * hdr->current = r9;
+ * return rcx - r11;
+ * }
+ */
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PushBatch_FixedShift
+ .type TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift);
+ FETCH_CPU(%r8d);
+ shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+ /* multiply cpu by 256k */
+ lea (%rdi, %r8), %r8;
+ movzwq (%r8, %rsi, 8), %r9; /* current */
+ movzwq 6(%r8, %rsi, 8), %r10; /* end */
+ cmpq %r10, %r9;
+ jae .LTcmallocSlab_Internal_PushBatch_FixedShift_full;
+ movq %rcx, %r11; /* r11 = copy of len */
+ subq %r9, %r10; /* r10 = free capacity */
+ cmpq %rcx, %r10;
+ cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */
+ addq %r9, %r10;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+ decq %r11;
+ movq (%rdx, %r11, 8), %rax;
+ movq %rax, (%r8, %r9, 8);
+ incq %r9;
+ cmpq %r9, %r10;
+ jne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+ movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PushBatch_FixedShift_commit:
+ movq %rcx, %rax;
+ subq %r11, %rax;
+ ret;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_full:
+ xor %rax, %rax;
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PushBatch_FixedShift_VCPU
+ .type TcmallocSlab_Internal_PushBatch_FixedShift_VCPU, @function
+TcmallocSlab_Internal_PushBatch_FixedShift_VCPU:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU);
+ FETCH_VCPU(%r8d);
+ shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+ /* multiply cpu by 256k */
+ lea (%rdi, %r8), %r8;
+ movzwq (%r8, %rsi, 8), %r9; /* current */
+ movzwq 6(%r8, %rsi, 8), %r10; /* end */
+ cmpq %r10, %r9;
+ jae .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full;
+ movq %rcx, %r11; /* r11 = copy of len */
+ subq %r9, %r10; /* r10 = free capacity */
+ cmpq %rcx, %r10;
+ cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */
+ addq %r9, %r10;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop:
+ decq %r11;
+ movq (%rdx, %r11, 8), %rax;
+ movq %rax, (%r8, %r9, 8);
+ incq %r9;
+ cmpq %r9, %r10;
+ jne .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop
+ movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_commit:
+ movq %rcx, %rax;
+ subq %r11, %rax;
+ ret;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full:
+ xor %rax, %rax;
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU)
+
+/* size_t TcmallocSlab_Internal_PopBatch_FixedShift(
+ * void *ptr (%rdi),
+ * size_t cl (%rsi),
+ * void** batch (%rdx),
+ * size_t len (%rcx) {
+ * uint64_t r8 = __rseq_abi.cpu_id;
+ * uint64_t* r8 = CpuMemoryStart(rdi, r8);
+ * Header* hdr = GetHeader(rdi, rax, cl);
+ * uint64_t r9 = hdr->current;
+ * uint64_t r10 = hdr->begin;
+ * if (r9 <= r10) return 0;
+ * r11 = min(rcx, r9 - r10);
+ * rax = 0;
+ * loop:
+ * r9--;
+ * r10 = *(r8 + r9 * 8);
+ * batch[rax] = r10;
+ * rax++;
+ * if (rax != r11) goto loop;
+ * hdr->current = r9;
+ * return rax;
+ * }
+ */
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PopBatch_FixedShift
+ .type TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift);
+ FETCH_CPU(%r8d);
+ shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+ /* multiply cpu by 256k */
+ lea (%rdi, %r8), %r8;
+ movzwq (%r8, %rsi, 8), %r9; /* current */
+ movzwq 4(%r8, %rsi, 8), %r10; /* begin */
+ cmp %r10, %r9;
+ jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_empty;
+ movq %r9, %r11;
+ subq %r10, %r11; /* r11 = available items */
+ cmpq %rcx, %r11;
+ cmovaq %rcx, %r11; /* r11 = min(len, available items) */
+ xorq %rax, %rax;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+ decq %r9;
+ movq (%r8, %r9, 8), %r10;
+ movq %r10, (%rdx, %rax, 8);
+ incq %rax;
+ cmpq %rax, %r11;
+ jne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+ movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PopBatch_FixedShift_commit:
+ ret;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_empty:
+ xor %rax, %rax;
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+ .p2align 6; /* aligns to 2^6 with NOP filling */
+ .globl TcmallocSlab_Internal_PopBatch_FixedShift_VCPU
+ .type TcmallocSlab_Internal_PopBatch_FixedShift_VCPU, @function
+TcmallocSlab_Internal_PopBatch_FixedShift_VCPU:
+ .cfi_startproc
+ START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU);
+ FETCH_VCPU(%r8d);
+ shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+ /* multiply cpu by 256k */
+ lea (%rdi, %r8), %r8;
+ movzwq (%r8, %rsi, 8), %r9; /* current */
+ movzwq 4(%r8, %rsi, 8), %r10; /* begin */
+ cmp %r10, %r9;
+ jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty;
+ movq %r9, %r11;
+ subq %r10, %r11; /* r11 = available items */
+ cmpq %rcx, %r11;
+ cmovaq %rcx, %r11; /* r11 = min(len, available items) */
+ xorq %rax, %rax;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop:
+ decq %r9;
+ movq (%r8, %r9, 8), %r10;
+ movq %r10, (%rdx, %rax, 8);
+ incq %rax;
+ cmpq %rax, %r11;
+ jne .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop
+ movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_commit:
+ ret;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty:
+ xor %rax, %rax;
+ ret;
+ .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU)
+
+.section .note.GNU-stack,"",@progbits
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h
new file mode 100644
index 0000000000..91d15ba908
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h
@@ -0,0 +1,1279 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
+#define TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
+
+#include <atomic>
+#include <cstring>
+
+#include "absl/base/casts.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/sysinfo.h"
+#include "tcmalloc/internal/mincore.h"
+#include "tcmalloc/internal/percpu.h"
+
+#if defined(TCMALLOC_PERCPU_USE_RSEQ)
+#if !defined(__clang__)
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1
+#elif __clang_major__ >= 9 && !__has_feature(speculative_load_hardening)
+// asm goto requires the use of Clang 9 or newer:
+// https://releases.llvm.org/9.0.0/tools/clang/docs/ReleaseNotes.html#c-language-changes-in-clang
+//
+// SLH (Speculative Load Hardening) builds do not support asm goto. We can
+// detect these compilation modes since
+// https://github.com/llvm/llvm-project/commit/379e68a763097bed55556c6dc7453e4b732e3d68.
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1
+#if __clang_major__ >= 11
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT 1
+#endif
+
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0
+#endif
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct PerCPUMetadataState {
+ size_t virtual_size;
+ size_t resident_size;
+};
+
+namespace subtle {
+namespace percpu {
+
+// Tcmalloc slab for per-cpu caching mode.
+// Conceptually it is equivalent to an array of NumClasses PerCpuSlab's,
+// and in fallback implementation it is implemented that way. But optimized
+// implementation uses more compact layout and provides faster operations.
+//
+// Methods of this type must only be used in threads where it is known that the
+// percpu primitives are available and percpu::IsFast() has previously returned
+// 'true'.
+template <size_t NumClasses>
+class TcmallocSlab {
+ public:
+ constexpr TcmallocSlab() = default;
+
+ // Init must be called before any other methods.
+ // <alloc> is memory allocation callback (e.g. malloc).
+ // <capacity> callback returns max capacity for size class <cl>.
+ // <lazy> indicates that per-CPU slabs should be populated on demand
+ // <shift> indicates the number of bits to shift the CPU ID in order to
+ // obtain the location of the per-CPU slab. If this parameter matches
+ // TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in
+ // percpu_intenal.h then the assembly language versions of push/pop
+ // batch can be used; otherwise batch operations are emulated.
+ //
+ // Initial capacity is 0 for all slabs.
+ void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), bool lazy,
+ size_t shift);
+
+ // Only may be called if Init(..., lazy = true) was used.
+ void InitCPU(int cpu, size_t (*capacity)(size_t cl));
+
+ // For tests.
+ void Destroy(void(free)(void*));
+
+ // Number of elements in cpu/cl slab.
+ size_t Length(int cpu, size_t cl) const;
+
+ // Number of elements (currently) allowed in cpu/cl slab.
+ size_t Capacity(int cpu, size_t cl) const;
+
+ // If running on cpu, increment the cpu/cl slab's capacity to no greater than
+ // min(capacity+len, max_cap) and return the increment applied. Otherwise
+ // return 0. Note: max_cap must be the same as returned by capacity callback
+ // passed to Init.
+ size_t Grow(int cpu, size_t cl, size_t len, size_t max_cap);
+
+ // If running on cpu, decrement the cpu/cl slab's capacity to no less than
+ // max(capacity-len, 0) and return the actual decrement applied. Otherwise
+ // return 0.
+ size_t Shrink(int cpu, size_t cl, size_t len);
+
+ // Add an item (which must be non-zero) to the current CPU's slab. Returns
+ // true if add succeeds. Otherwise invokes <f> and returns false (assuming
+ // that <f> returns negative value).
+ bool Push(size_t cl, void* item, OverflowHandler f);
+
+ // Remove an item (LIFO) from the current CPU's slab. If the slab is empty,
+ // invokes <f> and returns its result.
+ void* Pop(size_t cl, UnderflowHandler f);
+
+ // Add up to <len> items to the current cpu slab from the array located at
+ // <batch>. Returns the number of items that were added (possibly 0). All
+ // items not added will be returned at the start of <batch>. Items are only
+ // not added if there is no space on the current cpu.
+ // REQUIRES: len > 0.
+ size_t PushBatch(size_t cl, void** batch, size_t len);
+
+ // Pop up to <len> items from the current cpu slab and return them in <batch>.
+ // Returns the number of items actually removed.
+ // REQUIRES: len > 0.
+ size_t PopBatch(size_t cl, void** batch, size_t len);
+
+ // Decrements the cpu/cl slab's capacity to no less than max(capacity-len, 0)
+ // and returns the actual decrement applied. It attempts to shrink any
+ // unused capacity (i.e end-current) in cpu/cl's slab; if it does not have
+ // enough unused items, it pops up to <len> items from cpu/cl slab and then
+ // shrinks the freed capacity.
+ //
+ // May be called from another processor, not just the <cpu>.
+ // REQUIRES: len > 0.
+ typedef void (*ShrinkHandler)(void* arg, size_t cl, void** batch, size_t n);
+ size_t ShrinkOtherCache(int cpu, size_t cl, size_t len, void* shrink_ctx,
+ ShrinkHandler f);
+
+ // Remove all items (of all classes) from <cpu>'s slab; reset capacity for all
+ // classes to zero. Then, for each sizeclass, invoke
+ // DrainHandler(drain_ctx, cl, <items from slab>, <previous slab capacity>);
+ //
+ // It is invalid to concurrently execute Drain() for the same CPU; calling
+ // Push/Pop/Grow/Shrink concurrently (even on the same CPU) is safe.
+ typedef void (*DrainHandler)(void* drain_ctx, size_t cl, void** batch,
+ size_t n, size_t cap);
+ void Drain(int cpu, void* drain_ctx, DrainHandler f);
+
+ PerCPUMetadataState MetadataMemoryUsage() const;
+
+ // We use a single continuous region of memory for all slabs on all CPUs.
+ // This region is split into NumCPUs regions of size kPerCpuMem (256k).
+ // First NumClasses words of each CPU region are occupied by slab
+ // headers (Header struct). The remaining memory contain slab arrays.
+ struct Slabs {
+ std::atomic<int64_t> header[NumClasses];
+ void* mem[];
+ };
+
+ inline int GetCurrentVirtualCpuUnsafe() {
+ return VirtualRseqCpuId(virtual_cpu_id_offset_);
+ }
+
+ private:
+ // Slab header (packed, atomically updated 64-bit).
+ struct Header {
+ // All values are word offsets from per-CPU region start.
+ // The array is [begin, end).
+ uint16_t current;
+ // Copy of end. Updated by Shrink/Grow, but is not overwritten by Drain.
+ uint16_t end_copy;
+ // Lock updates only begin and end with a 32-bit write.
+ union {
+ struct {
+ uint16_t begin;
+ uint16_t end;
+ };
+ uint32_t lock_update;
+ };
+
+ // Lock is used by Drain to stop concurrent mutations of the Header.
+ // Lock sets begin to 0xffff and end to 0, which makes Push and Pop fail
+ // regardless of current value.
+ bool IsLocked() const;
+ void Lock();
+ };
+
+ // We cast Header to std::atomic<int64_t>.
+ static_assert(sizeof(Header) == sizeof(std::atomic<int64_t>),
+ "bad Header size");
+
+ Slabs* slabs_ = nullptr;
+ size_t shift_ = 0;
+ // This is in units of bytes.
+ size_t virtual_cpu_id_offset_ = offsetof(kernel_rseq, cpu_id);
+
+ Slabs* CpuMemoryStart(int cpu) const;
+ std::atomic<int64_t>* GetHeader(int cpu, size_t cl) const;
+ static Header LoadHeader(std::atomic<int64_t>* hdrp);
+ static void StoreHeader(std::atomic<int64_t>* hdrp, Header hdr);
+ static int CompareAndSwapHeader(int cpu, std::atomic<int64_t>* hdrp,
+ Header old, Header hdr,
+ size_t virtual_cpu_id_offset);
+};
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Length(int cpu, size_t cl) const {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ return hdr.IsLocked() ? 0 : hdr.current - hdr.begin;
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Capacity(int cpu, size_t cl) const {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ return hdr.IsLocked() ? 0 : hdr.end - hdr.begin;
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len,
+ size_t max_cap) {
+ const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ for (;;) {
+ Header old = LoadHeader(hdrp);
+ if (old.IsLocked() || old.end - old.begin == max_cap) {
+ return 0;
+ }
+ uint16_t n = std::min<uint16_t>(len, max_cap - (old.end - old.begin));
+ Header hdr = old;
+ hdr.end += n;
+ hdr.end_copy += n;
+ const int ret =
+ CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset);
+ if (ret == cpu) {
+ return n;
+ } else if (ret >= 0) {
+ return 0;
+ }
+ }
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) {
+ const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ for (;;) {
+ Header old = LoadHeader(hdrp);
+ if (old.IsLocked() || old.current == old.end) {
+ return 0;
+ }
+ uint16_t n = std::min<uint16_t>(len, old.end - old.current);
+ Header hdr = old;
+ hdr.end -= n;
+ hdr.end_copy -= n;
+ const int ret =
+ CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset);
+ if (ret == cpu) {
+ return n;
+ } else if (ret >= 0) {
+ return 0;
+ }
+ }
+}
+
+#if defined(__x86_64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push(
+ typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item,
+ const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) {
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ asm goto(
+#else
+ bool overflow;
+ asm volatile(
+#endif
+ // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for
+ // relocations, but could be read-only for non-PIE builds.
+ ".pushsection __rseq_cs, \"aw?\"\n"
+ ".balign 32\n"
+ ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n"
+ ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n"
+ "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n"
+ ".long 0x0\n"
+ ".long 0x0\n"
+ ".quad 4f\n"
+ ".quad 5f - 4f\n"
+ ".quad 2f\n"
+ ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+ ".reloc 0, R_X86_64_NONE, 1f\n"
+#endif
+ ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+ "1:\n"
+ ".balign 8;"
+ ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ // Force this section to be retained. It is for debugging, but is
+ // otherwise not referenced.
+ ".popsection\n"
+ ".pushsection .text.unlikely, \"ax?\"\n"
+ ".byte 0x0f, 0x1f, 0x05\n"
+ ".long %c[rseq_sig]\n"
+ ".local TcmallocSlab_Internal_Push_trampoline_%=\n"
+ ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n"
+ "TcmallocSlab_Internal_Push_trampoline_%=:\n"
+ "2:\n"
+ "jmp 3f\n"
+ ".size TcmallocSlab_Internal_Push_trampoline_%=, . - "
+ "TcmallocSlab_Internal_Push_trampoline_%=;\n"
+ ".popsection\n"
+ // Prepare
+ //
+ // TODO(b/151503411): Pending widespread availability of LLVM's asm
+ // goto with output contraints
+ // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can
+ // return the register allocations to the compiler rather than using
+ // explicit clobbers. Prior to this, blocks which use asm goto cannot
+ // also specify outputs.
+ //
+ // r10: Scratch
+ // r11: Current
+ "3:\n"
+ "lea __rseq_cs_TcmallocSlab_Internal_Push_%=(%%rip), %%r10\n"
+ "mov %%r10, %c[rseq_cs_offset](%[rseq_abi])\n"
+ // Start
+ "4:\n"
+ // scratch = __rseq_abi.cpu_id;
+ "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %%r10d\n"
+ // scratch = slabs + scratch
+ "shlq %b[shift], %%r10\n"
+ "add %[slabs], %%r10\n"
+ // r11 = slabs->current;
+ "movzwq (%%r10, %[cl], 8), %%r11\n"
+ // if (ABSL_PREDICT_FALSE(r11 >= slabs->end)) { goto overflow; }
+ "cmp 6(%%r10, %[cl], 8), %%r11w\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ "jae %l[overflow_label]\n"
+#else
+ "jae 5f\n"
+ // Important! code below this must not affect any flags (i.e.: ccae)
+ // If so, the above code needs to explicitly set a ccae return value.
+#endif
+ "mov %[item], (%%r10, %%r11, 8)\n"
+ "lea 1(%%r11), %%r11\n"
+ "mov %%r11w, (%%r10, %[cl], 8)\n"
+ // Commit
+ "5:\n"
+ :
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ [overflow] "=@ccae"(overflow)
+#endif
+ : [rseq_abi] "r"(&__rseq_abi),
+ [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+ [rseq_cpu_offset] "r"(virtual_cpu_id_offset),
+ [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift),
+ [slabs] "r"(slabs), [cl] "r"(cl), [item] "r"(item)
+ : "cc", "memory", "r10", "r11"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ : overflow_label
+#endif
+ );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ if (ABSL_PREDICT_FALSE(overflow)) {
+ goto overflow_label;
+ }
+#endif
+ return 0;
+overflow_label:
+ // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+ // values for the fallthrough path. The values on the taken branches are
+ // undefined.
+ int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+ return f(cpu, cl, item);
+}
+#endif // defined(__x86_64__)
+
+#if defined(__aarch64__)
+
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push(
+ typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item,
+ const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) {
+ void* region_start;
+ uint64_t cpu_id;
+ void* end_ptr;
+ uintptr_t current;
+ uintptr_t end;
+ // Multiply cl by the bytesize of each header
+ size_t cl_lsl3 = cl * 8;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ asm goto(
+#else
+ bool overflow;
+ asm volatile(
+#endif
+ // TODO(b/141629158): __rseq_cs only needs to be writeable to allow for
+ // relocations, but could be read-only for non-PIE builds.
+ ".pushsection __rseq_cs, \"aw?\"\n"
+ ".balign 32\n"
+ ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n"
+ ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n"
+ "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n"
+ ".long 0x0\n"
+ ".long 0x0\n"
+ ".quad 4f\n"
+ ".quad 5f - 4f\n"
+ ".quad 2f\n"
+ ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+ ".reloc 0, R_AARCH64_NONE, 1f\n"
+#endif
+ ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+ "1:\n"
+ ".balign 8;"
+ ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ // Force this section to be retained. It is for debugging, but is
+ // otherwise not referenced.
+ ".popsection\n"
+ ".pushsection .text.unlikely, \"ax?\"\n"
+ ".long %c[rseq_sig]\n"
+ ".local TcmallocSlab_Internal_Push_trampoline_%=\n"
+ ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n"
+ "TcmallocSlab_Internal_Push_trampoline_%=:\n"
+ "2:\n"
+ "b 3f\n"
+ ".popsection\n"
+ // Prepare
+ //
+ // TODO(b/151503411): Pending widespread availability of LLVM's asm
+ // goto with output contraints
+ // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can
+ // return the register allocations to the compiler rather than using
+ // explicit clobbers. Prior to this, blocks which use asm goto cannot
+ // also specify outputs.
+ "3:\n"
+ // Use current as scratch here to hold address of this function's
+ // critical section
+ "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ "add %[current], %[current], "
+ ":lo12:__rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+ "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n"
+ // Start
+ "4:\n"
+ // cpu_id = __rseq_abi.cpu_id;
+ "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n"
+ // region_start = Start of cpu region
+ "lsl %[region_start], %[cpu_id], %[shift]\n"
+ "add %[region_start], %[region_start], %[slabs]\n"
+ // end_ptr = &(slab_headers[0]->end)
+ "add %[end_ptr], %[region_start], #6\n"
+ // current = slab_headers[cl]->current (current index)
+ "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n"
+ // end = slab_headers[cl]->end (end index)
+ "ldrh %w[end], [%[end_ptr], %[cl_lsl3]]\n"
+ // if (ABSL_PREDICT_FALSE(current >= end)) { goto overflow; }
+ "cmp %[end], %[current]\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ "b.le %l[overflow_label]\n"
+#else
+ "b.le 5f\n"
+ // Important! code below this must not affect any flags (i.e.: ccae)
+ // If so, the above code needs to explicitly set a ccae return value.
+#endif
+ "str %[item], [%[region_start], %[current], LSL #3]\n"
+ "add %w[current], %w[current], #1\n"
+ "strh %w[current], [%[region_start], %[cl_lsl3]]\n"
+ // Commit
+ "5:\n"
+ : [end_ptr] "=&r"(end_ptr), [cpu_id] "=&r"(cpu_id),
+ [current] "=&r"(current), [end] "=&r"(end),
+ [region_start] "=&r"(region_start)
+
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ ,
+ [overflow] "=@ccae"(overflow)
+#endif
+ : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs),
+ [cl_lsl3] "r"(cl_lsl3), [item] "r"(item), [rseq_abi] "r"(&__rseq_abi),
+ [shift] "r"(shift),
+ // Constants
+ [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+ [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE)
+ : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ : overflow_label
+#endif
+ );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+ if (ABSL_PREDICT_FALSE(overflow)) {
+ goto overflow_label;
+ }
+#endif
+ return 0;
+overflow_label:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+ // values for the fallthrough path. The values on the taken branches are
+ // undefined.
+ int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+ // With asm goto--without output constraints--the value of scratch is
+ // well-defined by the compiler and our implementation. As an optimization on
+ // this case, we can avoid looking up cpu_id again, by undoing the
+ // transformation of cpu_id to the value of scratch.
+ int cpu = cpu_id;
+#endif
+ return f(cpu, cl, item);
+}
+#endif // defined (__aarch64__)
+
+template <size_t NumClasses>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab<NumClasses>::Push(
+ size_t cl, void* item, OverflowHandler f) {
+ ASSERT(item != nullptr);
+#if defined(__x86_64__) || defined(__aarch64__)
+ return TcmallocSlab_Internal_Push<NumClasses>(slabs_, cl, item, shift_, f,
+ virtual_cpu_id_offset_) >= 0;
+#else
+ if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+ return TcmallocSlab_Internal_Push_FixedShift(slabs_, cl, item, f) >= 0;
+ } else {
+ return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f) >= 0;
+ }
+#endif
+}
+
+#if defined(__x86_64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop(
+ typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl,
+ UnderflowHandler f, const size_t shift,
+ const size_t virtual_cpu_id_offset) {
+ void* result;
+ void* scratch;
+ uintptr_t current;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ asm goto
+#else
+ bool underflow;
+ asm
+#endif
+ (
+ // TODO(b/141629158): __rseq_cs only needs to be writeable to allow
+ // for relocations, but could be read-only for non-PIE builds.
+ ".pushsection __rseq_cs, \"aw?\"\n"
+ ".balign 32\n"
+ ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n"
+ ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n"
+ "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n"
+ ".long 0x0\n"
+ ".long 0x0\n"
+ ".quad 4f\n"
+ ".quad 5f - 4f\n"
+ ".quad 2f\n"
+ ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+ ".reloc 0, R_X86_64_NONE, 1f\n"
+#endif
+ ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+ "1:\n"
+ ".balign 8;"
+ ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ // Force this section to be retained. It is for debugging, but is
+ // otherwise not referenced.
+ ".popsection\n"
+ ".pushsection .text.unlikely, \"ax?\"\n"
+ ".byte 0x0f, 0x1f, 0x05\n"
+ ".long %c[rseq_sig]\n"
+ ".local TcmallocSlab_Internal_Pop_trampoline_%=\n"
+ ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n"
+ "TcmallocSlab_Internal_Pop_trampoline_%=:\n"
+ "2:\n"
+ "jmp 3f\n"
+ ".size TcmallocSlab_Internal_Pop_trampoline_%=, . - "
+ "TcmallocSlab_Internal_Pop_trampoline_%=;\n"
+ ".popsection\n"
+ // Prepare
+ "3:\n"
+ "lea __rseq_cs_TcmallocSlab_Internal_Pop_%=(%%rip), %[scratch];\n"
+ "mov %[scratch], %c[rseq_cs_offset](%[rseq_abi])\n"
+ // Start
+ "4:\n"
+ // scratch = __rseq_abi.cpu_id;
+ "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %k[scratch]\n"
+ // scratch = slabs + scratch
+ "shlq %b[shift], %[scratch]\n"
+ "add %[slabs], %[scratch]\n"
+ // current = scratch->header[cl].current;
+ "movzwq (%[scratch], %[cl], 8), %[current]\n"
+ // if (ABSL_PREDICT_FALSE(scratch->header[cl].begin > current))
+ "cmp 4(%[scratch], %[cl], 8), %w[current]\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ "jbe %l[underflow_path]\n"
+#else
+ "jbe 5f\n"
+ // Important! code below this must not affect any flags (i.e.: ccbe)
+ // If so, the above code needs to explicitly set a ccbe return value.
+#endif
+ "mov -16(%[scratch], %[current], 8), %[result]\n"
+ // A note about prefetcht0 in Pop: While this prefetch may appear
+ // costly, trace analysis shows the target is frequently used
+ // (b/70294962). Stalling on a TLB miss at the prefetch site (which
+ // has no deps) and prefetching the line async is better than stalling
+ // at the use (which may have deps) to fill the TLB and the cache
+ // miss.
+ "prefetcht0 (%[result])\n"
+ "movq -8(%[scratch], %[current], 8), %[result]\n"
+ "lea -1(%[current]), %[current]\n"
+ "mov %w[current], (%[scratch], %[cl], 8)\n"
+ // Commit
+ "5:\n"
+ : [result] "=&r"(result),
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ [underflow] "=@ccbe"(underflow),
+#endif
+ [scratch] "=&r"(scratch), [current] "=&r"(current)
+ : [rseq_abi] "r"(&__rseq_abi),
+ [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+ [rseq_cpu_offset] "r"(virtual_cpu_id_offset),
+ [rseq_sig] "n"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift),
+ [slabs] "r"(slabs), [cl] "r"(cl)
+ : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ : underflow_path
+#endif
+ );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ if (ABSL_PREDICT_FALSE(underflow)) {
+ goto underflow_path;
+ }
+#endif
+
+ return result;
+underflow_path:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+ // values for the fallthrough path. The values on the taken branches are
+ // undefined.
+ int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+ // With asm goto--without output constraints--the value of scratch is
+ // well-defined by the compiler and our implementation. As an optimization on
+ // this case, we can avoid looking up cpu_id again, by undoing the
+ // transformation of cpu_id to the value of scratch.
+ int cpu =
+ (reinterpret_cast<char*>(scratch) - reinterpret_cast<char*>(slabs)) >>
+ shift;
+#endif
+ return f(cpu, cl);
+}
+#endif // defined(__x86_64__)
+
+#if defined(__aarch64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop(
+ typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl,
+ UnderflowHandler f, const size_t shift,
+ const size_t virtual_cpu_id_offset) {
+ void* result;
+ void* region_start;
+ uint64_t cpu_id;
+ void* begin_ptr;
+ uintptr_t current;
+ uintptr_t new_current;
+ uintptr_t begin;
+ // Multiply cl by the bytesize of each header
+ size_t cl_lsl3 = cl * 8;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ asm goto
+#else
+ bool underflow;
+ asm
+#endif
+ (
+ // TODO(b/141629158): __rseq_cs only needs to be writeable to allow
+ // for relocations, but could be read-only for non-PIE builds.
+ ".pushsection __rseq_cs, \"aw?\"\n"
+ ".balign 32\n"
+ ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n"
+ ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n"
+ "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n"
+ ".long 0x0\n"
+ ".long 0x0\n"
+ ".quad 4f\n"
+ ".quad 5f - 4f\n"
+ ".quad 2f\n"
+ ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+ ".reloc 0, R_AARCH64_NONE, 1f\n"
+#endif
+ ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+ "1:\n"
+ ".balign 8;"
+ ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ // Force this section to be retained. It is for debugging, but is
+ // otherwise not referenced.
+ ".popsection\n"
+ ".pushsection .text.unlikely, \"ax?\"\n"
+ ".long %c[rseq_sig]\n"
+ ".local TcmallocSlab_Internal_Pop_trampoline_%=\n"
+ ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n"
+ "TcmallocSlab_Internal_Pop_trampoline_%=:\n"
+ "2:\n"
+ "b 3f\n"
+ ".popsection\n"
+ // Prepare
+ "3:\n"
+ // Use current as scratch here to hold address of this function's
+ // critical section
+ "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ "add %[current], %[current], "
+ ":lo12:__rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+ "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n"
+ // Start
+ "4:\n"
+ // cpu_id = __rseq_abi.cpu_id;
+ "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n"
+ // region_start = Start of cpu region
+ "lsl %[region_start], %[cpu_id], %[shift]\n"
+ "add %[region_start], %[region_start], %[slabs]\n"
+ // begin_ptr = &(slab_headers[0]->begin)
+ "add %[begin_ptr], %[region_start], #4\n"
+ // current = slab_headers[cl]->current (current index)
+ "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n"
+ // begin = slab_headers[cl]->begin (begin index)
+ "ldrh %w[begin], [%[begin_ptr], %[cl_lsl3]]\n"
+ // if (ABSL_PREDICT_FALSE(begin >= current)) { goto overflow; }
+ "cmp %w[begin], %w[current]\n"
+ "sub %w[new_current], %w[current], #1\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ "b.ge %l[underflow_path]\n"
+#else
+ "b.ge 5f\n"
+ // Important! code below this must not affect any flags (i.e.: ccbe)
+ // If so, the above code needs to explicitly set a ccbe return value.
+#endif
+ // current--
+ "ldr %[result], [%[region_start], %[new_current], LSL #3]\n"
+ "strh %w[new_current], [%[region_start], %[cl_lsl3]]\n"
+ // Commit
+ "5:\n"
+ :
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ [underflow] "=@ccbe"(underflow),
+#endif
+ [result] "=&r"(result),
+ // Temps
+ [cpu_id] "=&r"(cpu_id), [region_start] "=&r"(region_start),
+ [begin] "=&r"(begin), [current] "=&r"(current),
+ [new_current] "=&r"(new_current), [begin_ptr] "=&r"(begin_ptr)
+ // Real inputs
+ : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs),
+ [cl_lsl3] "r"(cl_lsl3), [rseq_abi] "r"(&__rseq_abi),
+ [shift] "r"(shift),
+ // constants
+ [rseq_cs_offset] "in"(offsetof(kernel_rseq, rseq_cs)),
+ [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE)
+ : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ : underflow_path
+#endif
+ );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ if (ABSL_PREDICT_FALSE(underflow)) {
+ goto underflow_path;
+ }
+#endif
+
+ return result;
+underflow_path:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+ // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+ // values for the fallthrough path. The values on the taken branches are
+ // undefined.
+ int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+ // With asm goto--without output constraints--the value of scratch is
+ // well-defined by the compiler and our implementation. As an optimization on
+ // this case, we can avoid looking up cpu_id again, by undoing the
+ // transformation of cpu_id to the value of scratch.
+ int cpu = cpu_id;
+#endif
+ return f(cpu, cl);
+}
+#endif // defined(__aarch64__)
+
+template <size_t NumClasses>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab<NumClasses>::Pop(
+ size_t cl, UnderflowHandler f) {
+#if defined(__x86_64__) || defined(__aarch64__)
+ return TcmallocSlab_Internal_Pop<NumClasses>(slabs_, cl, f, shift_,
+ virtual_cpu_id_offset_);
+#else
+ if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+ return TcmallocSlab_Internal_Pop_FixedShift(slabs_, cl, f);
+ } else {
+ return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_);
+ }
+#endif
+}
+
+static inline void* NoopUnderflow(int cpu, size_t cl) { return nullptr; }
+
+static inline int NoopOverflow(int cpu, size_t cl, void* item) { return -1; }
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::PushBatch(size_t cl, void** batch,
+ size_t len) {
+ ASSERT(len != 0);
+ if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+ // TODO(b/159923407): TcmallocSlab_Internal_PushBatch_FixedShift needs to be
+ // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid
+ // needing to dispatch on two separate versions of the same function with
+ // only minor differences between them.
+ switch (virtual_cpu_id_offset_) {
+ case offsetof(kernel_rseq, cpu_id):
+ return TcmallocSlab_Internal_PushBatch_FixedShift(slabs_, cl, batch,
+ len);
+#ifdef __x86_64__
+ case offsetof(kernel_rseq, vcpu_id):
+ return TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(slabs_, cl,
+ batch, len);
+#endif // __x86_64__
+ default:
+ __builtin_unreachable();
+ }
+#else // !TCMALLOC_PERCPU_USE_RSEQ
+ __builtin_unreachable();
+#endif // !TCMALLOC_PERCPU_USE_RSEQ
+ } else {
+ size_t n = 0;
+ // Push items until either all done or a push fails
+ while (n < len && Push(cl, batch[len - 1 - n], NoopOverflow)) {
+ n++;
+ }
+ return n;
+ }
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::PopBatch(size_t cl, void** batch,
+ size_t len) {
+ ASSERT(len != 0);
+ size_t n = 0;
+ if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+ // TODO(b/159923407): TcmallocSlab_Internal_PopBatch_FixedShift needs to be
+ // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid
+ // needing to dispatch on two separate versions of the same function with
+ // only minor differences between them.
+ switch (virtual_cpu_id_offset_) {
+ case offsetof(kernel_rseq, cpu_id):
+ n = TcmallocSlab_Internal_PopBatch_FixedShift(slabs_, cl, batch, len);
+ break;
+#ifdef __x86_64__
+ case offsetof(kernel_rseq, vcpu_id):
+ n = TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(slabs_, cl, batch,
+ len);
+ break;
+#endif // __x86_64__
+ default:
+ __builtin_unreachable();
+ }
+
+ // PopBatch is implemented in assembly, msan does not know that the returned
+ // batch is initialized.
+ ANNOTATE_MEMORY_IS_INITIALIZED(batch, n * sizeof(batch[0]));
+#else // !TCMALLOC_PERCPU_USE_RSEQ
+ __builtin_unreachable();
+#endif // !TCMALLOC_PERCPU_USE_RSEQ
+ } else {
+ // Pop items until either all done or a pop fails
+ while (n < len && (batch[n] = Pop(cl, NoopUnderflow))) {
+ n++;
+ }
+ }
+ return n;
+}
+
+template <size_t NumClasses>
+inline typename TcmallocSlab<NumClasses>::Slabs*
+TcmallocSlab<NumClasses>::CpuMemoryStart(int cpu) const {
+ char* const bytes = reinterpret_cast<char*>(slabs_);
+ return reinterpret_cast<Slabs*>(&bytes[cpu << shift_]);
+}
+
+template <size_t NumClasses>
+inline std::atomic<int64_t>* TcmallocSlab<NumClasses>::GetHeader(
+ int cpu, size_t cl) const {
+ return &CpuMemoryStart(cpu)->header[cl];
+}
+
+template <size_t NumClasses>
+inline typename TcmallocSlab<NumClasses>::Header
+TcmallocSlab<NumClasses>::LoadHeader(std::atomic<int64_t>* hdrp) {
+ return absl::bit_cast<Header>(hdrp->load(std::memory_order_relaxed));
+}
+
+template <size_t NumClasses>
+inline void TcmallocSlab<NumClasses>::StoreHeader(std::atomic<int64_t>* hdrp,
+ Header hdr) {
+ hdrp->store(absl::bit_cast<int64_t>(hdr), std::memory_order_relaxed);
+}
+
+template <size_t NumClasses>
+inline int TcmallocSlab<NumClasses>::CompareAndSwapHeader(
+ int cpu, std::atomic<int64_t>* hdrp, Header old, Header hdr,
+ const size_t virtual_cpu_id_offset) {
+#if __SIZEOF_POINTER__ == 8
+ const int64_t old_raw = absl::bit_cast<int64_t>(old);
+ const int64_t new_raw = absl::bit_cast<int64_t>(hdr);
+ return CompareAndSwapUnsafe(cpu, hdrp, static_cast<intptr_t>(old_raw),
+ static_cast<intptr_t>(new_raw),
+ virtual_cpu_id_offset);
+#else
+ Crash(kCrash, __FILE__, __LINE__, "This architecture is not supported.");
+#endif
+}
+
+template <size_t NumClasses>
+inline bool TcmallocSlab<NumClasses>::Header::IsLocked() const {
+ return begin == 0xffffu;
+}
+
+template <size_t NumClasses>
+inline void TcmallocSlab<NumClasses>::Header::Lock() {
+ // Write 0xffff to begin and 0 to end. This blocks new Push'es and Pop's.
+ // Note: we write only 4 bytes. The first 4 bytes are left intact.
+ // See Drain method for details. tl;dr: C++ does not allow us to legally
+ // express this without undefined behavior.
+ std::atomic<int32_t>* p =
+ reinterpret_cast<std::atomic<int32_t>*>(&lock_update);
+ Header hdr;
+ hdr.begin = 0xffffu;
+ hdr.end = 0;
+ p->store(absl::bit_cast<int32_t>(hdr.lock_update), std::memory_order_relaxed);
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size),
+ size_t (*capacity)(size_t cl), bool lazy,
+ size_t shift) {
+#ifdef __x86_64__
+ if (UsingFlatVirtualCpus()) {
+ virtual_cpu_id_offset_ = offsetof(kernel_rseq, vcpu_id);
+ }
+#endif // __x86_64__
+
+ shift_ = shift;
+ size_t mem_size = absl::base_internal::NumCPUs() * (1ul << shift);
+ void* backing = alloc(mem_size);
+ // MSan does not see writes in assembly.
+ ANNOTATE_MEMORY_IS_INITIALIZED(backing, mem_size);
+ if (!lazy) {
+ memset(backing, 0, mem_size);
+ }
+ slabs_ = static_cast<Slabs*>(backing);
+ size_t bytes_used = 0;
+ for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+ bytes_used += sizeof(std::atomic<int64_t>) * NumClasses;
+ void** elems = CpuMemoryStart(cpu)->mem;
+
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ size_t cap = capacity(cl);
+ CHECK_CONDITION(static_cast<uint16_t>(cap) == cap);
+
+ if (cap == 0) {
+ continue;
+ }
+
+ if (cap) {
+ if (!lazy) {
+ // In Pop() we prefetch the item a subsequent Pop() would return; this
+ // is slow if it's not a valid pointer. To avoid this problem when
+ // popping the last item, keep one fake item before the actual ones
+ // (that points, safely, to itself.)
+ *elems = elems;
+ elems++;
+ }
+
+ // One extra element for prefetch
+ bytes_used += (cap + 1) * sizeof(void*);
+ }
+
+ if (!lazy) {
+ // TODO(ckennelly): Consolidate this initialization logic with that in
+ // InitCPU.
+ size_t offset = elems - reinterpret_cast<void**>(CpuMemoryStart(cpu));
+ CHECK_CONDITION(static_cast<uint16_t>(offset) == offset);
+
+ Header hdr;
+ hdr.current = offset;
+ hdr.begin = offset;
+ hdr.end = offset;
+ hdr.end_copy = offset;
+
+ StoreHeader(GetHeader(cpu, cl), hdr);
+ }
+
+ elems += cap;
+ CHECK_CONDITION(reinterpret_cast<char*>(elems) -
+ reinterpret_cast<char*>(CpuMemoryStart(cpu)) <=
+ (1 << shift_));
+ }
+ }
+ // Check for less than 90% usage of the reserved memory
+ if (bytes_used * 10 < 9 * mem_size) {
+ Log(kLog, __FILE__, __LINE__, "Bytes used per cpu of available", bytes_used,
+ mem_size);
+ }
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) {
+ const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+ // TODO(ckennelly): Consolidate this logic with Drain.
+ // Phase 1: verify no header is locked
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ CHECK_CONDITION(!hdr.IsLocked());
+ }
+
+ // Phase 2: Stop concurrent mutations. Locking ensures that there exists no
+ // value of current such that begin < current.
+ for (bool done = false; !done;) {
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ // Note: this reinterpret_cast and write in Lock lead to undefined
+ // behavior, because the actual object type is std::atomic<int64_t>. But
+ // C++ does not allow to legally express what we need here: atomic writes
+ // of different sizes.
+ reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+ }
+ FenceCpu(cpu, virtual_cpu_id_offset);
+ done = true;
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ if (!hdr.IsLocked()) {
+ // Header was overwritten by Grow/Shrink. Retry.
+ done = false;
+ break;
+ }
+ }
+ }
+
+ // Phase 3: Initialize prefetch target and compute the offsets for the
+ // boundaries of each size class' cache.
+ void** elems = CpuMemoryStart(cpu)->mem;
+ uint16_t begin[NumClasses];
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ size_t cap = capacity(cl);
+ CHECK_CONDITION(static_cast<uint16_t>(cap) == cap);
+
+ if (cap) {
+ // In Pop() we prefetch the item a subsequent Pop() would return; this is
+ // slow if it's not a valid pointer. To avoid this problem when popping
+ // the last item, keep one fake item before the actual ones (that points,
+ // safely, to itself.)
+ *elems = elems;
+ elems++;
+ }
+
+ size_t offset = elems - reinterpret_cast<void**>(CpuMemoryStart(cpu));
+ CHECK_CONDITION(static_cast<uint16_t>(offset) == offset);
+ begin[cl] = offset;
+
+ elems += cap;
+ CHECK_CONDITION(reinterpret_cast<char*>(elems) -
+ reinterpret_cast<char*>(CpuMemoryStart(cpu)) <=
+ (1 << shift_));
+ }
+
+ // Phase 4: Store current. No restartable sequence will proceed
+ // (successfully) as !(begin < current) for all size classes.
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ Header hdr = LoadHeader(hdrp);
+ hdr.current = begin[cl];
+ StoreHeader(hdrp, hdr);
+ }
+ FenceCpu(cpu, virtual_cpu_id_offset);
+
+ // Phase 5: Allow access to this cache.
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr;
+ hdr.current = begin[cl];
+ hdr.begin = begin[cl];
+ hdr.end = begin[cl];
+ hdr.end_copy = begin[cl];
+ StoreHeader(GetHeader(cpu, cl), hdr);
+ }
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Destroy(void(free)(void*)) {
+ free(slabs_);
+ slabs_ = nullptr;
+}
+
+template <size_t NumClasses>
+size_t TcmallocSlab<NumClasses>::ShrinkOtherCache(int cpu, size_t cl,
+ size_t len, void* ctx,
+ ShrinkHandler f) {
+ ASSERT(cpu >= 0);
+ ASSERT(cpu < absl::base_internal::NumCPUs());
+ const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+ // Phase 1: Collect begin as it will be overwritten by the lock.
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ Header hdr = LoadHeader(hdrp);
+ CHECK_CONDITION(!hdr.IsLocked());
+ const uint16_t begin = hdr.begin;
+
+ // Phase 2: stop concurrent mutations.
+ for (bool done = false; !done;) {
+ reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+ FenceCpu(cpu, virtual_cpu_id_offset);
+ done = true;
+
+ hdr = LoadHeader(GetHeader(cpu, cl));
+ if (!hdr.IsLocked()) {
+ // Header was overwritten by Grow/Shrink. Retry.
+ done = false;
+ }
+ }
+
+ // Phase 3: If we do not have len number of items to shrink, we try
+ // to pop items from the list first to create enough capacity that can be
+ // shrunk. If we pop items, we also execute callbacks.
+ //
+ // We can't write all 4 fields at once with a single write, because Pop does
+ // several non-atomic loads of the fields. Consider that a concurrent Pop
+ // loads old current (still pointing somewhere in the middle of the region);
+ // then we update all fields with a single write; then Pop loads the updated
+ // begin which allows it to proceed; then it decrements current below begin.
+ //
+ // So we instead first just update current--our locked begin/end guarantee
+ // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop
+ // is using the old current, and can safely update begin/end to be an empty
+ // slab.
+
+ const uint16_t unused = hdr.end_copy - hdr.current;
+ if (unused < len) {
+ const uint16_t expected_pop = len - unused;
+ const uint16_t actual_pop =
+ std::min<uint16_t>(expected_pop, hdr.current - begin);
+ void** batch =
+ reinterpret_cast<void**>(GetHeader(cpu, 0) + hdr.current - actual_pop);
+ f(ctx, cl, batch, actual_pop);
+ hdr.current -= actual_pop;
+ StoreHeader(hdrp, hdr);
+ FenceCpu(cpu, virtual_cpu_id_offset);
+ }
+
+ // Phase 4: Shrink the capacity. Use a copy of begin and end_copy to
+ // restore the header, shrink it, and return the length by which the
+ // region was shrunk.
+ hdr.begin = begin;
+ const uint16_t to_shrink =
+ std::min<uint16_t>(len, hdr.end_copy - hdr.current);
+ hdr.end_copy -= to_shrink;
+ hdr.end = hdr.end_copy;
+ StoreHeader(hdrp, hdr);
+ return to_shrink;
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) {
+ CHECK_CONDITION(cpu >= 0);
+ CHECK_CONDITION(cpu < absl::base_internal::NumCPUs());
+ const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+ // Push/Pop/Grow/Shrink can be executed concurrently with Drain.
+ // That's not an expected case, but it must be handled for correctness.
+ // Push/Pop/Grow/Shrink can only be executed on <cpu> and use rseq primitives.
+ // Push only updates current. Pop only updates current and end_copy
+ // (it mutates only current but uses 4 byte write for performance).
+ // Grow/Shrink mutate end and end_copy using 64-bit stores.
+
+ // We attempt to stop all concurrent operations by writing 0xffff to begin
+ // and 0 to end. However, Grow/Shrink can overwrite our write, so we do this
+ // in a loop until we know that the header is in quiescent state.
+
+ // Phase 1: collect all begin's (these are not mutated by anybody else).
+ uint16_t begin[NumClasses];
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ CHECK_CONDITION(!hdr.IsLocked());
+ begin[cl] = hdr.begin;
+ }
+
+ // Phase 2: stop concurrent mutations.
+ for (bool done = false; !done;) {
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ // Note: this reinterpret_cast and write in Lock lead to undefined
+ // behavior, because the actual object type is std::atomic<int64_t>. But
+ // C++ does not allow to legally express what we need here: atomic writes
+ // of different sizes.
+ reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+ }
+ FenceCpu(cpu, virtual_cpu_id_offset);
+ done = true;
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ if (!hdr.IsLocked()) {
+ // Header was overwritten by Grow/Shrink. Retry.
+ done = false;
+ break;
+ }
+ }
+ }
+
+ // Phase 3: execute callbacks.
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ Header hdr = LoadHeader(GetHeader(cpu, cl));
+ // We overwrote begin and end, instead we use our local copy of begin
+ // and end_copy.
+ size_t n = hdr.current - begin[cl];
+ size_t cap = hdr.end_copy - begin[cl];
+ void** batch = reinterpret_cast<void**>(GetHeader(cpu, 0) + begin[cl]);
+ f(ctx, cl, batch, n, cap);
+ }
+
+ // Phase 4: reset current to beginning of the region.
+ // We can't write all 4 fields at once with a single write, because Pop does
+ // several non-atomic loads of the fields. Consider that a concurrent Pop
+ // loads old current (still pointing somewhere in the middle of the region);
+ // then we update all fields with a single write; then Pop loads the updated
+ // begin which allows it to proceed; then it decrements current below begin.
+ //
+ // So we instead first just update current--our locked begin/end guarantee
+ // no Push/Pop will make progress. Once we Fence below, we know no Push/Pop
+ // is using the old current, and can safely update begin/end to be an empty
+ // slab.
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ Header hdr = LoadHeader(hdrp);
+ hdr.current = begin[cl];
+ StoreHeader(hdrp, hdr);
+ }
+
+ // Phase 5: fence and reset the remaining fields to beginning of the region.
+ // This allows concurrent mutations again.
+ FenceCpu(cpu, virtual_cpu_id_offset);
+ for (size_t cl = 0; cl < NumClasses; ++cl) {
+ std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+ Header hdr;
+ hdr.current = begin[cl];
+ hdr.begin = begin[cl];
+ hdr.end = begin[cl];
+ hdr.end_copy = begin[cl];
+ StoreHeader(hdrp, hdr);
+ }
+}
+
+template <size_t NumClasses>
+PerCPUMetadataState TcmallocSlab<NumClasses>::MetadataMemoryUsage() const {
+ PerCPUMetadataState result;
+ result.virtual_size = absl::base_internal::NumCPUs() * (1ul << shift_);
+ result.resident_size = MInCore::residence(slabs_, result.virtual_size);
+ return result;
+}
+
+} // namespace percpu
+} // namespace subtle
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc
new file mode 100644
index 0000000000..39f07fbe67
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc
@@ -0,0 +1,855 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/percpu_tcmalloc.h"
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <atomic>
+#include <thread> // NOLINT(build/c++11)
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/fixed_array.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/debugging/symbolize.h"
+#include "absl/random/random.h"
+#include "absl/random/seed_sequences.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "absl/types/span.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+namespace {
+
+using testing::Each;
+using testing::UnorderedElementsAreArray;
+
+// Choose an available CPU and executes the passed functor on it. The
+// cpu that is chosen, as long as a valid disjoint remote CPU will be passed
+// as arguments to it.
+//
+// If the functor believes that it has failed in a manner attributable to
+// external modification, then it should return false and we will attempt to
+// retry the operation (up to a constant limit).
+void RunOnSingleCpuWithRemoteCpu(std::function<bool(int, int)> test) {
+ constexpr int kMaxTries = 1000;
+
+ for (int i = 0; i < kMaxTries; i++) {
+ auto allowed = AllowedCpus();
+
+ int target_cpu = allowed[0], remote_cpu;
+
+ // We try to pass something actually within the mask, but, for most tests it
+ // only needs to exist.
+ if (allowed.size() > 1)
+ remote_cpu = allowed[1];
+ else
+ remote_cpu = target_cpu ? 0 : 1;
+
+ ScopedAffinityMask mask(target_cpu);
+
+ // If the test function failed, assert that the mask was tampered with.
+ if (!test(target_cpu, remote_cpu))
+ ASSERT_TRUE(mask.Tampered());
+ else
+ return;
+ }
+
+ ASSERT_TRUE(false);
+}
+
+// Equivalent to RunOnSingleCpuWithRemoteCpu, except that only the CPU the
+// functor is executing on is passed.
+void RunOnSingleCpu(std::function<bool(int)> test) {
+ auto wrapper = [&test](int this_cpu, int unused) { return test(this_cpu); };
+ RunOnSingleCpuWithRemoteCpu(wrapper);
+}
+
+constexpr size_t kStressSlabs = 4;
+constexpr size_t kStressCapacity = 4;
+
+constexpr size_t kShift = 18;
+typedef class TcmallocSlab<kStressSlabs> TcmallocSlab;
+
+enum class SlabInit {
+ kEager,
+ kLazy,
+};
+
+class TcmallocSlabTest : public testing::TestWithParam<SlabInit> {
+ protected:
+ TcmallocSlabTest() {
+ slab_test_ = &slab_;
+ metadata_bytes_ = 0;
+
+// Ignore false-positive warning in GCC. For more information, see:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96003
+#pragma GCC diagnostic ignored "-Wnonnull"
+ slab_.Init(
+ &ByteCountingMalloc, [](size_t cl) { return kCapacity; },
+ GetParam() == SlabInit::kLazy, kShift);
+
+ for (int i = 0; i < kCapacity; ++i) {
+ object_ptrs_[i] = &objects_[i];
+ }
+ }
+
+ ~TcmallocSlabTest() override { slab_.Destroy(free); }
+
+ template <int result>
+ static int ExpectOverflow(int cpu, size_t cl, void* item) {
+ EXPECT_EQ(cpu, current_cpu_);
+ EXPECT_EQ(cl, current_cl_);
+ EXPECT_FALSE(overflow_called_);
+ overflow_called_ = true;
+ return result;
+ }
+
+ template <size_t result_object>
+ static void* ExpectUnderflow(int cpu, size_t cl) {
+ EXPECT_EQ(cpu, current_cpu_);
+ EXPECT_EQ(cl, current_cl_);
+ EXPECT_LT(result_object, kCapacity);
+ EXPECT_FALSE(underflow_called_);
+ underflow_called_ = true;
+ return &objects_[result_object];
+ }
+
+ template <int result>
+ bool PushExpectOverflow(TcmallocSlab* slab, size_t cl, void* item) {
+ bool res = slab->Push(cl, item, ExpectOverflow<result>);
+ EXPECT_TRUE(overflow_called_);
+ overflow_called_ = false;
+ return res;
+ }
+
+ template <size_t result_object>
+ void* PopExpectUnderflow(TcmallocSlab* slab, size_t cl) {
+ void* res = slab->Pop(cl, ExpectUnderflow<result_object>);
+ EXPECT_TRUE(underflow_called_);
+ underflow_called_ = false;
+ return res;
+ }
+
+ static void* ByteCountingMalloc(size_t size) {
+ const size_t kPageSize = getpagesize();
+ void* ptr;
+ CHECK_CONDITION(posix_memalign(&ptr, kPageSize, size) == 0);
+ if (ptr) {
+ // Emulate obtaining memory as if we got it from mmap (zero'd).
+ memset(ptr, 0, size);
+ madvise(ptr, size, MADV_DONTNEED);
+ metadata_bytes_ += size;
+ }
+ return ptr;
+ }
+
+ TcmallocSlab slab_;
+
+ static constexpr size_t kCapacity = 10;
+ static char objects_[kCapacity];
+ static void* object_ptrs_[kCapacity];
+ static int current_cpu_;
+ static size_t current_cl_;
+ static bool overflow_called_;
+ static bool underflow_called_;
+ static TcmallocSlab* slab_test_;
+ static size_t metadata_bytes_;
+};
+
+static int ExpectNoOverflow(int cpu, size_t cl, void* item) {
+ CHECK_CONDITION(false && "overflow is not expected");
+ return 0;
+}
+
+static void* ExpectNoUnderflow(int cpu, size_t cl) {
+ CHECK_CONDITION(false && "underflow is not expected");
+ return nullptr;
+}
+
+char TcmallocSlabTest::objects_[TcmallocSlabTest::kCapacity];
+void* TcmallocSlabTest::object_ptrs_[TcmallocSlabTest::kCapacity];
+int TcmallocSlabTest::current_cpu_;
+size_t TcmallocSlabTest::current_cl_;
+bool TcmallocSlabTest::overflow_called_;
+bool TcmallocSlabTest::underflow_called_;
+TcmallocSlab* TcmallocSlabTest::slab_test_;
+size_t TcmallocSlabTest::metadata_bytes_;
+
+TEST_P(TcmallocSlabTest, Metadata) {
+ PerCPUMetadataState r = slab_.MetadataMemoryUsage();
+
+ ASSERT_GT(metadata_bytes_, 0);
+ EXPECT_EQ(r.virtual_size, metadata_bytes_);
+ if (GetParam() == SlabInit::kLazy) {
+ EXPECT_EQ(r.resident_size, 0);
+
+ if (!IsFast()) {
+ GTEST_SKIP() << "Need fast percpu. Skipping.";
+ return;
+ }
+
+ // Initialize a core. Verify that the increased RSS is proportional to a
+ // core.
+ slab_.InitCPU(0, [](size_t cl) { return kCapacity; });
+
+ r = slab_.MetadataMemoryUsage();
+ // We may fault a whole hugepage, so round up the expected per-core share to
+ // a full hugepage.
+ size_t expected = r.virtual_size / absl::base_internal::NumCPUs();
+ expected = (expected + kHugePageSize - 1) & ~(kHugePageSize - 1);
+
+ // A single core may be less than the full slab for that core, since we do
+ // not touch every page within the slab.
+ EXPECT_GE(expected, r.resident_size);
+
+ // Read stats from the slab. This will fault additional memory.
+ for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; ++cpu) {
+ // To inhibit optimization, verify the values are sensible.
+ for (int cl = 0; cl < kStressSlabs; ++cl) {
+ EXPECT_EQ(0, slab_.Length(cpu, cl));
+ EXPECT_EQ(0, slab_.Capacity(cpu, cl));
+ }
+ }
+
+ PerCPUMetadataState post_stats = slab_.MetadataMemoryUsage();
+ EXPECT_LE(post_stats.resident_size, metadata_bytes_);
+ EXPECT_GT(post_stats.resident_size, r.resident_size);
+ } else {
+ EXPECT_EQ(r.resident_size, metadata_bytes_);
+ }
+}
+
+TEST_P(TcmallocSlabTest, Unit) {
+ if (MallocExtension::PerCpuCachesActive()) {
+ // This test unregisters rseq temporarily, as to decrease flakiness.
+ GTEST_SKIP() << "per-CPU TCMalloc is incompatible with unregistering rseq";
+ }
+
+ if (!IsFast()) {
+ GTEST_SKIP() << "Need fast percpu. Skipping.";
+ return;
+ }
+
+ // Decide if we should expect a push or pop to be the first action on the CPU
+ // slab to trigger initialization.
+ absl::FixedArray<bool, 0> initialized(absl::base_internal::NumCPUs(),
+ GetParam() != SlabInit::kLazy);
+
+ for (auto cpu : AllowedCpus()) {
+ SCOPED_TRACE(cpu);
+
+ // Temporarily fake being on the given CPU.
+ ScopedFakeCpuId fake_cpu_id(cpu);
+
+#if !defined(__ppc__)
+ if (UsingFlatVirtualCpus()) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+ __rseq_abi.vcpu_id = cpu ^ 1;
+#endif
+ cpu = cpu ^ 1;
+ }
+#endif
+ current_cpu_ = cpu;
+
+ for (size_t cl = 0; cl < kStressSlabs; ++cl) {
+ SCOPED_TRACE(cl);
+ current_cl_ = cl;
+
+#ifdef __ppc__
+ // This is imperfect but the window between operations below is small. We
+ // can make this more precise around individual operations if we see
+ // measurable flakiness as a result.
+ if (fake_cpu_id.Tampered()) break;
+#endif
+
+ // Check new slab state.
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+ if (!initialized[cpu]) {
+#pragma GCC diagnostic ignored "-Wnonnull"
+ void* ptr = slab_.Pop(cl, [](int cpu, size_t cl) {
+ slab_test_->InitCPU(cpu, [](size_t cl) { return kCapacity; });
+
+ return static_cast<void*>(slab_test_);
+ });
+
+ ASSERT_TRUE(ptr == slab_test_);
+ initialized[cpu] = true;
+ }
+
+ // Test overflow/underflow handlers.
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+ ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+ ASSERT_FALSE(PushExpectOverflow<-2>(&slab_, cl, &objects_[0]));
+ ASSERT_TRUE(PushExpectOverflow<0>(&slab_, cl, &objects_[0]));
+
+ // Grow capacity to kCapacity / 2.
+ ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2);
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+ ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow));
+ ASSERT_EQ(slab_.Length(cpu, cl), 1);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2);
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[0]);
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ for (size_t i = 0; i < kCapacity / 2; ++i) {
+ ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow));
+ ASSERT_EQ(slab_.Length(cpu, cl), i + 1);
+ }
+ ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+ for (size_t i = kCapacity / 2; i > 0; --i) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]);
+ ASSERT_EQ(slab_.Length(cpu, cl), i - 1);
+ }
+ // Ensure that Shink don't underflow capacity.
+ ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity / 2);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+ // Grow capacity to kCapacity.
+ ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+ // Ensure that grow don't overflow max capacity.
+ ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity, kCapacity), kCapacity / 2);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity);
+ for (size_t i = 0; i < kCapacity; ++i) {
+ ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow));
+ ASSERT_EQ(slab_.Length(cpu, cl), i + 1);
+ }
+ ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+ for (size_t i = kCapacity; i > 0; --i) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]);
+ ASSERT_EQ(slab_.Length(cpu, cl), i - 1);
+ }
+
+ // Ensure that we can't shrink below length.
+ ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow));
+ ASSERT_TRUE(slab_.Push(cl, &objects_[1], ExpectNoOverflow));
+ ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity - 2);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), 2);
+
+ // Test Drain.
+ ASSERT_EQ(slab_.Grow(cpu, cl, 2, kCapacity), 2);
+ slab_.Drain(cpu, &cl,
+ [](void* ctx, size_t cl, void** batch, size_t n, size_t cap) {
+ size_t mycl = *static_cast<size_t*>(ctx);
+ if (cl == mycl) {
+ ASSERT_EQ(n, 2);
+ ASSERT_EQ(cap, 4);
+ ASSERT_EQ(batch[0], &objects_[0]);
+ ASSERT_EQ(batch[1], &objects_[1]);
+ } else {
+ ASSERT_EQ(n, 0);
+ ASSERT_EQ(cap, 0);
+ }
+ });
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+ // Test PushBatch/PopBatch.
+ void* batch[kCapacity + 1];
+ for (size_t i = 0; i < kCapacity; ++i) {
+ batch[i] = &objects_[i];
+ }
+ ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0);
+ ASSERT_EQ(slab_.PushBatch(cl, batch, kCapacity), 0);
+ ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+ ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0);
+ // Push a batch of size i into empty slab.
+ for (size_t i = 1; i < kCapacity; ++i) {
+ const size_t expect = std::min(i, kCapacity / 2);
+ ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect);
+ ASSERT_EQ(slab_.Length(cpu, cl), expect);
+ for (size_t j = 0; j < expect; ++j) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+ &objects_[j + (i - expect)]);
+ }
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+ }
+ // Push a batch of size i into non-empty slab.
+ for (size_t i = 1; i < kCapacity / 2; ++i) {
+ const size_t expect = std::min(i, kCapacity / 2 - i);
+ ASSERT_EQ(slab_.PushBatch(cl, batch, i), i);
+ ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect);
+ ASSERT_EQ(slab_.Length(cpu, cl), i + expect);
+ for (size_t j = 0; j < expect; ++j) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+ static_cast<void*>(&objects_[j + (i - expect)]));
+ }
+ for (size_t j = 0; j < i; ++j) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+ static_cast<void*>(&objects_[j]));
+ }
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+ }
+ for (size_t i = 0; i < kCapacity + 1; ++i) {
+ batch[i] = nullptr;
+ }
+ // Pop all elements in a single batch.
+ for (size_t i = 1; i < kCapacity / 2; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+ }
+ ASSERT_EQ(slab_.PopBatch(cl, batch, i), i);
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+ ASSERT_THAT(absl::MakeSpan(&batch[0], i),
+ UnorderedElementsAreArray(&object_ptrs_[0], i));
+ ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr));
+ for (size_t j = 0; j < kCapacity + 1; ++j) {
+ batch[j] = nullptr;
+ }
+ }
+ // Pop half of elements in a single batch.
+ for (size_t i = 1; i < kCapacity / 2; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+ }
+ size_t want = std::max<size_t>(1, i / 2);
+ ASSERT_EQ(slab_.PopBatch(cl, batch, want), want);
+ ASSERT_EQ(slab_.Length(cpu, cl), i - want);
+
+ for (size_t j = 0; j < i - want; ++j) {
+ ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+ static_cast<void*>(&objects_[i - want - j - 1]));
+ }
+
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+ ASSERT_GE(i, want);
+ ASSERT_THAT(absl::MakeSpan(&batch[0], want),
+ UnorderedElementsAreArray(&object_ptrs_[i - want], want));
+ ASSERT_THAT(absl::MakeSpan(&batch[want], kCapacity - want),
+ Each(nullptr));
+ for (size_t j = 0; j < kCapacity + 1; ++j) {
+ batch[j] = nullptr;
+ }
+ }
+ // Pop 2x elements in a single batch.
+ for (size_t i = 1; i < kCapacity / 2; ++i) {
+ for (size_t j = 0; j < i; ++j) {
+ ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+ }
+ ASSERT_EQ(slab_.PopBatch(cl, batch, i * 2), i);
+ ASSERT_EQ(slab_.Length(cpu, cl), 0);
+ ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+ ASSERT_THAT(absl::MakeSpan(&batch[0], i),
+ UnorderedElementsAreArray(&object_ptrs_[0], i));
+ ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr));
+ for (size_t j = 0; j < kCapacity + 1; ++j) {
+ batch[j] = nullptr;
+ }
+ }
+ ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity / 2), kCapacity / 2);
+ }
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(Instant, TcmallocSlabTest,
+ testing::Values(SlabInit::kEager, SlabInit::kLazy));
+
+static void StressThread(size_t thread_id, TcmallocSlab* slab,
+ std::vector<void*>* block,
+ std::vector<absl::Mutex>* mutexes,
+ std::atomic<size_t>* capacity,
+ std::atomic<bool>* stop) {
+ EXPECT_TRUE(IsFast());
+
+ struct Handler {
+ static int Overflow(int cpu, size_t cl, void* item) {
+ EXPECT_GE(cpu, 0);
+ EXPECT_LT(cpu, absl::base_internal::NumCPUs());
+ EXPECT_LT(cl, kStressSlabs);
+ EXPECT_NE(item, nullptr);
+ return -1;
+ }
+
+ static void* Underflow(int cpu, size_t cl) {
+ EXPECT_GE(cpu, 0);
+ EXPECT_LT(cpu, absl::base_internal::NumCPUs());
+ EXPECT_LT(cl, kStressSlabs);
+ return nullptr;
+ }
+ };
+
+ absl::BitGen rnd(absl::SeedSeq({thread_id}));
+ while (!*stop) {
+ size_t cl = absl::Uniform<int32_t>(rnd, 0, kStressSlabs);
+ const int what = absl::Uniform<int32_t>(rnd, 0, 91);
+ if (what < 10) {
+ if (!block->empty()) {
+ if (slab->Push(cl, block->back(), &Handler::Overflow)) {
+ block->pop_back();
+ }
+ }
+ } else if (what < 20) {
+ if (void* item = slab->Pop(cl, &Handler::Underflow)) {
+ block->push_back(item);
+ }
+ } else if (what < 30) {
+ if (!block->empty()) {
+ void* batch[kStressCapacity];
+ size_t n = absl::Uniform<int32_t>(
+ rnd, 0, std::min(block->size(), kStressCapacity)) +
+ 1;
+ for (size_t i = 0; i < n; ++i) {
+ batch[i] = block->back();
+ block->pop_back();
+ }
+ size_t pushed = slab->PushBatch(cl, batch, n);
+ EXPECT_LE(pushed, n);
+ for (size_t i = 0; i < n - pushed; ++i) {
+ block->push_back(batch[i]);
+ }
+ }
+ } else if (what < 40) {
+ void* batch[kStressCapacity];
+ size_t n = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+ size_t popped = slab->PopBatch(cl, batch, n);
+ EXPECT_LE(popped, n);
+ for (size_t i = 0; i < popped; ++i) {
+ block->push_back(batch[i]);
+ }
+ } else if (what < 50) {
+ size_t n = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+ for (;;) {
+ size_t c = capacity->load();
+ n = std::min(n, c);
+ if (n == 0) {
+ break;
+ }
+ if (capacity->compare_exchange_weak(c, c - n)) {
+ break;
+ }
+ }
+ if (n != 0) {
+ size_t res = slab->Grow(slab->GetCurrentVirtualCpuUnsafe(), cl, n,
+ kStressCapacity);
+ EXPECT_LE(res, n);
+ capacity->fetch_add(n - res);
+ }
+ } else if (what < 60) {
+ size_t n =
+ slab->Shrink(slab->GetCurrentVirtualCpuUnsafe(), cl,
+ absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1);
+ capacity->fetch_add(n);
+ } else if (what < 70) {
+ size_t len = slab->Length(
+ absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()), cl);
+ EXPECT_LE(len, kStressCapacity);
+ } else if (what < 80) {
+ size_t cap = slab->Capacity(
+ absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()), cl);
+ EXPECT_LE(cap, kStressCapacity);
+ } else if (what < 90) {
+ struct Context {
+ std::vector<void*>* block;
+ std::atomic<size_t>* capacity;
+ };
+ Context ctx = {block, capacity};
+ int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+ if (mutexes->at(cpu).TryLock()) {
+ size_t to_shrink = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+ size_t total_shrunk = slab->ShrinkOtherCache(
+ cpu, cl, to_shrink, &ctx,
+ [](void* arg, size_t cl, void** batch, size_t n) {
+ Context* ctx = static_cast<Context*>(arg);
+ EXPECT_LT(cl, kStressSlabs);
+ EXPECT_LE(n, kStressCapacity);
+ for (size_t i = 0; i < n; ++i) {
+ EXPECT_NE(batch[i], nullptr);
+ ctx->block->push_back(batch[i]);
+ }
+ });
+ EXPECT_LE(total_shrunk, to_shrink);
+ EXPECT_LE(0, total_shrunk);
+ capacity->fetch_add(total_shrunk);
+ mutexes->at(cpu).Unlock();
+ }
+ } else {
+ struct Context {
+ std::vector<void*>* block;
+ std::atomic<size_t>* capacity;
+ };
+ Context ctx = {block, capacity};
+ int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+ if (mutexes->at(cpu).TryLock()) {
+ slab->Drain(
+ cpu, &ctx,
+ [](void* arg, size_t cl, void** batch, size_t n, size_t cap) {
+ Context* ctx = static_cast<Context*>(arg);
+ EXPECT_LT(cl, kStressSlabs);
+ EXPECT_LE(n, kStressCapacity);
+ EXPECT_LE(cap, kStressCapacity);
+ for (size_t i = 0; i < n; ++i) {
+ EXPECT_NE(batch[i], nullptr);
+ ctx->block->push_back(batch[i]);
+ }
+ ctx->capacity->fetch_add(cap);
+ });
+ mutexes->at(cpu).Unlock();
+ }
+ }
+ }
+}
+
+static void* allocator(size_t bytes) {
+ void* ptr = malloc(bytes);
+ if (ptr) {
+ memset(ptr, 0, bytes);
+ }
+ return ptr;
+}
+
+TEST(TcmallocSlab, Stress) {
+ // The test creates 2 * NumCPUs() threads each executing all possible
+ // operations on TcmallocSlab. After that we verify that no objects
+ // lost/duplicated and that total capacity is preserved.
+
+ if (!IsFast()) {
+ GTEST_SKIP() << "Need fast percpu. Skipping.";
+ return;
+ }
+
+ EXPECT_LE(kStressSlabs, kStressSlabs);
+ TcmallocSlab slab;
+ slab.Init(
+ allocator,
+ [](size_t cl) { return cl < kStressSlabs ? kStressCapacity : 0; }, false,
+ kShift);
+ std::vector<std::thread> threads;
+ const int n_threads = 2 * absl::base_internal::NumCPUs();
+
+ // Mutexes protect Drain operation on a CPU.
+ std::vector<absl::Mutex> mutexes(absl::base_internal::NumCPUs());
+ // Give each thread an initial set of local objects.
+ std::vector<std::vector<void*>> blocks(n_threads);
+ for (size_t i = 0; i < blocks.size(); ++i) {
+ for (size_t j = 0; j < kStressCapacity; ++j) {
+ blocks[i].push_back(reinterpret_cast<void*>(i * kStressCapacity + j + 1));
+ }
+ }
+ std::atomic<bool> stop(false);
+ // Total capacity shared between all size classes and all CPUs.
+ const int kTotalCapacity = blocks.size() * kStressCapacity * 3 / 4;
+ std::atomic<size_t> capacity(kTotalCapacity);
+ // Create threads and let them work for 5 seconds.
+ threads.reserve(n_threads);
+ for (size_t t = 0; t < n_threads; ++t) {
+ threads.push_back(std::thread(StressThread, t, &slab, &blocks[t], &mutexes,
+ &capacity, &stop));
+ }
+ absl::SleepFor(absl::Seconds(5));
+ stop = true;
+ for (auto& t : threads) {
+ t.join();
+ }
+ // Collect objects and capacity from all slabs.
+ std::set<void*> objects;
+ struct Context {
+ std::set<void*>* objects;
+ std::atomic<size_t>* capacity;
+ };
+ Context ctx = {&objects, &capacity};
+ for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+ slab.Drain(cpu, &ctx,
+ [](void* arg, size_t cl, void** batch, size_t n, size_t cap) {
+ Context* ctx = static_cast<Context*>(arg);
+ for (size_t i = 0; i < n; ++i) {
+ ctx->objects->insert(batch[i]);
+ }
+ ctx->capacity->fetch_add(cap);
+ });
+ for (size_t cl = 0; cl < kStressSlabs; ++cl) {
+ EXPECT_EQ(slab.Length(cpu, cl), 0);
+ EXPECT_EQ(slab.Capacity(cpu, cl), 0);
+ }
+ }
+ for (const auto& b : blocks) {
+ for (auto o : b) {
+ objects.insert(o);
+ }
+ }
+ EXPECT_EQ(objects.size(), blocks.size() * kStressCapacity);
+ EXPECT_EQ(capacity.load(), kTotalCapacity);
+ slab.Destroy(free);
+}
+
+TEST(TcmallocSlab, SMP) {
+ // For the other tests here to be meaningful, we need multiple cores.
+ ASSERT_GT(absl::base_internal::NumCPUs(), 1);
+}
+
+#if ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
+static int FilterElfHeader(struct dl_phdr_info* info, size_t size, void* data) {
+ *reinterpret_cast<uintptr_t*>(data) =
+ reinterpret_cast<uintptr_t>(info->dlpi_addr);
+ // No further iteration wanted.
+ return 1;
+}
+#endif
+
+TEST(TcmallocSlab, CriticalSectionMetadata) {
+// We cannot inhibit --gc-sections, except on GCC or Clang 9-or-newer.
+#if defined(__clang_major__) && __clang_major__ < 9
+ GTEST_SKIP() << "--gc-sections cannot be inhibited on this compiler.";
+#endif
+
+ // We expect that restartable sequence critical sections (rseq_cs) are in the
+ // __rseq_cs section (by convention, not hard requirement). Additionally, for
+ // each entry in that section, there should be a pointer to it in
+ // __rseq_cs_ptr_array.
+#if ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
+ uintptr_t relocation = 0;
+ dl_iterate_phdr(FilterElfHeader, &relocation);
+
+ int fd = tcmalloc_internal::signal_safe_open("/proc/self/exe", O_RDONLY);
+ ASSERT_NE(fd, -1);
+
+ const kernel_rseq_cs* cs_start = nullptr;
+ const kernel_rseq_cs* cs_end = nullptr;
+
+ const kernel_rseq_cs** cs_array_start = nullptr;
+ const kernel_rseq_cs** cs_array_end = nullptr;
+
+ absl::debugging_internal::ForEachSection(
+ fd, [&](const absl::string_view name, const ElfW(Shdr) & hdr) {
+ uintptr_t start = relocation + reinterpret_cast<uintptr_t>(hdr.sh_addr);
+ uintptr_t end =
+ relocation + reinterpret_cast<uintptr_t>(hdr.sh_addr + hdr.sh_size);
+
+ if (name == "__rseq_cs") {
+ EXPECT_EQ(cs_start, nullptr);
+ EXPECT_EQ(start % alignof(kernel_rseq_cs), 0);
+ EXPECT_EQ(end % alignof(kernel_rseq_cs), 0);
+ EXPECT_LT(start, end) << "__rseq_cs must not be empty";
+
+ cs_start = reinterpret_cast<const kernel_rseq_cs*>(start);
+ cs_end = reinterpret_cast<const kernel_rseq_cs*>(end);
+ } else if (name == "__rseq_cs_ptr_array") {
+ EXPECT_EQ(cs_array_start, nullptr);
+ EXPECT_EQ(start % alignof(kernel_rseq_cs*), 0);
+ EXPECT_EQ(end % alignof(kernel_rseq_cs*), 0);
+ EXPECT_LT(start, end) << "__rseq_cs_ptr_array must not be empty";
+
+ cs_array_start = reinterpret_cast<const kernel_rseq_cs**>(start);
+ cs_array_end = reinterpret_cast<const kernel_rseq_cs**>(end);
+ }
+
+ return true;
+ });
+
+ close(fd);
+
+ // The length of the array in multiples of rseq_cs should be the same as the
+ // length of the array of pointers.
+ ASSERT_EQ(cs_end - cs_start, cs_array_end - cs_array_start);
+
+ // The array should not be empty.
+ ASSERT_NE(cs_start, nullptr);
+
+ absl::flat_hash_set<const kernel_rseq_cs*> cs_pointers;
+ for (auto* ptr = cs_start; ptr != cs_end; ++ptr) {
+ cs_pointers.insert(ptr);
+ }
+
+ absl::flat_hash_set<const kernel_rseq_cs*> cs_array_pointers;
+ for (auto** ptr = cs_array_start; ptr != cs_array_end; ++ptr) {
+ // __rseq_cs_ptr_array should have no duplicates.
+ EXPECT_TRUE(cs_array_pointers.insert(*ptr).second);
+ }
+
+ EXPECT_THAT(cs_pointers, ::testing::ContainerEq(cs_array_pointers));
+#endif
+}
+
+static void BM_PushPop(benchmark::State& state) {
+ CHECK_CONDITION(IsFast());
+ RunOnSingleCpu([&](int this_cpu) {
+ const int kBatchSize = 32;
+ TcmallocSlab slab;
+
+#pragma GCC diagnostic ignored "-Wnonnull"
+ slab.Init(
+ allocator, [](size_t cl) -> size_t { return kBatchSize; }, false,
+ kShift);
+
+ CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) ==
+ kBatchSize);
+ void* batch[kBatchSize];
+ for (int i = 0; i < kBatchSize; i++) {
+ batch[i] = &batch[i];
+ }
+ for (auto _ : state) {
+ for (size_t x = 0; x < kBatchSize; x++) {
+ CHECK_CONDITION(slab.Push(0, batch[x], ExpectNoOverflow));
+ }
+ for (size_t x = 0; x < kBatchSize; x++) {
+ CHECK_CONDITION(slab.Pop(0, ExpectNoUnderflow) ==
+ batch[kBatchSize - x - 1]);
+ }
+ }
+ return true;
+ });
+}
+BENCHMARK(BM_PushPop);
+
+static void BM_PushPopBatch(benchmark::State& state) {
+ CHECK_CONDITION(IsFast());
+ RunOnSingleCpu([&](int this_cpu) {
+ const int kBatchSize = 32;
+ TcmallocSlab slab;
+ slab.Init(
+ allocator, [](size_t cl) -> size_t { return kBatchSize; }, false,
+ kShift);
+ CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) ==
+ kBatchSize);
+ void* batch[kBatchSize];
+ for (int i = 0; i < kBatchSize; i++) {
+ batch[i] = &batch[i];
+ }
+ for (auto _ : state) {
+ CHECK_CONDITION(slab.PushBatch(0, batch, kBatchSize) == kBatchSize);
+ CHECK_CONDITION(slab.PopBatch(0, batch, kBatchSize) == kBatchSize);
+ }
+ return true;
+ });
+}
+BENCHMARK(BM_PushPopBatch);
+
+} // namespace
+} // namespace percpu
+} // namespace subtle
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc
new file mode 100644
index 0000000000..5a5586cfff
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc
@@ -0,0 +1,171 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/proc_maps.h"
+
+#include <fcntl.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include <cstdio>
+#include <cstring>
+
+#include "absl/strings/str_format.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, nullptr); }
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer* buffer) {
+ Init(pid, buffer);
+}
+
+void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) {
+ if (pid == 0) {
+ pid = getpid();
+ }
+
+ pid_ = pid;
+ if (!buffer) {
+ // If the user didn't pass in any buffer storage, allocate it
+ // now. This is the normal case; the signal handler passes in a
+ // static buffer.
+ buffer = dynamic_buffer_ = new Buffer;
+ } else {
+ dynamic_buffer_ = nullptr;
+ }
+
+ ibuf_ = buffer->buf;
+
+ stext_ = etext_ = nextline_ = ibuf_;
+ ebuf_ = ibuf_ + Buffer::kBufSize - 1;
+ nextline_ = ibuf_;
+
+#if defined(__linux__)
+ // /maps exists in two places: /proc/pid/ and /proc/pid/task/tid (for each
+ // thread in the process.) The only difference between these is the "global"
+ // view (/proc/pid/maps) attempts to label each VMA which is the stack of a
+ // thread. This is nice to have, but not critical, and scales quadratically.
+ // Use the main thread's "local" view to ensure adequate performance.
+ int path_length = absl::SNPrintF(ibuf_, Buffer::kBufSize,
+ "/proc/%d/task/%d/maps", pid, pid);
+ CHECK_CONDITION(path_length < Buffer::kBufSize);
+
+ // No error logging since this can be called from the crash dump
+ // handler at awkward moments. Users should call Valid() before
+ // using.
+ TCMALLOC_RETRY_ON_TEMP_FAILURE(fd_ = open(ibuf_, O_RDONLY));
+#else
+ fd_ = -1; // so Valid() is always false
+#endif
+}
+
+ProcMapsIterator::~ProcMapsIterator() {
+ // As it turns out, Linux guarantees that close() does in fact close a file
+ // descriptor even when the return value is EINTR. According to the notes in
+ // the manpage for close(2), this is widespread yet not fully portable, which
+ // is unfortunate. POSIX explicitly leaves this behavior as unspecified.
+ if (fd_ >= 0) close(fd_);
+ delete dynamic_buffer_;
+}
+
+bool ProcMapsIterator::Valid() const { return fd_ != -1; }
+
+bool ProcMapsIterator::NextExt(uint64_t* start, uint64_t* end, char** flags,
+ uint64_t* offset, int64_t* inode,
+ char** filename, dev_t* dev) {
+#if defined __linux__
+ do {
+ // Advance to the start of the next line
+ stext_ = nextline_;
+
+ // See if we have a complete line in the buffer already
+ nextline_ = static_cast<char*>(memchr(stext_, '\n', etext_ - stext_));
+ if (!nextline_) {
+ // Shift/fill the buffer so we do have a line
+ int count = etext_ - stext_;
+
+ // Move the current text to the start of the buffer
+ memmove(ibuf_, stext_, count);
+ stext_ = ibuf_;
+ etext_ = ibuf_ + count;
+
+ int nread = 0; // fill up buffer with text
+ while (etext_ < ebuf_) {
+ TCMALLOC_RETRY_ON_TEMP_FAILURE(nread =
+ read(fd_, etext_, ebuf_ - etext_));
+ if (nread > 0)
+ etext_ += nread;
+ else
+ break;
+ }
+
+ // Zero out remaining characters in buffer at EOF to avoid returning
+ // garbage from subsequent calls.
+ if (etext_ != ebuf_ && nread == 0) {
+ memset(etext_, 0, ebuf_ - etext_);
+ }
+ *etext_ = '\n'; // sentinel; safe because ibuf extends 1 char beyond ebuf
+ nextline_ = static_cast<char*>(memchr(stext_, '\n', etext_ + 1 - stext_));
+ }
+ *nextline_ = 0; // turn newline into nul
+ nextline_ += ((nextline_ < etext_) ? 1 : 0); // skip nul if not end of text
+ // stext_ now points at a nul-terminated line
+ unsigned long long tmpstart, tmpend, tmpoffset; // NOLINT
+ long long tmpinode, local_inode; // NOLINT
+ unsigned long long local_start, local_end, local_offset; // NOLINT
+ int major, minor;
+ unsigned filename_offset = 0;
+ // for now, assume all linuxes have the same format
+ int para_num =
+ sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
+ start ? &local_start : &tmpstart, end ? &local_end : &tmpend,
+ flags_, offset ? &local_offset : &tmpoffset, &major, &minor,
+ inode ? &local_inode : &tmpinode, &filename_offset);
+
+ if (para_num != 7) continue;
+
+ if (start) *start = local_start;
+ if (end) *end = local_end;
+ if (offset) *offset = local_offset;
+ if (inode) *inode = local_inode;
+ // Depending on the Linux kernel being used, there may or may not be a space
+ // after the inode if there is no filename. sscanf will in such situations
+ // nondeterministically either fill in filename_offset or not (the results
+ // differ on multiple calls in the same run even with identical arguments).
+ // We don't want to wander off somewhere beyond the end of the string.
+ size_t stext_length = strlen(stext_);
+ if (filename_offset == 0 || filename_offset > stext_length)
+ filename_offset = stext_length;
+
+ // We found an entry
+ if (flags) *flags = flags_;
+ if (filename) *filename = stext_ + filename_offset;
+ if (dev) *dev = makedev(major, minor);
+
+ return true;
+ } while (etext_ > ibuf_);
+#endif
+
+ // We didn't find anything
+ return false;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h
new file mode 100644
index 0000000000..c5c763a1e8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h
@@ -0,0 +1,70 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PROC_MAPS_H_
+#define TCMALLOC_INTERNAL_PROC_MAPS_H_
+
+#include <limits.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// A ProcMapsIterator abstracts access to /proc/maps for a given process.
+class ProcMapsIterator {
+ public:
+ struct Buffer {
+ static constexpr size_t kBufSize = PATH_MAX + 1024;
+ char buf[kBufSize];
+ };
+
+ // Create a new iterator for the specified pid. pid can be 0 for "self".
+ explicit ProcMapsIterator(pid_t pid);
+
+ // Create an iterator with specified storage (for use in signal handler).
+ // "buffer" should point to a ProcMapsIterator::Buffer buffer can be null in
+ // which case a buffer will be allocated.
+ ProcMapsIterator(pid_t pid, Buffer* buffer);
+
+ // Returns true if the iterator successfully initialized;
+ bool Valid() const;
+
+ bool NextExt(uint64_t* start, uint64_t* end, char** flags, uint64_t* offset,
+ int64_t* inode, char** filename, dev_t* dev);
+
+ ~ProcMapsIterator();
+
+ private:
+ void Init(pid_t pid, Buffer* buffer);
+
+ char* ibuf_; // input buffer
+ char* stext_; // start of text
+ char* etext_; // end of text
+ char* nextline_; // start of next line
+ char* ebuf_; // end of buffer (1 char for a nul)
+ int fd_; // filehandle on /proc/*/maps
+ pid_t pid_;
+ char flags_[10];
+ Buffer* dynamic_buffer_; // dynamically-allocated Buffer
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_PROC_MAPS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h
new file mode 100644
index 0000000000..25b863934f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h
@@ -0,0 +1,503 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_RANGE_TRACKER_H_
+#define TCMALLOC_INTERNAL_RANGE_TRACKER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <climits>
+#include <limits>
+#include <type_traits>
+
+#include "absl/numeric/bits.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Keeps a bitmap of some fixed size (N bits).
+template <size_t N>
+class Bitmap {
+ public:
+ constexpr Bitmap() : bits_{} {}
+
+ size_t size() const { return N; }
+ bool GetBit(size_t i) const;
+
+ void SetBit(size_t i);
+ void ClearBit(size_t i);
+
+ // Returns the number of set bits [index, ..., index + n - 1].
+ size_t CountBits(size_t index, size_t n) const;
+
+ // Returns whether the bitmap is entirely zero or not.
+ bool IsZero() const;
+
+ // Equivalent to SetBit on bits [index, index + 1, ... index + n - 1].
+ void SetRange(size_t index, size_t n);
+ void ClearRange(size_t index, size_t n);
+
+ // Clears the lowest set bit. Special case is faster than more flexible code.
+ void ClearLowestBit();
+
+ // If there is at least one free range at or after <start>,
+ // put it in *index, *length and return true; else return false.
+ bool NextFreeRange(size_t start, size_t *index, size_t *length) const;
+
+ // Returns index of the first {true, false} bit >= index, or N if none.
+ size_t FindSet(size_t index) const;
+ size_t FindClear(size_t index) const;
+
+ // Returns index of the first {set, clear} bit in [index, 0] or -1 if none.
+ ssize_t FindSetBackwards(size_t index) const;
+ ssize_t FindClearBackwards(size_t index) const;
+
+ void Clear();
+
+ private:
+ static constexpr size_t kWordSize = sizeof(size_t) * 8;
+ static constexpr size_t kWords = (N + kWordSize - 1) / kWordSize;
+ static constexpr size_t kDeadBits = kWordSize * kWords - N;
+
+ size_t bits_[kWords];
+
+ size_t CountWordBits(size_t i, size_t from, size_t to) const;
+
+ template <bool Value>
+ void SetWordBits(size_t i, size_t from, size_t to);
+ template <bool Value>
+ void SetRangeValue(size_t index, size_t n);
+
+ template <bool Goal>
+ size_t FindValue(size_t index) const;
+ template <bool Goal>
+ ssize_t FindValueBackwards(size_t index) const;
+};
+
+// Tracks allocations in a range of items of fixed size. Supports
+// finding an unset range of a given length, while keeping track of
+// the largest remaining unmarked length.
+template <size_t N>
+class RangeTracker {
+ public:
+ constexpr RangeTracker()
+ : bits_{}, longest_free_(N), nused_(0), nallocs_(0) {}
+
+ size_t size() const;
+ // Number of bits marked
+ size_t used() const;
+ // Number of bits clear
+ size_t total_free() const;
+ // Longest contiguous range of clear bits.
+ size_t longest_free() const;
+ // Count of live allocations.
+ size_t allocs() const;
+
+ // REQUIRES: there is a free range of at least n bits
+ // (i.e. n <= longest_free())
+ // finds and marks n free bits, returning index of the first bit.
+ // Chooses by best fit.
+ size_t FindAndMark(size_t n);
+
+ // REQUIRES: the range [index, index + n) is fully marked, and
+ // was the returned value from a call to FindAndMark.
+ // Unmarks it.
+ void Unmark(size_t index, size_t n);
+ // If there is at least one free range at or after <start>,
+ // put it in *index, *length and return true; else return false.
+ bool NextFreeRange(size_t start, size_t *index, size_t *length) const;
+
+ void Clear();
+
+ private:
+ Bitmap<N> bits_;
+
+ // Computes the smallest unsigned type that can hold the constant N.
+ class UnsignedTypeFittingSize {
+ private:
+ static_assert(N <= std::numeric_limits<uint64_t>::max(),
+ "size_t more than 64 bits??");
+ template <typename T>
+ static constexpr bool Fit() {
+ return N <= std::numeric_limits<T>::max();
+ }
+ struct U32 {
+ using type =
+ typename std::conditional<Fit<uint32_t>(), uint32_t, uint64_t>::type;
+ };
+
+ struct U16 {
+ using type = typename std::conditional<Fit<uint16_t>(), uint16_t,
+ typename U32::type>::type;
+ };
+
+ struct U8 {
+ using type = typename std::conditional<Fit<uint8_t>(), uint8_t,
+ typename U16::type>::type;
+ };
+
+ public:
+ using type = typename U8::type;
+ };
+
+ // we keep various stats in the range [0, N]; make them as small as possible.
+ using Count = typename UnsignedTypeFittingSize::type;
+
+ Count longest_free_;
+ Count nused_;
+ Count nallocs_;
+};
+
+template <size_t N>
+inline size_t RangeTracker<N>::size() const {
+ return bits_.size();
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::used() const {
+ return nused_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::total_free() const {
+ return N - used();
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::longest_free() const {
+ return longest_free_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::allocs() const {
+ return nallocs_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::FindAndMark(size_t n) {
+ ASSERT(n > 0);
+
+ // We keep the two longest ranges in the bitmap since we might allocate
+ // from one.
+ size_t longest_len = 0;
+ size_t second_len = 0;
+
+ // the best (shortest) range we could use
+ // TODO(b/134691947): shortest? lowest-addressed?
+ size_t best_index = N;
+ size_t best_len = 2 * N;
+ // Iterate over free ranges:
+ size_t index = 0, len;
+
+ while (bits_.NextFreeRange(index, &index, &len)) {
+ if (len > longest_len) {
+ second_len = longest_len;
+ longest_len = len;
+ } else if (len > second_len) {
+ second_len = len;
+ }
+
+ if (len >= n && len < best_len) {
+ best_index = index;
+ best_len = len;
+ }
+
+ index += len;
+ }
+
+ CHECK_CONDITION(best_index < N);
+ bits_.SetRange(best_index, n);
+
+ if (best_len == longest_len) {
+ longest_len -= n;
+ if (longest_len < second_len) longest_len = second_len;
+ }
+
+ longest_free_ = longest_len;
+ nused_ += n;
+ nallocs_++;
+ return best_index;
+}
+
+// REQUIRES: the range [index, index + n) is fully marked.
+// Unmarks it.
+template <size_t N>
+inline void RangeTracker<N>::Unmark(size_t index, size_t n) {
+ ASSERT(bits_.FindClear(index) >= index + n);
+ bits_.ClearRange(index, n);
+ nused_ -= n;
+ nallocs_--;
+
+ // We just opened up a new free range--it might be the longest.
+ size_t lim = bits_.FindSet(index + n - 1);
+ index = bits_.FindSetBackwards(index) + 1;
+ n = lim - index;
+ if (n > longest_free()) {
+ longest_free_ = n;
+ }
+}
+
+// If there is at least one free range at or after <start>,
+// put it in *index, *length and return true; else return false.
+template <size_t N>
+inline bool RangeTracker<N>::NextFreeRange(size_t start, size_t *index,
+ size_t *length) const {
+ return bits_.NextFreeRange(start, index, length);
+}
+
+template <size_t N>
+inline void RangeTracker<N>::Clear() {
+ bits_.Clear();
+ nallocs_ = 0;
+ nused_ = 0;
+ longest_free_ = N;
+}
+
+// Count the set bits [from, to) in the i-th word to Value.
+template <size_t N>
+inline size_t Bitmap<N>::CountWordBits(size_t i, size_t from, size_t to) const {
+ ASSERT(from < kWordSize);
+ ASSERT(to <= kWordSize);
+ const size_t all_ones = ~static_cast<size_t>(0);
+ // how many bits are we setting?
+ const size_t n = to - from;
+ ASSERT(0 < n && n <= kWordSize);
+ const size_t mask = (all_ones >> (kWordSize - n)) << from;
+
+ ASSUME(i < kWords);
+ return absl::popcount(bits_[i] & mask);
+}
+
+// Set the bits [from, to) in the i-th word to Value.
+template <size_t N>
+template <bool Value>
+inline void Bitmap<N>::SetWordBits(size_t i, size_t from, size_t to) {
+ ASSERT(from < kWordSize);
+ ASSERT(to <= kWordSize);
+ const size_t all_ones = ~static_cast<size_t>(0);
+ // how many bits are we setting?
+ const size_t n = to - from;
+ ASSERT(n > 0 && n <= kWordSize);
+ const size_t mask = (all_ones >> (kWordSize - n)) << from;
+ ASSUME(i < kWords);
+ if (Value) {
+ bits_[i] |= mask;
+ } else {
+ bits_[i] &= ~mask;
+ }
+}
+
+template <size_t N>
+inline bool Bitmap<N>::GetBit(size_t i) const {
+ ASSERT(i < N);
+ size_t word = i / kWordSize;
+ size_t offset = i % kWordSize;
+ ASSUME(word < kWords);
+ return bits_[word] & (size_t{1} << offset);
+}
+
+template <size_t N>
+inline void Bitmap<N>::SetBit(size_t i) {
+ ASSERT(i < N);
+ size_t word = i / kWordSize;
+ size_t offset = i % kWordSize;
+ ASSUME(word < kWords);
+ bits_[word] |= (size_t{1} << offset);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearBit(size_t i) {
+ ASSERT(i < N);
+ size_t word = i / kWordSize;
+ size_t offset = i % kWordSize;
+ ASSUME(word < kWords);
+ bits_[word] &= ~(size_t{1} << offset);
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::CountBits(size_t index, size_t n) const {
+ ASSUME(index + n <= N);
+ size_t count = 0;
+ if (n == 0) {
+ return count;
+ }
+
+ size_t word = index / kWordSize;
+ size_t offset = index % kWordSize;
+ size_t k = std::min(offset + n, kWordSize);
+ count += CountWordBits(word, offset, k);
+ n -= k - offset;
+ while (n > 0) {
+ word++;
+ k = std::min(n, kWordSize);
+ count += CountWordBits(word, 0, k);
+ n -= k;
+ }
+
+ return count;
+}
+
+template <size_t N>
+inline bool Bitmap<N>::IsZero() const {
+ for (int i = 0; i < kWords; ++i) {
+ if (bits_[i] != 0) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <size_t N>
+inline void Bitmap<N>::SetRange(size_t index, size_t n) {
+ SetRangeValue<true>(index, n);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearRange(size_t index, size_t n) {
+ SetRangeValue<false>(index, n);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearLowestBit() {
+ for (int i = 0; i < kWords; ++i) {
+ if (bits_[i] != 0) {
+ bits_[i] &= bits_[i] - 1;
+ break;
+ }
+ }
+}
+
+template <size_t N>
+template <bool Value>
+inline void Bitmap<N>::SetRangeValue(size_t index, size_t n) {
+ ASSERT(index + n <= N);
+ size_t word = index / kWordSize;
+ size_t offset = index % kWordSize;
+ size_t k = offset + n;
+ if (k > kWordSize) k = kWordSize;
+ SetWordBits<Value>(word, offset, k);
+ n -= k - offset;
+ while (n > 0) {
+ word++;
+ k = n;
+ if (k > kWordSize) k = kWordSize;
+ SetWordBits<Value>(word, 0, k);
+ n -= k;
+ }
+}
+
+template <size_t N>
+inline bool Bitmap<N>::NextFreeRange(size_t start, size_t *index,
+ size_t *length) const {
+ if (start >= N) return false;
+ size_t i = FindClear(start);
+ if (i == N) return false;
+ size_t j = FindSet(i);
+ *index = i;
+ *length = j - i;
+ return true;
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::FindSet(size_t index) const {
+ return FindValue<true>(index);
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::FindClear(size_t index) const {
+ return FindValue<false>(index);
+}
+
+template <size_t N>
+inline ssize_t Bitmap<N>::FindSetBackwards(size_t index) const {
+ return FindValueBackwards<true>(index);
+}
+
+template <size_t N>
+inline ssize_t Bitmap<N>::FindClearBackwards(size_t index) const {
+ return FindValueBackwards<false>(index);
+}
+
+template <size_t N>
+inline void Bitmap<N>::Clear() {
+ for (int i = 0; i < kWords; ++i) {
+ bits_[i] = 0;
+ }
+}
+
+template <size_t N>
+template <bool Goal>
+inline size_t Bitmap<N>::FindValue(size_t index) const {
+ ASSERT(index < N);
+ size_t offset = index % kWordSize;
+ size_t word = index / kWordSize;
+ ASSUME(word < kWords);
+ size_t here = bits_[word];
+ if (!Goal) here = ~here;
+ size_t mask = ~static_cast<size_t>(0) << offset;
+ here &= mask;
+ while (here == 0) {
+ ++word;
+ if (word >= kWords) {
+ return N;
+ }
+ here = bits_[word];
+ if (!Goal) here = ~here;
+ }
+
+ word *= kWordSize;
+ ASSUME(here != 0);
+ size_t ret = absl::countr_zero(here) + word;
+ if (kDeadBits > 0) {
+ if (ret > N) ret = N;
+ }
+ return ret;
+}
+
+template <size_t N>
+template <bool Goal>
+inline ssize_t Bitmap<N>::FindValueBackwards(size_t index) const {
+ ASSERT(index < N);
+ size_t offset = index % kWordSize;
+ ssize_t word = index / kWordSize;
+ ASSUME(word < kWords);
+ size_t here = bits_[word];
+ if (!Goal) here = ~here;
+ size_t mask = (static_cast<size_t>(2) << offset) - 1;
+ here &= mask;
+ while (here == 0) {
+ --word;
+ if (word < 0) {
+ return -1;
+ }
+ here = bits_[word];
+ if (!Goal) here = ~here;
+ }
+
+ word *= kWordSize;
+ ASSUME(here != 0);
+ size_t ret = absl::bit_width(here) - 1 + word;
+ return ret;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_RANGE_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc
new file mode 100644
index 0000000000..278fc9ef1e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc
@@ -0,0 +1,387 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/range_tracker.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+template <size_t N>
+static void BM_MarkUnmark(benchmark::State& state) {
+ RangeTracker<N> range;
+ absl::BitGen rng;
+ std::vector<std::pair<size_t, size_t>> things;
+ while (range.used() < N / 2) {
+ size_t len =
+ absl::LogUniform<int32_t>(rng, 0, range.longest_free() - 1) + 1;
+ size_t i = range.FindAndMark(len);
+ things.push_back({i, len});
+ }
+
+ // only count successes :/
+ for (auto s : state) {
+ size_t index = absl::Uniform<int32_t>(rng, 0, things.size());
+ auto p = things[index];
+ range.Unmark(p.first, p.second);
+ size_t len =
+ absl::LogUniform<int32_t>(rng, 0, range.longest_free() - 1) + 1;
+ things[index] = {range.FindAndMark(len), len};
+ }
+
+ state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmark, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmark, 256 * 32);
+
+template <size_t N, size_t K>
+static void BM_MarkUnmarkEmpty(benchmark::State& state) {
+ RangeTracker<N> range;
+ for (auto s : state) {
+ size_t index = range.FindAndMark(K);
+ benchmark::DoNotOptimize(index);
+ range.Unmark(index, K);
+ }
+
+ state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 1);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 1);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 128);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 256 * 16);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 256 * 32);
+
+template <size_t N>
+static void BM_MarkUnmarkChunks(benchmark::State& state) {
+ RangeTracker<N> range;
+ range.FindAndMark(N);
+ size_t index = 0;
+ absl::BitGen rng;
+ while (index < N) {
+ size_t len = absl::Uniform<int32_t>(rng, 0, 32) + 1;
+ len = std::min(len, N - index);
+ size_t drop = absl::Uniform<int32_t>(rng, 0, len);
+ if (drop > 0) {
+ range.Unmark(index, drop);
+ }
+ index += len;
+ }
+ size_t m = range.longest_free();
+ for (auto s : state) {
+ size_t index = range.FindAndMark(m);
+ benchmark::DoNotOptimize(index);
+ range.Unmark(index, m);
+ }
+
+ state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 64);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 256 * 32);
+
+template <size_t N>
+static void BM_FillOnes(benchmark::State& state) {
+ RangeTracker<N> range;
+ while (state.KeepRunningBatch(N)) {
+ state.PauseTiming();
+ range.Clear();
+ state.ResumeTiming();
+ for (size_t j = 0; j < N; ++j) {
+ benchmark::DoNotOptimize(range.FindAndMark(1));
+ }
+ }
+
+ state.SetItemsProcessed(N * state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_FillOnes, 256);
+BENCHMARK_TEMPLATE(BM_FillOnes, 256 * 32);
+
+template <size_t N>
+static void BM_EmptyOnes(benchmark::State& state) {
+ RangeTracker<N> range;
+ while (state.KeepRunningBatch(N)) {
+ state.PauseTiming();
+ range.Clear();
+ range.FindAndMark(N);
+ state.ResumeTiming();
+ for (size_t j = 0; j < N; ++j) {
+ range.Unmark(j, 1);
+ }
+ }
+
+ state.SetItemsProcessed(N * state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_EmptyOnes, 256);
+BENCHMARK_TEMPLATE(BM_EmptyOnes, 256 * 32);
+
+enum SearchDirection {
+ Forward,
+ Backward,
+};
+
+template <size_t N, bool Goal, SearchDirection Dir>
+ABSL_ATTRIBUTE_NOINLINE size_t ExamineDoFind(Bitmap<N>* map, size_t index) {
+ if (Dir == Forward) {
+ if (Goal) {
+ return map->FindSet(index);
+ } else {
+ return map->FindClear(index);
+ }
+ } else {
+ if (Goal) {
+ return map->FindSetBackwards(index);
+ } else {
+ return map->FindClearBackwards(index);
+ }
+ }
+}
+
+template <size_t N, bool Goal, SearchDirection Dir>
+ABSL_ATTRIBUTE_NOINLINE void DoSearchBenchmark(Bitmap<N>* map,
+ benchmark::State& state) {
+ if (Dir == Forward) {
+ size_t index = 0;
+ for (auto s : state) {
+ index = ExamineDoFind<N, Goal, Dir>(map, index);
+ benchmark::DoNotOptimize(index);
+ index++;
+ if (index >= N) index = 0;
+ }
+ } else {
+ ssize_t index = N - 1;
+ for (auto s : state) {
+ index = ExamineDoFind<N, Goal, Dir>(map, index);
+ benchmark::DoNotOptimize(index);
+ index--;
+ if (index < 0) index = N - 1;
+ }
+ }
+}
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindEmpty(benchmark::State& state) {
+ Bitmap<N> set;
+ // Volatile set/clears prevent the compiler from const-propagating the whole
+ // search.
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindLast(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ set.SetBit(N - 1);
+ DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindLast, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindFull(benchmark::State& state) {
+ Bitmap<N> set;
+ set.SetRange(0, N);
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindFull, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindRandom(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ absl::BitGen rng;
+ for (int i = 0; i < N; ++i) {
+ if (absl::Bernoulli(rng, 1.0 / 2)) set.SetBit(i);
+ }
+ DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, true, Backward);
+
+template <size_t N>
+ABSL_ATTRIBUTE_NOINLINE size_t DoScanBenchmark(Bitmap<N>* set,
+ benchmark::State& state) {
+ size_t total = 0;
+ for (auto s : state) {
+ size_t index = 0, len;
+ while (set->NextFreeRange(index, &index, &len)) {
+ benchmark::DoNotOptimize(index);
+ benchmark::DoNotOptimize(len);
+ index += len;
+ total++;
+ }
+ }
+
+ return total;
+}
+
+template <size_t N>
+static void BM_ScanEmpty(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ size_t total = DoScanBenchmark<N>(&set, state);
+ state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 64);
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 256);
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 256 * 32);
+
+template <size_t N>
+static void BM_ScanFull(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ set.SetRange(0, N);
+
+ size_t total = DoScanBenchmark<N>(&set, state);
+ state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanFull, 64);
+BENCHMARK_TEMPLATE(BM_ScanFull, 256);
+BENCHMARK_TEMPLATE(BM_ScanFull, 256 * 32);
+
+template <size_t N>
+static void BM_ScanRandom(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ absl::BitGen rng;
+ for (int i = 0; i < N; ++i) {
+ if (absl::Bernoulli(rng, 1.0 / 2)) set.SetBit(i);
+ }
+ size_t total = DoScanBenchmark<N>(&set, state);
+ state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanRandom, 64);
+BENCHMARK_TEMPLATE(BM_ScanRandom, 256);
+BENCHMARK_TEMPLATE(BM_ScanRandom, 256 * 32);
+
+template <size_t N>
+static void BM_ScanChunks(benchmark::State& state) {
+ Bitmap<N> set;
+ volatile size_t to_set = 0;
+ volatile size_t to_clear = 0;
+ set.SetBit(to_set);
+ set.ClearBit(to_clear);
+ absl::BitGen rng;
+ size_t index = 0;
+ while (index < N) {
+ // Paint ~half of a chunk of random size.
+ size_t len = absl::Uniform<int32_t>(rng, 0, 32) + 1;
+ len = std::min(len, N - index);
+ size_t mid = absl::Uniform<int32_t>(rng, 0, len) + index;
+ size_t ones = mid + 1;
+ size_t limit = index + len;
+ if (ones < limit) {
+ set.SetRange(ones, limit - ones);
+ }
+ index = limit;
+ }
+ size_t total = DoScanBenchmark<N>(&set, state);
+ state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanChunks, 64);
+BENCHMARK_TEMPLATE(BM_ScanChunks, 256);
+BENCHMARK_TEMPLATE(BM_ScanChunks, 256 * 32);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc
new file mode 100644
index 0000000000..4f9202e221
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc
@@ -0,0 +1,294 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/range_tracker.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/container/fixed_array.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::ElementsAre;
+using testing::Pair;
+
+class BitmapTest : public testing::Test {
+ protected:
+ template <size_t N>
+ std::vector<size_t> FindSetResults(const Bitmap<N> &map) {
+ return FindResults<N, true>(map);
+ }
+
+ template <size_t N>
+ std::vector<size_t> FindClearResults(const Bitmap<N> &map) {
+ return FindResults<N, false>(map);
+ }
+
+ template <size_t N, bool Value>
+ std::vector<size_t> FindResults(const Bitmap<N> &map) {
+ std::vector<size_t> results;
+ ssize_t last = -1;
+ for (size_t i = 0; i < N; ++i) {
+ ssize_t j = Value ? map.FindSet(i) : map.FindClear(i);
+ EXPECT_LE(last, j) << i;
+ EXPECT_LE(i, j) << i;
+ EXPECT_GE(N, j) << i;
+ if (last != j) {
+ results.push_back(j);
+ last = j;
+ }
+ }
+
+ return results;
+ }
+
+ template <size_t N>
+ std::vector<size_t> FindSetResultsBackwards(const Bitmap<N> &map) {
+ return FindResultsBackwards<N, true>(map);
+ }
+
+ template <size_t N>
+ std::vector<size_t> FindClearResultsBackwards(const Bitmap<N> &map) {
+ return FindResultsBackwards<N, false>(map);
+ }
+
+ template <size_t N, bool Value>
+ std::vector<size_t> FindResultsBackwards(const Bitmap<N> &map) {
+ std::vector<size_t> results;
+ ssize_t last = N;
+ for (ssize_t i = N - 1; i >= 0; --i) {
+ ssize_t j = Value ? map.FindSetBackwards(i) : map.FindClearBackwards(i);
+ EXPECT_GE(last, j) << i;
+ EXPECT_GE(i, j) << i;
+ EXPECT_LE(-1, j) << i;
+ if (last != j) {
+ results.push_back(j);
+ last = j;
+ }
+ }
+
+ return results;
+ }
+};
+
+TEST_F(BitmapTest, GetBitEmpty) {
+ Bitmap<253> map;
+ for (size_t i = 0; i < map.size(); ++i) {
+ EXPECT_EQ(map.GetBit(i), 0);
+ }
+}
+
+TEST_F(BitmapTest, CheckIsZero) {
+ Bitmap<253> map;
+ EXPECT_EQ(map.IsZero(), true);
+ for (size_t i = 0; i < map.size(); ++i) {
+ map.Clear();
+ EXPECT_EQ(map.IsZero(), true);
+ map.SetBit(i);
+ EXPECT_EQ(map.IsZero(), false);
+ }
+}
+
+TEST_F(BitmapTest, CheckClearLowestBit) {
+ Bitmap<253> map;
+ for (size_t i = 0; i < map.size(); ++i) {
+ map.SetBit(i);
+ }
+ for (size_t i = 0; i < map.size(); ++i) {
+ size_t index = map.FindSet(0);
+ EXPECT_EQ(index, i);
+ map.ClearLowestBit();
+ }
+}
+
+TEST_F(BitmapTest, GetBitOneSet) {
+ const size_t N = 251;
+ for (size_t s = 0; s < N; s++) {
+ Bitmap<N> map;
+ map.SetBit(s);
+ for (size_t i = 0; i < map.size(); ++i) {
+ EXPECT_EQ(map.GetBit(i), i == s ? 1 : 0);
+ }
+ }
+}
+
+TEST_F(BitmapTest, FindSet) {
+ Bitmap<253> map;
+ EXPECT_THAT(FindSetResults(map), ElementsAre(253));
+ EXPECT_THAT(FindSetResultsBackwards(map), ElementsAre(-1));
+ map.SetBit(7);
+ map.SetBit(14);
+ map.SetBit(15);
+ map.SetBit(63);
+ map.SetBit(128);
+ EXPECT_THAT(FindSetResults(map), ElementsAre(7, 14, 15, 63, 128, 253));
+ EXPECT_THAT(FindSetResultsBackwards(map),
+ ElementsAre(128, 63, 15, 14, 7, -1));
+ map.SetBit(195);
+ map.SetBit(196);
+ map.SetBit(251);
+ map.SetBit(252);
+ EXPECT_THAT(FindSetResults(map),
+ ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252));
+ EXPECT_THAT(FindSetResultsBackwards(map),
+ ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1));
+ map.SetBit(0);
+ EXPECT_THAT(FindSetResultsBackwards(map),
+ ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0));
+}
+
+TEST_F(BitmapTest, FindClear) {
+ Bitmap<253> map;
+ map.SetRange(0, 253);
+ EXPECT_THAT(FindClearResults(map), ElementsAre(253));
+ EXPECT_THAT(FindClearResultsBackwards(map), ElementsAre(-1));
+
+ map.ClearBit(7);
+ map.ClearBit(14);
+ map.ClearBit(15);
+ map.ClearBit(63);
+ map.ClearBit(128);
+ EXPECT_THAT(FindClearResults(map), ElementsAre(7, 14, 15, 63, 128, 253));
+ EXPECT_THAT(FindClearResultsBackwards(map),
+ ElementsAre(128, 63, 15, 14, 7, -1));
+ map.ClearBit(195);
+ map.ClearBit(196);
+ map.ClearBit(251);
+ map.ClearBit(252);
+ EXPECT_THAT(FindClearResults(map),
+ ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252));
+ EXPECT_THAT(FindClearResultsBackwards(map),
+ ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1));
+ map.ClearBit(0);
+ EXPECT_THAT(FindClearResultsBackwards(map),
+ ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0));
+}
+
+TEST_F(BitmapTest, CountBits) {
+ Bitmap<253> map;
+ map.SetRange(0, 253);
+ EXPECT_EQ(map.CountBits(0, 253), 253);
+ EXPECT_EQ(map.CountBits(8, 245), 245);
+ EXPECT_EQ(map.CountBits(0, 250), 250);
+
+ map.ClearBit(7);
+ map.ClearBit(14);
+ map.ClearBit(15);
+ map.ClearBit(63);
+ map.ClearBit(128);
+
+ EXPECT_EQ(map.CountBits(0, 253), 248);
+ EXPECT_EQ(map.CountBits(8, 245), 241);
+ EXPECT_EQ(map.CountBits(0, 250), 245);
+
+ map.ClearBit(195);
+ map.ClearBit(196);
+ map.ClearBit(251);
+ map.ClearBit(252);
+
+ EXPECT_EQ(map.CountBits(0, 253), 244);
+ EXPECT_EQ(map.CountBits(8, 245), 237);
+ EXPECT_EQ(map.CountBits(0, 250), 243);
+
+ map.ClearBit(0);
+
+ EXPECT_EQ(map.CountBits(0, 253), 243);
+ EXPECT_EQ(map.CountBits(8, 245), 237);
+ EXPECT_EQ(map.CountBits(0, 250), 242);
+}
+
+TEST_F(BitmapTest, CountBitsFuzz) {
+ static constexpr size_t kBits = 253;
+ absl::FixedArray<bool> truth(kBits);
+ Bitmap<kBits> map;
+
+ absl::BitGen rng;
+ for (int i = 0; i < kBits; i++) {
+ bool v = absl::Bernoulli(rng, 0.3);
+ truth[i] = v;
+ if (v) {
+ map.SetBit(i);
+ }
+ }
+
+ for (int i = 0; i < 100; i++) {
+ SCOPED_TRACE(i);
+
+ // Pick a random starting point and a length, use a naive loop against truth
+ // to calculate the expected bit count.
+ size_t start = absl::Uniform(rng, 0u, kBits);
+ size_t length = absl::Uniform(rng, 0u, kBits - start);
+
+ size_t expected = 0;
+ for (int j = 0; j < length; j++) {
+ if (truth[start + j]) {
+ expected++;
+ }
+ }
+
+ EXPECT_EQ(expected, map.CountBits(start, length));
+ }
+}
+
+class RangeTrackerTest : public ::testing::Test {
+ protected:
+ std::vector<std::pair<size_t, size_t>> FreeRanges() {
+ std::vector<std::pair<size_t, size_t>> ret;
+ size_t index = 0, len;
+ while (range_.NextFreeRange(index, &index, &len)) {
+ ret.push_back({index, len});
+ index += len;
+ }
+ return ret;
+ }
+ static constexpr size_t kBits = 1017;
+ RangeTracker<kBits> range_;
+};
+
+TEST_F(RangeTrackerTest, Trivial) {
+ EXPECT_EQ(kBits, range_.size());
+ EXPECT_EQ(0, range_.used());
+ EXPECT_EQ(kBits, range_.longest_free());
+ EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, kBits)));
+ ASSERT_EQ(0, range_.FindAndMark(kBits));
+ EXPECT_EQ(0, range_.longest_free());
+ EXPECT_EQ(kBits, range_.used());
+ EXPECT_THAT(FreeRanges(), ElementsAre());
+ range_.Unmark(0, 100);
+ EXPECT_EQ(100, range_.longest_free());
+ EXPECT_EQ(kBits - 100, range_.used());
+ EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100)));
+ // non-contiguous - shouldn't increase longest
+ range_.Unmark(200, 100);
+ EXPECT_EQ(100, range_.longest_free());
+ EXPECT_EQ(kBits - 200, range_.used());
+ EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100), Pair(200, 100)));
+ range_.Unmark(100, 100);
+ EXPECT_EQ(300, range_.longest_free());
+ EXPECT_EQ(kBits - 300, range_.used());
+ EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 300)));
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h
new file mode 100644
index 0000000000..f1b6d3375f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h
@@ -0,0 +1,195 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
+#define TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <atomic>
+#include <limits>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/functional/function_ref.h"
+#include "absl/numeric/bits.h"
+#include "absl/numeric/int128.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/clock.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Aggregates a series of reported values of type S in a set of entries of type
+// T, one entry per epoch. This class factors out common functionality of
+// different time series trackers. S can be any type, T needs to implement:
+// Nil(), Report(S val), empty()
+template <typename T, typename S, size_t kEpochs = 16>
+class TimeSeriesTracker {
+ public:
+ enum SkipEntriesSetting { kSkipEmptyEntries, kDoNotSkipEmptyEntries };
+
+ explicit constexpr TimeSeriesTracker(Clock clock, absl::Duration w)
+ : window_(w), epoch_length_(window_ / kEpochs), clock_(clock) {
+ // See comment in GetCurrentEpoch().
+ auto d = static_cast<uint64_t>(absl::ToDoubleSeconds(epoch_length_) *
+ clock.freq());
+ div_precision_ = 63 + absl::bit_width(d);
+ epoch_ticks_m_ =
+ static_cast<uint64_t>(
+ (static_cast<absl::uint128>(1) << div_precision_) / d) +
+ 1;
+ }
+
+ bool Report(S val);
+
+ // Iterates over the time series, starting from the oldest entry. The callback
+ // receives the offset of the entry, its timestamp according to the clock and
+ // the entry itself. Offsets are relative to the beginning of the buffer.
+ void Iter(absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+ SkipEntriesSetting skip_entries) const;
+
+ // Iterates over the last num_epochs data points (if -1, iterate to the
+ // oldest entry). Offsets are relative to the end of the buffer.
+ void IterBackwards(absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+ int64_t num_epochs = -1) const;
+
+ // This retrieves a particular data point (if offset is outside the valid
+ // range, the default data point will be returned).
+ const T GetEpochAtOffset(size_t offset);
+
+ // Updates the time base to the current time. This is useful to report the
+ // most recent time window rather than the last time window that had any
+ // reported values.
+ void UpdateTimeBase() { UpdateClock(); }
+
+ private:
+ // Returns true if the tracker moved to a different epoch.
+ bool UpdateClock();
+
+ // Returns the current epoch based on the clock.
+ int64_t GetCurrentEpoch() {
+ // This is equivalent to
+ // `clock_.now() / (absl::ToDoubleSeconds(epoch_length_) * clock_.freq())`.
+ // We basically follow the technique from
+ // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html,
+ // except that we use one fewer bit of precision than necessary to always
+ // get the correct answer if the numerator were a 64-bit unsigned number. In
+ // this case, because clock_.now() returns a signed 64-bit number (i.e. max
+ // is <2^63), it shouldn't cause a problem. This way, we don't need to
+ // handle overflow so it's simpler. See also:
+ // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/.
+ return static_cast<int64_t>(static_cast<absl::uint128>(epoch_ticks_m_) *
+ clock_.now() >>
+ div_precision_);
+ }
+
+ const absl::Duration window_;
+ const absl::Duration epoch_length_;
+
+ T entries_[kEpochs]{};
+ size_t last_epoch_{0};
+ size_t current_epoch_{0};
+ // This is the magic constant from
+ // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html.
+ uint64_t epoch_ticks_m_;
+ uint8_t div_precision_;
+
+ Clock clock_;
+};
+
+// Erases values from the window that are out of date; sets the current epoch
+// to the current location in the ringbuffer.
+template <class T, class S, size_t kEpochs>
+bool TimeSeriesTracker<T, S, kEpochs>::UpdateClock() {
+ const size_t epoch = GetCurrentEpoch();
+ // How many time steps did we take? (Since we only record kEpochs
+ // time steps, we can pretend it was at most that.)
+ size_t delta = epoch - last_epoch_;
+ delta = std::min(delta, kEpochs);
+ last_epoch_ = epoch;
+
+ if (delta == 0) {
+ return false;
+ }
+
+ // At each tick, we move our current location by one, to a new location
+ // that contains too-old data (which must be zeroed.)
+ for (size_t offset = 0; offset < delta; ++offset) {
+ current_epoch_++;
+ if (current_epoch_ == kEpochs) current_epoch_ = 0;
+ entries_[current_epoch_] = T::Nil();
+ }
+ return true;
+}
+
+template <class T, class S, size_t kEpochs>
+void TimeSeriesTracker<T, S, kEpochs>::Iter(
+ absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+ SkipEntriesSetting skip_entries) const {
+ size_t j = current_epoch_ + 1;
+ if (j == kEpochs) j = 0;
+ int64_t timestamp =
+ (last_epoch_ - kEpochs) * absl::ToInt64Nanoseconds(epoch_length_);
+ for (int offset = 0; offset < kEpochs; offset++) {
+ timestamp += absl::ToInt64Nanoseconds(epoch_length_);
+ if (skip_entries == kDoNotSkipEmptyEntries || !entries_[j].empty()) {
+ f(offset, timestamp, entries_[j]);
+ }
+ j++;
+ if (j == kEpochs) j = 0;
+ }
+}
+
+template <class T, class S, size_t kEpochs>
+void TimeSeriesTracker<T, S, kEpochs>::IterBackwards(
+ absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+ int64_t num_epochs) const {
+ // -1 means that we are outputting all epochs.
+ num_epochs = (num_epochs == -1) ? kEpochs : num_epochs;
+ size_t j = current_epoch_;
+ ASSERT(num_epochs <= kEpochs);
+ int64_t timestamp = last_epoch_ * absl::ToInt64Nanoseconds(epoch_length_);
+ for (size_t offset = 0; offset < num_epochs; ++offset) {
+ // This is deliberately int64_t and not a time unit, since clock_ is not
+ // guaranteed to be a real time base.
+ f(offset, timestamp, entries_[j]);
+ timestamp -= absl::ToInt64Nanoseconds(epoch_length_);
+ if (j == 0) j = kEpochs;
+ --j;
+ }
+}
+
+template <class T, class S, size_t kEpochs>
+const T TimeSeriesTracker<T, S, kEpochs>::GetEpochAtOffset(size_t offset) {
+ return (offset >= kEpochs)
+ ? T::Nil()
+ : entries_[(current_epoch_ + kEpochs - offset) % kEpochs];
+}
+
+template <class T, class S, size_t kEpochs>
+bool TimeSeriesTracker<T, S, kEpochs>::Report(S val) {
+ bool updated_clock = UpdateClock();
+ entries_[current_epoch_].Report(val);
+ return updated_clock;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc
new file mode 100644
index 0000000000..1f75306161
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc
@@ -0,0 +1,191 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/timeseries_tracker.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class TimeSeriesTrackerTest : public testing::Test {
+ public:
+ struct TestEntry {
+ static TestEntry Nil() { return TestEntry(); }
+
+ void Report(int n) { values_.push_back(n); }
+
+ bool empty() const { return values_.empty(); }
+
+ std::vector<int> values_;
+ };
+
+ protected:
+ void Advance(absl::Duration d) {
+ clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+ }
+
+ static constexpr absl::Duration kDuration = absl::Seconds(2);
+
+ TimeSeriesTracker<TestEntry, int, 8> tracker_{
+ Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration};
+
+ private:
+ static int64_t FakeClock() { return clock_; }
+
+ static double GetFakeClockFrequency() {
+ return absl::ToDoubleNanoseconds(absl::Seconds(2));
+ }
+
+ static int64_t clock_;
+};
+
+int64_t TimeSeriesTrackerTest::clock_{0};
+
+// Test that frequency conversion in the cycle clock works correctly
+TEST(TimeSeriesTest, CycleClock) {
+ TimeSeriesTracker<TimeSeriesTrackerTest::TestEntry, int, 100> tracker{
+ Clock{absl::base_internal::CycleClock::Now,
+ absl::base_internal::CycleClock::Frequency},
+ absl::Seconds(10)}; // 100ms epochs
+
+ tracker.Report(1);
+ absl::SleepFor(absl::Milliseconds(100));
+ tracker.Report(2);
+
+ // Iterate through entries skipping empty entries.
+ int num_timestamps = 0;
+ int offset_1, offset_2;
+ tracker.Iter(
+ [&](size_t offset, int64_t ts,
+ const TimeSeriesTrackerTest::TestEntry& e) {
+ ASSERT_LT(num_timestamps, 2);
+ if (num_timestamps == 0) {
+ offset_1 = offset;
+ EXPECT_THAT(e.values_, ElementsAre(1));
+ } else {
+ offset_2 = offset;
+ EXPECT_THAT(e.values_, ElementsAre(2));
+ }
+ num_timestamps++;
+ },
+ tracker.kSkipEmptyEntries);
+
+ // If we are near an epoch boundary, we may skip two epochs.
+ EXPECT_GE(offset_2 - offset_1, 1);
+ EXPECT_LE(offset_2 - offset_1, 2);
+}
+
+TEST_F(TimeSeriesTrackerTest, Works) {
+ const int64_t kEpochLength = absl::ToInt64Nanoseconds(kDuration) / 8;
+ Advance(kDuration);
+
+ tracker_.Report(1);
+ Advance(absl::Nanoseconds(1));
+ tracker_.Report(2);
+ Advance(kDuration / 4);
+ tracker_.Report(4);
+
+ // Iterate through entries skipping empty entries.
+ int num_timestamps = 0;
+ int offset_1, offset_2;
+ tracker_.Iter(
+ [&](size_t offset, int64_t ts, const TestEntry& e) {
+ ASSERT_LT(num_timestamps, 2);
+ if (num_timestamps == 0) {
+ offset_1 = offset;
+ EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration), ts);
+ EXPECT_THAT(e.values_, ElementsAre(1, 2));
+ } else {
+ offset_2 = offset;
+ EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration) +
+ absl::ToInt64Nanoseconds(kDuration) / 4,
+ ts);
+ EXPECT_THAT(e.values_, ElementsAre(4));
+ }
+ num_timestamps++;
+ },
+ tracker_.kSkipEmptyEntries);
+
+ EXPECT_EQ(2, num_timestamps);
+ EXPECT_EQ(offset_2 - offset_1, 2);
+
+ Advance(kDuration / 4);
+
+ // Iterate through entries not skipping empty entries.
+ int64_t expected_timestamp = absl::ToInt64Nanoseconds(kDuration) / 4;
+ num_timestamps = 0;
+
+ tracker_.Iter(
+ [&](size_t offset, int64_t ts, const TestEntry& e) {
+ expected_timestamp += kEpochLength;
+ ASSERT_LT(num_timestamps, 8);
+ EXPECT_EQ(expected_timestamp, ts);
+ num_timestamps++;
+ },
+ tracker_.kDoNotSkipEmptyEntries);
+
+ EXPECT_EQ(8, num_timestamps);
+
+ tracker_.Report(8);
+ Advance(kDuration / 4);
+ tracker_.Report(16);
+
+ // Iterate backwards.
+ num_timestamps = 0;
+ expected_timestamp =
+ 7 * absl::ToInt64Nanoseconds(kDuration) / 4; // Current time
+ tracker_.IterBackwards(
+ [&](size_t offset, int64_t ts, const TestEntry& e) {
+ ASSERT_LT(num_timestamps, 3);
+ EXPECT_EQ(num_timestamps, offset);
+ EXPECT_EQ(expected_timestamp, ts);
+ if (num_timestamps == 0) {
+ EXPECT_THAT(e.values_, ElementsAre(16));
+ } else if (num_timestamps == 1) {
+ EXPECT_TRUE(e.values_.empty());
+ } else {
+ EXPECT_THAT(e.values_, ElementsAre(8));
+ }
+ expected_timestamp -= kEpochLength;
+ num_timestamps++;
+ },
+ 3);
+
+ EXPECT_EQ(3, num_timestamps);
+
+ EXPECT_THAT(tracker_.GetEpochAtOffset(0).values_, ElementsAre(16));
+ EXPECT_THAT(tracker_.GetEpochAtOffset(2).values_, ElementsAre(8));
+ EXPECT_TRUE(tracker_.GetEpochAtOffset(3).empty());
+ EXPECT_TRUE(tracker_.GetEpochAtOffset(1000).empty());
+
+ // This should annilate everything.
+ Advance(kDuration * 2);
+ tracker_.UpdateTimeBase();
+ tracker_.Iter(
+ [&](size_t offset, int64_t ts, const TestEntry& e) {
+ ASSERT_TRUE(false) << "Time series should be empty";
+ },
+ tracker_.kSkipEmptyEntries);
+
+ EXPECT_TRUE(tracker_.GetEpochAtOffset(1).empty());
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc
new file mode 100644
index 0000000000..ef705b02e3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc
@@ -0,0 +1,195 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/util.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <utility>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int signal_safe_open(const char* path, int flags, ...) {
+ int fd;
+ va_list ap;
+
+ va_start(ap, flags);
+ mode_t mode = va_arg(ap, mode_t);
+ va_end(ap);
+
+ do {
+ fd = ((flags & O_CREAT) ? open(path, flags, mode) : open(path, flags));
+ } while (fd == -1 && errno == EINTR);
+
+ return fd;
+}
+
+int signal_safe_close(int fd) {
+ int rc;
+
+ do {
+ rc = close(fd);
+ } while (rc == -1 && errno == EINTR);
+
+ return rc;
+}
+
+ssize_t signal_safe_write(int fd, const char* buf, size_t count,
+ size_t* bytes_written) {
+ ssize_t rc;
+ size_t total_bytes = 0;
+
+ do {
+ rc = write(fd, buf + total_bytes, count - total_bytes);
+ if (rc > 0) total_bytes += rc;
+ } while ((rc > 0 && count > total_bytes) || (rc == -1 && errno == EINTR));
+
+ if (bytes_written != nullptr) *bytes_written = total_bytes;
+
+ return rc;
+}
+
+int signal_safe_poll(struct pollfd* fds, int nfds, absl::Duration timeout) {
+ int rc = 0;
+ absl::Duration elapsed = absl::ZeroDuration();
+
+ // We can't use gettimeofday since it's not async signal safe. We could use
+ // clock_gettime but that would require linking //base against librt.
+ // Fortunately, timeout is of sufficiently coarse granularity that we can just
+ // approximate it.
+ while ((elapsed <= timeout || timeout < absl::ZeroDuration()) && (rc == 0)) {
+ if (elapsed > absl::ZeroDuration())
+ ::absl::SleepFor(::absl::Milliseconds(1));
+ elapsed += absl::Milliseconds(1);
+ while ((rc = poll(fds, nfds, 0)) == -1 && errno == EINTR) {
+ }
+ }
+
+ return rc;
+}
+
+ssize_t signal_safe_read(int fd, char* buf, size_t count, size_t* bytes_read) {
+ ssize_t rc;
+ size_t total_bytes = 0;
+ struct pollfd pfd;
+
+ // poll is required for testing whether there is any data left on fd in the
+ // case of a signal interrupting a partial read. This is needed since this
+ // case is only defined to return the number of bytes read up to that point,
+ // with no indication whether more could have been read (up to count).
+ pfd.fd = fd;
+ pfd.events = POLL_IN;
+ pfd.revents = 0;
+
+ do {
+ rc = read(fd, buf + total_bytes, count - total_bytes);
+ if (rc > 0) total_bytes += rc;
+
+ if (rc == 0) break; // EOF
+ // try again if there's space to fill, no (non-interrupt) error,
+ // and data is available.
+ } while (total_bytes < count && (rc > 0 || errno == EINTR) &&
+ (signal_safe_poll(&pfd, 1, absl::ZeroDuration()) == 1 ||
+ total_bytes == 0));
+
+ if (bytes_read) *bytes_read = total_bytes;
+
+ if (rc != -1 || errno == EINTR)
+ rc = total_bytes; // return the cumulative bytes read
+ return rc;
+}
+
+std::vector<int> AllowedCpus() {
+ // We have no need for dynamically sized sets (currently >1024 CPUs for glibc)
+ // at the present time. We could change this in the future.
+ cpu_set_t allowed_cpus;
+ CHECK_CONDITION(sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) ==
+ 0);
+ int n = CPU_COUNT(&allowed_cpus), c = 0;
+
+ std::vector<int> result(n);
+ for (int i = 0; i < CPU_SETSIZE && n; i++) {
+ if (CPU_ISSET(i, &allowed_cpus)) {
+ result[c++] = i;
+ n--;
+ }
+ }
+ CHECK_CONDITION(0 == n);
+
+ return result;
+}
+
+static cpu_set_t SpanToCpuSetT(absl::Span<int> mask) {
+ cpu_set_t result;
+ CPU_ZERO(&result);
+ for (int cpu : mask) {
+ CPU_SET(cpu, &result);
+ }
+ return result;
+}
+
+ScopedAffinityMask::ScopedAffinityMask(absl::Span<int> allowed_cpus) {
+ specified_cpus_ = SpanToCpuSetT(allowed_cpus);
+ // getaffinity should never fail.
+ CHECK_CONDITION(
+ sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0);
+ // See destructor comments on setaffinity interactions. Tampered() will
+ // necessarily be true in this case.
+ sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_);
+}
+
+ScopedAffinityMask::ScopedAffinityMask(int allowed_cpu) {
+ CPU_ZERO(&specified_cpus_);
+ CPU_SET(allowed_cpu, &specified_cpus_);
+
+ // getaffinity should never fail.
+ CHECK_CONDITION(
+ sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0);
+ // See destructor comments on setaffinity interactions. Tampered() will
+ // necessarily be true in this case.
+ sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_);
+}
+
+ScopedAffinityMask::~ScopedAffinityMask() {
+ // If something else has already reset our affinity, do not attempt to
+ // restrict towards our original mask. This is best-effort as the tampering
+ // may obviously occur during the destruction of *this.
+ if (!Tampered()) {
+ // Note: We do not assert success here, conflicts may restrict us from all
+ // 'original_cpus_'.
+ sched_setaffinity(0, sizeof(original_cpus_), &original_cpus_);
+ }
+}
+
+bool ScopedAffinityMask::Tampered() {
+ cpu_set_t current_cpus;
+ CHECK_CONDITION(sched_getaffinity(0, sizeof(current_cpus), &current_cpus) ==
+ 0);
+ return !CPU_EQUAL(&current_cpus, &specified_cpus_); // Mismatch => modified.
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.h b/contrib/libs/tcmalloc/tcmalloc/internal/util.h
new file mode 100644
index 0000000000..b43e322257
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.h
@@ -0,0 +1,138 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_UTIL_H_
+#define TCMALLOC_INTERNAL_UTIL_H_
+
+#include <poll.h> // IWYU pragma: keep
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <vector>
+
+#include "absl/base/internal/sysinfo.h"
+#include "absl/time/time.h"
+#include "absl/types/span.h"
+#include "tcmalloc/internal/config.h"
+
+#define TCMALLOC_RETRY_ON_TEMP_FAILURE(expression) \
+ (__extension__({ \
+ long int _temp_failure_retry_result; \
+ do _temp_failure_retry_result = (long int)(expression); \
+ while (_temp_failure_retry_result == -1L && errno == EINTR); \
+ _temp_failure_retry_result; \
+ }))
+
+// Useful internal utility functions. These calls are async-signal safe
+// provided the signal handler saves errno at entry and restores it before
+// return.
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// signal_safe_open() - a wrapper for open(2) which ignores signals
+// Semantics equivalent to open(2):
+// returns a file-descriptor (>=0) on success, -1 on failure, error in errno
+int signal_safe_open(const char *path, int flags, ...);
+
+// signal_safe_close() - a wrapper for close(2) which ignores signals
+// Semantics equivalent to close(2):
+// returns 0 on success, -1 on failure, error in errno
+int signal_safe_close(int fd);
+
+// signal_safe_write() - a wrapper for write(2) which ignores signals
+// Semantics equivalent to write(2):
+// returns number of bytes written, -1 on failure, error in errno
+// additionally, (if not NULL) total bytes written in *bytes_written
+//
+// In the interrupted (EINTR) case, signal_safe_write will continue attempting
+// to write out buf. This means that in the:
+// write->interrupted by signal->write->error case
+// That it is possible for signal_safe_write to return -1 when there were bytes
+// flushed from the buffer in the first write. To handle this case the optional
+// bytes_written parameter is provided, when not-NULL, it will always return the
+// total bytes written before any error.
+ssize_t signal_safe_write(int fd, const char *buf, size_t count,
+ size_t *bytes_written);
+
+// signal_safe_read() - a wrapper for read(2) which ignores signals
+// Semantics equivalent to read(2):
+// returns number of bytes written, -1 on failure, error in errno
+// additionally, (if not NULL) total bytes written in *bytes_written
+//
+// In the interrupted (EINTR) case, signal_safe_read will continue attempting
+// to read into buf. This means that in the:
+// read->interrupted by signal->read->error case
+// That it is possible for signal_safe_read to return -1 when there were bytes
+// read by a previous read. To handle this case the optional bytes_written
+// parameter is provided, when not-NULL, it will always return the total bytes
+// read before any error.
+ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read);
+
+// signal_safe_poll() - a wrapper for poll(2) which ignores signals
+// Semantics equivalent to poll(2):
+// Returns number of structures with non-zero revent fields.
+//
+// In the interrupted (EINTR) case, signal_safe_poll will continue attempting to
+// poll for data. Unlike ppoll/pselect, signal_safe_poll is *ignoring* signals
+// not attempting to re-enable them. Protecting us from the traditional races
+// involved with the latter.
+int signal_safe_poll(struct ::pollfd *fds, int nfds, absl::Duration timeout);
+
+// Affinity helpers.
+
+// Returns a vector of the which cpus the currently allowed thread is allowed to
+// run on. There are no guarantees that this will not change before, after, or
+// even during, the call to AllowedCpus().
+std::vector<int> AllowedCpus();
+
+// Enacts a scoped affinity mask on the constructing thread. Attempts to
+// restore the original affinity mask on destruction.
+//
+// REQUIRES: For test-use only. Do not use this in production code.
+class ScopedAffinityMask {
+ public:
+ // When racing with an external restriction that has a zero-intersection with
+ // "allowed_cpus" we will construct, but immediately register as "Tampered()",
+ // without actual changes to affinity.
+ explicit ScopedAffinityMask(absl::Span<int> allowed_cpus);
+ explicit ScopedAffinityMask(int allowed_cpu);
+
+ // Restores original affinity iff our scoped affinity has not been externally
+ // modified (i.e. Tampered()). Otherwise, the updated affinity is preserved.
+ ~ScopedAffinityMask();
+
+ // Returns true if the affinity mask no longer matches what was set at point
+ // of construction.
+ //
+ // Note: This is instantaneous and not fool-proof. It's possible for an
+ // external affinity modification to subsequently align with our originally
+ // specified "allowed_cpus". In this case Tampered() will return false when
+ // time may have been spent executing previously on non-specified cpus.
+ bool Tampered();
+
+ private:
+ cpu_set_t original_cpus_, specified_cpus_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_INTERNAL_UTIL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h
new file mode 100644
index 0000000000..66027418ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h
@@ -0,0 +1,133 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Extra extensions exported by some malloc implementations. These
+// extensions are accessed through a virtual base class so an
+// application can link against a malloc that does not implement these
+// extensions, and it will get default versions that do nothing.
+
+#ifndef TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
+#define TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
+
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/functional/function_ref.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AllocationProfilingTokenAccessor and ProfileAccessor provide access to the
+// private constructors of AllocationProfilingToken and Profile that take a
+// pointer.
+class AllocationProfilingTokenAccessor {
+ public:
+ static MallocExtension::AllocationProfilingToken MakeToken(
+ std::unique_ptr<AllocationProfilingTokenBase> p) {
+ return MallocExtension::AllocationProfilingToken(std::move(p));
+ }
+};
+
+class ProfileAccessor {
+ public:
+ static Profile MakeProfile(std::unique_ptr<const ProfileBase> p) {
+ return Profile(std::move(p));
+ }
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#if ABSL_HAVE_ATTRIBUTE_WEAK && !defined(__APPLE__) && !defined(__EMSCRIPTEN__)
+
+extern "C" {
+
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_ForceCpuCacheActivation();
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::AddressRegionFactory*
+MallocExtension_Internal_GetRegionFactory();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetRegionFactory(
+ tcmalloc::AddressRegionFactory* factory);
+
+ABSL_ATTRIBUTE_WEAK const tcmalloc::tcmalloc_internal::ProfileBase*
+MallocExtension_Internal_SnapshotCurrent(tcmalloc::ProfileType type);
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase*
+MallocExtension_Internal_StartAllocationProfiling();
+
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ActivateGuardedSampling();
+ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::Ownership
+MallocExtension_Internal_GetOwnership(const void* ptr);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit(
+ tcmalloc::MallocExtension::MemoryLimit* limit);
+ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty(
+ const char* name_data, size_t name_size, size_t* value);
+ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches();
+ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval(
+ absl::Duration* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetProperties(
+ std::map<std::string, tcmalloc::MallocExtension::Property>* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetStats(std::string* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxPerCpuCacheSize(
+ int32_t value);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseInterval(
+ absl::Duration value);
+ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ReleaseMemoryToSystem(
+ size_t bytes);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMemoryLimit(
+ const tcmalloc::MallocExtension::MemoryLimit* limit);
+
+ABSL_ATTRIBUTE_WEAK size_t
+MallocExtension_Internal_GetAllocatedSize(const void* ptr);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadBusy();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadIdle();
+
+ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetProfileSamplingRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetProfileSamplingRate(
+ int64_t);
+
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ProcessBackgroundActions();
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::BytesPerSecond
+MallocExtension_Internal_GetBackgroundReleaseRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetBackgroundReleaseRate(
+ tcmalloc::MallocExtension::BytesPerSecond);
+
+ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetGuardedSamplingRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetGuardedSamplingRate(
+ int64_t);
+
+ABSL_ATTRIBUTE_WEAK int64_t
+MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+ int64_t value);
+
+ABSL_ATTRIBUTE_WEAK void
+MallocExtension_EnableForkSupport();
+
+ABSL_ATTRIBUTE_WEAK void
+MallocExtension_SetSampleUserDataCallbacks(
+ tcmalloc::MallocExtension::CreateSampleUserDataCallback create,
+ tcmalloc::MallocExtension::CopySampleUserDataCallback copy,
+ tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy);
+
+}
+
+#endif
+
+#endif // TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc
new file mode 100644
index 0000000000..5395252719
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc
@@ -0,0 +1,711 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 24, 1, 32}, // 0.68%
+ { 32, 1, 32}, // 0.59%
+ { 40, 1, 32}, // 0.98%
+ { 48, 1, 32}, // 0.98%
+ { 56, 1, 32}, // 0.78%
+ { 64, 1, 32}, // 0.59%
+ { 72, 1, 32}, // 1.28%
+ { 80, 1, 32}, // 0.98%
+ { 88, 1, 32}, // 0.68%
+ { 96, 1, 32}, // 0.98%
+ { 104, 1, 32}, // 1.58%
+ { 112, 1, 32}, // 0.78%
+ { 120, 1, 32}, // 0.98%
+ { 128, 1, 32}, // 0.59%
+ { 136, 1, 32}, // 0.98%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 296, 1, 32}, // 3.10%
+ { 312, 1, 32}, // 1.58%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 408, 1, 32}, // 0.98%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 2, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 2, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 98304, 12, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 147456, 18, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 24, 1, 32}, // 0.17%
+ { 32, 1, 32}, // 0.15%
+ { 40, 1, 32}, // 0.17%
+ { 48, 1, 32}, // 0.24%
+ { 56, 1, 32}, // 0.17%
+ { 64, 1, 32}, // 0.15%
+ { 72, 1, 32}, // 0.17%
+ { 80, 1, 32}, // 0.29%
+ { 88, 1, 32}, // 0.24%
+ { 96, 1, 32}, // 0.24%
+ { 104, 1, 32}, // 0.17%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 280, 1, 32}, // 0.17%
+ { 304, 1, 32}, // 0.89%
+ { 328, 1, 32}, // 1.06%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 488, 1, 32}, // 0.37%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 24, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 40, 1, 32}, // 0.03%
+ { 48, 1, 32}, // 0.02%
+ { 56, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 72, 1, 32}, // 0.04%
+ { 80, 1, 32}, // 0.04%
+ { 88, 1, 32}, // 0.05%
+ { 96, 1, 32}, // 0.04%
+ { 104, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 360, 1, 32}, // 0.04%
+ { 408, 1, 32}, // 0.10%
+ { 456, 1, 32}, // 0.17%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10880, 1, 6}, // 0.41%
+ { 11904, 1, 5}, // 0.12%
+ { 13056, 1, 5}, // 0.41%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 18688, 1, 3}, // 0.21%
+ { 21760, 1, 3}, // 0.41%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 24, 1, 32}, // 1.57%
+ { 32, 1, 32}, // 1.17%
+ { 40, 1, 32}, // 1.57%
+ { 48, 1, 32}, // 1.57%
+ { 56, 1, 32}, // 1.37%
+ { 64, 1, 32}, // 1.17%
+ { 72, 1, 32}, // 2.78%
+ { 80, 1, 32}, // 1.57%
+ { 88, 1, 32}, // 2.37%
+ { 96, 1, 32}, // 2.78%
+ { 104, 1, 32}, // 2.17%
+ { 120, 1, 32}, // 1.57%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 184, 1, 32}, // 2.37%
+ { 208, 1, 32}, // 4.86%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 312, 1, 32}, // 2.17%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 408, 1, 32}, // 1.57%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3456, 6, 18}, // 1.79%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 48, 1, 32}, // 0.98%
+ { 64, 1, 32}, // 0.59%
+ { 80, 1, 32}, // 0.98%
+ { 96, 1, 32}, // 0.98%
+ { 112, 1, 32}, // 0.78%
+ { 128, 1, 32}, // 0.59%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 288, 1, 32}, // 2.18%
+ { 304, 1, 32}, // 4.25%
+ { 320, 1, 32}, // 3.00%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 384, 1, 32}, // 2.18%
+ { 400, 1, 32}, // 3.00%
+ { 416, 1, 32}, // 4.25%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 2, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 6784, 5, 9}, // 0.75%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 2, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 90112, 11, 2}, // 0.05%
+ { 98304, 12, 2}, // 0.05%
+ { 106496, 13, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 139264, 17, 2}, // 0.03%
+ { 155648, 19, 2}, // 0.03%
+ { 172032, 21, 2}, // 0.03%
+ { 188416, 23, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 221184, 27, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 48, 1, 32}, // 0.24%
+ { 64, 1, 32}, // 0.15%
+ { 80, 1, 32}, // 0.29%
+ { 96, 1, 32}, // 0.24%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 272, 1, 32}, // 0.54%
+ { 288, 1, 32}, // 0.84%
+ { 304, 1, 32}, // 0.89%
+ { 320, 1, 32}, // 0.54%
+ { 336, 1, 32}, // 0.69%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 480, 1, 32}, // 0.54%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 768, 1, 32}, // 1.74%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1408, 1, 32}, // 1.33%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2432, 1, 26}, // 3.80%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13056, 2, 5}, // 0.47%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 48, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 80, 1, 32}, // 0.04%
+ { 96, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 368, 1, 32}, // 0.07%
+ { 416, 1, 32}, // 0.04%
+ { 464, 1, 32}, // 0.19%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1408, 1, 32}, // 0.12%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3200, 1, 20}, // 1.15%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10368, 1, 6}, // 1.15%
+ { 11392, 1, 5}, // 0.07%
+ { 12416, 1, 5}, // 0.56%
+ { 13696, 1, 4}, // 0.76%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 17408, 1, 3}, // 0.41%
+ { 20096, 1, 3}, // 0.36%
+ { 21760, 1, 3}, // 0.41%
+ { 23808, 1, 2}, // 0.12%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 196608, 3, 2}, // 0.01%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 48, 1, 32}, // 1.57%
+ { 64, 1, 32}, // 1.17%
+ { 80, 1, 32}, // 1.57%
+ { 96, 1, 32}, // 2.78%
+ { 112, 1, 32}, // 2.78%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 176, 1, 32}, // 2.37%
+ { 192, 1, 32}, // 2.78%
+ { 208, 1, 32}, // 4.86%
+ { 224, 1, 32}, // 2.78%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 288, 1, 32}, // 2.78%
+ { 304, 1, 32}, // 4.86%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 400, 1, 32}, // 3.60%
+ { 448, 1, 32}, // 2.78%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 640, 2, 32}, // 7.29%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3200, 4, 20}, // 2.70%
+ { 3584, 7, 18}, // 0.17%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
new file mode 100644
index 0000000000..89f8e4e5c8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
@@ -0,0 +1,39 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This .h file imports the code that causes tcmalloc to override libc
+// versions of malloc/free/new/delete/etc. That is, it provides the
+// logic that makes it so calls to malloc(10) go through tcmalloc,
+// rather than the default (libc) malloc.
+//
+// Every libc has its own way of doing this, and sometimes the compiler
+// matters too, so we have a different file for each libc, and often
+// for different compilers and OS's.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_H_
+#define TCMALLOC_LIBC_OVERRIDE_H_
+
+#include <features.h>
+
+#include "tcmalloc/tcmalloc.h"
+
+#if defined(__GLIBC__)
+#include "tcmalloc/libc_override_glibc.h"
+
+#else
+#include "tcmalloc/libc_override_redefine.h"
+
+#endif
+
+#endif // TCMALLOC_LIBC_OVERRIDE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h
new file mode 100644
index 0000000000..709bcb727f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h
@@ -0,0 +1,114 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used to override malloc routines on systems that define the
+// memory allocation routines to be weak symbols in their libc
+// (almost all unix-based systems are like this), on gcc, which
+// suppports the 'alias' attribute.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
+#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
+
+#include <stddef.h>
+
+#include <new>
+
+#include "tcmalloc/tcmalloc.h"
+
+#ifndef __GNUC__
+#error libc_override_gcc_and_weak.h is for gcc distributions only.
+#endif
+
+// visibility("default") ensures that these symbols are always exported, even
+// with -fvisibility=hidden.
+#define TCMALLOC_ALIAS(tc_fn) \
+ __attribute__((alias(#tc_fn), visibility("default")))
+
+void* operator new(size_t size) noexcept(false)
+ TCMALLOC_ALIAS(TCMallocInternalNew);
+void operator delete(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete);
+void operator delete(void* p, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteSized);
+void* operator new[](size_t size) noexcept(false)
+ TCMALLOC_ALIAS(TCMallocInternalNewArray);
+void operator delete[](void* p) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArray);
+void operator delete[](void* p, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArraySized);
+void* operator new(size_t size, const std::nothrow_t& nt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalNewNothrow);
+void* operator new[](size_t size, const std::nothrow_t& nt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalNewArrayNothrow);
+void operator delete(void* p, const std::nothrow_t& nt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteNothrow);
+void operator delete[](void* p, const std::nothrow_t& nt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArrayNothrow);
+
+void* operator new(size_t size, std::align_val_t alignment) noexcept(false)
+ TCMALLOC_ALIAS(TCMallocInternalNewAligned);
+void* operator new(size_t size, std::align_val_t alignment,
+ const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalNewAligned_nothrow);
+void operator delete(void* p, std::align_val_t alignment) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteAligned);
+void operator delete(void* p, std::align_val_t alignment,
+ const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteAligned_nothrow);
+void operator delete(void* p, size_t size, std::align_val_t alignment) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned);
+void* operator new[](size_t size, std::align_val_t alignment) noexcept(false)
+ TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned);
+void* operator new[](size_t size, std::align_val_t alignment,
+ const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned_nothrow);
+void operator delete[](void* p, std::align_val_t alignment) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned);
+void operator delete[](void* p, std::align_val_t alignment,
+ const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned_nothrow);
+void operator delete[](void* p, size_t size,
+ std::align_val_t alignemnt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDeleteArraySizedAligned);
+
+extern "C" {
+void* malloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalMalloc);
+void free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree);
+void sdallocx(void* ptr, size_t size, int flags) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalSdallocx);
+void* realloc(void* ptr, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalRealloc);
+void* calloc(size_t n, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalCalloc);
+void cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree);
+void* memalign(size_t align, size_t s) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMemalign);
+void* aligned_alloc(size_t align, size_t s) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalAlignedAlloc);
+void* valloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalValloc);
+void* pvalloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalPvalloc);
+int posix_memalign(void** r, size_t a, size_t s) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalPosixMemalign);
+void malloc_stats(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocStats);
+int mallopt(int cmd, int value) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMallOpt);
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+struct mallinfo mallinfo(void) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMallocInfo);
+#endif
+size_t malloc_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize);
+size_t malloc_usable_size(void* p) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMallocSize);
+} // extern "C"
+
+#endif // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h
new file mode 100644
index 0000000000..8e23b6eb78
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h
@@ -0,0 +1,120 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used to override malloc routines on systems that are using glibc.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
+#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
+
+#include <features.h>
+#include <stddef.h>
+
+#include "tcmalloc/tcmalloc.h"
+
+#ifndef __GLIBC__
+#error libc_override_glibc.h is for glibc distributions only.
+#endif
+
+// In glibc, the memory-allocation methods are weak symbols, so we can
+// just override them with our own. If we're using gcc, we can use
+// __attribute__((alias)) to do the overriding easily (exception:
+// Mach-O, which doesn't support aliases). Otherwise we have to use a
+// function call.
+#if !defined(__GNUC__) || defined(__MACH__)
+
+#include "libc_override_redefine.h"
+
+#else // #if !defined(__GNUC__) || defined(__MACH__)
+
+// If we get here, we're a gcc system, so do all the overriding we do
+// with gcc. This does the overriding of all the 'normal' memory
+// allocation.
+#include "libc_override_gcc_and_weak.h"
+
+// We also have to do some glibc-specific overriding. Some library
+// routines on RedHat 9 allocate memory using malloc() and free it
+// using __libc_free() (or vice-versa). Since we provide our own
+// implementations of malloc/free, we need to make sure that the
+// __libc_XXX variants (defined as part of glibc) also point to the
+// same implementations. Since it only matters for redhat, we
+// do it inside the gcc #ifdef, since redhat uses gcc.
+// TODO(b/134690953): only do this if we detect we're an old enough glibc?
+
+extern "C" {
+void* __libc_malloc(size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMalloc);
+void __libc_free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree);
+void* __libc_realloc(void* ptr, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalRealloc);
+void* __libc_calloc(size_t n, size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalCalloc);
+void __libc_cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree);
+void* __libc_memalign(size_t align, size_t s) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMemalign);
+void* __libc_valloc(size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalValloc);
+void* __libc_pvalloc(size_t size) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalPvalloc);
+int __posix_memalign(void** r, size_t a, size_t s) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalPosixMemalign);
+} // extern "C"
+
+#endif // #if defined(__GNUC__) && !defined(__MACH__)
+
+// We also have to hook libc malloc. While our work with weak symbols
+// should make sure libc malloc is never called in most situations, it
+// can be worked around by shared libraries with the DEEPBIND
+// environment variable set. The below hooks libc to call our malloc
+// routines even in that situation. In other situations, this hook
+// should never be called.
+extern "C" {
+static void* glibc_override_malloc(size_t size, const void* caller) {
+ return TCMallocInternalMalloc(size);
+}
+static void* glibc_override_realloc(void* ptr, size_t size,
+ const void* caller) {
+ return TCMallocInternalRealloc(ptr, size);
+}
+static void glibc_override_free(void* ptr, const void* caller) {
+ TCMallocInternalFree(ptr);
+}
+static void* glibc_override_memalign(size_t align, size_t size,
+ const void* caller) {
+ return TCMallocInternalMemalign(align, size);
+}
+
+// We should be using __malloc_initialize_hook here. (See
+// http://swoolley.org/man.cgi/3/malloc_hook.) However, this causes weird
+// linker errors with programs that link with -static, so instead we just assign
+// the vars directly at static-constructor time. That should serve the same
+// effect of making sure the hooks are set before the first malloc call the
+// program makes.
+
+// Glibc-2.14 and above make __malloc_hook and friends volatile
+#ifndef __MALLOC_HOOK_VOLATILE
+#define __MALLOC_HOOK_VOLATILE /**/
+#endif
+
+void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(size_t, const void*) =
+ &glibc_override_malloc;
+void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(void*, size_t, const void*) =
+ &glibc_override_realloc;
+void (*__MALLOC_HOOK_VOLATILE __free_hook)(void*,
+ const void*) = &glibc_override_free;
+void* (*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void*) =
+ &glibc_override_memalign;
+
+} // extern "C"
+
+#endif // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
new file mode 100644
index 0000000000..b1655461c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
@@ -0,0 +1,100 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used on systems that don't have their own definition of
+// malloc/new/etc. (Typically this will be a windows msvcrt.dll that
+// has been edited to remove the definitions.) We can just define our
+// own as normal functions.
+//
+// This should also work on systems were all the malloc routines are
+// defined as weak symbols, and there's no support for aliasing.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
+#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
+
+#include <cstddef>
+#include <new>
+
+#include "tcmalloc/tcmalloc.h"
+
+void* operator new(size_t size) { return TCMallocInternalNew(size); }
+void operator delete(void* p) noexcept { TCMallocInternalDelete(p); }
+void* operator new[](size_t size) { return TCMallocInternalNewArray(size); }
+void operator delete[](void* p) noexcept { TCMallocInternalDeleteArray(p); }
+void* operator new(size_t size, const std::nothrow_t& nt) noexcept {
+ return TCMallocInternalNewNothrow(size, nt);
+}
+void* operator new[](size_t size, const std::nothrow_t& nt) noexcept {
+ return TCMallocInternalNewArrayNothrow(size, nt);
+}
+void operator delete(void* ptr, const std::nothrow_t& nt) noexcept {
+ return TCMallocInternalDeleteNothrow(ptr, nt);
+}
+void operator delete[](void* ptr, const std::nothrow_t& nt) noexcept {
+ return TCMallocInternalDeleteArrayNothrow(ptr, nt);
+}
+
+extern "C" {
+void* malloc(size_t s) { return TCMallocInternalMalloc(s); }
+void* calloc(size_t n, size_t s) { return TCMallocInternalCalloc(n, s); }
+void* realloc(void* p, size_t s) { return TCMallocInternalRealloc(p, s); }
+void free(void* p) { TCMallocInternalFree(p); }
+void* memalign(size_t a, size_t s) { return TCMallocInternalMemalign(a, s); }
+int posix_memalign(void** r, size_t a, size_t s) {
+ return TCMallocInternalPosixMemalign(r, a, s);
+}
+size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); }
+
+// tcmalloc extension
+void sdallocx(void* p, size_t s, int flags) noexcept {
+ TCMallocInternalSdallocx(p, s, flags);
+}
+
+#if defined(__GLIBC__) || defined(__NEWLIB__)
+// SunOS extension
+void cfree(void* p) { TCMallocInternalCfree(p); }
+#endif
+
+#if defined(OS_MACOSX) || defined(__BIONIC__) || defined(__GLIBC__) || \
+ defined(__NEWLIB__) || defined(__UCLIBC__)
+// Obsolete memalign
+void* valloc(size_t s) { return TCMallocInternalValloc(s); }
+#endif
+
+#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__)
+// Obsolete memalign
+void* pvalloc(size_t s) { return TCMallocInternalPvalloc(s); }
+#endif
+
+#if defined(__GLIBC__) || defined(__NEWLIB__) || defined(__UCLIBC__)
+void malloc_stats(void) { TCMallocInternalMallocStats(); }
+#endif
+
+#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \
+ defined(__UCLIBC__)
+int mallopt(int cmd, int v) { return TCMallocInternalMallOpt(cmd, v); }
+#endif
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+struct mallinfo mallinfo(void) {
+ return TCMallocInternalMallocInfo();
+}
+#endif
+
+#if defined(__GLIBC__)
+size_t malloc_size(void* p) { return TCMallocInternalMallocSize(p); }
+#endif
+} // extern "C"
+
+#endif // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc
new file mode 100644
index 0000000000..ad3205fcdc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc
@@ -0,0 +1,530 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/malloc_extension.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include <atomic>
+#include <cstdlib>
+#include <memory>
+#include <new>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/low_level_alloc.h"
+#include "absl/memory/memory.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/internal_malloc_extension.h"
+
+namespace tcmalloc {
+
+MallocExtension::AllocationProfilingToken::AllocationProfilingToken(
+ std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> impl)
+ : impl_(std::move(impl)) {}
+
+MallocExtension::AllocationProfilingToken::~AllocationProfilingToken() {}
+
+Profile MallocExtension::AllocationProfilingToken::Stop() && {
+ std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> p(
+ std::move(impl_));
+ if (!p) {
+ return Profile();
+ }
+ return std::move(*p).Stop();
+}
+
+Profile::Profile(std::unique_ptr<const tcmalloc_internal::ProfileBase> impl)
+ : impl_(std::move(impl)) {}
+
+Profile::~Profile() {}
+
+void Profile::Iterate(absl::FunctionRef<void(const Sample&)> f) const {
+ if (!impl_) {
+ return;
+ }
+
+ impl_->Iterate(f);
+}
+
+int64_t Profile::Period() const {
+ if (!impl_) {
+ return -1;
+ }
+
+ return impl_->Period();
+}
+
+ProfileType Profile::Type() const {
+ if (!impl_) {
+ return ProfileType::kDoNotUse;
+ }
+
+ return impl_->Type();
+}
+
+AddressRegion::~AddressRegion() {}
+
+AddressRegionFactory::~AddressRegionFactory() {}
+
+size_t AddressRegionFactory::GetStats(absl::Span<char> buffer) {
+ static_cast<void>(buffer);
+ return 0;
+}
+
+size_t AddressRegionFactory::GetStatsInPbtxt(absl::Span<char> buffer) {
+ static_cast<void>(buffer);
+ return 0;
+}
+
+static std::atomic<size_t> address_region_factory_internal_bytes_allocated(0);
+
+size_t AddressRegionFactory::InternalBytesAllocated() {
+ return address_region_factory_internal_bytes_allocated.load(
+ std::memory_order_relaxed);
+}
+
+void* AddressRegionFactory::MallocInternal(size_t size) {
+ // Use arena without malloc hooks to avoid HeapChecker reporting a leak.
+ static auto* arena =
+ absl::base_internal::LowLevelAlloc::NewArena(/*flags=*/0);
+ void* result =
+ absl::base_internal::LowLevelAlloc::AllocWithArena(size, arena);
+ if (result) {
+ address_region_factory_internal_bytes_allocated.fetch_add(
+ size, std::memory_order_relaxed);
+ }
+ return result;
+}
+
+#if !ABSL_HAVE_ATTRIBUTE_WEAK || defined(__APPLE__) || defined(__EMSCRIPTEN__)
+#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 0
+#else
+#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 1
+#endif
+
+std::string MallocExtension::GetStats() {
+ std::string ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetStats != nullptr) {
+ MallocExtension_Internal_GetStats(&ret);
+ }
+#endif
+ return ret;
+}
+
+void MallocExtension::ReleaseMemoryToSystem(size_t num_bytes) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_ReleaseMemoryToSystem != nullptr) {
+ MallocExtension_Internal_ReleaseMemoryToSystem(num_bytes);
+ }
+#endif
+}
+
+AddressRegionFactory* MallocExtension::GetRegionFactory() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetRegionFactory == nullptr) {
+ return nullptr;
+ }
+
+ return MallocExtension_Internal_GetRegionFactory();
+#else
+ return nullptr;
+#endif
+}
+
+void MallocExtension::SetRegionFactory(AddressRegionFactory* factory) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_SetRegionFactory == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_SetRegionFactory(factory);
+#endif
+ // Default implementation does nothing
+}
+
+Profile MallocExtension::SnapshotCurrent(tcmalloc::ProfileType type) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_SnapshotCurrent == nullptr) {
+ return Profile();
+ }
+
+ return tcmalloc_internal::ProfileAccessor::MakeProfile(
+ std::unique_ptr<const tcmalloc_internal::ProfileBase>(
+ MallocExtension_Internal_SnapshotCurrent(type)));
+#else
+ return Profile();
+#endif
+}
+
+MallocExtension::AllocationProfilingToken
+MallocExtension::StartAllocationProfiling() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_StartAllocationProfiling == nullptr) {
+ return {};
+ }
+
+ return tcmalloc_internal::AllocationProfilingTokenAccessor::MakeToken(
+ std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase>(
+ MallocExtension_Internal_StartAllocationProfiling()));
+#else
+ return {};
+#endif
+}
+
+void MallocExtension::MarkThreadIdle() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_MarkThreadIdle == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_MarkThreadIdle();
+#endif
+}
+
+void MallocExtension::MarkThreadBusy() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_MarkThreadBusy == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_MarkThreadBusy();
+#endif
+}
+
+MallocExtension::MemoryLimit MallocExtension::GetMemoryLimit() {
+ MemoryLimit ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetMemoryLimit != nullptr) {
+ MallocExtension_Internal_GetMemoryLimit(&ret);
+ }
+#endif
+ return ret;
+}
+
+void MallocExtension::SetMemoryLimit(
+ const MallocExtension::MemoryLimit& limit) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_SetMemoryLimit != nullptr) {
+ MallocExtension_Internal_SetMemoryLimit(&limit);
+ }
+#endif
+}
+
+int64_t MallocExtension::GetProfileSamplingRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetProfileSamplingRate != nullptr) {
+ return MallocExtension_Internal_GetProfileSamplingRate();
+ }
+#endif
+ return -1;
+}
+
+void MallocExtension::SetProfileSamplingRate(int64_t rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_SetProfileSamplingRate != nullptr) {
+ MallocExtension_Internal_SetProfileSamplingRate(rate);
+ }
+#endif
+ (void)rate;
+}
+
+int64_t MallocExtension::GetGuardedSamplingRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetGuardedSamplingRate == nullptr) {
+ return -1;
+ }
+
+ return MallocExtension_Internal_GetGuardedSamplingRate();
+#else
+ return -1;
+#endif
+}
+
+void MallocExtension::SetGuardedSamplingRate(int64_t rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_SetGuardedSamplingRate == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_SetGuardedSamplingRate(rate);
+#else
+ (void)rate;
+#endif
+}
+
+void MallocExtension::ActivateGuardedSampling() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_ActivateGuardedSampling != nullptr) {
+ MallocExtension_Internal_ActivateGuardedSampling();
+ }
+#endif
+}
+
+bool MallocExtension::PerCpuCachesActive() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetPerCpuCachesActive == nullptr) {
+ return false;
+ }
+
+ return MallocExtension_Internal_GetPerCpuCachesActive();
+#else
+ return false;
+#endif
+}
+
+void MallocExtension::DeactivatePerCpuCaches() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_DeactivatePerCpuCaches();
+#endif
+}
+
+int32_t MallocExtension::GetMaxPerCpuCacheSize() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) {
+ return -1;
+ }
+
+ return MallocExtension_Internal_GetMaxPerCpuCacheSize();
+#else
+ return -1;
+#endif
+}
+
+void MallocExtension::SetMaxPerCpuCacheSize(int32_t value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_SetMaxPerCpuCacheSize == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_SetMaxPerCpuCacheSize(value);
+#else
+ (void)value;
+#endif
+}
+
+int64_t MallocExtension::GetMaxTotalThreadCacheBytes() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetMaxTotalThreadCacheBytes == nullptr) {
+ return -1;
+ }
+
+ return MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+#else
+ return -1;
+#endif
+}
+
+void MallocExtension::SetMaxTotalThreadCacheBytes(int64_t value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_SetMaxTotalThreadCacheBytes == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_SetMaxTotalThreadCacheBytes(value);
+#else
+ (void)value;
+#endif
+}
+
+absl::Duration MallocExtension::GetSkipSubreleaseInterval() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetSkipSubreleaseInterval == nullptr) {
+ return absl::ZeroDuration();
+ }
+
+ absl::Duration value;
+ MallocExtension_Internal_GetSkipSubreleaseInterval(&value);
+ return value;
+#else
+ return absl::ZeroDuration();
+#endif
+}
+
+void MallocExtension::SetSkipSubreleaseInterval(absl::Duration value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_SetSkipSubreleaseInterval == nullptr) {
+ return;
+ }
+
+ MallocExtension_Internal_SetSkipSubreleaseInterval(value);
+#else
+ (void)value;
+#endif
+}
+
+absl::optional<size_t> MallocExtension::GetNumericProperty(
+ absl::string_view property) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetNumericProperty != nullptr) {
+ size_t value;
+ if (MallocExtension_Internal_GetNumericProperty(property.data(),
+ property.size(), &value)) {
+ return value;
+ }
+ }
+#endif
+ return absl::nullopt;
+}
+
+size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
+ return nallocx(size, 0);
+}
+
+absl::optional<size_t> MallocExtension::GetAllocatedSize(const void* p) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetAllocatedSize != nullptr) {
+ return MallocExtension_Internal_GetAllocatedSize(p);
+ }
+#endif
+ return absl::nullopt;
+}
+
+MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_GetOwnership != nullptr) {
+ return MallocExtension_Internal_GetOwnership(p);
+ }
+#endif
+ return MallocExtension::Ownership::kUnknown;
+}
+
+std::map<std::string, MallocExtension::Property>
+MallocExtension::GetProperties() {
+ std::map<std::string, MallocExtension::Property> ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetProperties != nullptr) {
+ MallocExtension_Internal_GetProperties(&ret);
+ }
+#endif
+ return ret;
+}
+
+size_t MallocExtension::ReleaseCpuMemory(int cpu) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (MallocExtension_Internal_ReleaseCpuMemory != nullptr) {
+ return MallocExtension_Internal_ReleaseCpuMemory(cpu);
+ }
+#endif
+ return 0;
+}
+
+void MallocExtension::ProcessBackgroundActions() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (NeedsProcessBackgroundActions()) {
+ MallocExtension_Internal_ProcessBackgroundActions();
+ }
+#endif
+}
+
+bool MallocExtension::NeedsProcessBackgroundActions() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ return &MallocExtension_Internal_ProcessBackgroundActions != nullptr;
+#else
+ return false;
+#endif
+}
+
+MallocExtension::BytesPerSecond MallocExtension::GetBackgroundReleaseRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_GetBackgroundReleaseRate != nullptr) {
+ return MallocExtension_Internal_GetBackgroundReleaseRate();
+ }
+#endif
+ return static_cast<MallocExtension::BytesPerSecond>(0);
+}
+
+void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_Internal_SetBackgroundReleaseRate != nullptr) {
+ MallocExtension_Internal_SetBackgroundReleaseRate(rate);
+ }
+#endif
+}
+
+void MallocExtension::EnableForkSupport() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_EnableForkSupport != nullptr) {
+ MallocExtension_EnableForkSupport();
+ }
+#endif
+}
+
+void MallocExtension::SetSampleUserDataCallbacks(
+ CreateSampleUserDataCallback create,
+ CopySampleUserDataCallback copy,
+ DestroySampleUserDataCallback destroy) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+ if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) {
+ MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy);
+ }
+#else
+ (void)create;
+ (void)copy;
+ (void)destroy;
+#endif
+}
+
+} // namespace tcmalloc
+
+// Default implementation just returns size. The expectation is that
+// the linked-in malloc implementation might provide an override of
+// this weak function with a better implementation.
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE size_t nallocx(size_t size,
+ int) noexcept {
+ return size;
+}
+
+// Default implementation just frees memory. The expectation is that the
+// linked-in malloc implementation may provide an override with an
+// implementation that uses this optimization.
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void sdallocx(void* ptr, size_t,
+ int) noexcept {
+ free(ptr);
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new(size_t size) {
+ return {::operator new(size), size};
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_nothrow(size_t size) noexcept {
+ void* p = ::operator new(size, std::nothrow);
+ return {p, p ? size : 0};
+}
+
+#if defined(_LIBCPP_VERSION) && defined(__cpp_aligned_new)
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_aligned(size_t size,
+ std::align_val_t alignment) {
+ return {::operator new(size, alignment), size};
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_aligned_nothrow(
+ size_t size, std::align_val_t alignment) noexcept {
+ void* p = ::operator new(size, alignment, std::nothrow);
+ return {p, p ? size : 0};
+}
+
+#endif // _LIBCPP_VERSION && __cpp_aligned_new
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h
new file mode 100644
index 0000000000..fcbd347ca1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h
@@ -0,0 +1,617 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file documents extensions supported by TCMalloc. These extensions
+// provide hooks for both surfacing telemetric data about TCMalloc's usage and
+// tuning the internal implementation of TCMalloc. The internal implementation
+// functions use weak linkage, allowing an application to link against the
+// extensions without always linking against TCMalloc.
+
+#ifndef TCMALLOC_MALLOC_EXTENSION_H_
+#define TCMALLOC_MALLOC_EXTENSION_H_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <map>
+#include <memory>
+#include <new>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
+#include "absl/base/policy_checks.h"
+#include "absl/base/port.h"
+#include "absl/functional/function_ref.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+class AllocationProfilingTokenAccessor;
+class AllocationProfilingTokenBase;
+class ProfileAccessor;
+class ProfileBase;
+} // namespace tcmalloc_internal
+
+enum class ProfileType {
+ // Approximation of current heap usage
+ kHeap,
+
+ // Fragmentation report
+ kFragmentation,
+
+ // Sample of objects that were live at a recent peak of total heap usage. The
+ // specifics of when exactly this profile is collected are subject to change.
+ kPeakHeap,
+
+ // Sample of objects allocated from the start of allocation profiling until
+ // the profile was terminated with Stop().
+ kAllocations,
+
+ // Only present to prevent switch statements without a default clause so that
+ // we can extend this enumeration without breaking code.
+ kDoNotUse,
+};
+
+class Profile final {
+ public:
+ Profile() = default;
+ Profile(Profile&&) = default;
+ Profile(const Profile&) = delete;
+
+ ~Profile();
+
+ Profile& operator=(Profile&&) = default;
+ Profile& operator=(const Profile&) = delete;
+
+ struct Sample {
+ static constexpr int kMaxStackDepth = 64;
+
+ int64_t sum;
+ int64_t count; // Total added with this <stack,requested_size,...>
+
+ size_t requested_size;
+ size_t requested_alignment;
+ size_t allocated_size;
+
+ int depth;
+ void* stack[kMaxStackDepth];
+
+ void* user_data;
+ };
+
+ void Iterate(absl::FunctionRef<void(const Sample&)> f) const;
+
+ int64_t Period() const;
+ ProfileType Type() const;
+
+ private:
+ explicit Profile(std::unique_ptr<const tcmalloc_internal::ProfileBase>);
+
+ std::unique_ptr<const tcmalloc_internal::ProfileBase> impl_;
+ friend class tcmalloc_internal::ProfileAccessor;
+};
+
+class AddressRegion {
+ public:
+ AddressRegion() {}
+ virtual ~AddressRegion();
+
+ // Allocates at least size bytes of memory from this region, aligned with
+ // alignment. Returns a pair containing a pointer to the start the allocated
+ // memory and the actual size allocated. Returns {nullptr, 0} on failure.
+ //
+ // Alloc must return memory located within the address range given in the call
+ // to AddressRegionFactory::Create that created this AddressRegion.
+ virtual std::pair<void*, size_t> Alloc(size_t size, size_t alignment) = 0;
+};
+
+// Interface to a pluggable address region allocator.
+class AddressRegionFactory {
+ public:
+ enum class UsageHint {
+ kNormal, // Normal usage.
+ kInfrequentAllocation, // TCMalloc allocates from these regions less
+ // frequently than normal regions.
+ kInfrequent ABSL_DEPRECATED("Use kInfrequentAllocation") =
+ kInfrequentAllocation,
+ };
+
+ AddressRegionFactory() {}
+ virtual ~AddressRegionFactory();
+
+ // Returns an AddressRegion with the specified start address and size. hint
+ // indicates how the caller intends to use the returned region (helpful for
+ // deciding which regions to remap with hugepages, which regions should have
+ // pages prefaulted, etc.). The returned AddressRegion must never be deleted.
+ //
+ // The caller must have reserved size bytes of address space starting at
+ // start_addr with mmap(PROT_NONE) prior to calling this function (so it is
+ // safe for Create() to mmap(MAP_FIXED) over the specified address range).
+ // start_addr and size are always page-aligned.
+ virtual AddressRegion* Create(void* start_addr, size_t size,
+ UsageHint hint) = 0;
+
+ // Gets a human-readable description of the current state of the allocator.
+ //
+ // The state is stored in the provided buffer. The number of bytes used (or
+ // would have been required, had the buffer been of sufficient size) is
+ // returned.
+ virtual size_t GetStats(absl::Span<char> buffer);
+
+ // Gets a description of the current state of the allocator in pbtxt format.
+ //
+ // The state is stored in the provided buffer. The number of bytes used (or
+ // would have been required, had the buffer been of sufficient size) is
+ // returned.
+ virtual size_t GetStatsInPbtxt(absl::Span<char> buffer);
+
+ // Returns the total number of bytes allocated by MallocInternal().
+ static size_t InternalBytesAllocated();
+
+ protected:
+ // Dynamically allocates memory for use by AddressRegionFactory. Particularly
+ // useful for creating AddressRegions inside Create().
+ //
+ // This memory is never freed, so allocate sparingly.
+ static void* MallocInternal(size_t size);
+};
+
+class MallocExtension final {
+ public:
+ // Gets a human readable description of the current state of the malloc data
+ // structures.
+ //
+ // See https://github.com/google/tcmalloc/tree/master/docs/stats.md for how to interpret these
+ // statistics.
+ static std::string GetStats();
+
+ // -------------------------------------------------------------------
+ // Control operations for getting malloc implementation specific parameters.
+ // Some currently useful properties:
+ //
+ // generic
+ // -------
+ // "generic.current_allocated_bytes"
+ // Number of bytes currently allocated by application
+ //
+ // "generic.heap_size"
+ // Number of bytes in the heap ==
+ // current_allocated_bytes +
+ // fragmentation +
+ // freed (but not released to OS) memory regions
+ //
+ // tcmalloc
+ // --------
+ // "tcmalloc.max_total_thread_cache_bytes"
+ // Upper limit on total number of bytes stored across all
+ // per-thread caches. Default: 16MB.
+ //
+ // "tcmalloc.current_total_thread_cache_bytes"
+ // Number of bytes used across all thread caches.
+ //
+ // "tcmalloc.pageheap_free_bytes"
+ // Number of bytes in free, mapped pages in page heap. These
+ // bytes can be used to fulfill allocation requests. They
+ // always count towards virtual memory usage, and unless the
+ // underlying memory is swapped out by the OS, they also count
+ // towards physical memory usage.
+ //
+ // "tcmalloc.pageheap_unmapped_bytes"
+ // Number of bytes in free, unmapped pages in page heap.
+ // These are bytes that have been released back to the OS,
+ // possibly by one of the MallocExtension "Release" calls.
+ // They can be used to fulfill allocation requests, but
+ // typically incur a page fault. They always count towards
+ // virtual memory usage, and depending on the OS, typically
+ // do not count towards physical memory usage.
+ //
+ // "tcmalloc.per_cpu_caches_active"
+ // Whether tcmalloc is using per-CPU caches (1 or 0 respectively).
+ // -------------------------------------------------------------------
+
+ // Gets the named property's value or a nullopt if the property is not valid.
+ static absl::optional<size_t> GetNumericProperty(absl::string_view property);
+
+ // Marks the current thread as "idle". This function may optionally be called
+ // by threads as a hint to the malloc implementation that any thread-specific
+ // resources should be released. Note: this may be an expensive function, so
+ // it should not be called too often.
+ //
+ // Also, if the code that calls this function will go to sleep for a while, it
+ // should take care to not allocate anything between the call to this function
+ // and the beginning of the sleep.
+ static void MarkThreadIdle();
+
+ // Marks the current thread as "busy". This function should be called after
+ // MarkThreadIdle() if the thread will now do more work. If this method is
+ // not called, performance may suffer.
+ static void MarkThreadBusy();
+
+ // Attempts to free any resources associated with cpu <cpu> (in the sense of
+ // only being usable from that CPU.) Returns the number of bytes previously
+ // assigned to "cpu" that were freed. Safe to call from any processor, not
+ // just <cpu>.
+ static size_t ReleaseCpuMemory(int cpu);
+
+ // Gets the region factory used by the malloc extension instance. Returns null
+ // for malloc implementations that do not support pluggable region factories.
+ static AddressRegionFactory* GetRegionFactory();
+
+ // Sets the region factory to the specified.
+ //
+ // Users could register their own region factories by doing:
+ // factory = new MyOwnRegionFactory();
+ // MallocExtension::SetRegionFactory(factory);
+ //
+ // It's up to users whether to fall back (recommended) to the default region
+ // factory (use GetRegionFactory() above) or not. The caller is responsible to
+ // any necessary locking.
+ static void SetRegionFactory(AddressRegionFactory* a);
+
+ // Tries to release at least num_bytes of free memory back to the OS for
+ // reuse.
+ //
+ // Depending on the state of the malloc implementation, more than num_bytes of
+ // memory may be released to the OS.
+ //
+ // This request may not be completely honored if:
+ // * The underlying malloc implementation does not support releasing memory to
+ // the OS.
+ // * There are not at least num_bytes of free memory cached, or free memory is
+ // fragmented in ways that keep it from being returned to the OS.
+ //
+ // Returning memory to the OS can hurt performance in two ways:
+ // * Parts of huge pages may be free and returning them to the OS requires
+ // breaking up the huge page they are located on. This can slow accesses to
+ // still-allocated memory due to increased TLB pressure for the working set.
+ // * If the memory is ultimately needed again, pages will need to be faulted
+ // back in.
+ static void ReleaseMemoryToSystem(size_t num_bytes);
+
+ struct MemoryLimit {
+ // Make a best effort attempt to prevent more than limit bytes of memory
+ // from being allocated by the system. In particular, if satisfying a given
+ // malloc call would require passing this limit, release as much memory to
+ // the OS as needed to stay under it if possible.
+ //
+ // If hard is set, crash if returning memory is unable to get below the
+ // limit.
+ //
+ // Note: limit=SIZE_T_MAX implies no limit.
+ size_t limit = std::numeric_limits<size_t>::max();
+ bool hard = false;
+
+ // Explicitly declare the ctor to put it in the google_malloc section.
+ MemoryLimit() = default;
+ };
+
+ static MemoryLimit GetMemoryLimit();
+ static void SetMemoryLimit(const MemoryLimit& limit);
+
+ // Gets the sampling rate. Returns a value < 0 if unknown.
+ static int64_t GetProfileSamplingRate();
+ // Sets the sampling rate for heap profiles. TCMalloc samples approximately
+ // every rate bytes allocated.
+ static void SetProfileSamplingRate(int64_t rate);
+
+ // Gets the guarded sampling rate. Returns a value < 0 if unknown.
+ static int64_t GetGuardedSamplingRate();
+ // Sets the guarded sampling rate for sampled allocations. TCMalloc samples
+ // approximately every rate bytes allocated, subject to implementation
+ // limitations in GWP-ASan.
+ //
+ // Guarded samples provide probablistic protections against buffer underflow,
+ // overflow, and use-after-free when GWP-ASan is active (via calling
+ // ActivateGuardedSampling).
+ static void SetGuardedSamplingRate(int64_t rate);
+
+ // Switches TCMalloc to guard sampled allocations for underflow, overflow, and
+ // use-after-free according to the guarded sample parameter value.
+ static void ActivateGuardedSampling();
+
+ // Gets whether TCMalloc is using per-CPU caches.
+ static bool PerCpuCachesActive();
+
+ // Extension for unified agent.
+ //
+ // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321
+ static void DeactivatePerCpuCaches();
+
+ // Gets the current maximum cache size per CPU cache.
+ static int32_t GetMaxPerCpuCacheSize();
+ // Sets the maximum cache size per CPU cache. This is a per-core limit.
+ static void SetMaxPerCpuCacheSize(int32_t value);
+
+ // Gets the current maximum thread cache.
+ static int64_t GetMaxTotalThreadCacheBytes();
+ // Sets the maximum thread cache size. This is a whole-process limit.
+ static void SetMaxTotalThreadCacheBytes(int64_t value);
+
+ // Gets the delayed subrelease interval (0 if delayed subrelease is disabled)
+ static absl::Duration GetSkipSubreleaseInterval();
+ // Sets the delayed subrelease interval (0 to disable delayed subrelease)
+ static void SetSkipSubreleaseInterval(absl::Duration value);
+
+ // Returns the estimated number of bytes that will be allocated for a request
+ // of "size" bytes. This is an estimate: an allocation of "size" bytes may
+ // reserve more bytes, but will never reserve fewer.
+ static size_t GetEstimatedAllocatedSize(size_t size);
+
+ // Returns the actual number N of bytes reserved by tcmalloc for the pointer
+ // p. This number may be equal to or greater than the number of bytes
+ // requested when p was allocated.
+ //
+ // This function is just useful for statistics collection. The client must
+ // *not* read or write from the extra bytes that are indicated by this call.
+ //
+ // Example, suppose the client gets memory by calling
+ // p = malloc(10)
+ // and GetAllocatedSize(p) returns 16. The client must only use the first 10
+ // bytes p[0..9], and not attempt to read or write p[10..15].
+ //
+ // p must have been allocated by TCMalloc and must not be an interior pointer
+ // -- that is, must be exactly the pointer returned to by malloc() et al., not
+ // some offset from that -- and should not have been freed yet. p may be
+ // null.
+ static absl::optional<size_t> GetAllocatedSize(const void* p);
+
+ // Returns
+ // * kOwned if TCMalloc allocated the memory pointed to by p, or
+ // * kNotOwned if allocated elsewhere or p is null.
+ //
+ // REQUIRES: p must be a value returned from a previous call to malloc(),
+ // calloc(), realloc(), memalign(), posix_memalign(), valloc(), pvalloc(),
+ // new, or new[], and must refer to memory that is currently allocated (so,
+ // for instance, you should not pass in a pointer after having called free()
+ // on it).
+ enum class Ownership { kUnknown = 0, kOwned, kNotOwned };
+ static Ownership GetOwnership(const void* p);
+
+ // Type used by GetProperties. See comment on GetProperties.
+ struct Property {
+ size_t value;
+ };
+
+ // Returns detailed statistics about the state of TCMalloc. The map is keyed
+ // by the name of the statistic.
+ //
+ // Common across malloc implementations:
+ // generic.bytes_in_use_by_app -- Bytes currently in use by application
+ // generic.physical_memory_used -- Overall (including malloc internals)
+ // generic.virtual_memory_used -- Overall (including malloc internals)
+ //
+ // Tcmalloc specific properties
+ // tcmalloc.cpu_free -- Bytes in per-cpu free-lists
+ // tcmalloc.thread_cache_free -- Bytes in per-thread free-lists
+ // tcmalloc.transfer_cache -- Bytes in cross-thread transfer caches
+ // tcmalloc.central_cache_free -- Bytes in central cache
+ // tcmalloc.page_heap_free -- Bytes in page heap
+ // tcmalloc.page_heap_unmapped -- Bytes in page heap (no backing phys. mem)
+ // tcmalloc.metadata_bytes -- Used by internal data structures
+ // tcmalloc.thread_cache_count -- Number of thread caches in use
+ // tcmalloc.experiment.NAME -- Experiment NAME is running if 1
+ static std::map<std::string, Property> GetProperties();
+
+ static Profile SnapshotCurrent(tcmalloc::ProfileType type);
+
+ // AllocationProfilingToken tracks an active profiling session started with
+ // StartAllocationProfiling. Profiling continues until Stop() is called.
+ class AllocationProfilingToken {
+ public:
+ AllocationProfilingToken() = default;
+ AllocationProfilingToken(AllocationProfilingToken&&) = default;
+ AllocationProfilingToken(const AllocationProfilingToken&) = delete;
+ ~AllocationProfilingToken();
+
+ AllocationProfilingToken& operator=(AllocationProfilingToken&&) = default;
+ AllocationProfilingToken& operator=(const AllocationProfilingToken&) =
+ delete;
+
+ // Finish the recording started by the corresponding call to
+ // StartAllocationProfile, and return samples of calls to each function. If
+ // it is called more than once, subsequent calls will return an empty
+ // profile.
+ Profile Stop() &&;
+
+ private:
+ explicit AllocationProfilingToken(
+ std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase>);
+
+ std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> impl_;
+ friend class tcmalloc_internal::AllocationProfilingTokenAccessor;
+ };
+
+ // Start recording a sample of allocation and deallocation calls. Returns
+ // null if the implementation does not support profiling.
+ static AllocationProfilingToken StartAllocationProfiling();
+
+ // Runs housekeeping actions for the allocator off of the main allocation path
+ // of new/delete. As of 2020, this includes:
+ // * Inspecting the current CPU mask and releasing memory from inaccessible
+ // CPUs.
+ // * Releasing GetBackgroundReleaseRate() bytes per second from the page
+ // heap, if that many bytes are free, via ReleaseMemoryToSystem().
+ //
+ // When linked against TCMalloc, this method does not return.
+ static void ProcessBackgroundActions();
+
+ // Return true if ProcessBackgroundActions should be called on this platform.
+ // Not all platforms need/support background actions. As of 2021 this
+ // includes Apple and Emscripten.
+ static bool NeedsProcessBackgroundActions();
+
+ // Specifies a rate in bytes per second.
+ //
+ // The enum is used to provide strong-typing for the value.
+ enum class BytesPerSecond : size_t {};
+
+ // Gets the current release rate (in bytes per second) from the page heap.
+ // Zero inhibits the release path.
+ static BytesPerSecond GetBackgroundReleaseRate();
+ // Specifies the release rate from the page heap. ProcessBackgroundActions
+ // must be called for this to be operative.
+ static void SetBackgroundReleaseRate(BytesPerSecond rate);
+
+ // Enables fork support.
+ // Allocator will continue to function correctly in the child, after calling fork().
+ static void EnableForkSupport();
+
+ using CreateSampleUserDataCallback = void*();
+ using CopySampleUserDataCallback = void*(void*);
+ using DestroySampleUserDataCallback = void(void*);
+
+ // Sets callbacks for lifetime control of custom user data attached to allocation samples
+ static void SetSampleUserDataCallbacks(
+ CreateSampleUserDataCallback create,
+ CopySampleUserDataCallback copy,
+ DestroySampleUserDataCallback destroy);
+};
+
+} // namespace tcmalloc
+
+// The nallocx function allocates no memory, but it performs the same size
+// computation as the malloc function, and returns the real size of the
+// allocation that would result from the equivalent malloc function call.
+// Default weak implementation returns size unchanged, but tcmalloc overrides it
+// and returns rounded up size. See the following link for details:
+// http://www.unix.com/man-page/freebsd/3/nallocx/
+extern "C" size_t nallocx(size_t size, int flags) noexcept;
+
+// The sdallocx function deallocates memory allocated by malloc or memalign. It
+// takes a size parameter to pass the original allocation size.
+//
+// The default weak implementation calls free(), but TCMalloc overrides it and
+// uses the size to improve deallocation performance.
+extern "C" void sdallocx(void* ptr, size_t size, int flags) noexcept;
+
+namespace tcmalloc {
+
+// Pointer / capacity information as returned by
+// tcmalloc_size_returning_operator_new(). See
+// tcmalloc_size_returning_operator_new() for more information.
+struct sized_ptr_t {
+ void* p;
+ size_t n;
+};
+
+} // namespace tcmalloc
+
+// Allocates memory of at least the requested size.
+//
+// Returns a `sized_ptr_t` struct holding the allocated pointer, and the
+// capacity of the allocated memory, which may be larger than the requested
+// size.
+//
+// The returned pointer follows the alignment requirements of the standard new
+// operator. This function will terminate on failure, except for the APIs
+// accepting the std::nothrow parameter which will return {nullptr, 0} on
+// failure.
+//
+// The returned pointer must be freed calling the matching ::operator delete.
+//
+// If a sized operator delete operator is invoked, then the 'size' parameter
+// passed to delete must be greater or equal to the original requested size, and
+// less than or equal to the capacity of the allocated memory as returned by the
+// `tcmalloc_size_returning_operator_new` method.
+//
+// If neither the original size or capacity is known, then the non-sized
+// operator delete can be invoked, however, this should be avoided, as this is
+// substantially less efficient.
+//
+// The default weak implementation allocates the memory using the corresponding
+// (matching) ::operator new(size_t, ...).
+//
+// This is a prototype API for the extension to C++ "size feedback in operator
+// new" proposal:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html
+extern "C" {
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new(size_t size);
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow(
+ size_t size) noexcept;
+
+// Aligned size returning new is only supported for libc++ because of issues
+// with libstdcxx.so linkage. See http://b/110969867 for background.
+#if defined(__cpp_aligned_new)
+
+// Identical to `tcmalloc_size_returning_operator_new` except that the returned
+// memory is aligned according to the `alignment` argument.
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned(
+ size_t size, std::align_val_t alignment);
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow(
+ size_t size, std::align_val_t alignment) noexcept;
+
+#endif // __cpp_aligned_new
+
+} // extern "C"
+
+#ifndef MALLOCX_LG_ALIGN
+#define MALLOCX_LG_ALIGN(la) (la)
+#endif
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AllocationProfilingTokenBase tracks an on-going profiling session of sampled
+// allocations. The session ends when Stop() is called.
+//
+// This decouples the implementation details (of TCMalloc) from the interface,
+// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
+// while allowing the library to compile and link.
+class AllocationProfilingTokenBase {
+ public:
+ // Explicitly declare the ctor to put it in the google_malloc section.
+ AllocationProfilingTokenBase() = default;
+
+ virtual ~AllocationProfilingTokenBase() = default;
+
+ // Finish recording started during construction of this object.
+ //
+ // After the first call, Stop() will return an empty profile.
+ virtual Profile Stop() && = 0;
+};
+
+// ProfileBase contains a profile of allocations.
+//
+// This decouples the implementation details (of TCMalloc) from the interface,
+// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
+// while allowing the library to compile and link.
+class ProfileBase {
+ public:
+ virtual ~ProfileBase() = default;
+
+ // For each sample in the profile, Iterate invokes the callback f on the
+ // sample.
+ virtual void Iterate(
+ absl::FunctionRef<void(const Profile::Sample&)> f) const = 0;
+
+ // The approximate interval between recorded samples of the event of interest.
+ // A period of 1 means every sample was recorded.
+ virtual int64_t Period() const = 0;
+
+ // The type of profile (live objects, allocated, etc.).
+ virtual ProfileType Type() const = 0;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_MALLOC_EXTENSION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc
new file mode 100644
index 0000000000..26335bdef8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+
+#include "absl/types/optional.h"
+#include "tcmalloc/malloc_extension.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+ using tcmalloc::MallocExtension;
+
+ const std::string property(reinterpret_cast<const char*>(d), size);
+ absl::optional<size_t> val = MallocExtension::GetNumericProperty(property);
+ if (!val.has_value()) {
+ // Rather than inspect the result of MallocExtension::GetProperties, we
+ // defer to the test in //tcmalloc/malloc_extension_test.cc to
+ // ensure that every key in GetProperties has a value returned by
+ // GetNumericProperty.
+ return 0;
+ }
+
+ std::map<std::string, MallocExtension::Property> properties =
+ MallocExtension::GetProperties();
+ if (properties.find(property) == properties.end()) {
+ __builtin_trap();
+ }
+ return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc
new file mode 100644
index 0000000000..81e7afa010
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// These tests assume TCMalloc is not linked in, and therefore the features
+// exposed by MallocExtension should be no-ops, but otherwise safe to call.
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(MallocExtension, SnapshotCurrentIsEmpty) {
+ // Allocate memory to use the allocator.
+ absl::BitGen gen;
+ int bytes_remaining = 1 << 24;
+ std::vector<void*> ptrs;
+
+ while (bytes_remaining > 0) {
+ int size = absl::LogUniform<int>(gen, 0, 1 << 20);
+ ptrs.push_back(::operator new(size));
+ bytes_remaining -= size;
+ }
+
+ // All of the profiles should be empty.
+ ProfileType types[] = {
+ ProfileType::kHeap,
+ ProfileType::kFragmentation, ProfileType::kPeakHeap,
+ ProfileType::kAllocations,
+ };
+
+ for (auto t : types) {
+ SCOPED_TRACE(static_cast<int>(t));
+
+ Profile p = MallocExtension::SnapshotCurrent(t);
+ int samples = 0;
+ p.Iterate([&](const Profile::Sample&) { samples++; });
+
+ EXPECT_EQ(samples, 0);
+ }
+
+ for (void* ptr : ptrs) {
+ ::operator delete(ptr);
+ }
+}
+
+TEST(MallocExtension, AllocationProfile) {
+ auto token = MallocExtension::StartAllocationProfiling();
+
+ // Allocate memory to use the allocator.
+ absl::BitGen gen;
+ int bytes_remaining = 1 << 24;
+ std::vector<void*> ptrs;
+
+ while (bytes_remaining > 0) {
+ int size = absl::LogUniform<int>(gen, 0, 1 << 20);
+ ptrs.push_back(::operator new(size));
+ bytes_remaining -= size;
+ }
+
+ // Finish profiling and verify the profile is empty.
+ Profile p = std::move(token).Stop();
+ int samples = 0;
+ p.Iterate([&](const Profile::Sample&) { samples++; });
+
+ EXPECT_EQ(samples, 0);
+
+ for (void* ptr : ptrs) {
+ ::operator delete(ptr);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc
new file mode 100644
index 0000000000..5088806ff8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc
@@ -0,0 +1,67 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Test for TCMalloc implementation of MallocExtension
+
+#include "tcmalloc/malloc_extension.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(MallocExtension, BackgroundReleaseRate) {
+
+ // Mutate via MallocExtension.
+ MallocExtension::SetBackgroundReleaseRate(
+ MallocExtension::BytesPerSecond{100 << 20});
+
+ EXPECT_EQ(static_cast<size_t>(MallocExtension::GetBackgroundReleaseRate()),
+ 100 << 20);
+
+ // Disable release
+ MallocExtension::SetBackgroundReleaseRate(MallocExtension::BytesPerSecond{0});
+
+ EXPECT_EQ(static_cast<size_t>(MallocExtension::GetBackgroundReleaseRate()),
+ 0);
+}
+
+TEST(MallocExtension, SkipSubreleaseInterval) {
+
+ // Mutate via MallocExtension.
+ MallocExtension::SetSkipSubreleaseInterval(absl::Seconds(10));
+ EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::Seconds(10));
+
+ // Disable skip subrelease
+ MallocExtension::SetSkipSubreleaseInterval(absl::ZeroDuration());
+ EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::ZeroDuration());
+}
+
+TEST(MallocExtension, Properties) {
+ // Verify that every property under GetProperties also works with
+ // GetNumericProperty.
+ const auto properties = MallocExtension::GetProperties();
+ for (const auto& property : properties) {
+ absl::optional<size_t> scalar =
+ MallocExtension::GetNumericProperty(property.first);
+ // The value of the property itself may have changed, so just check that it
+ // is present.
+ EXPECT_THAT(scalar, testing::Ne(absl::nullopt)) << property.first;
+ }
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc
new file mode 100644
index 0000000000..13308b947a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc
@@ -0,0 +1,64 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/mock_central_freelist.h"
+
+#include "absl/base/internal/spinlock.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void MinimalFakeCentralFreeList::AllocateBatch(void** batch, int n) {
+ for (int i = 0; i < n; ++i) batch[i] = &batch[i];
+}
+
+void MinimalFakeCentralFreeList::FreeBatch(absl::Span<void*> batch) {
+ for (void* x : batch) CHECK_CONDITION(x != nullptr);
+}
+
+void MinimalFakeCentralFreeList::InsertRange(absl::Span<void*> batch) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ FreeBatch(batch);
+}
+
+int MinimalFakeCentralFreeList::RemoveRange(void** batch, int n) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ AllocateBatch(batch, n);
+ return n;
+}
+
+void FakeCentralFreeList::AllocateBatch(void** batch, int n) {
+ for (int i = 0; i < n; ++i) {
+ batch[i] = ::operator new(4);
+ }
+}
+
+void FakeCentralFreeList::FreeBatch(absl::Span<void*> batch) {
+ for (void* x : batch) {
+ ::operator delete(x);
+ }
+}
+
+void FakeCentralFreeList::InsertRange(absl::Span<void*> batch) {
+ FreeBatch(batch);
+}
+
+int FakeCentralFreeList::RemoveRange(void** batch, int n) {
+ AllocateBatch(batch, n);
+ return n;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h
new file mode 100644
index 0000000000..c2a56c0c60
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h
@@ -0,0 +1,89 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_MOCK_CENTRAL_FREELIST_H_
+#define TCMALLOC_MOCK_CENTRAL_FREELIST_H_
+
+#include <stddef.h>
+
+#include "gmock/gmock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/types/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class FakeCentralFreeListBase {
+ public:
+ FakeCentralFreeListBase() {}
+ FakeCentralFreeListBase(const FakeCentralFreeListBase&) = delete;
+ FakeCentralFreeListBase& operator=(const FakeCentralFreeListBase&) = delete;
+
+ static constexpr void Init(size_t) {}
+};
+
+// CentralFreeList implementation that backs onto the system's malloc.
+//
+// Useful for unit tests and fuzz tests where identifying leaks and correctness
+// is important.
+class FakeCentralFreeList : public FakeCentralFreeListBase {
+ public:
+ void InsertRange(absl::Span<void*> batch);
+ int RemoveRange(void** batch, int N);
+
+ void AllocateBatch(void** batch, int n);
+ void FreeBatch(absl::Span<void*> batch);
+};
+
+// CentralFreeList implementation that does minimal work but no correctness
+// checking.
+//
+// Useful for benchmarks where you want to avoid unrelated expensive operations.
+class MinimalFakeCentralFreeList : public FakeCentralFreeListBase {
+ public:
+ void InsertRange(absl::Span<void*> batch);
+ int RemoveRange(void** batch, int N);
+
+ void AllocateBatch(void** batch, int n);
+ void FreeBatch(absl::Span<void*> batch);
+
+ private:
+ absl::base_internal::SpinLock lock_;
+};
+
+// CentralFreeList implementation that allows intercepting specific calls. By
+// default backs onto the system's malloc.
+//
+// Useful for intrusive unit tests that want to verify internal behavior.
+class RawMockCentralFreeList : public FakeCentralFreeList {
+ public:
+ RawMockCentralFreeList() : FakeCentralFreeList() {
+ ON_CALL(*this, InsertRange).WillByDefault([this](absl::Span<void*> batch) {
+ return static_cast<FakeCentralFreeList*>(this)->InsertRange(batch);
+ });
+ ON_CALL(*this, RemoveRange).WillByDefault([this](void** batch, int n) {
+ return static_cast<FakeCentralFreeList*>(this)->RemoveRange(batch, n);
+ });
+ }
+
+ MOCK_METHOD(void, InsertRange, (absl::Span<void*> batch));
+ MOCK_METHOD(int, RemoveRange, (void** batch, int N));
+};
+
+using MockCentralFreeList = testing::NiceMock<RawMockCentralFreeList>;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_MOCK_CENTRAL_FREELIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc
new file mode 100644
index 0000000000..b8b2bcf131
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc
@@ -0,0 +1,24 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/mock_transfer_cache.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int FakeTransferCacheManager::DetermineSizeClassToEvict() { return 3; }
+bool FakeTransferCacheManager::ShrinkCache(int) { return true; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h
new file mode 100644
index 0000000000..5b5192f6dc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h
@@ -0,0 +1,310 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_MOCK_TRANSFER_CACHE_H_
+#define TCMALLOC_MOCK_TRANSFER_CACHE_H_
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <random>
+
+#include "gmock/gmock.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline constexpr size_t kClassSize = 8;
+inline constexpr size_t kNumToMove = 32;
+inline constexpr int kSizeClass = 0;
+
+class FakeTransferCacheManagerBase {
+ public:
+ constexpr static size_t class_to_size(int size_class) { return kClassSize; }
+ constexpr static size_t num_objects_to_move(int size_class) {
+ // TODO(b/170732338): test with multiple different num_objects_to_move
+ return kNumToMove;
+ }
+ void* Alloc(size_t size) {
+ memory_.emplace_back(::operator new(size));
+ return memory_.back().get();
+ }
+ struct Free {
+ void operator()(void* b) { ::operator delete(b); }
+ };
+
+ private:
+ std::vector<std::unique_ptr<void, Free>> memory_;
+};
+
+// TransferCacheManager with basic stubs for everything.
+//
+// Useful for benchmarks where you want to unrelated expensive operations.
+class FakeTransferCacheManager : public FakeTransferCacheManagerBase {
+ public:
+ int DetermineSizeClassToEvict();
+ bool ShrinkCache(int);
+};
+
+// TransferCacheManager which allows intercepting intersting methods.
+//
+// Useful for intrusive unit tests that want to verify internal behavior.
+class RawMockTransferCacheManager : public FakeTransferCacheManagerBase {
+ public:
+ RawMockTransferCacheManager() : FakeTransferCacheManagerBase() {
+ // We want single threaded tests to be deterministic, so we use a
+ // deterministic generator. Because we don't know about the threading for
+ // our tests we cannot keep the generator in a local variable.
+ ON_CALL(*this, ShrinkCache).WillByDefault([]() {
+ thread_local std::mt19937 gen{0};
+ return absl::Bernoulli(gen, 0.8);
+ });
+ ON_CALL(*this, GrowCache).WillByDefault([]() {
+ thread_local std::mt19937 gen{0};
+ return absl::Bernoulli(gen, 0.8);
+ });
+ ON_CALL(*this, DetermineSizeClassToEvict).WillByDefault([]() {
+ thread_local std::mt19937 gen{0};
+ return absl::Uniform<size_t>(gen, 1, kNumClasses);
+ });
+ }
+
+ MOCK_METHOD(int, DetermineSizeClassToEvict, ());
+ MOCK_METHOD(bool, ShrinkCache, (int size_class));
+ MOCK_METHOD(bool, GrowCache, (int size_class));
+};
+
+using MockTransferCacheManager = testing::NiceMock<RawMockTransferCacheManager>;
+
+// Wires up a largely functional TransferCache + TransferCacheManager +
+// MockCentralFreeList.
+//
+// By default, it fills allocations and responds sensibly. Because it backs
+// onto malloc/free, it will detect leaks and memory misuse when run in asan or
+// tsan.
+//
+// Exposes the underlying mocks to allow for more whitebox tests.
+//
+// Drains the cache and verifies that no data was lost in the destructor.
+template <typename TransferCacheT>
+class FakeTransferCacheEnvironment {
+ public:
+ using TransferCache = TransferCacheT;
+ using Manager = typename TransferCache::Manager;
+ using FreeList = typename TransferCache::FreeList;
+
+ static constexpr int kMaxObjectsToMove =
+ ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove;
+ static constexpr int kBatchSize = Manager::num_objects_to_move(1);
+
+ FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) {}
+
+ ~FakeTransferCacheEnvironment() { Drain(); }
+
+ void Shrink() { cache_.ShrinkCache(kSizeClass); }
+ void Grow() { cache_.GrowCache(kSizeClass); }
+
+ void Insert(int n) {
+ std::vector<void*> bufs;
+ while (n > 0) {
+ int b = std::min(n, kBatchSize);
+ bufs.resize(b);
+ central_freelist().AllocateBatch(&bufs[0], b);
+ cache_.InsertRange(kSizeClass, absl::MakeSpan(bufs));
+ n -= b;
+ }
+ }
+
+ void Remove(int n) {
+ std::vector<void*> bufs;
+ while (n > 0) {
+ int b = std::min(n, kBatchSize);
+ bufs.resize(b);
+ int removed = cache_.RemoveRange(kSizeClass, &bufs[0], b);
+ // Ensure we make progress.
+ ASSERT_GT(removed, 0);
+ ASSERT_LE(removed, b);
+ central_freelist().FreeBatch({&bufs[0], static_cast<size_t>(removed)});
+ n -= removed;
+ }
+ }
+
+ void Drain() { Remove(cache_.tc_length()); }
+
+ void RandomlyPoke() {
+ absl::BitGen gen;
+ // We want a probabilistic steady state size:
+ // - grow/shrink balance on average
+ // - insert/remove balance on average
+ double choice = absl::Uniform(gen, 0.0, 1.0);
+ if (choice < 0.1) {
+ Shrink();
+ } else if (choice < 0.2) {
+ Grow();
+ } else if (choice < 0.3) {
+ cache_.HasSpareCapacity(kSizeClass);
+ } else if (choice < 0.65) {
+ Insert(absl::Uniform(gen, 1, kBatchSize));
+ } else {
+ Remove(absl::Uniform(gen, 1, kBatchSize));
+ }
+ }
+
+ TransferCache& transfer_cache() { return cache_; }
+
+ Manager& transfer_cache_manager() { return manager_; }
+
+ FreeList& central_freelist() { return cache_.freelist(); }
+
+ private:
+ Manager manager_;
+ TransferCache cache_;
+};
+
+// A fake transfer cache manager class which supports two size classes instead
+// of just the one. To make this work, we have to store the transfer caches
+// inside the cache manager, like in production code.
+template <typename FreeListT,
+ template <typename FreeList, typename Manager> class TransferCacheT>
+class TwoSizeClassManager : public FakeTransferCacheManagerBase {
+ public:
+ using FreeList = FreeListT;
+ using TransferCache = TransferCacheT<FreeList, TwoSizeClassManager>;
+
+ // This is 3 instead of 2 because we hard code cl == 0 to be invalid in many
+ // places. We only use cl 1 and 2 here.
+ static constexpr int kSizeClasses = 3;
+ static constexpr size_t kClassSize1 = 8;
+ static constexpr size_t kClassSize2 = 16;
+ static constexpr size_t kNumToMove1 = 32;
+ static constexpr size_t kNumToMove2 = 16;
+
+ TwoSizeClassManager() {
+ caches_.push_back(absl::make_unique<TransferCache>(this, 0));
+ caches_.push_back(absl::make_unique<TransferCache>(this, 1));
+ caches_.push_back(absl::make_unique<TransferCache>(this, 2));
+ }
+
+ constexpr static size_t class_to_size(int size_class) {
+ switch (size_class) {
+ case 1:
+ return kClassSize1;
+ case 2:
+ return kClassSize2;
+ default:
+ return 0;
+ }
+ }
+ constexpr static size_t num_objects_to_move(int size_class) {
+ switch (size_class) {
+ case 1:
+ return kNumToMove1;
+ case 2:
+ return kNumToMove2;
+ default:
+ return 0;
+ }
+ }
+
+ int DetermineSizeClassToEvict() { return evicting_from_; }
+
+ bool ShrinkCache(int size_class) {
+ return caches_[size_class]->ShrinkCache(size_class);
+ }
+
+ FreeList& central_freelist(int cl) { return caches_[cl]->freelist(); }
+
+ void InsertRange(int cl, absl::Span<void*> batch) {
+ caches_[cl]->InsertRange(cl, batch);
+ }
+
+ int RemoveRange(int cl, void** batch, int N) {
+ return caches_[cl]->RemoveRange(cl, batch, N);
+ }
+
+ bool HasSpareCapacity(int cl) { return caches_[cl]->HasSpareCapacity(cl); }
+
+ size_t tc_length(int cl) { return caches_[cl]->tc_length(); }
+
+ std::vector<std::unique_ptr<TransferCache>> caches_;
+
+ // From which size class to evict.
+ int evicting_from_ = 1;
+};
+
+template <template <typename FreeList, typename Manager> class TransferCacheT>
+class TwoSizeClassEnv {
+ public:
+ using FreeList = MockCentralFreeList;
+ using Manager = TwoSizeClassManager<FreeList, TransferCacheT>;
+ using TransferCache = typename Manager::TransferCache;
+
+ static constexpr int kMaxObjectsToMove =
+ ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove;
+
+ explicit TwoSizeClassEnv() = default;
+
+ ~TwoSizeClassEnv() { Drain(); }
+
+ void Insert(int cl, int n) {
+ const size_t batch_size = Manager::num_objects_to_move(cl);
+ std::vector<void*> bufs;
+ while (n > 0) {
+ int b = std::min<int>(n, batch_size);
+ bufs.resize(b);
+ central_freelist(cl).AllocateBatch(&bufs[0], b);
+ manager_.InsertRange(cl, absl::MakeSpan(bufs));
+ n -= b;
+ }
+ }
+
+ void Remove(int cl, int n) {
+ const size_t batch_size = Manager::num_objects_to_move(cl);
+ std::vector<void*> bufs;
+ while (n > 0) {
+ const int b = std::min<int>(n, batch_size);
+ bufs.resize(b);
+ const int removed = manager_.RemoveRange(cl, &bufs[0], b);
+ // Ensure we make progress.
+ ASSERT_GT(removed, 0);
+ ASSERT_LE(removed, b);
+ central_freelist(cl).FreeBatch({&bufs[0], static_cast<size_t>(removed)});
+ n -= removed;
+ }
+ }
+
+ void Drain() {
+ for (int i = 0; i < Manager::kSizeClasses; ++i) {
+ Remove(i, manager_.tc_length(i));
+ }
+ }
+
+ Manager& transfer_cache_manager() { return manager_; }
+
+ FreeList& central_freelist(int cl) { return manager_.central_freelist(cl); }
+
+ private:
+ Manager manager_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_MOCK_TRANSFER_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc
new file mode 100644
index 0000000000..c6dc90adcc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc
@@ -0,0 +1,33 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Default implementation doesn't load runtime size classes.
+// To enable runtime size classes, link with :runtime_size_classes.
+// This is in a separate library so that it doesn't get inlined inside common.cc
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE int MaybeSizeClassesFromEnv(
+ int max_size, int max_classes, SizeClassInfo* parsed) {
+ return -1;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc
new file mode 100644
index 0000000000..e9599ef46a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc
@@ -0,0 +1,196 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_allocator.h"
+
+#include <new>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int ABSL_ATTRIBUTE_WEAK default_want_hpaa();
+
+bool decide_want_hpaa() {
+#if defined(__PPC64__) && defined(TCMALLOC_SMALL_BUT_SLOW)
+ // In small-but-slow, we choose a kMinSystemAlloc size that smaller than the
+ // hugepage size on PPC. If this situation changes, this static_assert will
+ // begin failing.
+ static_assert(kHugePageSize > kMinSystemAlloc,
+ "HPAA may now support PPC, update tests");
+ return false;
+#endif
+
+ const char *e =
+ tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_HPAA_CONTROL");
+ if (e) {
+ switch (e[0]) {
+ case '0':
+ if (kPageShift <= 12) {
+ return false;
+ }
+
+ if (default_want_hpaa != nullptr) {
+ int default_hpaa = default_want_hpaa();
+ if (default_hpaa < 0) {
+ return false;
+ }
+ }
+
+ Log(kLog, __FILE__, __LINE__,
+ "Runtime opt-out from HPAA requires building with "
+ "//tcmalloc:want_no_hpaa."
+ );
+ break;
+ case '1':
+ return true;
+ case '2':
+ return true;
+ default:
+ Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+ return false;
+ }
+ }
+
+ if (default_want_hpaa != nullptr) {
+ int default_hpaa = default_want_hpaa();
+ if (default_hpaa != 0) {
+ return default_hpaa > 0;
+ }
+ }
+
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+ // HPAA is neither small nor slow :)
+ return false;
+#else
+ return true;
+#endif
+}
+
+bool want_hpaa() {
+ static bool use = decide_want_hpaa();
+
+ return use;
+}
+
+PageAllocator::PageAllocator() {
+ const bool kUseHPAA = want_hpaa();
+ if (kUseHPAA) {
+ normal_impl_[0] =
+ new (&choices_[0].hpaa) HugePageAwareAllocator(MemoryTag::kNormal);
+ if (Static::numa_topology().numa_aware()) {
+ normal_impl_[1] =
+ new (&choices_[1].hpaa) HugePageAwareAllocator(MemoryTag::kNormalP1);
+ }
+ sampled_impl_ = new (&choices_[kNumaPartitions + 0].hpaa)
+ HugePageAwareAllocator(MemoryTag::kSampled);
+ alg_ = HPAA;
+ } else {
+ normal_impl_[0] = new (&choices_[0].ph) PageHeap(MemoryTag::kNormal);
+ if (Static::numa_topology().numa_aware()) {
+ normal_impl_[1] = new (&choices_[1].ph) PageHeap(MemoryTag::kNormalP1);
+ }
+ sampled_impl_ =
+ new (&choices_[kNumaPartitions + 0].ph) PageHeap(MemoryTag::kSampled);
+ alg_ = PAGE_HEAP;
+ }
+}
+
+void PageAllocator::ShrinkToUsageLimit() {
+ if (limit_ == std::numeric_limits<size_t>::max()) {
+ return;
+ }
+ BackingStats s = stats();
+ size_t backed = s.system_bytes - s.unmapped_bytes + Static::metadata_bytes();
+ if (backed <= limit_) {
+ // We're already fine.
+ return;
+ }
+
+ limit_hits_++;
+ const size_t overage = backed - limit_;
+ const Length pages = LengthFromBytes(overage + kPageSize - 1);
+ if (ShrinkHardBy(pages)) {
+ return;
+ }
+
+ // We're still not below limit.
+ if (limit_is_hard_) {
+ limit_ = std::numeric_limits<decltype(limit_)>::max();
+ Crash(
+ kCrash, __FILE__, __LINE__,
+ "Hit hard tcmalloc heap limit (e.g. --tcmalloc_heap_size_hard_limit). "
+ "Aborting.\nIt was most likely set to catch "
+ "allocations that would crash the process anyway. "
+ );
+ }
+
+ // Print logs once.
+ static bool warned = false;
+ if (warned) return;
+ warned = true;
+ Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ",
+ limit_, "and OOM is likely to follow.");
+}
+
+bool PageAllocator::ShrinkHardBy(Length pages) {
+ Length ret = ReleaseAtLeastNPages(pages);
+ if (alg_ == HPAA) {
+ if (pages <= ret) {
+ // We released target amount.
+ return true;
+ }
+
+ // At this point, we have no choice but to break up hugepages.
+ // However, if the client has turned off subrelease, and is using hard
+ // limits, then respect desire to do no subrelease ever.
+ if (limit_is_hard_ && !Parameters::hpaa_subrelease()) return false;
+
+ static bool warned_hugepages = false;
+ if (!warned_hugepages) {
+ Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ",
+ limit_, "without breaking hugepages - performance will drop");
+ warned_hugepages = true;
+ }
+ for (int partition = 0; partition < active_numa_partitions(); partition++) {
+ ret += static_cast<HugePageAwareAllocator *>(normal_impl_[partition])
+ ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret);
+ if (ret >= pages) {
+ return true;
+ }
+ }
+
+ ret += static_cast<HugePageAwareAllocator *>(sampled_impl_)
+ ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret);
+ }
+ // Return "true", if we got back under the limit.
+ return (pages <= ret);
+}
+
+size_t PageAllocator::active_numa_partitions() const {
+ return Static::numa_topology().active_partitions();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h
new file mode 100644
index 0000000000..611482f999
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h
@@ -0,0 +1,241 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_ALLOCATOR_H_
+#define TCMALLOC_PAGE_ALLOCATOR_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include <utility>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PageAllocator {
+ public:
+ PageAllocator();
+ ~PageAllocator() = delete;
+ // Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
+ //
+ // Any address in the returned Span is guaranteed to satisfy
+ // GetMemoryTag(addr) == "tag".
+ Span* New(Length n, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ // As New, but the returned span is aligned to a <align>-page boundary.
+ // <align> must be a power of two.
+ Span* NewAligned(Length n, Length align, MemoryTag tag)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ // Delete the span "[p, p+n-1]".
+ // REQUIRES: span was returned by earlier call to New() with the same value of
+ // "tag" and has not yet been deleted.
+ void Delete(Span* span, MemoryTag tag)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ BackingStats stats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ void GetSmallSpanStats(SmallSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ void GetLargeSpanStats(LargeSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Try to release at least num_pages for reuse by the OS. Returns
+ // the actual number of pages released, which may be less than
+ // num_pages if there weren't enough pages to release. The result
+ // may also be larger than num_pages since page_heap might decide to
+ // release one large range instead of fragmenting it into two
+ // smaller released and unreleased ranges.
+ Length ReleaseAtLeastNPages(Length num_pages)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Prints stats about the page heap to *out.
+ void Print(Printer* out, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+ void PrintInPbtxt(PbtxtRegion* region, MemoryTag tag)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ void set_limit(size_t limit, bool is_hard) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+ std::pair<size_t, bool> limit() const ABSL_LOCKS_EXCLUDED(pageheap_lock);
+ int64_t limit_hits() const ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ // If we have a usage limit set, ensure we're not violating it from our latest
+ // allocation.
+ void ShrinkToUsageLimit() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ const PageAllocInfo& info(MemoryTag tag) const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ enum Algorithm {
+ PAGE_HEAP = 0,
+ HPAA = 1,
+ };
+
+ Algorithm algorithm() const { return alg_; }
+
+ private:
+ bool ShrinkHardBy(Length pages) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ ABSL_ATTRIBUTE_RETURNS_NONNULL PageAllocatorInterface* impl(
+ MemoryTag tag) const;
+
+ size_t active_numa_partitions() const;
+
+ static constexpr size_t kNumHeaps = kNumaPartitions + 1;
+
+ union Choices {
+ Choices() : dummy(0) {}
+ ~Choices() {}
+ int dummy;
+ PageHeap ph;
+ HugePageAwareAllocator hpaa;
+ } choices_[kNumHeaps];
+ std::array<PageAllocatorInterface*, kNumaPartitions> normal_impl_;
+ PageAllocatorInterface* sampled_impl_;
+ Algorithm alg_;
+
+ bool limit_is_hard_{false};
+ // Max size of backed spans we will attempt to maintain.
+ size_t limit_{std::numeric_limits<size_t>::max()};
+ // The number of times the limit has been hit.
+ int64_t limit_hits_{0};
+};
+
+inline PageAllocatorInterface* PageAllocator::impl(MemoryTag tag) const {
+ switch (tag) {
+ case MemoryTag::kNormalP0:
+ return normal_impl_[0];
+ case MemoryTag::kNormalP1:
+ return normal_impl_[1];
+ case MemoryTag::kSampled:
+ return sampled_impl_;
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+}
+
+inline Span* PageAllocator::New(Length n, MemoryTag tag) {
+ return impl(tag)->New(n);
+}
+
+inline Span* PageAllocator::NewAligned(Length n, Length align, MemoryTag tag) {
+ return impl(tag)->NewAligned(n, align);
+}
+
+inline void PageAllocator::Delete(Span* span, MemoryTag tag) {
+ impl(tag)->Delete(span);
+}
+
+inline BackingStats PageAllocator::stats() const {
+ BackingStats ret = normal_impl_[0]->stats();
+ for (int partition = 1; partition < active_numa_partitions(); partition++) {
+ ret += normal_impl_[partition]->stats();
+ }
+ ret += sampled_impl_->stats();
+ return ret;
+}
+
+inline void PageAllocator::GetSmallSpanStats(SmallSpanStats* result) {
+ SmallSpanStats normal, sampled;
+ for (int partition = 0; partition < active_numa_partitions(); partition++) {
+ SmallSpanStats part_stats;
+ normal_impl_[partition]->GetSmallSpanStats(&part_stats);
+ normal += part_stats;
+ }
+ sampled_impl_->GetSmallSpanStats(&sampled);
+ *result = normal + sampled;
+}
+
+inline void PageAllocator::GetLargeSpanStats(LargeSpanStats* result) {
+ LargeSpanStats normal, sampled;
+ for (int partition = 0; partition < active_numa_partitions(); partition++) {
+ LargeSpanStats part_stats;
+ normal_impl_[partition]->GetLargeSpanStats(&part_stats);
+ normal += part_stats;
+ }
+ sampled_impl_->GetLargeSpanStats(&sampled);
+ *result = normal + sampled;
+}
+
+inline Length PageAllocator::ReleaseAtLeastNPages(Length num_pages) {
+ Length released;
+ for (int partition = 0; partition < active_numa_partitions(); partition++) {
+ released +=
+ normal_impl_[partition]->ReleaseAtLeastNPages(num_pages - released);
+ if (released >= num_pages) {
+ return released;
+ }
+ }
+
+ released += sampled_impl_->ReleaseAtLeastNPages(num_pages - released);
+ return released;
+}
+
+inline void PageAllocator::Print(Printer* out, MemoryTag tag) {
+ const absl::string_view label = MemoryTagToLabel(tag);
+ if (tag != MemoryTag::kNormal) {
+ out->printf("\n>>>>>>> Begin %s page allocator <<<<<<<\n", label);
+ }
+ impl(tag)->Print(out);
+ if (tag != MemoryTag::kNormal) {
+ out->printf(">>>>>>> End %s page allocator <<<<<<<\n", label);
+ }
+}
+
+inline void PageAllocator::PrintInPbtxt(PbtxtRegion* region, MemoryTag tag) {
+ PbtxtRegion pa = region->CreateSubRegion("page_allocator");
+ pa.PrintRaw("tag", MemoryTagToLabel(tag));
+ impl(tag)->PrintInPbtxt(&pa);
+}
+
+inline void PageAllocator::set_limit(size_t limit, bool is_hard) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ limit_ = limit;
+ limit_is_hard_ = is_hard;
+}
+
+inline std::pair<size_t, bool> PageAllocator::limit() const {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return {limit_, limit_is_hard_};
+}
+
+inline int64_t PageAllocator::limit_hits() const {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return limit_hits_;
+}
+
+inline const PageAllocInfo& PageAllocator::info(MemoryTag tag) const {
+ return impl(tag)->info();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc
new file mode 100644
index 0000000000..3173247acb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc
@@ -0,0 +1,89 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_allocator_interface.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static int OpenLog(MemoryTag tag) {
+ const char *fname = [&]() {
+ switch (tag) {
+ case MemoryTag::kNormal:
+ return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE");
+ case MemoryTag::kNormalP1:
+ return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE_P1");
+ case MemoryTag::kSampled:
+ return thread_safe_getenv("TCMALLOC_SAMPLED_PAGE_LOG_FILE");
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+ }();
+
+ if (ABSL_PREDICT_TRUE(!fname)) return -1;
+
+ if (getuid() != geteuid() || getgid() != getegid()) {
+ Log(kLog, __FILE__, __LINE__, "Cannot take a pagetrace from setuid binary");
+ return -1;
+ }
+ char buf[PATH_MAX];
+ // Tag file with PID - handles forking children much better.
+ int pid = getpid();
+ // Blaze tests can output here for recovery of the output file
+ const char *test_dir = thread_safe_getenv("TEST_UNDECLARED_OUTPUTS_DIR");
+ if (test_dir) {
+ snprintf(buf, sizeof(buf), "%s/%s.%d", test_dir, fname, pid);
+ } else {
+ snprintf(buf, sizeof(buf), "%s.%d", fname, pid);
+ }
+ int fd =
+ signal_safe_open(buf, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+
+ if (fd < 0) {
+ Crash(kCrash, __FILE__, __LINE__, fd, errno, fname);
+ }
+
+ return fd;
+}
+
+PageAllocatorInterface::PageAllocatorInterface(const char *label, MemoryTag tag)
+ : PageAllocatorInterface(label, &Static::pagemap(), tag) {}
+
+PageAllocatorInterface::PageAllocatorInterface(const char *label, PageMap *map,
+ MemoryTag tag)
+ : info_(label, OpenLog(tag)), pagemap_(map), tag_(tag) {}
+
+PageAllocatorInterface::~PageAllocatorInterface() {
+ // This is part of tcmalloc statics - they must be immortal.
+ Crash(kCrash, __FILE__, __LINE__, "should never destroy this");
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h
new file mode 100644
index 0000000000..cf1dc67897
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
+#define TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
+
+#include <stddef.h>
+
+#include <limits>
+#include <utility>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PageMap;
+
+class PageAllocatorInterface {
+ public:
+ PageAllocatorInterface(const char* label, MemoryTag tag);
+ // For testing: use a non-default pagemap.
+ PageAllocatorInterface(const char* label, PageMap* map, MemoryTag tag);
+ virtual ~PageAllocatorInterface();
+ // Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
+ virtual Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+ // As New, but the returned span is aligned to a <align>-page boundary.
+ // <align> must be a power of two.
+ virtual Span* NewAligned(Length n, Length align)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+ // Delete the span "[p, p+n-1]".
+ // REQUIRES: span was returned by earlier call to New() and
+ // has not yet been deleted.
+ virtual void Delete(Span* span)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+ virtual BackingStats stats() const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+ virtual void GetSmallSpanStats(SmallSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+ virtual void GetLargeSpanStats(LargeSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+ // Try to release at least num_pages for reuse by the OS. Returns
+ // the actual number of pages released, which may be less than
+ // num_pages if there weren't enough pages to release. The result
+ // may also be larger than num_pages since page_heap might decide to
+ // release one large range instead of fragmenting it into two
+ // smaller released and unreleased ranges.
+ virtual Length ReleaseAtLeastNPages(Length num_pages)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+ // Prints stats about the page heap to *out.
+ virtual void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+ // Prints stats about the page heap in pbtxt format.
+ //
+ // TODO(b/130249686): Remove this one and make `Print` print in pbtxt.
+ virtual void PrintInPbtxt(PbtxtRegion* region)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+ const PageAllocInfo& info() const { return info_; }
+
+ protected:
+ PageAllocInfo info_ ABSL_GUARDED_BY(pageheap_lock);
+ PageMap* pagemap_;
+
+ MemoryTag tag_; // The type of tagged memory this heap manages
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc
new file mode 100644
index 0000000000..d302c085a9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc
@@ -0,0 +1,145 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Tests for infrastructure common to page allocator implementations
+// (stats and logging.)
+#include "tcmalloc/page_allocator.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <memory>
+#include <new>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator_test_util.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class PageAllocatorTest : public testing::Test {
+ protected:
+ // Not in constructor so subclasses can mess about with environment
+ // variables.
+ void SetUp() override {
+ // If this test is not linked against TCMalloc, the global arena used for
+ // metadata will not be initialized.
+ Static::InitIfNecessary();
+
+ before_ = MallocExtension::GetRegionFactory();
+ extra_ = new ExtraRegionFactory(before_);
+ MallocExtension::SetRegionFactory(extra_);
+ void *p = malloc(sizeof(PageAllocator));
+ allocator_ = new (p) PageAllocator;
+ }
+ void TearDown() override {
+ MallocExtension::SetRegionFactory(before_);
+ delete extra_;
+ free(allocator_);
+ }
+
+ Span *New(Length n) { return allocator_->New(n, MemoryTag::kNormal); }
+ Span *NewAligned(Length n, Length align) {
+ return allocator_->NewAligned(n, align, MemoryTag::kNormal);
+ }
+ void Delete(Span *s) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ allocator_->Delete(s, MemoryTag::kNormal);
+ }
+
+ Length Release(Length n) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return allocator_->ReleaseAtLeastNPages(n);
+ }
+
+ std::string Print() {
+ std::vector<char> buf(1024 * 1024);
+ Printer out(&buf[0], buf.size());
+ allocator_->Print(&out, MemoryTag::kNormal);
+
+ return std::string(&buf[0]);
+ }
+
+ PageAllocator *allocator_;
+ ExtraRegionFactory *extra_;
+ AddressRegionFactory *before_;
+};
+
+// We've already tested in stats_test that PageAllocInfo keeps good stats;
+// here we're just testing that we make the proper Record calls.
+TEST_F(PageAllocatorTest, Record) {
+ for (int i = 0; i < 15; ++i) {
+ Delete(New(Length(1)));
+ }
+
+ std::vector<Span *> spans;
+ for (int i = 0; i < 20; ++i) {
+ spans.push_back(New(Length(2)));
+ }
+
+ for (int i = 0; i < 25; ++i) {
+ Delete(NewAligned(Length(3), Length(2)));
+ }
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ auto info = allocator_->info(MemoryTag::kNormal);
+
+ CHECK_CONDITION(15 == info.counts_for(Length(1)).nalloc);
+ CHECK_CONDITION(15 == info.counts_for(Length(1)).nfree);
+
+ CHECK_CONDITION(20 == info.counts_for(Length(2)).nalloc);
+ CHECK_CONDITION(0 == info.counts_for(Length(2)).nfree);
+
+ CHECK_CONDITION(25 == info.counts_for(Length(3)).nalloc);
+ CHECK_CONDITION(25 == info.counts_for(Length(3)).nfree);
+
+ for (auto i = Length(4); i <= kMaxPages; ++i) {
+ CHECK_CONDITION(0 == info.counts_for(i).nalloc);
+ CHECK_CONDITION(0 == info.counts_for(i).nfree);
+ }
+
+ const Length absurd =
+ Length(uintptr_t{1} << (kAddressBits - 1 - kPageShift));
+ for (Length i = kMaxPages + Length(1); i < absurd; i *= 2) {
+ CHECK_CONDITION(0 == info.counts_for(i).nalloc);
+ CHECK_CONDITION(0 == info.counts_for(i).nfree);
+ }
+ }
+ for (auto s : spans) Delete(s);
+}
+
+// And that we call the print method properly.
+TEST_F(PageAllocatorTest, PrintIt) {
+ Delete(New(Length(1)));
+ std::string output = Print();
+ EXPECT_THAT(output, testing::ContainsRegex("stats on allocation sizes"));
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h
new file mode 100644
index 0000000000..55f134bfdd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h
@@ -0,0 +1,79 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
+#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
+
+#include <tuple>
+#include <utility>
+
+#include "tcmalloc/malloc_extension.h"
+
+// TODO(b/116000878): Remove dependency on common.h if it causes ODR issues.
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AddressRegion that adds some padding on either side of each
+// allocation. This prevents multiple PageAllocators in the system
+// from noticing one another's presence in the pagemap.
+class ExtraRegion : public AddressRegion {
+ public:
+ explicit ExtraRegion(AddressRegion *under) : under_(under) {}
+
+ std::pair<void *, size_t> Alloc(size_t size, size_t alignment) override {
+ size_t big = size + alignment + alignment;
+ // Can't pad if allocation is within 2 * alignment of region size.
+ if (big > kMinMmapAlloc) {
+ return under_->Alloc(size, alignment);
+ }
+ void *ptr;
+ size_t actual_size;
+ std::tie(ptr, actual_size) = under_->Alloc(big, alignment);
+ if (!ptr) return {nullptr, 0};
+ actual_size = actual_size - alignment * 2;
+ return {static_cast<char *>(ptr) + alignment, actual_size};
+ }
+
+ private:
+ AddressRegion *under_;
+};
+
+class ExtraRegionFactory : public AddressRegionFactory {
+ public:
+ explicit ExtraRegionFactory(AddressRegionFactory *under) : under_(under) {}
+
+ AddressRegion *Create(void *start, size_t size, UsageHint hint) override {
+ AddressRegion *underlying_region = under_->Create(start, size, hint);
+ CHECK_CONDITION(underlying_region);
+ void *region_space = MallocInternal(sizeof(ExtraRegion));
+ CHECK_CONDITION(region_space);
+ return new (region_space) ExtraRegion(underlying_region);
+ }
+
+ size_t GetStats(absl::Span<char> buffer) override {
+ return under_->GetStats(buffer);
+ }
+
+ private:
+ AddressRegionFactory *under_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc
new file mode 100644
index 0000000000..c6b4c6dbd1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc
@@ -0,0 +1,528 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_heap.h"
+
+#include <stddef.h>
+
+#include <limits>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Helper function to record span address into pageheap
+void PageHeap::RecordSpan(Span* span) {
+ pagemap_->Set(span->first_page(), span);
+ if (span->num_pages() > Length(1)) {
+ pagemap_->Set(span->last_page(), span);
+ }
+}
+
+PageHeap::PageHeap(MemoryTag tag) : PageHeap(&Static::pagemap(), tag) {}
+
+PageHeap::PageHeap(PageMap* map, MemoryTag tag)
+ : PageAllocatorInterface("PageHeap", map, tag),
+ // Start scavenging at kMaxPages list
+ release_index_(kMaxPages.raw_num()) {}
+
+Span* PageHeap::SearchFreeAndLargeLists(Length n, bool* from_returned) {
+ ASSERT(Check());
+ ASSERT(n > Length(0));
+
+ // Find first size >= n that has a non-empty list
+ for (Length s = n; s < kMaxPages; ++s) {
+ SpanList* ll = &free_[s.raw_num()].normal;
+ // If we're lucky, ll is non-empty, meaning it has a suitable span.
+ if (!ll->empty()) {
+ ASSERT(ll->first()->location() == Span::ON_NORMAL_FREELIST);
+ *from_returned = false;
+ return Carve(ll->first(), n);
+ }
+ // Alternatively, maybe there's a usable returned span.
+ ll = &free_[s.raw_num()].returned;
+ if (!ll->empty()) {
+ ASSERT(ll->first()->location() == Span::ON_RETURNED_FREELIST);
+ *from_returned = true;
+ return Carve(ll->first(), n);
+ }
+ }
+ // No luck in free lists, our last chance is in a larger class.
+ return AllocLarge(n, from_returned); // May be NULL
+}
+
+Span* PageHeap::AllocateSpan(Length n, bool* from_returned) {
+ ASSERT(Check());
+ Span* result = SearchFreeAndLargeLists(n, from_returned);
+ if (result != nullptr) return result;
+
+ // Grow the heap and try again.
+ if (!GrowHeap(n)) {
+ ASSERT(Check());
+ return nullptr;
+ }
+
+ result = SearchFreeAndLargeLists(n, from_returned);
+ // our new memory should be unbacked
+ ASSERT(*from_returned);
+ return result;
+}
+
+Span* PageHeap::New(Length n) {
+ ASSERT(n > Length(0));
+ bool from_returned;
+ Span* result;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ result = AllocateSpan(n, &from_returned);
+ if (result) Static::page_allocator().ShrinkToUsageLimit();
+ if (result) info_.RecordAlloc(result->first_page(), result->num_pages());
+ }
+
+ if (result != nullptr && from_returned) {
+ SystemBack(result->start_address(), result->bytes_in_span());
+ }
+
+ ASSERT(!result || GetMemoryTag(result->start_address()) == tag_);
+ return result;
+}
+
+static bool IsSpanBetter(Span* span, Span* best, Length n) {
+ if (span->num_pages() < n) {
+ return false;
+ }
+ if (best == nullptr) {
+ return true;
+ }
+ if (span->num_pages() < best->num_pages()) {
+ return true;
+ }
+ if (span->num_pages() > best->num_pages()) {
+ return false;
+ }
+ return span->first_page() < best->first_page();
+}
+
+// We could do slightly more efficient things here (we do some
+// unnecessary Carves in New) but it's not anywhere
+// close to a fast path, and is going to be replaced soon anyway, so
+// don't bother.
+Span* PageHeap::NewAligned(Length n, Length align) {
+ ASSERT(n > Length(0));
+ ASSERT(absl::has_single_bit(align.raw_num()));
+
+ if (align <= Length(1)) {
+ return New(n);
+ }
+
+ bool from_returned;
+ Span* span;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ Length extra = align - Length(1);
+ span = AllocateSpan(n + extra, &from_returned);
+ if (span == nullptr) return nullptr;
+ // <span> certainly contains an appropriately aligned region; find it
+ // and chop off the rest.
+ PageId p = span->first_page();
+ const Length mask = align - Length(1);
+ PageId aligned = PageId{(p.index() + mask.raw_num()) & ~mask.raw_num()};
+ ASSERT(aligned.index() % align.raw_num() == 0);
+ ASSERT(p <= aligned);
+ ASSERT(aligned + n <= p + span->num_pages());
+ // we have <extra> too many pages now, possible all before, possibly all
+ // after, maybe both
+ Length before = aligned - p;
+ Length after = extra - before;
+ span->set_first_page(aligned);
+ span->set_num_pages(n);
+ RecordSpan(span);
+
+ const Span::Location loc =
+ from_returned ? Span::ON_RETURNED_FREELIST : Span::ON_NORMAL_FREELIST;
+ if (before > Length(0)) {
+ Span* extra = Span::New(p, before);
+ extra->set_location(loc);
+ RecordSpan(extra);
+ MergeIntoFreeList(extra);
+ }
+
+ if (after > Length(0)) {
+ Span* extra = Span::New(aligned + n, after);
+ extra->set_location(loc);
+ RecordSpan(extra);
+ MergeIntoFreeList(extra);
+ }
+
+ info_.RecordAlloc(aligned, n);
+ }
+
+ if (span != nullptr && from_returned) {
+ SystemBack(span->start_address(), span->bytes_in_span());
+ }
+
+ ASSERT(!span || GetMemoryTag(span->start_address()) == tag_);
+ return span;
+}
+
+Span* PageHeap::AllocLarge(Length n, bool* from_returned) {
+ // find the best span (closest to n in size).
+ // The following loops implements address-ordered best-fit.
+ Span* best = nullptr;
+
+ // Search through normal list
+ for (Span* span : large_.normal) {
+ ASSERT(span->location() == Span::ON_NORMAL_FREELIST);
+ if (IsSpanBetter(span, best, n)) {
+ best = span;
+ *from_returned = false;
+ }
+ }
+
+ // Search through released list in case it has a better fit
+ for (Span* span : large_.returned) {
+ ASSERT(span->location() == Span::ON_RETURNED_FREELIST);
+ if (IsSpanBetter(span, best, n)) {
+ best = span;
+ *from_returned = true;
+ }
+ }
+
+ return best == nullptr ? nullptr : Carve(best, n);
+}
+
+Span* PageHeap::Carve(Span* span, Length n) {
+ ASSERT(n > Length(0));
+ ASSERT(span->location() != Span::IN_USE);
+ const Span::Location old_location = span->location();
+ RemoveFromFreeList(span);
+ span->set_location(Span::IN_USE);
+
+ const Length extra = span->num_pages() - n;
+ if (extra > Length(0)) {
+ Span* leftover = nullptr;
+ // Check if this span has another span on the right but not on the left.
+ // There is one special case we want to handle: if heap grows down (as it is
+ // usually happens with mmap allocator) and user allocates lots of large
+ // persistent memory blocks (namely, kMinSystemAlloc + epsilon), then we
+ // want to return the last part of the span to user and push the beginning
+ // to the freelist.
+ // Otherwise system allocator would allocate 2 * kMinSystemAlloc, we return
+ // the first kMinSystemAlloc + epsilon to user and add the remaining
+ // kMinSystemAlloc - epsilon to the freelist. The remainder is not large
+ // enough to satisfy the next allocation request, so we allocate
+ // another 2 * kMinSystemAlloc from system and the process repeats wasting
+ // half of memory.
+ // If we return the last part to user, then the remainder will be merged
+ // with the next system allocation which will result in dense packing.
+ // There are no other known cases where span splitting strategy matters,
+ // so in other cases we return beginning to user.
+ if (pagemap_->GetDescriptor(span->first_page() - Length(1)) == nullptr &&
+ pagemap_->GetDescriptor(span->last_page() + Length(1)) != nullptr) {
+ leftover = Span::New(span->first_page(), extra);
+ span->set_first_page(span->first_page() + extra);
+ pagemap_->Set(span->first_page(), span);
+ } else {
+ leftover = Span::New(span->first_page() + n, extra);
+ }
+ leftover->set_location(old_location);
+ RecordSpan(leftover);
+ PrependToFreeList(leftover); // Skip coalescing - no candidates possible
+ leftover->set_freelist_added_time(span->freelist_added_time());
+ span->set_num_pages(n);
+ pagemap_->Set(span->last_page(), span);
+ }
+ ASSERT(Check());
+ return span;
+}
+
+void PageHeap::Delete(Span* span) {
+ ASSERT(GetMemoryTag(span->start_address()) == tag_);
+ info_.RecordFree(span->first_page(), span->num_pages());
+ ASSERT(Check());
+ ASSERT(span->location() == Span::IN_USE);
+ ASSERT(!span->sampled());
+ ASSERT(span->num_pages() > Length(0));
+ ASSERT(pagemap_->GetDescriptor(span->first_page()) == span);
+ ASSERT(pagemap_->GetDescriptor(span->last_page()) == span);
+ span->set_location(Span::ON_NORMAL_FREELIST);
+ MergeIntoFreeList(span); // Coalesces if possible
+ ASSERT(Check());
+}
+
+void PageHeap::MergeIntoFreeList(Span* span) {
+ ASSERT(span->location() != Span::IN_USE);
+ span->set_freelist_added_time(absl::base_internal::CycleClock::Now());
+
+ // Coalesce -- we guarantee that "p" != 0, so no bounds checking
+ // necessary. We do not bother resetting the stale pagemap
+ // entries for the pieces we are merging together because we only
+ // care about the pagemap entries for the boundaries.
+ //
+ // Note that only similar spans are merged together. For example,
+ // we do not coalesce "returned" spans with "normal" spans.
+ const PageId p = span->first_page();
+ const Length n = span->num_pages();
+ Span* prev = pagemap_->GetDescriptor(p - Length(1));
+ if (prev != nullptr && prev->location() == span->location()) {
+ // Merge preceding span into this span
+ ASSERT(prev->last_page() + Length(1) == p);
+ const Length len = prev->num_pages();
+ span->AverageFreelistAddedTime(prev);
+ RemoveFromFreeList(prev);
+ Span::Delete(prev);
+ span->set_first_page(span->first_page() - len);
+ span->set_num_pages(span->num_pages() + len);
+ pagemap_->Set(span->first_page(), span);
+ }
+ Span* next = pagemap_->GetDescriptor(p + n);
+ if (next != nullptr && next->location() == span->location()) {
+ // Merge next span into this span
+ ASSERT(next->first_page() == p + n);
+ const Length len = next->num_pages();
+ span->AverageFreelistAddedTime(next);
+ RemoveFromFreeList(next);
+ Span::Delete(next);
+ span->set_num_pages(span->num_pages() + len);
+ pagemap_->Set(span->last_page(), span);
+ }
+
+ PrependToFreeList(span);
+}
+
+void PageHeap::PrependToFreeList(Span* span) {
+ ASSERT(span->location() != Span::IN_USE);
+ SpanListPair* list = (span->num_pages() < kMaxPages)
+ ? &free_[span->num_pages().raw_num()]
+ : &large_;
+ if (span->location() == Span::ON_NORMAL_FREELIST) {
+ stats_.free_bytes += span->bytes_in_span();
+ list->normal.prepend(span);
+ } else {
+ stats_.unmapped_bytes += span->bytes_in_span();
+ list->returned.prepend(span);
+ }
+}
+
+void PageHeap::RemoveFromFreeList(Span* span) {
+ ASSERT(span->location() != Span::IN_USE);
+ if (span->location() == Span::ON_NORMAL_FREELIST) {
+ stats_.free_bytes -= span->bytes_in_span();
+ } else {
+ stats_.unmapped_bytes -= span->bytes_in_span();
+ }
+ span->RemoveFromList();
+}
+
+Length PageHeap::ReleaseLastNormalSpan(SpanListPair* slist) {
+ Span* s = slist->normal.last();
+ ASSERT(s->location() == Span::ON_NORMAL_FREELIST);
+ RemoveFromFreeList(s);
+
+ // We're dropping very important and otherwise contended pageheap_lock around
+ // call to potentially very slow syscall to release pages. Those syscalls can
+ // be slow even with "advanced" things such as MADV_FREE{,ABLE} because they
+ // have to walk actual page tables, and we sometimes deal with large spans,
+ // which sometimes takes lots of time. Plus Linux grabs per-address space
+ // mm_sem lock which could be extremely contended at times. So it is best if
+ // we avoid holding one contended lock while waiting for another.
+ //
+ // Note, we set span location to in-use, because our span could be found via
+ // pagemap in e.g. MergeIntoFreeList while we're not holding the lock. By
+ // marking it in-use we prevent this possibility. So span is removed from free
+ // list and marked "unmergable" and that guarantees safety during unlock-ful
+ // release.
+ //
+ // Taking the span off the free list will make our stats reporting wrong if
+ // another thread happens to try to measure memory usage during the release,
+ // so we fix up the stats during the unlocked period.
+ stats_.free_bytes += s->bytes_in_span();
+ s->set_location(Span::IN_USE);
+ pageheap_lock.Unlock();
+
+ const Length n = s->num_pages();
+ SystemRelease(s->start_address(), s->bytes_in_span());
+
+ pageheap_lock.Lock();
+ stats_.free_bytes -= s->bytes_in_span();
+ s->set_location(Span::ON_RETURNED_FREELIST);
+ MergeIntoFreeList(s); // Coalesces if possible.
+ return n;
+}
+
+Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
+ Length released_pages;
+ Length prev_released_pages = Length::max() + Length(1);
+
+ // Round robin through the lists of free spans, releasing the last
+ // span in each list. Stop after releasing at least num_pages.
+ while (released_pages < num_pages) {
+ if (released_pages == prev_released_pages) {
+ // Last iteration of while loop made no progress.
+ break;
+ }
+ prev_released_pages = released_pages;
+
+ for (int i = 0; i < kMaxPages.raw_num() + 1 && released_pages < num_pages;
+ i++, release_index_++) {
+ if (release_index_ > kMaxPages.raw_num()) release_index_ = 0;
+ SpanListPair* slist = (release_index_ == kMaxPages.raw_num())
+ ? &large_
+ : &free_[release_index_];
+ if (!slist->normal.empty()) {
+ Length released_len = ReleaseLastNormalSpan(slist);
+ released_pages += released_len;
+ }
+ }
+ }
+ info_.RecordRelease(num_pages, released_pages);
+ return released_pages;
+}
+
+void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
+ for (int s = 0; s < kMaxPages.raw_num(); s++) {
+ result->normal_length[s] = free_[s].normal.length();
+ result->returned_length[s] = free_[s].returned.length();
+ }
+}
+
+void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
+ result->spans = 0;
+ result->normal_pages = Length(0);
+ result->returned_pages = Length(0);
+ for (Span* s : large_.normal) {
+ result->normal_pages += s->num_pages();
+ result->spans++;
+ }
+ for (Span* s : large_.returned) {
+ result->returned_pages += s->num_pages();
+ result->spans++;
+ }
+}
+
+bool PageHeap::GrowHeap(Length n) {
+ if (n > Length::max()) return false;
+ size_t actual_size;
+ void* ptr = SystemAlloc(n.in_bytes(), &actual_size, kPageSize, tag_);
+ if (ptr == nullptr) return false;
+ n = BytesToLengthFloor(actual_size);
+
+ stats_.system_bytes += actual_size;
+ const PageId p = PageIdContaining(ptr);
+ ASSERT(p > PageId{0});
+
+ // If we have already a lot of pages allocated, just pre allocate a bunch of
+ // memory for the page map. This prevents fragmentation by pagemap metadata
+ // when a program keeps allocating and freeing large blocks.
+
+ // Make sure pagemap has entries for all of the new pages.
+ // Plus ensure one before and one after so coalescing code
+ // does not need bounds-checking.
+ if (pagemap_->Ensure(p - Length(1), n + Length(2))) {
+ // Pretend the new area is allocated and then return it to cause
+ // any necessary coalescing to occur.
+ Span* span = Span::New(p, n);
+ RecordSpan(span);
+ span->set_location(Span::ON_RETURNED_FREELIST);
+ MergeIntoFreeList(span);
+ ASSERT(Check());
+ return true;
+ } else {
+ // We could not allocate memory within the pagemap.
+ // Note the following leaks virtual memory, but at least it gets rid of
+ // the underlying physical memory.
+ SystemRelease(ptr, actual_size);
+ return false;
+ }
+}
+
+bool PageHeap::Check() {
+ ASSERT(free_[0].normal.empty());
+ ASSERT(free_[0].returned.empty());
+ return true;
+}
+
+void PageHeap::PrintInPbtxt(PbtxtRegion* region) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ SmallSpanStats small;
+ GetSmallSpanStats(&small);
+ LargeSpanStats large;
+ GetLargeSpanStats(&large);
+
+ struct Helper {
+ static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) {
+ for (const Span* s : pair.normal) {
+ ages->RecordRange(s->num_pages(), false, s->freelist_added_time());
+ }
+
+ for (const Span* s : pair.returned) {
+ ages->RecordRange(s->num_pages(), true, s->freelist_added_time());
+ }
+ }
+ };
+
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ for (int s = 0; s < kMaxPages.raw_num(); ++s) {
+ Helper::RecordAges(&ages, free_[s]);
+ }
+ Helper::RecordAges(&ages, large_);
+ PrintStatsInPbtxt(region, small, large, ages);
+ // We do not collect info_.PrintInPbtxt for now.
+}
+
+void PageHeap::Print(Printer* out) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ SmallSpanStats small;
+ GetSmallSpanStats(&small);
+ LargeSpanStats large;
+ GetLargeSpanStats(&large);
+ PrintStats("PageHeap", out, stats_, small, large, true);
+
+ struct Helper {
+ static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) {
+ for (const Span* s : pair.normal) {
+ ages->RecordRange(s->num_pages(), false, s->freelist_added_time());
+ }
+
+ for (const Span* s : pair.returned) {
+ ages->RecordRange(s->num_pages(), true, s->freelist_added_time());
+ }
+ }
+ };
+
+ PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+ for (int s = 0; s < kMaxPages.raw_num(); ++s) {
+ Helper::RecordAges(&ages, free_[s]);
+ }
+ Helper::RecordAges(&ages, large_);
+ ages.Print("PageHeap", out);
+
+ info_.Print(out);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.h b/contrib/libs/tcmalloc/tcmalloc/page_heap.h
new file mode 100644
index 0000000000..86cf5d01df
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.h
@@ -0,0 +1,161 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_HEAP_H_
+#define TCMALLOC_PAGE_HEAP_H_
+
+#include <stdint.h>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// -------------------------------------------------------------------------
+// Page-level allocator
+// * Eager coalescing
+//
+// Heap for page-level allocation. We allow allocating and freeing a
+// contiguous runs of pages (called a "span").
+// -------------------------------------------------------------------------
+
+class PageHeap final : public PageAllocatorInterface {
+ public:
+ explicit PageHeap(MemoryTag tag);
+ // for testing
+ PageHeap(PageMap* map, MemoryTag tag);
+ ~PageHeap() override = default;
+
+ // Allocate a run of "n" pages. Returns zero if out of memory.
+ // Caller should not pass "n == 0" -- instead, n should have
+ // been rounded up already.
+ // The returned memory is backed.
+ Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ // As New, but the returned span is aligned to a <align>-page boundary.
+ // <align> must be a power of two.
+ Span* NewAligned(Length n, Length align)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ // Delete the span "[p, p+n-1]".
+ // REQUIRES: span was returned by earlier call to New() and
+ // has not yet been deleted.
+ void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ inline BackingStats stats() const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override {
+ return stats_;
+ }
+
+ void GetSmallSpanStats(SmallSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ void GetLargeSpanStats(LargeSpanStats* result)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ // Try to release at least num_pages for reuse by the OS. Returns
+ // the actual number of pages released, which may be less than
+ // num_pages if there weren't enough pages to release. The result
+ // may also be larger than num_pages since page_heap might decide to
+ // release one large range instead of fragmenting it into two
+ // smaller released and unreleased ranges.
+ Length ReleaseAtLeastNPages(Length num_pages)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+ // Prints stats about the page heap to *out.
+ void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ void PrintInPbtxt(PbtxtRegion* region)
+ ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ private:
+ // We segregate spans of a given size into two circular linked
+ // lists: one for normal spans, and one for spans whose memory
+ // has been returned to the system.
+ struct SpanListPair {
+ SpanList normal;
+ SpanList returned;
+ };
+
+ // List of free spans of length >= kMaxPages
+ SpanListPair large_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // Array mapping from span length to a doubly linked list of free spans
+ SpanListPair free_[kMaxPages.raw_num()] ABSL_GUARDED_BY(pageheap_lock);
+
+ // Statistics on system, free, and unmapped bytes
+ BackingStats stats_ ABSL_GUARDED_BY(pageheap_lock);
+
+ Span* SearchFreeAndLargeLists(Length n, bool* from_returned)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ bool GrowHeap(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // REQUIRES: span->length >= n
+ // REQUIRES: span->location != IN_USE
+ // Remove span from its free list, and move any leftover part of
+ // span into appropriate free lists. Also update "span" to have
+ // length exactly "n" and mark it as non-free so it can be returned
+ // to the client. After all that, decrease free_pages_ by n and
+ // return span.
+ Span* Carve(Span* span, Length n)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Allocate a large span of length == n. If successful, returns a
+ // span of exactly the specified length. Else, returns NULL.
+ Span* AllocLarge(Length n, bool* from_returned)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Coalesce span with neighboring spans if possible, prepend to
+ // appropriate free list, and adjust stats.
+ void MergeIntoFreeList(Span* span)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Prepends span to appropriate free list, and adjusts stats. You'll probably
+ // want to adjust span->freelist_added_time before/after calling this
+ // function.
+ void PrependToFreeList(Span* span)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Removes span from its free list, and adjust stats.
+ void RemoveFromFreeList(Span* span)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Release the last span on the normal portion of this list.
+ // Return the length of that span.
+ Length ReleaseLastNormalSpan(SpanListPair* slist)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Do invariant testing.
+ bool Check() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Index of last free list where we released memory to the OS.
+ int release_index_ ABSL_GUARDED_BY(pageheap_lock);
+
+ Span* AllocateSpan(Length n, bool* from_returned)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ void RecordSpan(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGE_HEAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h
new file mode 100644
index 0000000000..5d2bbfe92c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h
@@ -0,0 +1,93 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
+#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct AllocatorStats {
+ // Number of allocated but unfreed objects
+ size_t in_use;
+ // Number of objects created (both free and allocated)
+ size_t total;
+};
+
+// Simple allocator for objects of a specified type. External locking
+// is required before accessing one of these objects.
+template <class T>
+class PageHeapAllocator {
+ public:
+ constexpr PageHeapAllocator()
+ : arena_(nullptr), free_list_(nullptr), stats_{0, 0} {}
+
+ // We use an explicit Init function because these variables are statically
+ // allocated and their constructors might not have run by the time some
+ // other static variable tries to allocate memory.
+ void Init(Arena* arena) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ arena_ = arena;
+ // Reserve some space at the beginning to avoid fragmentation.
+ Delete(New());
+ }
+
+ ABSL_ATTRIBUTE_RETURNS_NONNULL T* New()
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ // Consult free list
+ T* result = free_list_;
+ stats_.in_use++;
+ if (ABSL_PREDICT_FALSE(result == nullptr)) {
+ stats_.total++;
+ return reinterpret_cast<T*>(arena_->Alloc(sizeof(T)));
+ }
+ free_list_ = *(reinterpret_cast<T**>(free_list_));
+ return result;
+ }
+
+ void Delete(T* p) ABSL_ATTRIBUTE_NONNULL()
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ *(reinterpret_cast<void**>(p)) = free_list_;
+ free_list_ = p;
+ stats_.in_use--;
+ }
+
+ AllocatorStats stats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return stats_;
+ }
+
+ private:
+ // Arena from which to allocate memory
+ Arena* arena_;
+
+ // Free list of already carved objects
+ T* free_list_ ABSL_GUARDED_BY(pageheap_lock);
+
+ AllocatorStats stats_ ABSL_GUARDED_BY(pageheap_lock);
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc
new file mode 100644
index 0000000000..dc13a60cb7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc
@@ -0,0 +1,109 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_heap.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <new>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// PageHeap expands by kMinSystemAlloc by default, so use this as the minimum
+// Span length to not get more memory than expected.
+constexpr Length kMinSpanLength = BytesToLengthFloor(kMinSystemAlloc);
+
+void CheckStats(const PageHeap* ph, Length system_pages, Length free_pages,
+ Length unmapped_pages) ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+ BackingStats stats;
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ stats = ph->stats();
+ }
+
+ ASSERT_EQ(system_pages.in_bytes(), stats.system_bytes);
+ ASSERT_EQ(free_pages.in_bytes(), stats.free_bytes);
+ ASSERT_EQ(unmapped_pages.in_bytes(), stats.unmapped_bytes);
+}
+
+static void Delete(PageHeap* ph, Span* s) ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ ph->Delete(s);
+ }
+}
+
+static Length Release(PageHeap* ph, Length n) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return ph->ReleaseAtLeastNPages(n);
+}
+
+class PageHeapTest : public ::testing::Test {
+ public:
+ PageHeapTest() {
+ // If this test is not linked against TCMalloc, the global arena used for
+ // metadata will not be initialized.
+ Static::InitIfNecessary();
+ }
+};
+
+// TODO(b/36484267): replace this test wholesale.
+TEST_F(PageHeapTest, Stats) {
+ auto pagemap = absl::make_unique<PageMap>();
+ void* memory = calloc(1, sizeof(PageHeap));
+ PageHeap* ph = new (memory) PageHeap(pagemap.get(), MemoryTag::kNormal);
+
+ // Empty page heap
+ CheckStats(ph, Length(0), Length(0), Length(0));
+
+ // Allocate a span 's1'
+ Span* s1 = ph->New(kMinSpanLength);
+ CheckStats(ph, kMinSpanLength, Length(0), Length(0));
+
+ // Allocate an aligned span 's2'
+ static const Length kHalf = kMinSpanLength / 2;
+ Span* s2 = ph->NewAligned(kHalf, kHalf);
+ ASSERT_EQ(s2->first_page().index() % kHalf.raw_num(), 0);
+ CheckStats(ph, kMinSpanLength * 2, Length(0), kHalf);
+
+ // Delete the old one
+ Delete(ph, s1);
+ CheckStats(ph, kMinSpanLength * 2, kMinSpanLength, kHalf);
+
+ // Release the space from there:
+ Length released = Release(ph, Length(1));
+ ASSERT_EQ(released, kMinSpanLength);
+ CheckStats(ph, kMinSpanLength * 2, Length(0), kHalf + kMinSpanLength);
+
+ // and delete the new one
+ Delete(ph, s2);
+ CheckStats(ph, kMinSpanLength * 2, kHalf, kHalf + kMinSpanLength);
+
+ free(memory);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc
new file mode 100644
index 0000000000..25962302c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc
@@ -0,0 +1,73 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/pagemap.h"
+
+#include <sys/mman.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void PageMap::RegisterSizeClass(Span* span, size_t sc) {
+ ASSERT(span->location() == Span::IN_USE);
+ const PageId first = span->first_page();
+ const PageId last = span->last_page();
+ ASSERT(GetDescriptor(first) == span);
+ for (PageId p = first; p <= last; ++p) {
+ map_.set_with_sizeclass(p.index(), span, sc);
+ }
+}
+
+void PageMap::UnregisterSizeClass(Span* span) {
+ ASSERT(span->location() == Span::IN_USE);
+ const PageId first = span->first_page();
+ const PageId last = span->last_page();
+ ASSERT(GetDescriptor(first) == span);
+ for (PageId p = first; p <= last; ++p) {
+ map_.clear_sizeclass(p.index());
+ }
+}
+
+void PageMap::MapRootWithSmallPages() {
+ constexpr size_t kHugePageMask = ~(kHugePageSize - 1);
+ uintptr_t begin = reinterpret_cast<uintptr_t>(map_.RootAddress());
+ // Round begin up to the nearest hugepage, this avoids causing memory before
+ // the start of the pagemap to become mapped onto small pages.
+ uintptr_t rbegin = (begin + kHugePageSize) & kHugePageMask;
+ size_t length = map_.RootSize();
+ // Round end down to the nearest hugepage, this avoids causing memory after
+ // the end of the pagemap becoming mapped onto small pages.
+ size_t rend = (begin + length) & kHugePageMask;
+ // Since we have rounded the start up, and the end down, we also want to
+ // confirm that there is something left between them for us to modify.
+ // For small but slow, the root pagemap is less than a hugepage in size,
+ // so we will not end up forcing it to be small pages.
+ if (rend > rbegin) {
+ size_t rlength = rend - rbegin;
+ madvise(reinterpret_cast<void*>(rbegin), rlength, MADV_NOHUGEPAGE);
+ }
+}
+
+void* MetaDataAlloc(size_t bytes) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return Static::arena().Alloc(bytes);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.h b/contrib/libs/tcmalloc/tcmalloc/pagemap.h
new file mode 100644
index 0000000000..0cafa8a38d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.h
@@ -0,0 +1,431 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A data structure used by the caching malloc. It maps from page# to
+// a pointer that contains info about that page using a two-level array.
+//
+// The BITS parameter should be the number of bits required to hold
+// a page number. E.g., with 32 bit pointers and 8K pages (i.e.,
+// page offset fits in lower 13 bits), BITS == 19.
+//
+// A PageMap requires external synchronization, except for the get/sizeclass
+// methods (see explanation at top of tcmalloc.cc).
+
+#ifndef TCMALLOC_PAGEMAP_H_
+#define TCMALLOC_PAGEMAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Two-level radix tree
+typedef void* (*PagemapAllocator)(size_t);
+void* MetaDataAlloc(size_t bytes);
+
+template <int BITS, PagemapAllocator Allocator>
+class PageMap2 {
+ private:
+ // The leaf node (regardless of pointer size) always maps 2^15 entries;
+ // with 8K pages, this gives us 256MB mapped per leaf node.
+ static constexpr int kLeafBits = 15;
+ static constexpr int kLeafLength = 1 << kLeafBits;
+ static constexpr int kRootBits = (BITS >= kLeafBits) ? (BITS - kLeafBits) : 0;
+ // (1<<kRootBits) must not overflow an "int"
+ static_assert(kRootBits < sizeof(int) * 8 - 1, "kRootBits is too large");
+ static constexpr int kRootLength = 1 << kRootBits;
+
+ static constexpr size_t kLeafCoveredBytes = 1ul << (kLeafBits + kPageShift);
+ static_assert(kLeafCoveredBytes >= kHugePageSize, "leaf too small");
+ static constexpr size_t kLeafHugeBits =
+ (kLeafBits + kPageShift - kHugePageShift);
+ static constexpr size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize;
+ static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity");
+ struct Leaf {
+ // We keep parallel arrays indexed by page number. One keeps the
+ // size class; another span pointers; the last hugepage-related
+ // information. The size class information is kept segregated
+ // since small object deallocations are so frequent and do not
+ // need the other information kept in a Span.
+ CompactSizeClass sizeclass[kLeafLength];
+ Span* span[kLeafLength];
+ void* hugepage[kLeafHugepages];
+ };
+
+ Leaf* root_[kRootLength]; // Top-level node
+ size_t bytes_used_;
+
+ public:
+ typedef uintptr_t Number;
+
+ constexpr PageMap2() : root_{}, bytes_used_(0) {}
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ void* get(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ if ((k >> BITS) > 0 || root_[i1] == nullptr) {
+ return nullptr;
+ }
+ return root_[i1]->span[i2];
+ }
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ // Requires that the span is known to already exist.
+ Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ ASSERT((k >> BITS) == 0);
+ ASSERT(root_[i1] != nullptr);
+ return root_[i1]->span[i2];
+ }
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ // REQUIRES: Must be a valid page number previously Ensure()d.
+ CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE
+ sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ ASSERT((k >> BITS) == 0);
+ ASSERT(root_[i1] != nullptr);
+ return root_[i1]->sizeclass[i2];
+ }
+
+ void set(Number k, Span* s) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ root_[i1]->span[i2] = s;
+ }
+
+ void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ Leaf* leaf = root_[i1];
+ leaf->span[i2] = s;
+ leaf->sizeclass[i2] = sc;
+ }
+
+ void clear_sizeclass(Number k) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ root_[i1]->sizeclass[i2] = 0;
+ }
+
+ void* get_hugepage(Number k) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ const Leaf* leaf = root_[i1];
+ ASSERT(leaf != nullptr);
+ return leaf->hugepage[i2 >> (kLeafBits - kLeafHugeBits)];
+ }
+
+ void set_hugepage(Number k, void* v) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> kLeafBits;
+ const Number i2 = k & (kLeafLength - 1);
+ root_[i1]->hugepage[i2 >> (kLeafBits - kLeafHugeBits)] = v;
+ }
+
+ bool Ensure(Number start, size_t n) {
+ ASSERT(n > 0);
+ for (Number key = start; key <= start + n - 1;) {
+ const Number i1 = key >> kLeafBits;
+
+ // Check for overflow
+ if (i1 >= kRootLength) return false;
+
+ // Make 2nd level node if necessary
+ if (root_[i1] == nullptr) {
+ Leaf* leaf = reinterpret_cast<Leaf*>(Allocator(sizeof(Leaf)));
+ if (leaf == nullptr) return false;
+ bytes_used_ += sizeof(Leaf);
+ memset(leaf, 0, sizeof(*leaf));
+ root_[i1] = leaf;
+ }
+
+ // Advance key past whatever is covered by this leaf node
+ key = ((key >> kLeafBits) + 1) << kLeafBits;
+ }
+ return true;
+ }
+
+ size_t bytes_used() const {
+ // Account for size of root node, etc.
+ return bytes_used_ + sizeof(*this);
+ }
+
+ constexpr size_t RootSize() const { return sizeof(root_); }
+ const void* RootAddress() { return root_; }
+};
+
+// Three-level radix tree
+// Currently only used for TCMALLOC_SMALL_BUT_SLOW
+template <int BITS, PagemapAllocator Allocator>
+class PageMap3 {
+ private:
+ // For x86 we currently have 48 usable bits, for POWER we have 46. With
+ // 4KiB page sizes (12 bits) we end up with 36 bits for x86 and 34 bits
+ // for POWER. So leaf covers 4KiB * 1 << 12 = 16MiB - which is huge page
+ // size for POWER.
+ static constexpr int kLeafBits = (BITS + 2) / 3; // Round up
+ static constexpr int kLeafLength = 1 << kLeafBits;
+ static constexpr int kMidBits = (BITS + 2) / 3; // Round up
+ static constexpr int kMidLength = 1 << kMidBits;
+ static constexpr int kRootBits = BITS - kLeafBits - kMidBits;
+ static_assert(kRootBits > 0, "Too many bits assigned to leaf and mid");
+ // (1<<kRootBits) must not overflow an "int"
+ static_assert(kRootBits < sizeof(int) * 8 - 1, "Root bits too large");
+ static constexpr int kRootLength = 1 << kRootBits;
+
+ static constexpr size_t kLeafCoveredBytes = size_t{1}
+ << (kLeafBits + kPageShift);
+ static_assert(kLeafCoveredBytes >= kHugePageSize, "leaf too small");
+ static constexpr size_t kLeafHugeBits =
+ (kLeafBits + kPageShift - kHugePageShift);
+ static constexpr size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize;
+ static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity");
+ struct Leaf {
+ // We keep parallel arrays indexed by page number. One keeps the
+ // size class; another span pointers; the last hugepage-related
+ // information. The size class information is kept segregated
+ // since small object deallocations are so frequent and do not
+ // need the other information kept in a Span.
+ CompactSizeClass sizeclass[kLeafLength];
+ Span* span[kLeafLength];
+ void* hugepage[kLeafHugepages];
+ };
+
+ struct Node {
+ // Mid-level structure that holds pointers to leafs
+ Leaf* leafs[kMidLength];
+ };
+
+ Node* root_[kRootLength]; // Top-level node
+ size_t bytes_used_;
+
+ public:
+ typedef uintptr_t Number;
+
+ constexpr PageMap3() : root_{}, bytes_used_(0) {}
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ void* get(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ if ((k >> BITS) > 0 || root_[i1] == nullptr ||
+ root_[i1]->leafs[i2] == nullptr) {
+ return nullptr;
+ }
+ return root_[i1]->leafs[i2]->span[i3];
+ }
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ // Requires that the span is known to already exist.
+ Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ ASSERT((k >> BITS) == 0);
+ ASSERT(root_[i1] != nullptr);
+ ASSERT(root_[i1]->leafs[i2] != nullptr);
+ return root_[i1]->leafs[i2]->span[i3];
+ }
+
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ // REQUIRES: Must be a valid page number previously Ensure()d.
+ CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE
+ sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ ASSERT((k >> BITS) == 0);
+ ASSERT(root_[i1] != nullptr);
+ ASSERT(root_[i1]->leafs[i2] != nullptr);
+ return root_[i1]->leafs[i2]->sizeclass[i3];
+ }
+
+ void set(Number k, Span* s) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ root_[i1]->leafs[i2]->span[i3] = s;
+ }
+
+ void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ Leaf* leaf = root_[i1]->leafs[i2];
+ leaf->span[i3] = s;
+ leaf->sizeclass[i3] = sc;
+ }
+
+ void clear_sizeclass(Number k) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ root_[i1]->leafs[i2]->sizeclass[i3] = 0;
+ }
+
+ void* get_hugepage(Number k) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ const Node* node = root_[i1];
+ ASSERT(node != nullptr);
+ const Leaf* leaf = node->leafs[i2];
+ ASSERT(leaf != nullptr);
+ return leaf->hugepage[i3 >> (kLeafBits - kLeafHugeBits)];
+ }
+
+ void set_hugepage(Number k, void* v) {
+ ASSERT(k >> BITS == 0);
+ const Number i1 = k >> (kLeafBits + kMidBits);
+ const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+ const Number i3 = k & (kLeafLength - 1);
+ root_[i1]->leafs[i2]->hugepage[i3 >> (kLeafBits - kLeafHugeBits)] = v;
+ }
+
+ bool Ensure(Number start, size_t n) {
+ for (Number key = start; key <= start + n - 1;) {
+ const Number i1 = key >> (kLeafBits + kMidBits);
+ const Number i2 = (key >> kLeafBits) & (kMidLength - 1);
+
+ // Check within root
+ if (i1 >= kRootLength) return false;
+
+ // Allocate Node if necessary
+ if (root_[i1] == nullptr) {
+ Node* node = reinterpret_cast<Node*>(Allocator(sizeof(Node)));
+ if (node == nullptr) return false;
+ bytes_used_ += sizeof(Node);
+ memset(node, 0, sizeof(*node));
+ root_[i1] = node;
+ }
+
+ // Allocate Leaf if necessary
+ if (root_[i1]->leafs[i2] == nullptr) {
+ Leaf* leaf = reinterpret_cast<Leaf*>(Allocator(sizeof(Leaf)));
+ if (leaf == nullptr) return false;
+ bytes_used_ += sizeof(Leaf);
+ memset(leaf, 0, sizeof(*leaf));
+ root_[i1]->leafs[i2] = leaf;
+ }
+
+ // Advance key past whatever is covered by this leaf node
+ key = ((key >> kLeafBits) + 1) << kLeafBits;
+ }
+ return true;
+ }
+
+ size_t bytes_used() const { return bytes_used_ + sizeof(*this); }
+
+ constexpr size_t RootSize() const { return sizeof(root_); }
+ const void* RootAddress() { return root_; }
+};
+
+class PageMap {
+ public:
+ constexpr PageMap() : map_{} {}
+
+ // Return the size class for p, or 0 if it is not known to tcmalloc
+ // or is a page containing large objects.
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ CompactSizeClass sizeclass(PageId p) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ return map_.sizeclass(p.index());
+ }
+
+ void Set(PageId p, Span* span) { map_.set(p.index(), span); }
+
+ bool Ensure(PageId p, Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return map_.Ensure(p.index(), n.raw_num());
+ }
+
+ // Mark an allocated span as being used for small objects of the
+ // specified size-class.
+ // REQUIRES: span was returned by an earlier call to PageAllocator::New()
+ // and has not yet been deleted.
+ // Concurrent calls to this method are safe unless they mark the same span.
+ void RegisterSizeClass(Span* span, size_t sc);
+
+ // Mark an allocated span as being not used for any size-class.
+ // REQUIRES: span was returned by an earlier call to PageAllocator::New()
+ // and has not yet been deleted.
+ // Concurrent calls to this method are safe unless they mark the same span.
+ void UnregisterSizeClass(Span* span);
+
+ // Return the descriptor for the specified page. Returns NULL if
+ // this PageId was not allocated previously.
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ inline Span* GetDescriptor(PageId p) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ return reinterpret_cast<Span*>(map_.get(p.index()));
+ }
+
+ // Return the descriptor for the specified page.
+ // PageId must have been previously allocated.
+ // No locks required. See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+ ABSL_ATTRIBUTE_RETURNS_NONNULL inline Span* GetExistingDescriptor(
+ PageId p) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+ Span* span = map_.get_existing(p.index());
+ ASSERT(span != nullptr);
+ return span;
+ }
+
+ size_t bytes() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ return map_.bytes_used();
+ }
+
+ void* GetHugepage(PageId p) { return map_.get_hugepage(p.index()); }
+
+ void SetHugepage(PageId p, void* v) { map_.set_hugepage(p.index(), v); }
+
+ // The PageMap root node can be quite large and sparsely used. If this
+ // gets mapped with hugepages we potentially end up holding a large
+ // amount of unused memory. So it is better to map the root node with
+ // small pages to minimise the amount of unused memory.
+ void MapRootWithSmallPages();
+
+ private:
+#ifdef TCMALLOC_USE_PAGEMAP3
+ PageMap3<kAddressBits - kPageShift, MetaDataAlloc> map_;
+#else
+ PageMap2<kAddressBits - kPageShift, MetaDataAlloc> map_;
+#endif
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGEMAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc
new file mode 100644
index 0000000000..49ef5477d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc
@@ -0,0 +1,166 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/pagemap.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <new>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+
+// Note: we leak memory every time a map is constructed, so do not
+// create too many maps.
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Pick span pointer to use for page numbered i
+Span* span(intptr_t i) { return reinterpret_cast<Span*>(i + 1); }
+
+// Pick sizeclass to use for page numbered i
+uint8_t sc(intptr_t i) { return i % 16; }
+
+class PageMapTest : public ::testing::TestWithParam<int> {
+ public:
+ PageMapTest() {
+ // Arrange to pass zero-filled memory as the backing store for map.
+ memset(storage, 0, sizeof(Map));
+ map = new (storage) Map();
+ }
+
+ ~PageMapTest() override {
+ for (void* ptr : *ptrs()) {
+ ::operator delete(ptr);
+ }
+ ptrs()->clear();
+ }
+
+ private:
+ static std::vector<void*>* ptrs() {
+ static std::vector<void*>* ret = new std::vector<void*>();
+ return ret;
+ }
+
+ static void* alloc(size_t n) {
+ void* ptr = ::operator new(n);
+ ptrs()->push_back(ptr);
+ return ptr;
+ }
+
+ public:
+ using Map = PageMap2<20, alloc>;
+ Map* map;
+
+ private:
+ alignas(Map) char storage[sizeof(Map)];
+};
+
+TEST_P(PageMapTest, Sequential) {
+ const intptr_t limit = GetParam();
+
+ for (intptr_t i = 0; i < limit; i++) {
+ map->Ensure(i, 1);
+ map->set(i, span(i));
+ ASSERT_EQ(map->get(i), span(i));
+
+ // Test size class handling
+ ASSERT_EQ(0, map->sizeclass(i));
+ map->set_with_sizeclass(i, span(i), sc(i));
+ ASSERT_EQ(sc(i), map->sizeclass(i));
+ }
+ for (intptr_t i = 0; i < limit; i++) {
+ ASSERT_EQ(map->get(i), span(i));
+ }
+}
+
+TEST_P(PageMapTest, Bulk) {
+ const intptr_t limit = GetParam();
+
+ map->Ensure(0, limit);
+ for (intptr_t i = 0; i < limit; i++) {
+ map->set(i, span(i));
+ ASSERT_EQ(map->get(i), span(i));
+ }
+ for (intptr_t i = 0; i < limit; i++) {
+ ASSERT_EQ(map->get(i), span(i));
+ }
+}
+
+TEST_P(PageMapTest, Overflow) {
+ const intptr_t kLimit = 1 << 20;
+ ASSERT_FALSE(map->Ensure(kLimit, kLimit + 1));
+}
+
+TEST_P(PageMapTest, RandomAccess) {
+ const intptr_t limit = GetParam();
+
+ std::vector<intptr_t> elements;
+ for (intptr_t i = 0; i < limit; i++) {
+ elements.push_back(i);
+ }
+ std::shuffle(elements.begin(), elements.end(), absl::BitGen());
+
+ for (intptr_t i = 0; i < limit; i++) {
+ map->Ensure(elements[i], 1);
+ map->set(elements[i], span(elements[i]));
+ ASSERT_EQ(map->get(elements[i]), span(elements[i]));
+ }
+ for (intptr_t i = 0; i < limit; i++) {
+ ASSERT_EQ(map->get(i), span(i));
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(Limits, PageMapTest, ::testing::Values(100, 1 << 20));
+
+// Surround pagemap with unused memory. This isolates it so that it does not
+// share pages with any other structures. This avoids the risk that adjacent
+// objects might cause it to be mapped in. The padding is of sufficient size
+// that this is true even if this structure is mapped with huge pages.
+static struct PaddedPageMap {
+ constexpr PaddedPageMap() : padding_before{}, pagemap{}, padding_after{} {}
+ uint64_t padding_before[kHugePageSize / sizeof(uint64_t)];
+ PageMap pagemap;
+ uint64_t padding_after[kHugePageSize / sizeof(uint64_t)];
+} padded_pagemap_;
+
+TEST(TestMemoryFootprint, Test) {
+ uint64_t pagesize = sysconf(_SC_PAGESIZE);
+ ASSERT_NE(pagesize, 0);
+ size_t pages = sizeof(PageMap) / pagesize + 1;
+ std::vector<unsigned char> present(pages);
+
+ // mincore needs the address rounded to the start page
+ uint64_t basepage =
+ reinterpret_cast<uintptr_t>(&padded_pagemap_.pagemap) & ~(pagesize - 1);
+ ASSERT_EQ(mincore(reinterpret_cast<void*>(basepage), sizeof(PageMap),
+ present.data()),
+ 0);
+ for (int i = 0; i < pages; i++) {
+ EXPECT_EQ(present[i], 0);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/pages.h b/contrib/libs/tcmalloc/tcmalloc/pages.h
new file mode 100644
index 0000000000..e674c9c9c8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pages.h
@@ -0,0 +1,298 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGES_H_
+#define TCMALLOC_PAGES_H_
+
+#include <cmath>
+#include <string>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Type that can hold the length of a run of pages
+class Length {
+ public:
+ constexpr Length() : n_(0) {}
+ explicit constexpr Length(uintptr_t n) : n_(n) {}
+
+ constexpr Length(const Length&) = default;
+ constexpr Length& operator=(const Length&) = default;
+
+ constexpr size_t raw_num() const { return n_; }
+ constexpr size_t in_bytes() const { return n_ * kPageSize; }
+ double in_mib() const {
+ return std::ldexp(static_cast<double>(n_),
+ static_cast<int>(kPageShift) - 20);
+ }
+ constexpr Length in_pages() const { return *this; }
+
+ static constexpr Length min() { return Length(0); }
+ static constexpr Length max() {
+ return Length(std::numeric_limits<uintptr_t>::max() >> kPageShift);
+ }
+
+ constexpr Length& operator+=(Length rhs) {
+ n_ += rhs.n_;
+ return *this;
+ }
+
+ constexpr Length& operator-=(Length rhs) {
+ ASSERT(n_ >= rhs.n_);
+ n_ -= rhs.n_;
+ return *this;
+ }
+
+ constexpr Length& operator*=(size_t rhs) {
+ n_ *= rhs;
+ return *this;
+ }
+
+ constexpr Length& operator/=(size_t rhs) {
+ ASSERT(rhs != 0);
+ n_ /= rhs;
+ return *this;
+ }
+
+ constexpr Length& operator%=(Length rhs) {
+ ASSERT(rhs.n_ != 0);
+ n_ %= rhs.n_;
+ return *this;
+ }
+
+ friend constexpr bool operator<(Length lhs, Length rhs);
+ friend constexpr bool operator>(Length lhs, Length rhs);
+ friend constexpr bool operator<=(Length lhs, Length rhs);
+ friend constexpr bool operator>=(Length lhs, Length rhs);
+ friend constexpr bool operator==(Length lhs, Length rhs);
+ friend constexpr bool operator!=(Length lhs, Length rhs);
+
+ private:
+ uintptr_t n_;
+};
+
+inline bool AbslParseFlag(absl::string_view text, Length* l,
+ std::string* /* error */) {
+ uintptr_t n;
+ if (!absl::SimpleAtoi(text, &n)) {
+ return false;
+ }
+ *l = Length(n);
+ return true;
+}
+
+inline std::string AbslUnparseFlag(Length l) {
+ return absl::StrCat(l.raw_num());
+}
+
+// A single aligned page.
+class PageId {
+ public:
+ constexpr PageId() : pn_(0) {}
+ constexpr PageId(const PageId& p) = default;
+ constexpr PageId& operator=(const PageId& p) = default;
+
+ constexpr explicit PageId(uintptr_t pn) : pn_(pn) {}
+
+ void* start_addr() const {
+ return reinterpret_cast<void*>(pn_ << kPageShift);
+ }
+
+ uintptr_t start_uintptr() const { return pn_ << kPageShift; }
+
+ size_t index() const { return pn_; }
+
+ constexpr PageId& operator+=(Length rhs) {
+ pn_ += rhs.raw_num();
+ return *this;
+ }
+
+ constexpr PageId& operator-=(Length rhs) {
+ ASSERT(pn_ >= rhs.raw_num());
+ pn_ -= rhs.raw_num();
+ return *this;
+ }
+
+ private:
+ friend constexpr bool operator<(PageId lhs, PageId rhs);
+ friend constexpr bool operator>(PageId lhs, PageId rhs);
+ friend constexpr bool operator<=(PageId lhs, PageId rhs);
+ friend constexpr bool operator>=(PageId lhs, PageId rhs);
+ friend constexpr bool operator==(PageId lhs, PageId rhs);
+ friend constexpr bool operator!=(PageId lhs, PageId rhs);
+ friend constexpr Length operator-(PageId lhs, PageId rhs);
+
+ uintptr_t pn_;
+};
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length LengthFromBytes(size_t bytes) {
+ return Length(bytes >> kPageShift);
+}
+
+// Convert byte size into pages. This won't overflow, but may return
+// an unreasonably large value if bytes is huge enough.
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length BytesToLengthCeil(size_t bytes) {
+ return Length((bytes >> kPageShift) +
+ ((bytes & (kPageSize - 1)) > 0 ? 1 : 0));
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length BytesToLengthFloor(size_t bytes) {
+ return Length(bytes >> kPageShift);
+}
+
+inline constexpr Length kMaxValidPages = Length::max();
+// For all span-lengths < kMaxPages we keep an exact-size list.
+inline constexpr Length kMaxPages = Length(1 << (20 - kPageShift));
+
+inline PageId& operator++(PageId& p) { // NOLINT(runtime/references)
+ return p += Length(1);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(PageId lhs, PageId rhs) {
+ return lhs.pn_ < rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(PageId lhs, PageId rhs) {
+ return lhs.pn_ > rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(PageId lhs, PageId rhs) {
+ return lhs.pn_ <= rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(PageId lhs, PageId rhs) {
+ return lhs.pn_ >= rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(PageId lhs, PageId rhs) {
+ return lhs.pn_ == rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(PageId lhs, PageId rhs) {
+ return lhs.pn_ != rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator+(PageId lhs, Length rhs) { return lhs += rhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator+(Length lhs, PageId rhs) { return rhs += lhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator-(PageId lhs, Length rhs) { return lhs -= rhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator-(PageId lhs, PageId rhs) {
+ ASSERT(lhs.pn_ >= rhs.pn_);
+ return Length(lhs.pn_ - rhs.pn_);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline PageId PageIdContaining(const void* p) {
+ return PageId(reinterpret_cast<uintptr_t>(p) >> kPageShift);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(Length lhs, Length rhs) {
+ return lhs.n_ < rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(Length lhs, Length rhs) {
+ return lhs.n_ > rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(Length lhs, Length rhs) {
+ return lhs.n_ <= rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(Length lhs, Length rhs) {
+ return lhs.n_ >= rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(Length lhs, Length rhs) {
+ return lhs.n_ == rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(Length lhs, Length rhs) {
+ return lhs.n_ != rhs.n_;
+}
+
+inline Length& operator++(Length& l) { return l += Length(1); }
+
+inline Length& operator--(Length& l) { return l -= Length(1); }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator+(Length lhs, Length rhs) {
+ return Length(lhs.raw_num() + rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator-(Length lhs, Length rhs) {
+ return Length(lhs.raw_num() - rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator*(Length lhs, size_t rhs) {
+ return Length(lhs.raw_num() * rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator*(size_t lhs, Length rhs) {
+ return Length(lhs * rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr size_t operator/(Length lhs, Length rhs) {
+ return lhs.raw_num() / rhs.raw_num();
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator/(Length lhs, size_t rhs) {
+ ASSERT(rhs != 0);
+ return Length(lhs.raw_num() / rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator%(Length lhs, Length rhs) {
+ ASSERT(rhs.raw_num() != 0);
+ return Length(lhs.raw_num() % rhs.raw_num());
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PAGES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.cc b/contrib/libs/tcmalloc/tcmalloc/parameters.cc
new file mode 100644
index 0000000000..3f8e6e1ef8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/parameters.cc
@@ -0,0 +1,271 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/parameters.h"
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/thread_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// As decide_subrelease() is determined at runtime, we cannot require constant
+// initialization for the atomic. This avoids an initialization order fiasco.
+static std::atomic<bool>* hpaa_subrelease_ptr() {
+ static std::atomic<bool> v(decide_subrelease());
+ return &v;
+}
+
+// As skip_subrelease_interval_ns() is determined at runtime, we cannot require
+// constant initialization for the atomic. This avoids an initialization order
+// fiasco.
+static std::atomic<int64_t>& skip_subrelease_interval_ns() {
+ static std::atomic<int64_t> v(absl::ToInt64Nanoseconds(absl::Seconds(60)));
+ return v;
+}
+
+uint64_t Parameters::heap_size_hard_limit() {
+ size_t amount;
+ bool is_hard;
+ std::tie(amount, is_hard) = Static::page_allocator().limit();
+ if (!is_hard) {
+ amount = 0;
+ }
+ return amount;
+}
+
+void Parameters::set_heap_size_hard_limit(uint64_t value) {
+ TCMalloc_Internal_SetHeapSizeHardLimit(value);
+}
+
+bool Parameters::hpaa_subrelease() {
+ return hpaa_subrelease_ptr()->load(std::memory_order_relaxed);
+}
+
+void Parameters::set_hpaa_subrelease(bool value) {
+ TCMalloc_Internal_SetHPAASubrelease(value);
+}
+
+ABSL_CONST_INIT std::atomic<MallocExtension::BytesPerSecond>
+ Parameters::background_release_rate_(MallocExtension::BytesPerSecond{
+ 0
+ });
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::guarded_sampling_rate_(
+ 50 * kDefaultProfileSamplingRate);
+ABSL_CONST_INIT std::atomic<bool> Parameters::shuffle_per_cpu_caches_enabled_(
+ false);
+ABSL_CONST_INIT std::atomic<bool>
+ Parameters::reclaim_idle_per_cpu_caches_enabled_(true);
+ABSL_CONST_INIT std::atomic<bool> Parameters::lazy_per_cpu_caches_enabled_(
+ true);
+ABSL_CONST_INIT std::atomic<int32_t> Parameters::max_per_cpu_cache_size_(
+ kMaxCpuCacheSize);
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::max_total_thread_cache_bytes_(
+ kDefaultOverallThreadCacheSize);
+ABSL_CONST_INIT std::atomic<double>
+ Parameters::peak_sampling_heap_growth_fraction_(1.1);
+ABSL_CONST_INIT std::atomic<bool> Parameters::per_cpu_caches_enabled_(
+#if defined(TCMALLOC_DEPRECATED_PERTHREAD)
+ false
+#else
+ true
+#endif
+);
+
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::profile_sampling_rate_(
+ kDefaultProfileSamplingRate);
+
+absl::Duration Parameters::filler_skip_subrelease_interval() {
+ return absl::Nanoseconds(
+ skip_subrelease_interval_ns().load(std::memory_order_relaxed));
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+using tcmalloc::tcmalloc_internal::kLog;
+using tcmalloc::tcmalloc_internal::Log;
+using tcmalloc::tcmalloc_internal::Parameters;
+using tcmalloc::tcmalloc_internal::Static;
+
+extern "C" {
+
+int64_t MallocExtension_Internal_GetProfileSamplingRate() {
+ return Parameters::profile_sampling_rate();
+}
+
+void MallocExtension_Internal_SetProfileSamplingRate(int64_t value) {
+ Parameters::set_profile_sampling_rate(value);
+}
+
+int64_t MallocExtension_Internal_GetGuardedSamplingRate() {
+ return Parameters::guarded_sampling_rate();
+}
+
+void MallocExtension_Internal_SetGuardedSamplingRate(int64_t value) {
+ Parameters::set_guarded_sampling_rate(value);
+}
+
+int64_t MallocExtension_Internal_GetMaxTotalThreadCacheBytes() {
+ return Parameters::max_total_thread_cache_bytes();
+}
+
+void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(int64_t value) {
+ Parameters::set_max_total_thread_cache_bytes(value);
+}
+
+void MallocExtension_Internal_GetSkipSubreleaseInterval(absl::Duration* ret) {
+ *ret = Parameters::filler_skip_subrelease_interval();
+}
+
+void MallocExtension_Internal_SetSkipSubreleaseInterval(absl::Duration value) {
+ Parameters::set_filler_skip_subrelease_interval(value);
+}
+
+tcmalloc::MallocExtension::BytesPerSecond
+MallocExtension_Internal_GetBackgroundReleaseRate() {
+ return Parameters::background_release_rate();
+}
+
+void MallocExtension_Internal_SetBackgroundReleaseRate(
+ tcmalloc::MallocExtension::BytesPerSecond rate) {
+ Parameters::set_background_release_rate(rate);
+}
+
+void TCMalloc_Internal_SetBackgroundReleaseRate(size_t value) {
+ Parameters::background_release_rate_.store(
+ static_cast<tcmalloc::MallocExtension::BytesPerSecond>(value));
+}
+
+uint64_t TCMalloc_Internal_GetHeapSizeHardLimit() {
+ return Parameters::heap_size_hard_limit();
+}
+
+bool TCMalloc_Internal_GetHPAASubrelease() {
+ return Parameters::hpaa_subrelease();
+}
+
+bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled() {
+ return Parameters::shuffle_per_cpu_caches();
+}
+
+bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled() {
+ return Parameters::reclaim_idle_per_cpu_caches();
+}
+
+bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled() {
+ return Parameters::lazy_per_cpu_caches();
+}
+
+double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction() {
+ return Parameters::peak_sampling_heap_growth_fraction();
+}
+
+bool TCMalloc_Internal_GetPerCpuCachesEnabled() {
+ return Parameters::per_cpu_caches();
+}
+
+void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v) {
+ Parameters::guarded_sampling_rate_.store(v, std::memory_order_relaxed);
+}
+
+// update_lock guards changes via SetHeapSizeHardLimit.
+ABSL_CONST_INIT static absl::base_internal::SpinLock update_lock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t value) {
+ // Ensure that page allocator is set up.
+ Static::InitIfNecessary();
+
+ absl::base_internal::SpinLockHolder l(&update_lock);
+
+ size_t limit = std::numeric_limits<size_t>::max();
+ bool active = false;
+ if (value > 0) {
+ limit = value;
+ active = true;
+ }
+
+ bool currently_hard = Static::page_allocator().limit().second;
+ if (active || currently_hard) {
+ // Avoid resetting limit when current limit is soft.
+ Static::page_allocator().set_limit(limit, active /* is_hard */);
+ Log(kLog, __FILE__, __LINE__, "[tcmalloc] set page heap hard limit to",
+ limit, "bytes");
+ }
+}
+
+void TCMalloc_Internal_SetHPAASubrelease(bool v) {
+ tcmalloc::tcmalloc_internal::hpaa_subrelease_ptr()->store(
+ v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v) {
+ Parameters::shuffle_per_cpu_caches_enabled_.store(v,
+ std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v) {
+ Parameters::reclaim_idle_per_cpu_caches_enabled_.store(
+ v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v) {
+ Parameters::lazy_per_cpu_caches_enabled_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v) {
+ Parameters::max_per_cpu_cache_size_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v) {
+ Parameters::max_total_thread_cache_bytes_.store(v, std::memory_order_relaxed);
+
+ absl::base_internal::SpinLockHolder l(
+ &tcmalloc::tcmalloc_internal::pageheap_lock);
+ tcmalloc::tcmalloc_internal::ThreadCache::set_overall_thread_cache_size(v);
+}
+
+void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v) {
+ Parameters::peak_sampling_heap_growth_fraction_.store(
+ v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v) {
+ Parameters::per_cpu_caches_enabled_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetProfileSamplingRate(int64_t v) {
+ Parameters::profile_sampling_rate_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(
+ absl::Duration* v) {
+ *v = Parameters::filler_skip_subrelease_interval();
+}
+
+void TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+ absl::Duration v) {
+ tcmalloc::tcmalloc_internal::skip_subrelease_interval_ns().store(
+ absl::ToInt64Nanoseconds(v), std::memory_order_relaxed);
+}
+
+} // extern "C"
diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.h b/contrib/libs/tcmalloc/tcmalloc/parameters.h
new file mode 100644
index 0000000000..64893f0402
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/parameters.h
@@ -0,0 +1,152 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PARAMETERS_H_
+#define TCMALLOC_PARAMETERS_H_
+
+#include <atomic>
+#include <cmath>
+#include <string>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/time/time.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class Parameters {
+ public:
+ static MallocExtension::BytesPerSecond background_release_rate() {
+ return background_release_rate_.load(std::memory_order_relaxed);
+ }
+
+ static void set_background_release_rate(
+ MallocExtension::BytesPerSecond value) {
+ TCMalloc_Internal_SetBackgroundReleaseRate(static_cast<size_t>(value));
+ }
+
+ static uint64_t heap_size_hard_limit();
+ static void set_heap_size_hard_limit(uint64_t value);
+
+ static bool hpaa_subrelease();
+ static void set_hpaa_subrelease(bool value);
+
+ static int64_t guarded_sampling_rate() {
+ return guarded_sampling_rate_.load(std::memory_order_relaxed);
+ }
+
+ static void set_guarded_sampling_rate(int64_t value) {
+ TCMalloc_Internal_SetGuardedSamplingRate(value);
+ }
+
+ static int32_t max_per_cpu_cache_size() {
+ return max_per_cpu_cache_size_.load(std::memory_order_relaxed);
+ }
+
+ static void set_max_per_cpu_cache_size(int32_t value) {
+ TCMalloc_Internal_SetMaxPerCpuCacheSize(value);
+ }
+
+ static int64_t max_total_thread_cache_bytes() {
+ return max_total_thread_cache_bytes_.load(std::memory_order_relaxed);
+ }
+
+ static void set_max_total_thread_cache_bytes(int64_t value) {
+ TCMalloc_Internal_SetMaxTotalThreadCacheBytes(value);
+ }
+
+ static double peak_sampling_heap_growth_fraction() {
+ return peak_sampling_heap_growth_fraction_.load(std::memory_order_relaxed);
+ }
+
+ static void set_peak_sampling_heap_growth_fraction(double value) {
+ TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(value);
+ }
+
+ static bool shuffle_per_cpu_caches() {
+ return shuffle_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+ }
+
+ static bool reclaim_idle_per_cpu_caches() {
+ return reclaim_idle_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+ }
+
+ static bool lazy_per_cpu_caches() {
+ return lazy_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+ }
+
+ static void set_lazy_per_cpu_caches(bool value) {
+ TCMalloc_Internal_SetLazyPerCpuCachesEnabled(value);
+ }
+
+ static bool per_cpu_caches() {
+ return per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+ }
+
+ static void set_per_cpu_caches(bool value) {
+ TCMalloc_Internal_SetPerCpuCachesEnabled(value);
+ }
+
+ static int64_t profile_sampling_rate() {
+ return profile_sampling_rate_.load(std::memory_order_relaxed);
+ }
+
+ static void set_profile_sampling_rate(int64_t value) {
+ TCMalloc_Internal_SetProfileSamplingRate(value);
+ }
+
+ static void set_filler_skip_subrelease_interval(absl::Duration value) {
+ TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(value);
+ }
+
+ static absl::Duration filler_skip_subrelease_interval();
+
+ private:
+ friend void ::TCMalloc_Internal_SetBackgroundReleaseRate(size_t v);
+ friend void ::TCMalloc_Internal_SetGuardedSamplingRate(int64_t v);
+ friend void ::TCMalloc_Internal_SetHPAASubrelease(bool v);
+ friend void ::TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v);
+ friend void ::TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v);
+ friend void ::TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v);
+ friend void ::TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v);
+ friend void ::TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v);
+ friend void ::TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v);
+ friend void ::TCMalloc_Internal_SetPerCpuCachesEnabled(bool v);
+ friend void ::TCMalloc_Internal_SetProfileSamplingRate(int64_t v);
+
+ friend void ::TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+ absl::Duration v);
+
+ static std::atomic<MallocExtension::BytesPerSecond> background_release_rate_;
+ static std::atomic<int64_t> guarded_sampling_rate_;
+ static std::atomic<bool> shuffle_per_cpu_caches_enabled_;
+ static std::atomic<bool> reclaim_idle_per_cpu_caches_enabled_;
+ static std::atomic<bool> lazy_per_cpu_caches_enabled_;
+ static std::atomic<int32_t> max_per_cpu_cache_size_;
+ static std::atomic<int64_t> max_total_thread_cache_bytes_;
+ static std::atomic<double> peak_sampling_heap_growth_fraction_;
+ static std::atomic<bool> per_cpu_caches_enabled_;
+ static std::atomic<int64_t> profile_sampling_rate_;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PARAMETERS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc
new file mode 100644
index 0000000000..0dcc0df536
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc
@@ -0,0 +1,93 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/peak_heap_tracker.h"
+
+#include <stdio.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool PeakHeapTracker::IsNewPeak() {
+ return peak_sampled_heap_size_.value() == 0 ||
+ (static_cast<double>(Static::sampled_objects_size_.value()) /
+ peak_sampled_heap_size_.value() >
+ Parameters::peak_sampling_heap_growth_fraction());
+}
+
+void PeakHeapTracker::MaybeSaveSample() {
+ if (Parameters::peak_sampling_heap_growth_fraction() <= 0 || !IsNewPeak()) {
+ return;
+ }
+
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+ // double-check in case another allocation was sampled (or a sampled
+ // allocation freed) while we were waiting for the lock
+ if (!IsNewPeak()) {
+ return;
+ }
+ peak_sampled_heap_size_.LossyAdd(Static::sampled_objects_size_.value() -
+ peak_sampled_heap_size_.value());
+
+ StackTrace *t = peak_sampled_span_stacks_, *next = nullptr;
+ while (t != nullptr) {
+ next = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1]);
+ Static::DestroySampleUserData(t->user_data);
+ Static::stacktrace_allocator().Delete(t);
+ t = next;
+ }
+
+ next = nullptr;
+ for (Span* s : Static::sampled_objects_) {
+ t = Static::stacktrace_allocator().New();
+
+ StackTrace* sampled_stack = s->sampled_stack();
+ *t = *sampled_stack;
+ t->user_data = Static::CopySampleUserData(sampled_stack->user_data);
+ if (t->depth == kMaxStackDepth) {
+ t->depth = kMaxStackDepth - 1;
+ }
+ t->stack[kMaxStackDepth - 1] = reinterpret_cast<void*>(next);
+ next = t;
+ }
+ peak_sampled_span_stacks_ = t;
+}
+
+std::unique_ptr<ProfileBase> PeakHeapTracker::DumpSample() const {
+ auto profile = absl::make_unique<StackTraceTable>(
+ ProfileType::kPeakHeap, Sampler::GetSamplePeriod(), true, true);
+
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (StackTrace* t = peak_sampled_span_stacks_; t != nullptr;
+ t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1])) {
+ profile->AddTrace(1.0, *t);
+ }
+ return profile;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h
new file mode 100644
index 0000000000..a9f071d1b5
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h
@@ -0,0 +1,61 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PEAK_HEAP_TRACKER_H_
+#define TCMALLOC_PEAK_HEAP_TRACKER_H_
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PeakHeapTracker {
+ public:
+ constexpr PeakHeapTracker() : peak_sampled_span_stacks_(nullptr) {}
+
+ // Possibly save high-water-mark allocation stack traces for peak-heap
+ // profile. Should be called immediately after sampling an allocation. If
+ // the heap has grown by a sufficient amount since the last high-water-mark,
+ // it will save a copy of the sample profile.
+ void MaybeSaveSample() ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ // Return the saved high-water-mark heap profile, if any.
+ std::unique_ptr<ProfileBase> DumpSample() const
+ ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+ size_t CurrentPeakSize() const { return peak_sampled_heap_size_.value(); }
+
+ private:
+ // Linked list of stack traces from sampled allocations saved (from
+ // sampled_objects_ above) when we allocate memory from the system. The
+ // linked list pointer is stored in StackTrace::stack[kMaxStackDepth-1].
+ StackTrace* peak_sampled_span_stacks_;
+
+ // Sampled heap size last time peak_sampled_span_stacks_ was saved. Only
+ // written under pageheap_lock; may be read without it.
+ StatsCounter peak_sampled_heap_size_;
+
+ bool IsNewPeak();
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_PEAK_HEAP_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/profile_test.cc b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc
new file mode 100644
index 0000000000..0bd62cd428
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc
@@ -0,0 +1,281 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <new>
+#include <set>
+#include <thread> // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/declarations.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(AllocationSampleTest, TokenAbuse) {
+ auto token = MallocExtension::StartAllocationProfiling();
+ void *ptr = ::operator new(512 * 1024 * 1024);
+ // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+ benchmark::DoNotOptimize(ptr);
+ ::operator delete(ptr);
+ // Repeated Claims should happily return null.
+ auto profile = std::move(token).Stop();
+ int count = 0;
+ profile.Iterate([&](const Profile::Sample &) { count++; });
+
+#if !defined(UNDEFINED_BEHAVIOR_SANITIZER)
+ // UBSan does not implement our profiling API, but running the test can
+ // validate the correctness of the new/delete pairs.
+ EXPECT_EQ(count, 1);
+#endif
+
+ auto profile2 = std::move(token).Stop(); // NOLINT: use-after-move intended
+ int count2 = 0;
+ profile2.Iterate([&](const Profile::Sample &) { count2++; });
+ EXPECT_EQ(count2, 0);
+
+ // Delete (on the scope ending) without Claim should also be OK.
+ { MallocExtension::StartAllocationProfiling(); }
+}
+
+// Verify that profiling sessions concurrent with allocations do not crash due
+// to mutating pointers accessed by the sampling code (b/143623146).
+TEST(AllocationSampleTest, RaceToClaim) {
+ MallocExtension::SetProfileSamplingRate(1 << 14);
+
+ absl::BlockingCounter counter(2);
+ std::atomic<bool> stop{false};
+
+ std::thread t1([&]() {
+ counter.DecrementCount();
+
+ while (!stop) {
+ auto token = MallocExtension::StartAllocationProfiling();
+ absl::SleepFor(absl::Microseconds(1));
+ auto profile = std::move(token).Stop();
+ }
+ });
+
+ std::thread t2([&]() {
+ counter.DecrementCount();
+
+ const int kNum = 1000000;
+ std::vector<void *> ptrs;
+ while (!stop) {
+ for (int i = 0; i < kNum; i++) {
+ ptrs.push_back(::operator new(1));
+ }
+ for (void *p : ptrs) {
+ sized_delete(p, 1);
+ }
+ ptrs.clear();
+ }
+ });
+
+ // Verify the threads are up and running before we start the clock.
+ counter.Wait();
+
+ absl::SleepFor(absl::Seconds(1));
+
+ stop.store(true);
+
+ t1.join();
+ t2.join();
+}
+
+TEST(AllocationSampleTest, SampleAccuracy) {
+ // Disable GWP-ASan, since it allocates different sizes than normal samples.
+ MallocExtension::SetGuardedSamplingRate(-1);
+
+ // Allocate about 512 MiB each of various sizes. For _some_ but not all
+ // sizes, delete it as we go--it shouldn't matter for the sample count.
+ static const size_t kTotalPerSize = 512 * 1024 * 1024;
+
+ // (object size, object alignment, keep objects)
+ struct Requests {
+ size_t size;
+ size_t alignment;
+ bool keep;
+ // objects we don't delete as we go
+ void *list = nullptr;
+ };
+ std::vector<Requests> sizes = {
+ {8, 0, false}, {16, 16, true}, {1024, 0, false},
+ {64 * 1024, 64, false}, {512 * 1024, 0, true}, {1024 * 1024, 128, true}};
+ std::set<size_t> sizes_expected;
+ for (auto s : sizes) {
+ sizes_expected.insert(s.size);
+ }
+ auto token = MallocExtension::StartAllocationProfiling();
+
+ // We use new/delete to allocate memory, as malloc returns objects aligned to
+ // std::max_align_t.
+ for (auto &s : sizes) {
+ for (size_t bytes = 0; bytes < kTotalPerSize; bytes += s.size) {
+ void *obj;
+ if (s.alignment > 0) {
+ obj = operator new(s.size, static_cast<std::align_val_t>(s.alignment));
+ } else {
+ obj = operator new(s.size);
+ }
+ if (s.keep) {
+ tcmalloc_internal::SLL_Push(&s.list, obj);
+ } else if (s.alignment > 0) {
+ operator delete(obj, static_cast<std::align_val_t>(s.alignment));
+ } else {
+ operator delete(obj);
+ }
+ }
+ }
+ auto profile = std::move(token).Stop();
+
+ // size -> bytes seen
+ absl::flat_hash_map<size_t, size_t> m;
+
+ // size -> alignment request
+ absl::flat_hash_map<size_t, size_t> alignment;
+
+ for (auto s : sizes) {
+ alignment[s.size] = s.alignment;
+ }
+
+ profile.Iterate([&](const tcmalloc::Profile::Sample &e) {
+ // Skip unexpected sizes. They may have been triggered by a background
+ // thread.
+ if (sizes_expected.find(e.allocated_size) == sizes_expected.end()) {
+ return;
+ }
+
+ // Don't check stack traces until we have evidence that's broken, it's
+ // tedious and done fairly well elsewhere.
+ m[e.allocated_size] += e.sum;
+ EXPECT_EQ(alignment[e.requested_size], e.requested_alignment);
+ });
+
+#if !defined(UNDEFINED_BEHAVIOR_SANITIZER)
+ // UBSan does not implement our profiling API, but running the test can
+ // validate the correctness of the new/delete pairs.
+ size_t max_bytes = 0, min_bytes = std::numeric_limits<size_t>::max();
+ EXPECT_EQ(m.size(), sizes_expected.size());
+ for (auto seen : m) {
+ size_t bytes = seen.second;
+ min_bytes = std::min(min_bytes, bytes);
+ max_bytes = std::max(max_bytes, bytes);
+ }
+ // Hopefully we're in a fairly small range, that contains our actual
+ // allocation.
+ // TODO(b/134690164): better statistical tests here.
+ EXPECT_GE((min_bytes * 3) / 2, max_bytes);
+ EXPECT_LE((min_bytes * 3) / 4, kTotalPerSize);
+ EXPECT_LE(kTotalPerSize, (max_bytes * 4) / 3);
+#endif
+
+ // Remove the objects we left alive
+ for (auto &s : sizes) {
+ while (s.list != nullptr) {
+ void *obj = tcmalloc_internal::SLL_Pop(&s.list);
+ if (s.alignment > 0) {
+ operator delete(obj, static_cast<std::align_val_t>(s.alignment));
+ } else {
+ operator delete(obj);
+ }
+ }
+ }
+}
+
+TEST(FragmentationzTest, Accuracy) {
+ // Disable GWP-ASan, since it allocates different sizes than normal samples.
+ MallocExtension::SetGuardedSamplingRate(-1);
+
+ // a fairly odd allocation size - will be rounded to 128. This lets
+ // us find our record in the table.
+ static const size_t kItemSize = 115;
+ // allocate about 3.5 GiB:
+ static const size_t kNumItems = 32 * 1024 * 1024;
+
+ std::vector<std::unique_ptr<char[]>> keep;
+ std::vector<std::unique_ptr<char[]>> drop;
+ // hint expected sizes:
+ drop.reserve(kNumItems * 8 / 10);
+ keep.reserve(kNumItems * 2 / 10);
+
+ // We allocate many items, then free 80% of them "randomly". (To
+ // decrease noise and speed up, we just keep every 5th one exactly.)
+ for (int i = 0; i < kNumItems; ++i) {
+ // Ideally we should use a malloc() here, for consistency; but unique_ptr
+ // doesn't come with a have a "free()" deleter; use ::operator new insted.
+ (i % 5 == 0 ? keep : drop)
+ .push_back(std::unique_ptr<char[]>(
+ static_cast<char *>(::operator new[](kItemSize))));
+ }
+ drop.resize(0);
+
+ // there are at least 64 items per span here. (8/10)^64 = 6.2e-7 ~= 0
+ // probability we actually managed to free a page; every page is fragmented.
+ // We still have 20% or so of it allocated, so we should see 80% of it
+ // charged to these allocations as fragmentations.
+ auto profile = MallocExtension::SnapshotCurrent(ProfileType::kFragmentation);
+
+ // Pull out the fragmentationz entry corresponding to this
+ size_t requested_size = 0;
+ size_t allocated_size = 0;
+ size_t sum = 0;
+ size_t count = 0;
+ profile.Iterate([&](const Profile::Sample &e) {
+ if (e.requested_size != kItemSize) return;
+
+ if (requested_size == 0) {
+ allocated_size = e.allocated_size;
+ requested_size = e.requested_size;
+ } else {
+ // we will usually have single entry in
+ // profile, but in builds without optimization
+ // our fast-path code causes same call-site to
+ // have two different stack traces. Thus we
+ // expect and deal with second entry for same
+ // allocation.
+ EXPECT_EQ(requested_size, e.requested_size);
+ EXPECT_EQ(allocated_size, e.allocated_size);
+ }
+ sum += e.sum;
+ count += e.count;
+ });
+
+ double frag_bytes = sum;
+ double real_frag_bytes =
+ static_cast<double>(allocated_size * kNumItems) * 0.8;
+ // We should be pretty close with this much data:
+ // TODO(b/134690164): this is still slightly flaky (<1%) - why?
+ EXPECT_NEAR(real_frag_bytes, frag_bytes, real_frag_bytes * 0.15)
+ << " sum = " << sum << " allocated = " << allocated_size
+ << " requested = " << requested_size << " count = " << count;
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc
new file mode 100644
index 0000000000..e0e6aba606
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc
@@ -0,0 +1,104 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Test realloc() functionality
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+
+namespace tcmalloc {
+namespace {
+
+// Fill a buffer of the specified size with a predetermined pattern
+void Fill(unsigned char* buffer, int n) {
+ for (int i = 0; i < n; i++) {
+ buffer[i] = (i & 0xff);
+ }
+}
+
+// Check that the specified buffer has the predetermined pattern
+// generated by Fill()
+void ExpectValid(unsigned char* buffer, int n) {
+ for (int i = 0; i < n; i++) {
+ ASSERT_EQ((i & 0xff), buffer[i]);
+ }
+}
+
+// Return the next interesting size/delta to check. Returns -1 if no more.
+int NextSize(int size) {
+ if (size < 100) {
+ return size + 1;
+ } else if (size < 100000) {
+ // Find next power of two
+ int power = 1;
+ while (power < size) {
+ power <<= 1;
+ }
+
+ // Yield (power-1, power, power+1)
+ if (size < power - 1) {
+ return power - 1;
+ } else if (size == power - 1) {
+ return power;
+ } else {
+ assert(size == power);
+ return power + 1;
+ }
+ } else {
+ return -1;
+ }
+}
+
+TEST(ReallocTest, TestWithinCache) {
+ for (int src_size = 0; src_size >= 0; src_size = NextSize(src_size)) {
+ for (int dst_size = 0; dst_size >= 0; dst_size = NextSize(dst_size)) {
+ unsigned char* src = static_cast<unsigned char*>(malloc(src_size));
+ Fill(src, src_size);
+ unsigned char* dst = static_cast<unsigned char*>(realloc(src, dst_size));
+ ExpectValid(dst, std::min(src_size, dst_size));
+ Fill(dst, dst_size);
+ ExpectValid(dst, dst_size);
+ if (dst != nullptr) free(dst);
+ }
+ }
+}
+
+TEST(ReallocTest, AlignedAllocRealloc) {
+ std::pair<size_t, size_t> sizes[] = {{1024, 2048}, {512, 128}};
+
+ for (const auto& p : sizes) {
+ size_t src_size = p.first, dst_size = p.second;
+
+ auto src = static_cast<unsigned char*>(aligned_alloc(32, src_size));
+ Fill(src, src_size);
+ auto dst = static_cast<unsigned char*>(realloc(src, dst_size));
+ ExpectValid(dst, std::min(src_size, dst_size));
+ Fill(dst, dst_size);
+ ExpectValid(dst, dst_size);
+ if (dst != nullptr) free(dst);
+ }
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc
new file mode 100644
index 0000000000..4bca6485ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc
@@ -0,0 +1,81 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/runtime_size_classes.h"
+
+#include <string.h>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace runtime_size_classes_internal {
+
+int ParseSizeClasses(absl::string_view env, int max_size, int max_classes,
+ SizeClassInfo* parsed) {
+ int c = 1;
+ int t = 0;
+ memset(parsed, 0, sizeof(parsed[0]) * max_classes);
+ for (char e : env) {
+ // TODO(b/120885588): replace with absl::from_chars, once it is fully
+ // implemented.
+ if ('0' <= e && e <= '9') {
+ int n = e - '0';
+ int v = 10 * parsed[c].Value(t) + n;
+ if (v > max_size) {
+ Log(kLog, __FILE__, __LINE__, "size class integer overflow", v, n);
+ return -3;
+ }
+ parsed[c].SetValue(t, v);
+ } else if (e == ';') {
+ // next size class
+ t = 0;
+ c++;
+ if (c >= max_classes) {
+ return c;
+ }
+ } else if (e == ',') {
+ t++;
+ if (t >= kSizeClassInfoMembers) {
+ Log(kLog, __FILE__, __LINE__, "size class too many commas", c);
+ return -1;
+ }
+ } else {
+ Log(kLog, __FILE__, __LINE__, "Delimiter not , or ;", c, e);
+ return -2;
+ }
+ }
+ // The size class [0, 0, 0] counts as a size class, but is not parsed.
+ return c + 1;
+}
+
+} // namespace runtime_size_classes_internal
+
+int ABSL_ATTRIBUTE_NOINLINE MaybeSizeClassesFromEnv(int max_size,
+ int max_classes,
+ SizeClassInfo* parsed) {
+ const char* e = thread_safe_getenv("TCMALLOC_SIZE_CLASSES");
+ if (!e) {
+ return 0;
+ }
+ return runtime_size_classes_internal::ParseSizeClasses(e, max_size,
+ max_classes, parsed);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h
new file mode 100644
index 0000000000..42c5aa8859
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h
@@ -0,0 +1,49 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Run-time specification of Size classes
+#ifndef TCMALLOC_RUNTIME_SIZE_CLASSES_H_
+#define TCMALLOC_RUNTIME_SIZE_CLASSES_H_
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace runtime_size_classes_internal {
+
+// Set size classes from a string.
+// Format: "size,pages,num_to_move;"
+// Example: "8,1,32;16;32;40,1,16;128,2;256;512"
+// This function doesn't do validity checking. If a field is missing, its
+// value is set to zero.
+// The number of size classes parsed is returned.
+int ParseSizeClasses(absl::string_view env, int max_size, int max_classes,
+ SizeClassInfo* parsed);
+
+} // namespace runtime_size_classes_internal
+
+// If the environment variable TCMALLOC_SIZE_CLASSES is defined, its value is
+// parsed using ParseSizeClasses and ApplySizeClassDefaults into parsed. The
+// number of size classes parsed is returned. On error, a negative value is
+// returned.
+int MaybeSizeClassesFromEnv(int max_size, int max_classes,
+ SizeClassInfo* parsed);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_RUNTIME_SIZE_CLASSES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc
new file mode 100644
index 0000000000..89a111e3b8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/size_class_info.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+ absl::string_view env =
+ absl::string_view(reinterpret_cast<const char*>(d), size);
+
+ tcmalloc::tcmalloc_internal::SizeClassInfo
+ parsed[tcmalloc::tcmalloc_internal::kNumClasses];
+ tcmalloc::tcmalloc_internal::runtime_size_classes_internal::ParseSizeClasses(
+ env, tcmalloc::tcmalloc_internal::kMaxSize,
+ tcmalloc::tcmalloc_internal::kNumClasses, parsed);
+ return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc
new file mode 100644
index 0000000000..6a8771f9e2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc
@@ -0,0 +1,114 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/runtime_size_classes.h"
+
+#include <stdlib.h>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using runtime_size_classes_internal::ParseSizeClasses;
+
+constexpr int kNumClasses = 4;
+constexpr int kMaxSize = 1024 * 1024;
+
+TEST(RuntimeSizeClassesTest, EnvSingleFullClass) {
+ // Validate simple parsing.
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8,1,32", kMaxSize, kNumClasses, parsed), 2);
+ EXPECT_EQ(parsed[1].size, 8);
+ EXPECT_EQ(parsed[1].pages, 1);
+ EXPECT_EQ(parsed[1].num_to_move, 32);
+
+ EXPECT_EQ(parsed[0].size, 0);
+ EXPECT_EQ(parsed[0].pages, 0);
+ EXPECT_EQ(parsed[0].num_to_move, 0);
+}
+
+TEST(RuntimeSizeClassesTest, EnvSingleSizeOnlyClass) {
+ // Validate simple parsing.
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8,1,2", kMaxSize, kNumClasses, parsed), 2);
+ EXPECT_EQ(parsed[1].size, 8);
+ EXPECT_EQ(parsed[1].pages, 1);
+ EXPECT_EQ(parsed[1].num_to_move, 2);
+}
+
+TEST(RuntimeSizeClassesTest, EnvTwoFullClasses) {
+ // Validate two classes
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, kNumClasses, parsed),
+ 3);
+ EXPECT_EQ(parsed[1].size, 8);
+ EXPECT_EQ(parsed[1].pages, 1);
+ EXPECT_EQ(parsed[1].num_to_move, 32);
+
+ EXPECT_EQ(parsed[2].size, 1024);
+ EXPECT_EQ(parsed[2].pages, 2);
+ EXPECT_EQ(parsed[2].num_to_move, 16);
+}
+
+TEST(RuntimeSizeClassesTest, ParseArrayLimit) {
+ // Validate that the limit on the number of size classes is enforced.
+ SizeClassInfo parsed[kNumClasses] = {
+ {0, 0, 0},
+ {9, 9, 9},
+ {7, 7, 7},
+ };
+ EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, 2, parsed), 2);
+
+ EXPECT_EQ(parsed[1].size, 8);
+ EXPECT_EQ(parsed[1].pages, 1);
+ EXPECT_EQ(parsed[1].num_to_move, 32);
+
+ EXPECT_EQ(parsed[2].size, 7);
+ EXPECT_EQ(parsed[2].pages, 7);
+ EXPECT_EQ(parsed[2].num_to_move, 7);
+}
+
+TEST(RuntimeSizeClassesTest, EnvBadDelimiter) {
+ // Invalid class sizes should be caught
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8/4,16,3,1", kMaxSize, kNumClasses, parsed), -2);
+}
+
+TEST(RuntimeSizeClassesTest, EnvTooManyCommas) {
+ // Invalid class sizes should be caught
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8,4,16,3", kMaxSize, kNumClasses, parsed), -1);
+}
+
+TEST(RuntimeSizeClassesTest, EnvIntOverflow) {
+ // Invalid class sizes should be caught
+ SizeClassInfo parsed[kNumClasses];
+ EXPECT_EQ(ParseSizeClasses("8,4,2147483648", kMaxSize, kNumClasses, parsed),
+ -3);
+}
+
+TEST(RuntimeSizeClassesTest, EnvVariableExamined) {
+ SizeClassInfo parsed[kNumClasses];
+ setenv("TCMALLOC_SIZE_CLASSES", "256,13,31", 1);
+ EXPECT_EQ(MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed), 2);
+ EXPECT_EQ(parsed[1].size, 256);
+ EXPECT_EQ(parsed[1].pages, 13);
+ EXPECT_EQ(parsed[1].num_to_move, 31);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.cc b/contrib/libs/tcmalloc/tcmalloc/sampler.cc
new file mode 100644
index 0000000000..5e89c9e830
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/sampler.cc
@@ -0,0 +1,206 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/sampler.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <limits>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ssize_t Sampler::GetSamplePeriod() {
+ return Parameters::profile_sampling_rate();
+}
+
+// Run this before using your sampler
+ABSL_ATTRIBUTE_NOINLINE void Sampler::Init(uint64_t seed) {
+ ASSERT(seed != 0);
+
+ // do_malloc comes here without having initialized statics, and
+ // PickNextSamplingPoint uses data initialized in static vars.
+ Static::InitIfNecessary();
+
+ // Initialize PRNG
+ rnd_ = seed;
+ // Step it forward 20 times for good measure
+ for (int i = 0; i < 20; i++) {
+ rnd_ = NextRandom(rnd_);
+ }
+ // Initialize counters
+ true_bytes_until_sample_ = PickNextSamplingPoint();
+ if (Static::IsOnFastPath()) {
+ bytes_until_sample_ = true_bytes_until_sample_;
+ was_on_fast_path_ = true;
+ } else {
+ // Force the next allocation to hit the slow path.
+ ASSERT(bytes_until_sample_ == 0);
+ was_on_fast_path_ = false;
+ }
+ allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint();
+}
+
+ssize_t Sampler::PickNextSamplingPoint() {
+ sample_period_ = GetSamplePeriod();
+ if (sample_period_ <= 0) {
+ // In this case, we don't want to sample ever, and the larger a
+ // value we put here, the longer until we hit the slow path
+ // again. However, we have to support the flag changing at
+ // runtime, so pick something reasonably large (to keep overhead
+ // low) but small enough that we'll eventually start to sample
+ // again.
+ return 128 << 20;
+ }
+ if (ABSL_PREDICT_FALSE(sample_period_ == 1)) {
+ // A sample period of 1, generally used only in tests due to its exorbitant
+ // cost, is a request for *every* allocation to be sampled.
+ return 1;
+ }
+ return GetGeometricVariable(sample_period_);
+}
+
+ssize_t Sampler::PickNextGuardedSamplingPoint() {
+ double guarded_sample_rate = Parameters::guarded_sampling_rate();
+ double profile_sample_rate = Parameters::profile_sampling_rate();
+ if (guarded_sample_rate < 0 || profile_sample_rate <= 0) {
+ // Guarded sampling is disabled but could be turned on at run time. So we
+ // return a sampling point (default mean=100) in case guarded sampling is
+ // later enabled. Since the flag is also checked in
+ // ShouldSampleGuardedAllocation(), guarded sampling is still guaranteed
+ // not to run until it is enabled.
+ return GetGeometricVariable(/*mean=*/100);
+ }
+ return GetGeometricVariable(
+ std::ceil(guarded_sample_rate / profile_sample_rate));
+}
+
+// Generates a geometric variable with the specified mean.
+// This is done by generating a random number between 0 and 1 and applying
+// the inverse cumulative distribution function for an exponential.
+// Specifically: Let m be the inverse of the sample period, then
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+ssize_t Sampler::GetGeometricVariable(ssize_t mean) {
+ rnd_ = NextRandom(rnd_);
+ // Take the top 26 bits as the random number
+ // (This plus the 1<<58 sampling bound give a max possible step of
+ // 5194297183973780480 bytes.)
+ const uint64_t prng_mod_power = 48; // Number of bits in prng
+ // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+ // under piii debug for some binaries.
+ double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
+ // Put the computed p-value through the CDF of a geometric.
+ double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
+
+ // Very large values of interval overflow ssize_t. If we happen to hit this
+ // improbable condition, we simply cheat and clamp interval to the largest
+ // supported value. This is slightly tricky, since casting the maximum
+ // ssize_t value to a double rounds it up, and casting that rounded value
+ // back to an ssize_t will still overflow. Thus, we specifically need to
+ // use a ">=" condition here, rather than simply ">" as would be appropriate
+ // if the arithmetic were exact.
+ if (interval >= static_cast<double>(std::numeric_limits<ssize_t>::max()))
+ return std::numeric_limits<ssize_t>::max();
+ else
+ return static_cast<ssize_t>(interval);
+}
+
+size_t Sampler::RecordAllocationSlow(size_t k) {
+ static std::atomic<uint64_t> global_randomness;
+
+ if (ABSL_PREDICT_FALSE(!initialized_)) {
+ initialized_ = true;
+ uint64_t global_seed =
+ global_randomness.fetch_add(1, std::memory_order_relaxed);
+ Init(reinterpret_cast<uintptr_t>(this) ^ global_seed);
+ if (static_cast<size_t>(true_bytes_until_sample_) > k) {
+ true_bytes_until_sample_ -= k;
+ if (Static::IsOnFastPath()) {
+ bytes_until_sample_ -= k;
+ was_on_fast_path_ = true;
+ }
+ return 0;
+ }
+ }
+
+ if (ABSL_PREDICT_FALSE(true_bytes_until_sample_ > k)) {
+ // The last time we picked a sampling point, we were on the slow path. We
+ // don't want to sample yet since true_bytes_until_sample_ >= k.
+ true_bytes_until_sample_ -= k;
+
+ if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) {
+ // We've moved from the slow path to the fast path since the last sampling
+ // point was picked.
+ bytes_until_sample_ = true_bytes_until_sample_;
+ true_bytes_until_sample_ = 0;
+ was_on_fast_path_ = true;
+ } else {
+ bytes_until_sample_ = 0;
+ was_on_fast_path_ = false;
+ }
+
+ return 0;
+ }
+
+ // Compute sampling weight (i.e. the number of bytes represented by this
+ // sample in expectation).
+ //
+ // Let k be the size of the allocation, p be the sample period
+ // (sample_period_), and f the number of bytes after which we decided to
+ // sample (either bytes_until_sample_ or true_bytes_until_sample_). On
+ // average, if we were to continue taking samples every p bytes, we would take
+ // (k - f) / p additional samples in this allocation, plus the one we are
+ // taking now, for 1 + (k - f) / p total samples. Multiplying by p, the mean
+ // number of bytes between samples, gives us a weight of p + k - f.
+ //
+ size_t weight =
+ sample_period_ + k -
+ (was_on_fast_path_ ? bytes_until_sample_ : true_bytes_until_sample_);
+ const auto point = PickNextSamplingPoint();
+ if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) {
+ bytes_until_sample_ = point;
+ true_bytes_until_sample_ = 0;
+ was_on_fast_path_ = true;
+ } else {
+ bytes_until_sample_ = 0;
+ true_bytes_until_sample_ = point;
+ was_on_fast_path_ = false;
+ }
+ return GetSamplePeriod() <= 0 ? 0 : weight;
+}
+
+double AllocatedBytes(const StackTrace& stack, bool unsample) {
+ if (unsample) {
+ return static_cast<double>(stack.weight) * stack.allocated_size /
+ (stack.requested_size + 1);
+ } else {
+ return stack.allocated_size;
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.h b/contrib/libs/tcmalloc/tcmalloc/sampler.h
new file mode 100644
index 0000000000..d18dd44234
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/sampler.h
@@ -0,0 +1,298 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_SAMPLER_H_
+#define TCMALLOC_SAMPLER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Sampler to decide when to create a sample trace for an allocation
+// Not thread safe: Each thread should have it's own sampler object.
+// Caller must use external synchronization if used
+// from multiple threads.
+//
+// With 512K average sample step (the default):
+// the probability of sampling a 4K allocation is about 0.00778
+// the probability of sampling a 1MB allocation is about 0.865
+// the probability of sampling a 1GB allocation is about 1.00000
+// In general, the probablity of sampling is an allocation of size X
+// given a flag value of Y (default 1M) is:
+// 1 - e^(-X/Y)
+//
+// With 128K average sample step:
+// the probability of sampling a 1MB allocation is about 0.99966
+// the probability of sampling a 1GB allocation is about 1.0
+// (about 1 - 2**(-26))
+// With 1M average sample step:
+// the probability of sampling a 4K allocation is about 0.00390
+// the probability of sampling a 1MB allocation is about 0.632
+// the probability of sampling a 1GB allocation is about 1.0
+//
+// The sampler works by representing memory as a long stream from
+// which allocations are taken. Some of the bytes in this stream are
+// marked and if an allocation includes a marked byte then it is
+// sampled. Bytes are marked according to a Poisson point process
+// with each byte being marked independently with probability
+// p = 1/profile_sampling_rate. This makes the probability
+// of sampling an allocation of X bytes equal to the CDF of
+// a geometric with mean profile_sampling_rate. (ie. the
+// probability that at least one byte in the range is marked). This
+// is accurately given by the CDF of the corresponding exponential
+// distribution : 1 - e^(-X/profile_sampling_rate)
+// Independence of the byte marking ensures independence of
+// the sampling of each allocation.
+//
+// This scheme is implemented by noting that, starting from any
+// fixed place, the number of bytes until the next marked byte
+// is geometrically distributed. This number is recorded as
+// bytes_until_sample_. Every allocation subtracts from this
+// number until it is less than 0. When this happens the current
+// allocation is sampled.
+//
+// When an allocation occurs, bytes_until_sample_ is reset to
+// a new independtly sampled geometric number of bytes. The
+// memoryless property of the point process means that this may
+// be taken as the number of bytes after the end of the current
+// allocation until the next marked byte. This ensures that
+// very large allocations which would intersect many marked bytes
+// only result in a single call to PickNextSamplingPoint.
+//-------------------------------------------------------------------
+
+class SamplerTest;
+
+class Sampler {
+ public:
+ // Record allocation of "k" bytes. If the allocation needs to be sampled,
+ // return its sampling weight (i.e., the expected number of allocations of
+ // this size represented by this sample); otherwise return 0.
+ size_t RecordAllocation(size_t k);
+
+ // Same as above (but faster), except:
+ // a) REQUIRES(k < std::numeric_limits<ssize_t>::max())
+ // b) if this returns false, you must call RecordAllocation
+ // to confirm if sampling truly needed.
+ //
+ // The point of this function is to only deal with common case of no
+ // sampling and let caller (which is in malloc fast-path) to
+ // "escalate" to fuller and slower logic only if necessary.
+ bool TryRecordAllocationFast(size_t k);
+
+ // If the guarded sampling point has been reached, selects a new sampling
+ // point and returns true. Otherwise returns false.
+ bool ShouldSampleGuardedAllocation();
+
+ // Returns the Sampler's cached Static::IsOnFastPath state. This may differ
+ // from a fresh computation due to activating per-CPU mode or the
+ // addition/removal of hooks.
+ bool IsOnFastPath() const;
+ void UpdateFastPathState();
+
+ // Generate a geometric with mean profile_sampling_rate.
+ //
+ // Remembers the value of sample_rate for use in reweighing the sample
+ // later (so that if the flag value changes before the next sample is taken,
+ // the next sample is still weighed properly).
+ ssize_t PickNextSamplingPoint();
+
+ // Generates a geometric with mean guarded_sample_rate.
+ ssize_t PickNextGuardedSamplingPoint();
+
+ // Returns the current sample period
+ static ssize_t GetSamplePeriod();
+
+ // The following are public for the purposes of testing
+ static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value
+
+ constexpr Sampler()
+ : bytes_until_sample_(0),
+ sample_period_(0),
+ true_bytes_until_sample_(0),
+ allocs_until_guarded_sample_(0),
+ rnd_(0),
+ initialized_(false),
+ was_on_fast_path_(false) {}
+
+ private:
+ // Bytes until we sample next.
+ //
+ // More specifically when bytes_until_sample_ is X, we can allocate
+ // X bytes without triggering sampling; on the (X+1)th allocated
+ // byte, the containing allocation will be sampled.
+ //
+ // Always non-negative with only very brief exceptions (see
+ // DecrementFast{,Finish}, so casting to size_t is ok.
+ ssize_t bytes_until_sample_;
+
+ // Saved copy of the sampling period from when we actually set
+ // (true_)bytes_until_sample_. This allows us to properly calculate the sample
+ // weight of the first sample after the sampling period is changed.
+ ssize_t sample_period_;
+
+ // true_bytes_until_sample_ tracks the sampling point when we are on the slow
+ // path when picking sampling points (!Static::IsOnFastPath()) up until we
+ // notice (due to another allocation) that this state has changed.
+ ssize_t true_bytes_until_sample_;
+
+ // Number of sampled allocations until we do a guarded allocation.
+ ssize_t allocs_until_guarded_sample_;
+
+ uint64_t rnd_; // Cheap random number generator
+ bool initialized_;
+ bool was_on_fast_path_;
+
+ private:
+ friend class SamplerTest;
+ // Initialize this sampler.
+ void Init(uint64_t seed);
+ size_t RecordAllocationSlow(size_t k);
+ ssize_t GetGeometricVariable(ssize_t mean);
+};
+
+inline size_t Sampler::RecordAllocation(size_t k) {
+ // The first time we enter this function we expect bytes_until_sample_
+ // to be zero, and we must call SampleAllocationSlow() to ensure
+ // proper initialization of static vars.
+ ASSERT(Static::IsInited() || bytes_until_sample_ == 0);
+
+ // Avoid missampling 0.
+ k++;
+
+ // Note that we have to deal with arbitrarily large values of k
+ // here. Thus we're upcasting bytes_until_sample_ to unsigned rather
+ // than the other way around. And this is why this code cannot be
+ // merged with DecrementFast code below.
+ if (static_cast<size_t>(bytes_until_sample_) <= k) {
+ size_t result = RecordAllocationSlow(k);
+ ASSERT(Static::IsInited());
+ return result;
+ } else {
+ bytes_until_sample_ -= k;
+ ASSERT(Static::IsInited());
+ return 0;
+ }
+}
+
+inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+Sampler::TryRecordAllocationFast(size_t k) {
+ // Avoid missampling 0. Callers pass in requested size (which based on the
+ // assertion below k>=0 at this point). Since subtracting 0 from
+ // bytes_until_sample_ is a no-op, we increment k by one and resolve the
+ // effect on the distribution in Sampler::Unsample.
+ k++;
+
+ // For efficiency reason, we're testing bytes_until_sample_ after
+ // decrementing it by k. This allows compiler to do sub <reg>, <mem>
+ // followed by conditional jump on sign. But it is correct only if k
+ // is actually smaller than largest ssize_t value. Otherwise
+ // converting k to signed value overflows.
+ //
+ // It would be great for generated code to be sub <reg>, <mem>
+ // followed by conditional jump on 'carry', which would work for
+ // arbitrary values of k, but there seem to be no way to express
+ // that in C++.
+ //
+ // Our API contract explicitly states that only small values of k
+ // are permitted. And thus it makes sense to assert on that.
+ ASSERT(static_cast<ssize_t>(k) > 0);
+
+ bytes_until_sample_ -= static_cast<ssize_t>(k);
+ if (ABSL_PREDICT_FALSE(bytes_until_sample_ <= 0)) {
+ // Note, we undo sampling counter update, since we're not actually
+ // handling slow path in the "needs sampling" case (calling
+ // RecordAllocationSlow to reset counter). And we do that in order
+ // to avoid non-tail calls in malloc fast-path. See also comments
+ // on declaration inside Sampler class.
+ //
+ // volatile is used here to improve compiler's choice of
+ // instuctions. We know that this path is very rare and that there
+ // is no need to keep previous value of bytes_until_sample_ in
+ // register. This helps compiler generate slightly more efficient
+ // sub <reg>, <mem> instruction for subtraction above.
+ volatile ssize_t *ptr =
+ const_cast<volatile ssize_t *>(&bytes_until_sample_);
+ *ptr += k;
+ return false;
+ }
+ return true;
+}
+
+inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+Sampler::ShouldSampleGuardedAllocation() {
+ if (Parameters::guarded_sampling_rate() < 0) return false;
+ allocs_until_guarded_sample_--;
+ if (ABSL_PREDICT_FALSE(allocs_until_guarded_sample_ < 0)) {
+ allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint();
+ return true;
+ }
+ return false;
+}
+
+// Inline functions which are public for testing purposes
+
+// Returns the next prng value.
+// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
+// This is the lrand64 generator.
+inline uint64_t Sampler::NextRandom(uint64_t rnd) {
+ const uint64_t prng_mult = UINT64_C(0x5DEECE66D);
+ const uint64_t prng_add = 0xB;
+ const uint64_t prng_mod_power = 48;
+ const uint64_t prng_mod_mask =
+ ~((~static_cast<uint64_t>(0)) << prng_mod_power);
+ return (prng_mult * rnd + prng_add) & prng_mod_mask;
+}
+
+inline bool Sampler::IsOnFastPath() const { return was_on_fast_path_; }
+
+inline void Sampler::UpdateFastPathState() {
+ const bool is_on_fast_path = Static::IsOnFastPath();
+ if (ABSL_PREDICT_TRUE(was_on_fast_path_ == is_on_fast_path)) {
+ return;
+ }
+
+ was_on_fast_path_ = is_on_fast_path;
+
+ if (is_on_fast_path) {
+ bytes_until_sample_ = true_bytes_until_sample_;
+ true_bytes_until_sample_ = 0;
+ } else {
+ true_bytes_until_sample_ = bytes_until_sample_;
+ bytes_until_sample_ = 0;
+ }
+}
+
+// If unsample is true, return the approximate number of bytes that would have
+// been allocated to obtain this sample. This is only accurate if the sample
+// period hasn't changed since the allocation(s) were made.
+//
+// If unsample is false, the caller will handle unsampling.
+double AllocatedBytes(const StackTrace &stack, bool unsample);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_SAMPLER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_class_info.h b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h
new file mode 100644
index 0000000000..a424432b75
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h
@@ -0,0 +1,79 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Specification of Size classes
+#ifndef TCMALLOC_size_class_info_H_
+#define TCMALLOC_size_class_info_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// The number of members in SizeClassInfo
+static constexpr int kSizeClassInfoMembers = 3;
+
+// Precomputed size class parameters.
+struct SizeClassInfo {
+ int Value(int index) const {
+ switch (index) {
+ case 0:
+ return size;
+ case 1:
+ return pages;
+ case 2:
+ return num_to_move;
+ }
+ CHECK_CONDITION(index < kSizeClassInfoMembers);
+ return 0;
+ }
+
+ void SetValue(int index, size_t v) {
+ switch (index) {
+ case 0:
+ size = v;
+ break;
+ case 1:
+ pages = v;
+ break;
+ case 2:
+ num_to_move = v;
+ break;
+ default:
+ CHECK_CONDITION(index < kSizeClassInfoMembers);
+ }
+ }
+
+ // Max size storable in that class
+ size_t size;
+
+ // Number of pages to allocate at a time
+ size_t pages;
+
+ // Number of objects to move between a per-thread list and a central list in
+ // one shot. We want this to be not too small so we can amortize the lock
+ // overhead for accessing the central list. Making it too big may temporarily
+ // cause unnecessary memory wastage in the per-thread free list until the
+ // scavenger cleans up the list.
+ size_t num_to_move;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_size_class_info_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc
new file mode 100644
index 0000000000..f4b444994d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc
@@ -0,0 +1,711 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 24, 1, 32}, // 0.68%
+ { 32, 1, 32}, // 0.59%
+ { 40, 1, 32}, // 0.98%
+ { 48, 1, 32}, // 0.98%
+ { 56, 1, 32}, // 0.78%
+ { 64, 1, 32}, // 0.59%
+ { 72, 1, 32}, // 1.28%
+ { 80, 1, 32}, // 0.98%
+ { 88, 1, 32}, // 0.68%
+ { 96, 1, 32}, // 0.98%
+ { 104, 1, 32}, // 1.58%
+ { 112, 1, 32}, // 0.78%
+ { 120, 1, 32}, // 0.98%
+ { 128, 1, 32}, // 0.59%
+ { 136, 1, 32}, // 0.98%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 296, 1, 32}, // 3.10%
+ { 312, 1, 32}, // 1.58%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 408, 1, 32}, // 0.98%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 1, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 98304, 12, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 147456, 18, 2}, // 0.03%
+ { 163840, 20, 2}, // 0.03%
+ { 180224, 22, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 24, 1, 32}, // 0.17%
+ { 32, 1, 32}, // 0.15%
+ { 40, 1, 32}, // 0.17%
+ { 48, 1, 32}, // 0.24%
+ { 56, 1, 32}, // 0.17%
+ { 64, 1, 32}, // 0.15%
+ { 72, 1, 32}, // 0.17%
+ { 80, 1, 32}, // 0.29%
+ { 88, 1, 32}, // 0.24%
+ { 96, 1, 32}, // 0.24%
+ { 104, 1, 32}, // 0.17%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 280, 1, 32}, // 0.17%
+ { 304, 1, 32}, // 0.89%
+ { 328, 1, 32}, // 1.06%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 488, 1, 32}, // 0.37%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 24, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 40, 1, 32}, // 0.03%
+ { 48, 1, 32}, // 0.02%
+ { 56, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 72, 1, 32}, // 0.04%
+ { 80, 1, 32}, // 0.04%
+ { 88, 1, 32}, // 0.05%
+ { 96, 1, 32}, // 0.04%
+ { 104, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 360, 1, 32}, // 0.04%
+ { 408, 1, 32}, // 0.10%
+ { 456, 1, 32}, // 0.17%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10880, 1, 6}, // 0.41%
+ { 11904, 1, 5}, // 0.12%
+ { 13056, 1, 5}, // 0.41%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 18688, 1, 3}, // 0.21%
+ { 21760, 1, 3}, // 0.41%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 24, 1, 32}, // 1.57%
+ { 32, 1, 32}, // 1.17%
+ { 40, 1, 32}, // 1.57%
+ { 48, 1, 32}, // 1.57%
+ { 56, 1, 32}, // 1.37%
+ { 64, 1, 32}, // 1.17%
+ { 72, 1, 32}, // 2.78%
+ { 80, 1, 32}, // 1.57%
+ { 88, 1, 32}, // 2.37%
+ { 96, 1, 32}, // 2.78%
+ { 104, 1, 32}, // 2.17%
+ { 120, 1, 32}, // 1.57%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 184, 1, 32}, // 2.37%
+ { 208, 1, 32}, // 4.86%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 312, 1, 32}, // 2.17%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 408, 1, 32}, // 1.57%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3456, 6, 18}, // 1.79%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.59%
+ { 16, 1, 32}, // 0.59%
+ { 32, 1, 32}, // 0.59%
+ { 48, 1, 32}, // 0.98%
+ { 64, 1, 32}, // 0.59%
+ { 80, 1, 32}, // 0.98%
+ { 96, 1, 32}, // 0.98%
+ { 112, 1, 32}, // 0.78%
+ { 128, 1, 32}, // 0.59%
+ { 144, 1, 32}, // 2.18%
+ { 160, 1, 32}, // 0.98%
+ { 176, 1, 32}, // 1.78%
+ { 192, 1, 32}, // 2.18%
+ { 208, 1, 32}, // 1.58%
+ { 224, 1, 32}, // 2.18%
+ { 240, 1, 32}, // 0.98%
+ { 256, 1, 32}, // 0.59%
+ { 272, 1, 32}, // 0.98%
+ { 288, 1, 32}, // 2.18%
+ { 304, 1, 32}, // 4.25%
+ { 320, 1, 32}, // 3.00%
+ { 336, 1, 32}, // 2.18%
+ { 352, 1, 32}, // 1.78%
+ { 368, 1, 32}, // 1.78%
+ { 384, 1, 32}, // 2.18%
+ { 400, 1, 32}, // 3.00%
+ { 416, 1, 32}, // 4.25%
+ { 448, 1, 32}, // 2.18%
+ { 480, 1, 32}, // 0.98%
+ { 512, 1, 32}, // 0.59%
+ { 576, 1, 32}, // 2.18%
+ { 640, 1, 32}, // 7.29%
+ { 704, 1, 32}, // 6.40%
+ { 768, 1, 32}, // 7.29%
+ { 896, 1, 32}, // 2.18%
+ { 1024, 1, 32}, // 0.59%
+ { 1152, 2, 32}, // 1.88%
+ { 1280, 2, 32}, // 6.98%
+ { 1408, 2, 32}, // 6.10%
+ { 1536, 2, 32}, // 6.98%
+ { 1792, 2, 32}, // 1.88%
+ { 2048, 2, 32}, // 0.29%
+ { 2304, 2, 28}, // 1.88%
+ { 2688, 2, 24}, // 1.88%
+ { 2816, 3, 23}, // 9.30%
+ { 3200, 2, 20}, // 2.70%
+ { 3456, 3, 18}, // 1.79%
+ { 3584, 4, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.29%
+ { 4736, 3, 13}, // 3.99%
+ { 5376, 2, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.20%
+ { 6528, 4, 10}, // 0.54%
+ { 6784, 5, 9}, // 0.75%
+ { 7168, 7, 9}, // 0.08%
+ { 8192, 1, 8}, // 0.29%
+ { 9472, 5, 6}, // 8.23%
+ { 10240, 4, 6}, // 6.82%
+ { 12288, 3, 5}, // 0.20%
+ { 13568, 5, 4}, // 0.75%
+ { 14336, 7, 4}, // 0.08%
+ { 16384, 2, 4}, // 0.29%
+ { 20480, 5, 3}, // 0.12%
+ { 24576, 3, 2}, // 0.20%
+ { 28672, 7, 2}, // 0.08%
+ { 32768, 4, 2}, // 0.15%
+ { 40960, 5, 2}, // 0.12%
+ { 49152, 6, 2}, // 0.10%
+ { 57344, 7, 2}, // 0.08%
+ { 65536, 8, 2}, // 0.07%
+ { 73728, 9, 2}, // 0.07%
+ { 81920, 10, 2}, // 0.06%
+ { 90112, 11, 2}, // 0.05%
+ { 98304, 12, 2}, // 0.05%
+ { 106496, 13, 2}, // 0.05%
+ { 114688, 14, 2}, // 0.04%
+ { 131072, 16, 2}, // 0.04%
+ { 139264, 17, 2}, // 0.03%
+ { 155648, 19, 2}, // 0.03%
+ { 172032, 21, 2}, // 0.03%
+ { 188416, 23, 2}, // 0.03%
+ { 204800, 25, 2}, // 0.02%
+ { 221184, 27, 2}, // 0.02%
+ { 237568, 29, 2}, // 0.02%
+ { 262144, 32, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.15%
+ { 16, 1, 32}, // 0.15%
+ { 32, 1, 32}, // 0.15%
+ { 48, 1, 32}, // 0.24%
+ { 64, 1, 32}, // 0.15%
+ { 80, 1, 32}, // 0.29%
+ { 96, 1, 32}, // 0.24%
+ { 112, 1, 32}, // 0.34%
+ { 128, 1, 32}, // 0.15%
+ { 144, 1, 32}, // 0.39%
+ { 160, 1, 32}, // 0.54%
+ { 176, 1, 32}, // 0.24%
+ { 192, 1, 32}, // 0.54%
+ { 208, 1, 32}, // 0.49%
+ { 224, 1, 32}, // 0.34%
+ { 240, 1, 32}, // 0.54%
+ { 256, 1, 32}, // 0.15%
+ { 272, 1, 32}, // 0.54%
+ { 288, 1, 32}, // 0.84%
+ { 304, 1, 32}, // 0.89%
+ { 320, 1, 32}, // 0.54%
+ { 336, 1, 32}, // 0.69%
+ { 352, 1, 32}, // 0.24%
+ { 384, 1, 32}, // 0.54%
+ { 416, 1, 32}, // 1.13%
+ { 448, 1, 32}, // 0.34%
+ { 480, 1, 32}, // 0.54%
+ { 512, 1, 32}, // 0.15%
+ { 576, 1, 32}, // 1.74%
+ { 640, 1, 32}, // 0.54%
+ { 704, 1, 32}, // 1.33%
+ { 768, 1, 32}, // 1.74%
+ { 832, 1, 32}, // 1.13%
+ { 896, 1, 32}, // 1.74%
+ { 1024, 1, 32}, // 0.15%
+ { 1152, 1, 32}, // 1.74%
+ { 1280, 1, 32}, // 2.55%
+ { 1408, 1, 32}, // 1.33%
+ { 1536, 1, 32}, // 1.74%
+ { 1792, 1, 32}, // 1.74%
+ { 2048, 1, 32}, // 0.15%
+ { 2176, 1, 30}, // 0.54%
+ { 2304, 1, 28}, // 1.74%
+ { 2432, 1, 26}, // 3.80%
+ { 2688, 1, 24}, // 1.74%
+ { 2944, 1, 22}, // 1.33%
+ { 3200, 1, 20}, // 2.55%
+ { 3584, 1, 18}, // 1.74%
+ { 4096, 1, 16}, // 0.15%
+ { 4608, 1, 14}, // 1.74%
+ { 5376, 1, 12}, // 1.74%
+ { 6528, 1, 10}, // 0.54%
+ { 7168, 2, 9}, // 1.66%
+ { 8192, 1, 8}, // 0.15%
+ { 9344, 2, 7}, // 0.27%
+ { 10880, 1, 6}, // 0.54%
+ { 13056, 2, 5}, // 0.47%
+ { 13952, 3, 4}, // 0.70%
+ { 16384, 1, 4}, // 0.15%
+ { 19072, 3, 3}, // 3.14%
+ { 21760, 2, 3}, // 0.47%
+ { 24576, 3, 2}, // 0.05%
+ { 28032, 6, 2}, // 0.22%
+ { 32768, 1, 2}, // 0.15%
+ { 38144, 5, 2}, // 7.41%
+ { 40960, 4, 2}, // 6.71%
+ { 49152, 3, 2}, // 0.05%
+ { 57344, 7, 2}, // 0.02%
+ { 65536, 2, 2}, // 0.07%
+ { 81920, 5, 2}, // 0.03%
+ { 98304, 3, 2}, // 0.05%
+ { 114688, 7, 2}, // 0.02%
+ { 131072, 4, 2}, // 0.04%
+ { 163840, 5, 2}, // 0.03%
+ { 196608, 6, 2}, // 0.02%
+ { 229376, 7, 2}, // 0.02%
+ { 262144, 8, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 0.02%
+ { 16, 1, 32}, // 0.02%
+ { 32, 1, 32}, // 0.02%
+ { 48, 1, 32}, // 0.02%
+ { 64, 1, 32}, // 0.02%
+ { 80, 1, 32}, // 0.04%
+ { 96, 1, 32}, // 0.04%
+ { 112, 1, 32}, // 0.04%
+ { 128, 1, 32}, // 0.02%
+ { 144, 1, 32}, // 0.04%
+ { 160, 1, 32}, // 0.04%
+ { 176, 1, 32}, // 0.05%
+ { 192, 1, 32}, // 0.04%
+ { 208, 1, 32}, // 0.04%
+ { 240, 1, 32}, // 0.04%
+ { 256, 1, 32}, // 0.02%
+ { 304, 1, 32}, // 0.05%
+ { 336, 1, 32}, // 0.04%
+ { 368, 1, 32}, // 0.07%
+ { 416, 1, 32}, // 0.04%
+ { 464, 1, 32}, // 0.19%
+ { 512, 1, 32}, // 0.02%
+ { 576, 1, 32}, // 0.04%
+ { 640, 1, 32}, // 0.17%
+ { 704, 1, 32}, // 0.12%
+ { 768, 1, 32}, // 0.12%
+ { 832, 1, 32}, // 0.04%
+ { 896, 1, 32}, // 0.21%
+ { 1024, 1, 32}, // 0.02%
+ { 1152, 1, 32}, // 0.26%
+ { 1280, 1, 32}, // 0.41%
+ { 1408, 1, 32}, // 0.12%
+ { 1536, 1, 32}, // 0.41%
+ { 1664, 1, 32}, // 0.36%
+ { 1792, 1, 32}, // 0.21%
+ { 1920, 1, 32}, // 0.41%
+ { 2048, 1, 32}, // 0.02%
+ { 2176, 1, 30}, // 0.41%
+ { 2304, 1, 28}, // 0.71%
+ { 2432, 1, 26}, // 0.76%
+ { 2560, 1, 25}, // 0.41%
+ { 2688, 1, 24}, // 0.56%
+ { 2816, 1, 23}, // 0.12%
+ { 2944, 1, 22}, // 0.07%
+ { 3072, 1, 21}, // 0.41%
+ { 3200, 1, 20}, // 1.15%
+ { 3328, 1, 19}, // 1.00%
+ { 3584, 1, 18}, // 0.21%
+ { 3840, 1, 17}, // 0.41%
+ { 4096, 1, 16}, // 0.02%
+ { 4736, 1, 13}, // 0.66%
+ { 5504, 1, 11}, // 1.35%
+ { 6144, 1, 10}, // 1.61%
+ { 6528, 1, 10}, // 0.41%
+ { 6784, 1, 9}, // 1.71%
+ { 7168, 1, 9}, // 1.61%
+ { 7680, 1, 8}, // 0.41%
+ { 8192, 1, 8}, // 0.02%
+ { 8704, 1, 7}, // 0.41%
+ { 9344, 1, 7}, // 0.21%
+ { 10368, 1, 6}, // 1.15%
+ { 11392, 1, 5}, // 0.07%
+ { 12416, 1, 5}, // 0.56%
+ { 13696, 1, 4}, // 0.76%
+ { 14464, 1, 4}, // 0.71%
+ { 16384, 1, 4}, // 0.02%
+ { 17408, 1, 3}, // 0.41%
+ { 20096, 1, 3}, // 0.36%
+ { 21760, 1, 3}, // 0.41%
+ { 23808, 1, 2}, // 0.12%
+ { 26112, 1, 2}, // 0.41%
+ { 29056, 1, 2}, // 0.26%
+ { 32768, 1, 2}, // 0.02%
+ { 37376, 1, 2}, // 0.21%
+ { 43648, 1, 2}, // 0.12%
+ { 52352, 1, 2}, // 0.17%
+ { 56064, 2, 2}, // 3.92%
+ { 65536, 1, 2}, // 0.02%
+ { 74880, 2, 2}, // 0.03%
+ { 87296, 1, 2}, // 0.12%
+ { 104832, 2, 2}, // 0.03%
+ { 112256, 3, 2}, // 0.09%
+ { 131072, 1, 2}, // 0.02%
+ { 149760, 3, 2}, // 5.03%
+ { 174720, 2, 2}, // 0.03%
+ { 196608, 3, 2}, // 0.01%
+ { 209664, 4, 2}, // 0.03%
+ { 262144, 1, 2}, // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+ // <bytes>, <pages>, <batch size> <fixed>
+ { 0, 0, 0}, // +Inf%
+ { 8, 1, 32}, // 1.17%
+ { 16, 1, 32}, // 1.17%
+ { 32, 1, 32}, // 1.17%
+ { 48, 1, 32}, // 1.57%
+ { 64, 1, 32}, // 1.17%
+ { 80, 1, 32}, // 1.57%
+ { 96, 1, 32}, // 2.78%
+ { 112, 1, 32}, // 2.78%
+ { 128, 1, 32}, // 1.17%
+ { 144, 1, 32}, // 2.78%
+ { 160, 1, 32}, // 3.60%
+ { 176, 1, 32}, // 2.37%
+ { 192, 1, 32}, // 2.78%
+ { 208, 1, 32}, // 4.86%
+ { 224, 1, 32}, // 2.78%
+ { 240, 1, 32}, // 1.57%
+ { 256, 1, 32}, // 1.17%
+ { 272, 1, 32}, // 1.57%
+ { 288, 1, 32}, // 2.78%
+ { 304, 1, 32}, // 4.86%
+ { 336, 1, 32}, // 2.78%
+ { 368, 1, 32}, // 2.37%
+ { 400, 1, 32}, // 3.60%
+ { 448, 1, 32}, // 2.78%
+ { 512, 1, 32}, // 1.17%
+ { 576, 2, 32}, // 2.18%
+ { 640, 2, 32}, // 7.29%
+ { 704, 2, 32}, // 6.40%
+ { 768, 2, 32}, // 7.29%
+ { 896, 2, 32}, // 2.18%
+ { 1024, 2, 32}, // 0.59%
+ { 1152, 3, 32}, // 7.08%
+ { 1280, 3, 32}, // 7.08%
+ { 1536, 3, 32}, // 0.39%
+ { 1792, 4, 32}, // 1.88%
+ { 2048, 4, 32}, // 0.29%
+ { 2304, 4, 28}, // 1.88%
+ { 2688, 4, 24}, // 1.88%
+ { 3200, 4, 20}, // 2.70%
+ { 3584, 7, 18}, // 0.17%
+ { 4096, 4, 16}, // 0.29%
+ { 5376, 4, 12}, // 1.88%
+ { 6144, 3, 10}, // 0.39%
+ { 7168, 7, 9}, // 0.17%
+ { 8192, 4, 8}, // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc
new file mode 100644
index 0000000000..d66ce5b186
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc
@@ -0,0 +1,469 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/size_class_info.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/tcmalloc_policy.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Moved out of anonymous namespace so that it can be found by friend class in
+// span.h. This allows tests to access span internals so that we can
+// validate that scaling by a reciprocal correctly converts a pointer into
+// an offset within a span.
+class SpanTestPeer {
+ public:
+ static uint16_t CalcReciprocal(size_t size) {
+ return Span::CalcReciprocal(size);
+ }
+ static Span::ObjIdx TestOffsetToIdx(uintptr_t offset, size_t size,
+ uint16_t reciprocal) {
+ return Span::TestOffsetToIdx(offset, size, reciprocal);
+ }
+};
+
+namespace {
+
+size_t Alignment(size_t size) {
+ size_t ret = kAlignment;
+ if (size >= 1024) {
+ // SizeMap::ClassIndexMaybe requires 128-byte alignment for sizes >=1024.
+ ret = 128;
+ } else if (size >= 512) {
+ // Per //tcmalloc/span.h, we have 64 byte alignment for sizes
+ // >=512.
+ ret = 64;
+ } else if (size >= 8) {
+ ret = 8;
+ }
+
+ return ret;
+}
+
+class SizeClassesTest : public ::testing::Test {
+ protected:
+ SizeClassesTest() { m_.Init(); }
+
+ SizeMap m_;
+};
+
+TEST_F(SizeClassesTest, SmallClassesSinglePage) {
+ // Per //tcmalloc/span.h, the compressed index implementation
+ // added by cl/126729493 requires small size classes to be placed on a single
+ // page span so they can be addressed.
+ for (int c = 1; c < kNumClasses; c++) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ if (max_size_in_class >= SizeMap::kMultiPageSize) {
+ continue;
+ }
+ if (max_size_in_class == 0) {
+ continue;
+ }
+ EXPECT_EQ(m_.class_to_pages(c), 1) << max_size_in_class;
+ }
+}
+
+TEST_F(SizeClassesTest, SpanPages) {
+ for (int c = 1; c < kNumClasses; c++) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ if (max_size_in_class == 0) {
+ continue;
+ }
+ // A span of class_to_pages(c) must be able to hold at least one object.
+ EXPECT_GE(Length(m_.class_to_pages(c)).in_bytes(), max_size_in_class);
+ }
+}
+
+TEST_F(SizeClassesTest, ValidateSufficientBitmapCapacity) {
+ // Validate that all the objects in a span can fit into a bitmap.
+ // The cut-off for using a bitmap is kBitmapMinObjectSize, so it is
+ // theoretically possible that a span could exceed this threshold
+ // for object size and contain more than 64 objects.
+ for (int c = 1; c < kNumClasses; ++c) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ if (max_size_in_class >= kBitmapMinObjectSize) {
+ const size_t objects_per_span =
+ Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c);
+ // Span can hold at most 64 objects of this size.
+ EXPECT_LE(objects_per_span, 64);
+ }
+ }
+}
+
+TEST_F(SizeClassesTest, ValidateCorrectScalingByReciprocal) {
+ // Validate that multiplying by the reciprocal works for all size classes.
+ // When converting an offset within a span into an index we avoid a
+ // division operation by scaling by the reciprocal. The test ensures
+ // that this approach works for all objects in a span, for all object
+ // sizes.
+ for (int c = 1; c < kNumClasses; ++c) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ // Only test for sizes where object availability is recorded in a bitmap.
+ if (max_size_in_class < kBitmapMinObjectSize) {
+ continue;
+ }
+ size_t reciprocal = SpanTestPeer::CalcReciprocal(max_size_in_class);
+ const size_t objects_per_span =
+ Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c);
+ for (int index = 0; index < objects_per_span; index++) {
+ // Calculate the address of the object.
+ uintptr_t address = index * max_size_in_class;
+ // Calculate the index into the page using the reciprocal method.
+ int idx =
+ SpanTestPeer::TestOffsetToIdx(address, max_size_in_class, reciprocal);
+ // Check that the starting address back is correct.
+ ASSERT_EQ(address, idx * max_size_in_class);
+ }
+ }
+}
+
+TEST_F(SizeClassesTest, Aligned) {
+ // Validate that each size class is properly aligned.
+ for (int c = 1; c < kNumClasses; c++) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ size_t alignment = Alignment(max_size_in_class);
+
+ EXPECT_EQ(0, max_size_in_class % alignment) << max_size_in_class;
+ }
+}
+
+TEST_F(SizeClassesTest, Distinguishable) {
+ // Validate that the size to class lookup table is able to distinguish each
+ // size class from one another.
+ //
+ // ClassIndexMaybe provides 8 byte granularity below 1024 bytes and 128 byte
+ // granularity for larger sizes, so our chosen size classes cannot be any
+ // finer (otherwise they would map to the same entry in the lookup table).
+ //
+ // We don't check expanded size classes which are intentionally duplicated.
+ for (int partition = 0; partition < kNumaPartitions; partition++) {
+ for (int c = (partition * kNumBaseClasses) + 1;
+ c < (partition + 1) * kNumBaseClasses; c++) {
+ const size_t max_size_in_class = m_.class_to_size(c);
+ if (max_size_in_class == 0) {
+ continue;
+ }
+ const int class_index = m_.SizeClass(
+ CppPolicy().InNumaPartition(partition), max_size_in_class);
+
+ EXPECT_EQ(c, class_index) << max_size_in_class;
+ }
+ }
+}
+
+// This test is disabled until we use a different span size allocation
+// algorithm (such as the one in effect from cl/130150125 until cl/139955211).
+TEST_F(SizeClassesTest, DISABLED_WastedSpan) {
+ // Validate that each size class does not waste (number of objects) *
+ // (alignment) at the end of the span.
+ for (int c = 1; c < kNumClasses; c++) {
+ const size_t span_size = kPageSize * m_.class_to_pages(c);
+ const size_t max_size_in_class = m_.class_to_size(c);
+ const size_t alignment = Alignment(max_size_in_class);
+ const size_t n_objects = span_size / max_size_in_class;
+ const size_t waste = span_size - n_objects * max_size_in_class;
+
+ EXPECT_LT(waste, n_objects * alignment) << max_size_in_class;
+ }
+}
+
+TEST_F(SizeClassesTest, DoubleCheckedConsistency) {
+ // Validate that every size on [0, kMaxSize] maps to a size class that is
+ // neither too big nor too small.
+ for (size_t size = 0; size <= kMaxSize; size++) {
+ const int sc = m_.SizeClass(CppPolicy(), size);
+ EXPECT_GT(sc, 0) << size;
+ EXPECT_LT(sc, kNumClasses) << size;
+
+ if ((sc % kNumBaseClasses) > 1) {
+ EXPECT_GT(size, m_.class_to_size(sc - 1))
+ << "Allocating unnecessarily large class";
+ }
+
+ const size_t s = m_.class_to_size(sc);
+ EXPECT_LE(size, s);
+ EXPECT_NE(s, 0) << size;
+ }
+}
+
+TEST_F(SizeClassesTest, NumToMove) {
+ for (int c = 1; c < kNumClasses; c++) {
+ // For non-empty size classes, we should move at least 1 object to/from each
+ // layer of the caches.
+ const size_t max_size_in_class = m_.class_to_size(c);
+ if (max_size_in_class == 0) {
+ continue;
+ }
+ EXPECT_GT(m_.num_objects_to_move(c), 0) << max_size_in_class;
+ }
+}
+
+class TestingSizeMap : public SizeMap {
+ public:
+ TestingSizeMap() {}
+
+ bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+ return SizeMap::ValidSizeClasses(num_classes, parsed);
+ }
+
+ const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; }
+ const int DefaultSizeClassesCount() const { return kSizeClassesCount; }
+};
+
+class RunTimeSizeClassesTest : public ::testing::Test {
+ protected:
+ RunTimeSizeClassesTest() {}
+
+ TestingSizeMap m_;
+};
+
+TEST_F(RunTimeSizeClassesTest, ExpandedSizeClasses) {
+ // Verify that none of the default size classes are considered expanded size
+ // classes.
+ for (int i = 0; i < kNumClasses; i++) {
+ EXPECT_EQ(i < (m_.DefaultSizeClassesCount() * kNumaPartitions),
+ !IsExpandedSizeClass(i))
+ << i;
+ }
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizeIncreases) {
+ SizeClassInfo parsed[] = {
+ {0, 0, 0},
+ {16, 1, 14},
+ {32, 1, 15},
+ {kMaxSize, 1, 15},
+ };
+ EXPECT_TRUE(m_.ValidSizeClasses(4, parsed));
+
+ parsed[2].size = 8; // Change 32 to 8
+ EXPECT_FALSE(m_.ValidSizeClasses(4, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizeMax) {
+ SizeClassInfo parsed[] = {
+ {0, 0, 0},
+ {kMaxSize - 128, 1, 15},
+ };
+ // Last class must cover kMaxSize
+ EXPECT_FALSE(m_.ValidSizeClasses(2, parsed));
+
+ // Check Max Size is allowed 256 KiB = 262144
+ parsed[1].size = kMaxSize;
+ EXPECT_TRUE(m_.ValidSizeClasses(2, parsed));
+ // But kMaxSize + 128 is not allowed
+ parsed[1].size = kMaxSize + 128;
+ EXPECT_FALSE(m_.ValidSizeClasses(2, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizesAlignment) {
+ SizeClassInfo parsed[] = {
+ {0, 0, 0},
+ {8, 1, 14},
+ {kMaxSize, 1, 15},
+ };
+ EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+ // Doesn't meet alignment requirements
+ parsed[1].size = 7;
+ EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+
+ // Over 512, expect alignment of 64 bytes.
+ // 512 + 64 = 576
+ parsed[1].size = 576;
+ EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+ // 512 + 8
+ parsed[1].size = 520;
+ EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+
+ // Over 1024, expect alignment of 128 bytes.
+ // 1024 + 128 = 1152
+ parsed[1].size = 1024 + 128;
+ EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+ // 1024 + 64 = 1088
+ parsed[1].size = 1024 + 64;
+ EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateBatchSize) {
+ SizeClassInfo parsed[] = {
+ {0, 0, 0},
+ {8, 1, kMaxObjectsToMove},
+ {kMaxSize, 1, 15},
+ };
+ EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+
+ ++parsed[1].num_to_move;
+ EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidatePageSize) {
+ SizeClassInfo parsed[] = {
+ {0, 0, 0},
+ {1024, 255, kMaxObjectsToMove},
+ {kMaxSize, 1, 15},
+ };
+ EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+
+ parsed[1].pages = 256;
+ EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateDefaultSizeClasses) {
+ // The default size classes also need to be valid.
+ EXPECT_TRUE(m_.ValidSizeClasses(m_.DefaultSizeClassesCount(),
+ m_.DefaultSizeClasses()));
+}
+
+TEST_F(RunTimeSizeClassesTest, EnvVariableNotExamined) {
+ // Set a valid runtime size class environment variable
+ setenv("TCMALLOC_SIZE_CLASSES", "256,1,1", 1);
+ m_.Init();
+ // Without runtime_size_classes library linked, the environment variable
+ // should have no affect.
+ EXPECT_NE(m_.class_to_size(1), 256);
+}
+
+TEST(SizeMapTest, GetSizeClass) {
+ absl::BitGen rng;
+ constexpr int kTrials = 1000;
+
+ SizeMap m;
+ // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+ // non-zero NUMA partition.
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+ uint32_t cl;
+ if (m.GetSizeClass(CppPolicy(), size, &cl)) {
+ EXPECT_EQ(cl % kNumBaseClasses, 0) << size;
+ EXPECT_LT(cl, kExpandedClassesStart) << size;
+ } else {
+ // We should only fail to lookup the size class when size is outside of
+ // the size classes.
+ ASSERT_GT(size, kMaxSize);
+ }
+ }
+
+ // After m.Init(), GetSizeClass should return a size class.
+ m.Init();
+
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+ uint32_t cl;
+ if (m.GetSizeClass(CppPolicy(), size, &cl)) {
+ const size_t mapped_size = m.class_to_size(cl);
+ // The size class needs to hold size.
+ ASSERT_GE(mapped_size, size);
+ } else {
+ // We should only fail to lookup the size class when size is outside of
+ // the size classes.
+ ASSERT_GT(size, kMaxSize);
+ }
+ }
+}
+
+TEST(SizeMapTest, GetSizeClassWithAlignment) {
+ absl::BitGen rng;
+ constexpr int kTrials = 1000;
+
+ SizeMap m;
+ // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+ // non-zero NUMA partition.
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+ const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift);
+ uint32_t cl;
+ if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) {
+ EXPECT_EQ(cl % kNumBaseClasses, 0) << size << " " << alignment;
+ EXPECT_LT(cl, kExpandedClassesStart) << size << " " << alignment;
+ } else if (alignment < kPageSize) {
+ // When alignment > kPageSize, we do not produce a size class.
+ // TODO(b/172060547): alignment == kPageSize could fit into the size
+ // classes too.
+ //
+ // We should only fail to lookup the size class when size is large.
+ ASSERT_GT(size, kMaxSize) << alignment;
+ }
+ }
+
+ // After m.Init(), GetSizeClass should return a size class.
+ m.Init();
+
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+ const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift);
+ uint32_t cl;
+ if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) {
+ const size_t mapped_size = m.class_to_size(cl);
+ // The size class needs to hold size.
+ ASSERT_GE(mapped_size, size);
+ // The size needs to be a multiple of alignment.
+ ASSERT_EQ(mapped_size % alignment, 0);
+ } else if (alignment < kPageSize) {
+ // When alignment > kPageSize, we do not produce a size class.
+ // TODO(b/172060547): alignment == kPageSize could fit into the size
+ // classes too.
+ //
+ // We should only fail to lookup the size class when size is large.
+ ASSERT_GT(size, kMaxSize) << alignment;
+ }
+ }
+}
+
+TEST(SizeMapTest, SizeClass) {
+ absl::BitGen rng;
+ constexpr int kTrials = 1000;
+
+ SizeMap m;
+ // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+ // non-zero NUMA partition.
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize);
+ const uint32_t cl = m.SizeClass(CppPolicy(), size);
+ EXPECT_EQ(cl % kNumBaseClasses, 0) << size;
+ EXPECT_LT(cl, kExpandedClassesStart) << size;
+ }
+
+ // After m.Init(), SizeClass should return a size class.
+ m.Init();
+
+ for (int i = 0; i < kTrials; ++i) {
+ const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize);
+ uint32_t cl = m.SizeClass(CppPolicy(), size);
+
+ const size_t mapped_size = m.class_to_size(cl);
+ // The size class needs to hold size.
+ ASSERT_GE(mapped_size, size);
+ }
+}
+
+TEST(SizeMapTest, Preinit) {
+ ABSL_CONST_INIT static SizeMap m;
+
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ EXPECT_EQ(m.class_to_size(cl), 0) << cl;
+ EXPECT_EQ(m.class_to_pages(cl), 0) << cl;
+ EXPECT_EQ(m.num_objects_to_move(cl), 0) << cl;
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc
new file mode 100644
index 0000000000..17badddac9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc
@@ -0,0 +1,127 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/size_class_info.h"
+#include "tcmalloc/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class TestingSizeMap : public SizeMap {
+ public:
+ TestingSizeMap() {}
+
+ const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; }
+ int DefaultSizeClassesCount() const { return kSizeClassesCount; }
+};
+
+class RunTimeSizeClassesTest : public ::testing::Test {
+ protected:
+ RunTimeSizeClassesTest() {}
+
+ TestingSizeMap m_;
+};
+
+// Convert size classes into a string that can be passed to ParseSizeClasses().
+std::string SizeClassesToString(int num_classes, const SizeClassInfo* parsed) {
+ std::string result;
+ for (int c = 1; c < num_classes; c++) {
+ std::string one_size = absl::StrFormat(
+ "%d,%d,%d", parsed[c].size, parsed[c].pages, parsed[c].num_to_move);
+ if (c == 1) {
+ result = one_size;
+ } else {
+ absl::StrAppend(&result, ";", one_size);
+ }
+ }
+ return result;
+}
+
+std::string ModifiedSizeClassesString(int num_classes,
+ const SizeClassInfo* source) {
+ // Set a valid runtime size class environment variable, which
+ // is a modified version of the default class sizes.
+ SizeClassInfo parsed[kNumClasses];
+ for (int c = 0; c < num_classes; c++) {
+ parsed[c] = source[c];
+ }
+ // Change num_to_move to a different valid value so that
+ // loading from the ENV can be detected.
+ EXPECT_NE(parsed[1].num_to_move, 3);
+ parsed[1].num_to_move = 3;
+ return SizeClassesToString(num_classes, parsed);
+}
+
+TEST_F(RunTimeSizeClassesTest, EnvVariableExamined) {
+ std::string e = ModifiedSizeClassesString(m_.DefaultSizeClassesCount(),
+ m_.DefaultSizeClasses());
+ setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+ m_.Init();
+
+ // Confirm that the expected change is seen.
+ EXPECT_EQ(m_.num_objects_to_move(1), 3);
+}
+
+// TODO(b/122839049) - Remove this test after bug is fixed.
+TEST_F(RunTimeSizeClassesTest, ReducingSizeClassCountNotAllowed) {
+ // Try reducing the mumber of size classes by 1, which is expected to fail.
+ std::string e = ModifiedSizeClassesString(m_.DefaultSizeClassesCount() - 1,
+ m_.DefaultSizeClasses());
+ setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+ m_.Init();
+
+ // Confirm that the expected change is not seen.
+ EXPECT_EQ(m_.num_objects_to_move(1), m_.DefaultSizeClasses()[1].num_to_move);
+}
+
+// Convert the static classes to a string, parse that string via
+// the environement variable and check that we get exactly the same
+// results. Note, if the environement variable was not read, this test
+// would still pass.
+TEST_F(RunTimeSizeClassesTest, EnvRealClasses) {
+ const int count = m_.DefaultSizeClassesCount();
+ std::string e = SizeClassesToString(count, m_.DefaultSizeClasses());
+ setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+ m_.Init();
+ // With the runtime_size_classes library linked, the environment variable
+ // will be parsed.
+
+ for (int c = 0; c < kNumClasses;) {
+ for (int end = c + count; c < end; c++) {
+ const SizeClassInfo& default_info =
+ m_.DefaultSizeClasses()[c % kNumBaseClasses];
+ EXPECT_EQ(m_.class_to_size(c), default_info.size) << c;
+ EXPECT_EQ(m_.class_to_pages(c), default_info.pages);
+ EXPECT_EQ(m_.num_objects_to_move(c), default_info.num_to_move);
+ }
+ for (; (c % kNumBaseClasses) != 0; c++) {
+ EXPECT_EQ(m_.class_to_size(c), 0);
+ EXPECT_EQ(m_.class_to_pages(c), 0);
+ EXPECT_EQ(m_.num_objects_to_move(c), 0);
+ }
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/span.cc b/contrib/libs/tcmalloc/tcmalloc/span.cc
new file mode 100644
index 0000000000..87e6f29244
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span.cc
@@ -0,0 +1,332 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/span.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "absl/base/optimization.h" // ABSL_INTERNAL_ASSUME
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void Span::Sample(StackTrace* stack) {
+ ASSERT(!sampled_ && stack);
+ sampled_ = 1;
+ sampled_stack_ = stack;
+ Static::sampled_objects_.prepend(this);
+
+ // The cast to value matches Unsample.
+ tcmalloc_internal::StatsCounter::Value allocated_bytes =
+ static_cast<tcmalloc_internal::StatsCounter::Value>(
+ AllocatedBytes(*stack, true));
+ // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock.
+ Static::sampled_objects_size_.LossyAdd(allocated_bytes);
+}
+
+StackTrace* Span::Unsample() {
+ if (!sampled_) {
+ return nullptr;
+ }
+ sampled_ = 0;
+ StackTrace* stack = sampled_stack_;
+ sampled_stack_ = nullptr;
+ RemoveFromList(); // from Static::sampled_objects_
+ // The cast to Value ensures no funny business happens during the negation if
+ // sizeof(size_t) != sizeof(Value).
+ tcmalloc_internal::StatsCounter::Value neg_allocated_bytes =
+ -static_cast<tcmalloc_internal::StatsCounter::Value>(
+ AllocatedBytes(*stack, true));
+ // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock.
+ Static::sampled_objects_size_.LossyAdd(neg_allocated_bytes);
+ return stack;
+}
+
+double Span::Fragmentation() const {
+ const size_t cl = Static::pagemap().sizeclass(first_page_);
+ if (cl == 0) {
+ // Avoid crashes in production mode code, but report in tests.
+ ASSERT(cl != 0);
+ return 0;
+ }
+ const size_t obj_size = Static::sizemap().class_to_size(cl);
+ const size_t span_objects = bytes_in_span() / obj_size;
+ const size_t live = allocated_;
+ if (live == 0) {
+ // Avoid crashes in production mode code, but report in tests.
+ ASSERT(live != 0);
+ return 0;
+ }
+ // Assume that all in-use objects in this span are spread evenly
+ // through this span. So charge the free space in span evenly
+ // to each of the live objects.
+ // A note on units here: StackTraceTable::AddTrace(1, *t)
+ // represents usage (of whatever kind: heap space, allocation,
+ // fragmentation) of 1 object of size t->allocated_size.
+ // So we want to report here the number of objects we are "responsible"
+ // for pinning - NOT bytes.
+ return static_cast<double>(span_objects - live) / live;
+}
+
+void Span::AverageFreelistAddedTime(const Span* other) {
+ // Do this computation as floating-point to avoid overflowing our uint64_t.
+ freelist_added_time_ = static_cast<uint64_t>(
+ (static_cast<double>(freelist_added_time_) * num_pages_ +
+ static_cast<double>(other->freelist_added_time_) * other->num_pages_) /
+ (num_pages_ + other->num_pages_));
+}
+
+// Freelist organization.
+//
+// Partially full spans in CentralFreeList contain a list of free objects
+// (freelist). We could use the free objects as linked list nodes and form
+// a stack, but since the free objects are not likely to be cache-hot the
+// chain of dependent misses is very cache-unfriendly. The current
+// organization reduces number of cache misses during push/pop.
+//
+// Objects in the freelist are represented by 2-byte indices. The index is
+// object offset from the span start divided by a constant. For small objects
+// (<512) divider is 8, for larger -- 64. This allows to fit all indices into
+// 2 bytes.
+//
+// The freelist has two components. First, we have a small array-based cache
+// (4 objects) embedded directly into the Span (cache_ and cache_size_). We can
+// access this without touching any objects themselves.
+//
+// The rest of the freelist is stored as arrays inside free objects themselves.
+// We can store object_size / 2 indexes in any object, but this is not always
+// sufficient to store the entire contents of a Span in a single object. So we
+// reserve the first index slot in an object to form a linked list. We use the
+// first object in that list (freelist_) as an array to push/pop from; any
+// subsequent objects in the list's arrays are guaranteed to be full.
+//
+// Graphically this can be depicted as follows:
+//
+// freelist_ embed_count_ cache_ cache_size_
+// Span: [ |idx| 4 |idx|idx|---|---| 2 ]
+// |
+// \/
+// [idx|idx|idx|idx|idx|---|---|---] 16-byte object
+// |
+// \/
+// [---|idx|idx|idx|idx|idx|idx|idx] 16-byte object
+//
+
+Span::ObjIdx Span::PtrToIdx(void* ptr, size_t size) const {
+ // Object index is an offset from span start divided by a power-of-two.
+ // The divisors are choosen so that
+ // (1) objects are aligned on the divisor,
+ // (2) index fits into 16 bits and
+ // (3) the index of the beginning of all objects is strictly less than
+ // kListEnd (note that we have 256K pages and multi-page spans).
+ // For example with 1M spans we need kMultiPageAlignment >= 16.
+ // An ASSERT in BuildFreelist() verifies a condition which implies (3).
+ uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+ uintptr_t off;
+ if (size <= SizeMap::kMultiPageSize) {
+ // Generally we need to load first_page_ to compute the offset.
+ // But first_page_ can be in a different cache line then the fields that
+ // we use in FreelistPush otherwise (cache_, cache_size_, freelist_).
+ // So we avoid loading first_page_ for smaller sizes that have one page per
+ // span, instead we compute the offset by taking low kPageShift bits of the
+ // pointer.
+ ASSERT(PageIdContaining(ptr) == first_page_);
+ off = (p & (kPageSize - 1)) / kAlignment;
+ } else {
+ off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment;
+ }
+ ObjIdx idx = static_cast<ObjIdx>(off);
+ ASSERT(idx != kListEnd);
+ ASSERT(idx == off);
+ return idx;
+}
+
+Span::ObjIdx* Span::IdxToPtr(ObjIdx idx, size_t size) const {
+ ASSERT(idx != kListEnd);
+ uintptr_t off = first_page_.start_uintptr() +
+ (static_cast<uintptr_t>(idx)
+ << (size <= SizeMap::kMultiPageSize
+ ? kAlignmentShift
+ : SizeMap::kMultiPageAlignmentShift));
+ ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+ ASSERT(PtrToIdx(ptr, size) == idx);
+ return ptr;
+}
+
+Span::ObjIdx* Span::BitmapIdxToPtr(ObjIdx idx, size_t size) const {
+ uintptr_t off =
+ first_page_.start_uintptr() + (static_cast<uintptr_t>(idx) * size);
+ ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+ return ptr;
+}
+
+size_t Span::BitmapFreelistPopBatch(void** __restrict batch, size_t N,
+ size_t size) {
+#ifndef NDEBUG
+ size_t before = bitmap_.CountBits(0, 64);
+#endif // NDEBUG
+
+ size_t count = 0;
+ // Want to fill the batch either with N objects, or the number of objects
+ // remaining in the span.
+ while (!bitmap_.IsZero() && count < N) {
+ size_t offset = bitmap_.FindSet(0);
+ ASSERT(offset < 64);
+ batch[count] = BitmapIdxToPtr(offset, size);
+ bitmap_.ClearLowestBit();
+ count++;
+ }
+
+#ifndef NDEBUG
+ size_t after = bitmap_.CountBits(0, 64);
+ ASSERT(after + count == before);
+ ASSERT(allocated_ + count == embed_count_ - after);
+#endif // NDEBUG
+ allocated_ += count;
+ return count;
+}
+
+size_t Span::FreelistPopBatch(void** __restrict batch, size_t N, size_t size) {
+ // Handle spans with 64 or fewer objects using a bitmap. We expect spans
+ // to frequently hold smaller objects.
+ if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) {
+ return BitmapFreelistPopBatch(batch, N, size);
+ }
+ if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+ return FreelistPopBatchSized<Align::SMALL>(batch, N, size);
+ } else {
+ return FreelistPopBatchSized<Align::LARGE>(batch, N, size);
+ }
+}
+
+uint16_t Span::CalcReciprocal(size_t size) {
+ // Calculate scaling factor. We want to avoid dividing by the size of the
+ // object. Instead we'll multiply by a scaled version of the reciprocal.
+ // We divide kBitmapScalingDenominator by the object size, so later we can
+ // multiply by this reciprocal, and then divide this scaling factor out.
+ // TODO(djgove) These divides can be computed once at start up.
+ size_t reciprocal = 0;
+ // The spans hold objects up to kMaxSize, so it's safe to assume.
+ ABSL_INTERNAL_ASSUME(size <= kMaxSize);
+ if (size <= SizeMap::kMultiPageSize) {
+ reciprocal = kBitmapScalingDenominator / (size >> kAlignmentShift);
+ } else {
+ reciprocal =
+ kBitmapScalingDenominator / (size >> SizeMap::kMultiPageAlignmentShift);
+ }
+ ASSERT(reciprocal < 65536);
+ return static_cast<uint16_t>(reciprocal);
+}
+
+void Span::BitmapBuildFreelist(size_t size, size_t count) {
+ // We are using a bitmap to indicate whether objects are used or not. The
+ // maximum capacity for the bitmap is 64 objects.
+ ASSERT(count <= 64);
+#ifndef NDEBUG
+ // For bitmap_ use embed_count_ to record objects per span.
+ embed_count_ = count;
+#endif // NDEBUG
+ reciprocal_ = CalcReciprocal(size);
+ allocated_ = 0;
+ bitmap_.Clear(); // bitmap_ can be non-zero from a previous use.
+ bitmap_.SetRange(0, count);
+ ASSERT(bitmap_.CountBits(0, 64) == count);
+}
+
+int Span::BuildFreelist(size_t size, size_t count, void** batch, int N) {
+ freelist_ = kListEnd;
+
+ if (size >= kBitmapMinObjectSize) {
+ BitmapBuildFreelist(size, count);
+ return BitmapFreelistPopBatch(batch, N, size);
+ }
+
+ // First, push as much as we can into the batch.
+ char* ptr = static_cast<char*>(start_address());
+ int result = N <= count ? N : count;
+ for (int i = 0; i < result; ++i) {
+ batch[i] = ptr;
+ ptr += size;
+ }
+ allocated_ = result;
+
+ ObjIdx idxStep = size / kAlignment;
+ // Valid objects are {0, idxStep, idxStep * 2, ..., idxStep * (count - 1)}.
+ if (size > SizeMap::kMultiPageSize) {
+ idxStep = size / SizeMap::kMultiPageAlignment;
+ }
+ ObjIdx idx = idxStep * result;
+
+ // Verify that the end of the useful portion of the span (and the beginning of
+ // the span waste) has an index that doesn't overflow or risk confusion with
+ // kListEnd. This is slightly stronger than we actually need (see comment in
+ // PtrToIdx for that) but rules out some bugs and weakening it wouldn't
+ // actually help. One example of the potential bugs that are ruled out is the
+ // possibility of idxEnd (below) overflowing.
+ ASSERT(count * idxStep < kListEnd);
+
+ // The index of the end of the useful portion of the span.
+ ObjIdx idxEnd = count * idxStep;
+
+ // Then, push as much as we can into the cache_.
+ int cache_size = 0;
+ for (; idx < idxEnd && cache_size < kCacheSize; idx += idxStep) {
+ cache_[cache_size] = idx;
+ cache_size++;
+ }
+ cache_size_ = cache_size;
+
+ // Now, build freelist and stack other objects onto freelist objects.
+ // Note: we take freelist objects from the beginning and stacked objects
+ // from the end. This has a nice property of not paging in whole span at once
+ // and not draining whole cache.
+ ObjIdx* host = nullptr; // cached first object on freelist
+ const size_t max_embed = size / sizeof(ObjIdx) - 1;
+ int embed_count = 0;
+ while (idx < idxEnd) {
+ // Check the no idx can be confused with kListEnd.
+ ASSERT(idx != kListEnd);
+ if (host && embed_count != max_embed) {
+ // Push onto first object on the freelist.
+ embed_count++;
+ idxEnd -= idxStep;
+ host[embed_count] = idxEnd;
+ } else {
+ // The first object is full, push new object onto freelist.
+ host = IdxToPtr(idx, size);
+ host[0] = freelist_;
+ freelist_ = idx;
+ embed_count = 0;
+ idx += idxStep;
+ }
+ }
+ embed_count_ = embed_count;
+ return result;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/span.h b/contrib/libs/tcmalloc/tcmalloc/span.h
new file mode 100644
index 0000000000..c589709094
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span.h
@@ -0,0 +1,589 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A Span is a contiguous run of pages.
+
+#ifndef TCMALLOC_SPAN_H_
+#define TCMALLOC_SPAN_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/base/thread_annotations.h"
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/range_tracker.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Can fit 64 objects into a bitmap, so determine what the minimum object
+// size needs to be in order for that to work. This makes the assumption that
+// we don't increase the number of pages at a point where the object count
+// ends up exceeding 64.
+inline constexpr size_t kBitmapMinObjectSize = kPageSize / 64;
+
+// Denominator for bitmap scaling factor. The idea is that instead of dividing
+// by N we multiply by M = kBitmapScalingDenominator / N and round the resulting
+// value.
+inline constexpr size_t kBitmapScalingDenominator = 65536;
+
+// Information kept for a span (a contiguous run of pages).
+//
+// Spans can be in different states. The current state determines set of methods
+// that can be called on the span (and the active member in the union below).
+// States are:
+// - SMALL_OBJECT: the span holds multiple small objects.
+// The span is owned by CentralFreeList and is generally on
+// CentralFreeList::nonempty_ list (unless has no free objects).
+// location_ == IN_USE.
+// - LARGE_OBJECT: the span holds a single large object.
+// The span can be considered to be owner by user until the object is freed.
+// location_ == IN_USE.
+// - SAMPLED: the span holds a single sampled object.
+// The span can be considered to be owner by user until the object is freed.
+// location_ == IN_USE && sampled_ == 1.
+// - ON_NORMAL_FREELIST: the span has no allocated objects, owned by PageHeap
+// and is on normal PageHeap list.
+// location_ == ON_NORMAL_FREELIST.
+// - ON_RETURNED_FREELIST: the span has no allocated objects, owned by PageHeap
+// and is on returned PageHeap list.
+// location_ == ON_RETURNED_FREELIST.
+class Span;
+typedef TList<Span> SpanList;
+
+class Span : public SpanList::Elem {
+ public:
+ // Allocator/deallocator for spans. Note that these functions are defined
+ // in static_vars.h, which is weird: see there for why.
+ static Span* New(PageId p, Length len)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ static void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Remove this from the linked list in which it resides.
+ // REQUIRES: this span is on some list.
+ void RemoveFromList();
+
+ // locations used to track what list a span resides on.
+ enum Location {
+ IN_USE, // not on PageHeap lists
+ ON_NORMAL_FREELIST, // on normal PageHeap list
+ ON_RETURNED_FREELIST, // on returned PageHeap list
+ };
+ Location location() const;
+ void set_location(Location loc);
+
+ // ---------------------------------------------------------------------------
+ // Support for sampled allocations.
+ // There is one-to-one correspondence between a sampled allocation and a span.
+ // ---------------------------------------------------------------------------
+
+ // Mark this span as sampling allocation at the stack. Sets state to SAMPLED.
+ void Sample(StackTrace* stack) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Unmark this span as sampling an allocation.
+ // Returns stack trace previously passed to Sample,
+ // or nullptr if this is a non-sampling span.
+ // REQUIRES: this is a SAMPLED span.
+ StackTrace* Unsample() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Returns stack for the sampled allocation.
+ // pageheap_lock is not required, but caller either needs to hold the lock or
+ // ensure by some other means that the sampling state can't be changed
+ // concurrently.
+ // REQUIRES: this is a SAMPLED span.
+ StackTrace* sampled_stack() const;
+
+ // Is it a sampling span?
+ // For debug checks. pageheap_lock is not required, but caller needs to ensure
+ // that sampling state can't be changed concurrently.
+ bool sampled() const;
+
+ // ---------------------------------------------------------------------------
+ // Span memory range.
+ // ---------------------------------------------------------------------------
+
+ // Returns first page of the span.
+ PageId first_page() const;
+
+ // Returns the last page in the span.
+ PageId last_page() const;
+
+ // Sets span first page.
+ void set_first_page(PageId p);
+
+ // Returns start address of the span.
+ void* start_address() const;
+
+ // Returns number of pages in the span.
+ Length num_pages() const;
+
+ // Sets number of pages in the span.
+ void set_num_pages(Length len);
+
+ // Total memory bytes in the span.
+ size_t bytes_in_span() const;
+
+ // ---------------------------------------------------------------------------
+ // Age tracking (for free spans in PageHeap).
+ // ---------------------------------------------------------------------------
+
+ uint64_t freelist_added_time() const;
+ void set_freelist_added_time(uint64_t t);
+
+ // Sets this span freelist added time to average of this and other times
+ // weighted by their sizes.
+ // REQUIRES: this is a ON_NORMAL_FREELIST or ON_RETURNED_FREELIST span.
+ void AverageFreelistAddedTime(const Span* other);
+
+ // Returns internal fragmentation of the span.
+ // REQUIRES: this is a SMALL_OBJECT span.
+ double Fragmentation() const;
+
+ // ---------------------------------------------------------------------------
+ // Freelist management.
+ // Used for spans in CentralFreelist to manage free objects.
+ // These methods REQUIRE a SMALL_OBJECT span.
+ // ---------------------------------------------------------------------------
+
+ // Indicates whether the object is considered large or small based on
+ // size > SizeMap::kMultiPageSize.
+ enum class Align { SMALL, LARGE };
+
+ // Indicate whether the Span is empty. Size is used to determine whether
+ // the span is using a compressed linked list of objects, or a bitmap
+ // to hold available objects.
+ bool FreelistEmpty(size_t size) const;
+
+ // Pushes ptr onto freelist unless the freelist becomes full,
+ // in which case just return false.
+ bool FreelistPush(void* ptr, size_t size) {
+ ASSERT(allocated_ > 0);
+ if (ABSL_PREDICT_FALSE(allocated_ == 1)) {
+ return false;
+ }
+ allocated_--;
+ // Bitmaps are used to record object availability when there are fewer than
+ // 64 objects in a span.
+ if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) {
+ if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+ return BitmapFreelistPush<Align::SMALL>(ptr, size);
+ } else {
+ return BitmapFreelistPush<Align::LARGE>(ptr, size);
+ }
+ }
+ if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+ return FreelistPushSized<Align::SMALL>(ptr, size);
+ } else {
+ return FreelistPushSized<Align::LARGE>(ptr, size);
+ }
+ }
+
+ // Pops up to N objects from the freelist and returns them in the batch array.
+ // Returns number of objects actually popped.
+ size_t FreelistPopBatch(void** batch, size_t N, size_t size);
+
+ // Reset a Span object to track the range [p, p + n).
+ void Init(PageId p, Length n);
+
+ // Initialize freelist to contain all objects in the span.
+ // Pops up to N objects from the freelist and returns them in the batch array.
+ // Returns number of objects actually popped.
+ int BuildFreelist(size_t size, size_t count, void** batch, int N);
+
+ // Prefetch cacheline containing most important span information.
+ void Prefetch();
+
+ static constexpr size_t kCacheSize = 4;
+
+ private:
+ // See the comment on freelist organization in cc file.
+ typedef uint16_t ObjIdx;
+ static constexpr ObjIdx kListEnd = -1;
+
+ // Use uint16_t or uint8_t for 16 bit and 8 bit fields instead of bitfields.
+ // LLVM will generate widen load/store and bit masking operations to access
+ // bitfields and this hurts performance. Although compiler flag
+ // -ffine-grained-bitfield-accesses can help the performance if bitfields
+ // are used here, but the flag could potentially hurt performance in other
+ // cases so it is not enabled by default. For more information, please
+ // look at b/35680381 and cl/199502226.
+ uint16_t allocated_; // Number of non-free objects
+ uint16_t embed_count_;
+ // For available objects stored as a compressed linked list, the index of
+ // the first object in recorded in freelist_. When a bitmap is used to
+ // represent available objects, the reciprocal of the object size is
+ // stored to enable conversion from the offset of an object within a
+ // span to the index of the object.
+ union {
+ uint16_t freelist_;
+ uint16_t reciprocal_;
+ };
+ uint8_t cache_size_;
+ uint8_t location_ : 2; // Is the span on a freelist, and if so, which?
+ uint8_t sampled_ : 1; // Sampled object?
+
+ union {
+ // Used only for spans in CentralFreeList (SMALL_OBJECT state).
+ // Embed cache of free objects.
+ ObjIdx cache_[kCacheSize];
+
+ // Used for spans with in CentralFreeList with fewer than 64 objects.
+ // Each bit is set to one when the object is available, and zero
+ // when the object is used.
+ Bitmap<64> bitmap_{};
+
+ // Used only for sampled spans (SAMPLED state).
+ StackTrace* sampled_stack_;
+
+ // Used only for spans in PageHeap
+ // (ON_NORMAL_FREELIST or ON_RETURNED_FREELIST state).
+ // Time when this span was added to a freelist. Units: cycles. When a span
+ // is merged into this one, we set this to the average of now and the
+ // current freelist_added_time, weighted by the two spans' sizes.
+ uint64_t freelist_added_time_;
+ };
+
+ PageId first_page_; // Starting page number.
+ Length num_pages_; // Number of pages in span.
+
+ // Convert object pointer <-> freelist index.
+ ObjIdx PtrToIdx(void* ptr, size_t size) const;
+ ObjIdx* IdxToPtr(ObjIdx idx, size_t size) const;
+
+ // For bitmap'd spans conversion from an offset to an index is performed
+ // by multiplying by the scaled reciprocal of the object size.
+ static uint16_t CalcReciprocal(size_t size);
+
+ // Convert object pointer <-> freelist index for bitmap managed objects.
+ template <Align align>
+ ObjIdx BitmapPtrToIdx(void* ptr, size_t size) const;
+ ObjIdx* BitmapIdxToPtr(ObjIdx idx, size_t size) const;
+
+ // Helper function for converting a pointer to an index.
+ template <Align align>
+ static ObjIdx OffsetToIdx(uintptr_t offset, size_t size, uint16_t reciprocal);
+ // Helper function for testing round trips between pointers and indexes.
+ static ObjIdx TestOffsetToIdx(uintptr_t ptr, size_t size,
+ uint16_t reciprocal) {
+ if (size <= SizeMap::kMultiPageSize) {
+ return OffsetToIdx<Align::SMALL>(ptr, size, reciprocal);
+ } else {
+ return OffsetToIdx<Align::LARGE>(ptr, size, reciprocal);
+ }
+ }
+
+ template <Align align>
+ ObjIdx* IdxToPtrSized(ObjIdx idx, size_t size) const;
+
+ template <Align align>
+ ObjIdx PtrToIdxSized(void* ptr, size_t size) const;
+
+ template <Align align>
+ size_t FreelistPopBatchSized(void** __restrict batch, size_t N, size_t size);
+
+ template <Align align>
+ bool FreelistPushSized(void* ptr, size_t size);
+
+ // For spans containing 64 or fewer objects, indicate that the object at the
+ // index has been returned. Always returns true.
+ template <Align align>
+ bool BitmapFreelistPush(void* ptr, size_t size);
+
+ // A bitmap is used to indicate object availability for spans containing
+ // 64 or fewer objects.
+ void BitmapBuildFreelist(size_t size, size_t count);
+
+ // For spans with 64 or fewer objects populate batch with up to N objects.
+ // Returns number of objects actually popped.
+ size_t BitmapFreelistPopBatch(void** batch, size_t N, size_t size);
+
+ // Friend class to enable more indepth testing of bitmap code.
+ friend class SpanTestPeer;
+};
+
+template <Span::Align align>
+Span::ObjIdx* Span::IdxToPtrSized(ObjIdx idx, size_t size) const {
+ ASSERT(idx != kListEnd);
+ static_assert(align == Align::LARGE || align == Align::SMALL);
+ uintptr_t off =
+ first_page_.start_uintptr() +
+ (static_cast<uintptr_t>(idx)
+ << (align == Align::SMALL ? kAlignmentShift
+ : SizeMap::kMultiPageAlignmentShift));
+ ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+ ASSERT(PtrToIdx(ptr, size) == idx);
+ return ptr;
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::PtrToIdxSized(void* ptr, size_t size) const {
+ // Object index is an offset from span start divided by a power-of-two.
+ // The divisors are choosen so that
+ // (1) objects are aligned on the divisor,
+ // (2) index fits into 16 bits and
+ // (3) the index of the beginning of all objects is strictly less than
+ // kListEnd (note that we have 256K pages and multi-page spans).
+ // For example with 1M spans we need kMultiPageAlignment >= 16.
+ // An ASSERT in BuildFreelist() verifies a condition which implies (3).
+ uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+ uintptr_t off;
+ if (align == Align::SMALL) {
+ // Generally we need to load first_page_ to compute the offset.
+ // But first_page_ can be in a different cache line then the fields that
+ // we use in FreelistPush otherwise (cache_, cache_size_, freelist_).
+ // So we avoid loading first_page_ for smaller sizes that have one page per
+ // span, instead we compute the offset by taking low kPageShift bits of the
+ // pointer.
+ ASSERT(PageIdContaining(ptr) == first_page_);
+ ASSERT(num_pages_ == Length(1));
+ off = (p & (kPageSize - 1)) / kAlignment;
+ } else {
+ off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment;
+ }
+ ObjIdx idx = static_cast<ObjIdx>(off);
+ ASSERT(idx != kListEnd);
+ ASSERT(idx == off);
+ ASSERT(IdxToPtr(idx, size) == ptr);
+ return idx;
+}
+
+template <Span::Align align>
+size_t Span::FreelistPopBatchSized(void** __restrict batch, size_t N,
+ size_t size) {
+ size_t result = 0;
+
+ // Pop from cache.
+ auto csize = cache_size_;
+ ASSUME(csize <= kCacheSize);
+ auto cache_reads = csize < N ? csize : N;
+ for (; result < cache_reads; result++) {
+ batch[result] = IdxToPtrSized<align>(cache_[csize - result - 1], size);
+ }
+
+ // Store this->cache_size_ one time.
+ cache_size_ = csize - result;
+
+ while (result < N) {
+ if (freelist_ == kListEnd) {
+ break;
+ }
+
+ ObjIdx* const host = IdxToPtrSized<align>(freelist_, size);
+ uint16_t embed_count = embed_count_;
+ ObjIdx current = host[embed_count];
+
+ size_t iter = embed_count;
+ if (result + embed_count > N) {
+ iter = N - result;
+ }
+ for (size_t i = 0; i < iter; i++) {
+ // Pop from the first object on freelist.
+ batch[result + i] = IdxToPtrSized<align>(host[embed_count - i], size);
+ }
+ embed_count -= iter;
+ result += iter;
+
+ // Update current for next cycle.
+ current = host[embed_count];
+
+ if (result == N) {
+ embed_count_ = embed_count;
+ break;
+ }
+
+ // The first object on the freelist is empty, pop it.
+ ASSERT(embed_count == 0);
+
+ batch[result] = host;
+ result++;
+
+ freelist_ = current;
+ embed_count_ = size / sizeof(ObjIdx) - 1;
+ }
+ allocated_ += result;
+ return result;
+}
+
+template <Span::Align align>
+bool Span::FreelistPushSized(void* ptr, size_t size) {
+ ObjIdx idx = PtrToIdxSized<align>(ptr, size);
+ if (cache_size_ != kCacheSize) {
+ // Have empty space in the cache, push there.
+ cache_[cache_size_] = idx;
+ cache_size_++;
+ } else if (ABSL_PREDICT_TRUE(freelist_ != kListEnd) &&
+ // -1 because the first slot is used by freelist link.
+ ABSL_PREDICT_TRUE(embed_count_ != size / sizeof(ObjIdx) - 1)) {
+ // Push onto the first object on freelist.
+ ObjIdx* host;
+ if (align == Align::SMALL) {
+ // Avoid loading first_page_ in this case (see the comment in PtrToIdx).
+ ASSERT(num_pages_ == Length(1));
+ host = reinterpret_cast<ObjIdx*>(
+ (reinterpret_cast<uintptr_t>(ptr) & ~(kPageSize - 1)) +
+ static_cast<uintptr_t>(freelist_) * kAlignment);
+ ASSERT(PtrToIdx(host, size) == freelist_);
+ } else {
+ host = IdxToPtrSized<align>(freelist_, size);
+ }
+ embed_count_++;
+ host[embed_count_] = idx;
+ } else {
+ // Push onto freelist.
+ *reinterpret_cast<ObjIdx*>(ptr) = freelist_;
+ freelist_ = idx;
+ embed_count_ = 0;
+ }
+ return true;
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::OffsetToIdx(uintptr_t offset, size_t size,
+ uint16_t reciprocal) {
+ if (align == Align::SMALL) {
+ return static_cast<ObjIdx>(
+ // Add kBitmapScalingDenominator / 2 to round to nearest integer.
+ ((offset >> kAlignmentShift) * reciprocal +
+ kBitmapScalingDenominator / 2) /
+ kBitmapScalingDenominator);
+ } else {
+ return static_cast<ObjIdx>(
+ ((offset >> SizeMap::kMultiPageAlignmentShift) * reciprocal +
+ kBitmapScalingDenominator / 2) /
+ kBitmapScalingDenominator);
+ }
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::BitmapPtrToIdx(void* ptr, size_t size) const {
+ uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+ uintptr_t off = static_cast<uint32_t>(p - first_page_.start_uintptr());
+ ObjIdx idx = OffsetToIdx<align>(off, size, reciprocal_);
+ ASSERT(BitmapIdxToPtr(idx, size) == ptr);
+ return idx;
+}
+
+template <Span::Align align>
+bool Span::BitmapFreelistPush(void* ptr, size_t size) {
+#ifndef NDEBUG
+ size_t before = bitmap_.CountBits(0, 64);
+#endif
+ // TODO(djgove) Conversions to offsets can be computed outside of lock.
+ ObjIdx idx = BitmapPtrToIdx<align>(ptr, size);
+ // Check that the object is not already returned.
+ ASSERT(bitmap_.GetBit(idx) == 0);
+ // Set the bit indicating where the object was returned.
+ bitmap_.SetBit(idx);
+#ifndef NDEBUG
+ size_t after = bitmap_.CountBits(0, 64);
+ ASSERT(before + 1 == after);
+ ASSERT(allocated_ == embed_count_ - after);
+#endif
+ return true;
+}
+
+inline Span::Location Span::location() const {
+ return static_cast<Location>(location_);
+}
+
+inline void Span::set_location(Location loc) {
+ location_ = static_cast<uint64_t>(loc);
+}
+
+inline StackTrace* Span::sampled_stack() const {
+ ASSERT(sampled_);
+ return sampled_stack_;
+}
+
+inline bool Span::sampled() const { return sampled_; }
+
+inline PageId Span::first_page() const { return first_page_; }
+
+inline PageId Span::last_page() const {
+ return first_page_ + num_pages_ - Length(1);
+}
+
+inline void Span::set_first_page(PageId p) { first_page_ = p; }
+
+inline void* Span::start_address() const { return first_page_.start_addr(); }
+
+inline Length Span::num_pages() const { return num_pages_; }
+
+inline void Span::set_num_pages(Length len) { num_pages_ = len; }
+
+inline size_t Span::bytes_in_span() const { return num_pages_.in_bytes(); }
+
+inline void Span::set_freelist_added_time(uint64_t t) {
+ freelist_added_time_ = t;
+}
+
+inline uint64_t Span::freelist_added_time() const {
+ return freelist_added_time_;
+}
+
+inline bool Span::FreelistEmpty(size_t size) const {
+ if (size < kBitmapMinObjectSize) {
+ return (cache_size_ == 0 && freelist_ == kListEnd);
+ } else {
+ return (bitmap_.IsZero());
+ }
+}
+
+inline void Span::RemoveFromList() { SpanList::Elem::remove(); }
+
+inline void Span::Prefetch() {
+ // The first 16 bytes of a Span are the next and previous pointers
+ // for when it is stored in a linked list. Since the sizeof(Span) is
+ // 48 bytes, spans fit into 2 cache lines 50% of the time, with either
+ // the first 16-bytes or the last 16-bytes in a different cache line.
+ // Prefetch the cacheline that contains the most frequestly accessed
+ // data by offseting into the middle of the Span.
+#if defined(__GNUC__)
+#if __WORDSIZE == 32
+ // The Span fits in one cache line, so simply prefetch the base pointer.
+ static_assert(sizeof(Span) == 32, "Update span prefetch offset");
+ __builtin_prefetch(this, 0, 3);
+#else
+ // The Span can occupy two cache lines, so prefetch the cacheline with the
+ // most frequently accessed parts of the Span.
+ static_assert(sizeof(Span) == 48, "Update span prefetch offset");
+ __builtin_prefetch(&this->allocated_, 0, 3);
+#endif
+#endif
+}
+
+inline void Span::Init(PageId p, Length n) {
+#ifndef NDEBUG
+ // In debug mode we have additional checking of our list ops; these must be
+ // initialized.
+ new (this) Span();
+#endif
+ first_page_ = p;
+ num_pages_ = n;
+ location_ = IN_USE;
+ sampled_ = 0;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_SPAN_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc
new file mode 100644
index 0000000000..6e4569dd83
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc
@@ -0,0 +1,212 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <utility>
+#include <vector>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class RawSpan {
+ public:
+ void Init(size_t cl) {
+ size_t size = Static::sizemap().class_to_size(cl);
+ auto npages = Length(Static::sizemap().class_to_pages(cl));
+ size_t objects_per_span = npages.in_bytes() / size;
+
+ void *mem;
+ int res = posix_memalign(&mem, kPageSize, npages.in_bytes());
+ CHECK_CONDITION(res == 0);
+ span_.set_first_page(PageIdContaining(mem));
+ span_.set_num_pages(npages);
+ span_.BuildFreelist(size, objects_per_span, nullptr, 0);
+ }
+
+ ~RawSpan() { free(span_.start_address()); }
+
+ Span &span() { return span_; }
+
+ private:
+ Span span_;
+};
+
+// BM_single_span repeatedly pushes and pops the same num_objects_to_move(cl)
+// objects from the span.
+void BM_single_span(benchmark::State &state) {
+ const int cl = state.range(0);
+
+ size_t size = Static::sizemap().class_to_size(cl);
+ size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+ RawSpan raw_span;
+ raw_span.Init(cl);
+ Span &span = raw_span.span();
+
+ void *batch[kMaxObjectsToMove];
+
+ int64_t processed = 0;
+ while (state.KeepRunningBatch(batch_size)) {
+ int n = span.FreelistPopBatch(batch, batch_size, size);
+ processed += n;
+
+ for (int j = 0; j < n; j++) {
+ span.FreelistPush(batch[j], size);
+ }
+ }
+
+ state.SetItemsProcessed(processed);
+}
+
+// BM_single_span_fulldrain alternates between fully draining and filling the
+// span.
+void BM_single_span_fulldrain(benchmark::State &state) {
+ const int cl = state.range(0);
+
+ size_t size = Static::sizemap().class_to_size(cl);
+ size_t npages = Static::sizemap().class_to_pages(cl);
+ size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+ size_t objects_per_span = npages * kPageSize / size;
+ RawSpan raw_span;
+ raw_span.Init(cl);
+ Span &span = raw_span.span();
+
+ std::vector<void *> objects(objects_per_span, nullptr);
+ size_t oindex = 0;
+
+ size_t processed = 0;
+ while (state.KeepRunningBatch(objects_per_span)) {
+ // Drain span
+ while (oindex < objects_per_span) {
+ size_t popped = span.FreelistPopBatch(&objects[oindex], batch_size, size);
+ oindex += popped;
+ processed += popped;
+ }
+
+ // Fill span
+ while (oindex > 0) {
+ void *p = objects[oindex - 1];
+ if (!span.FreelistPush(p, size)) {
+ break;
+ }
+
+ oindex--;
+ }
+ }
+
+ state.SetItemsProcessed(processed);
+}
+
+BENCHMARK(BM_single_span)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(7)
+ ->Arg(10)
+ ->Arg(12)
+ ->Arg(16)
+ ->Arg(20)
+ ->Arg(30)
+ ->Arg(40)
+ ->Arg(kNumClasses - 1);
+
+BENCHMARK(BM_single_span_fulldrain)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(7)
+ ->Arg(10)
+ ->Arg(12)
+ ->Arg(16)
+ ->Arg(20)
+ ->Arg(30)
+ ->Arg(40)
+ ->Arg(kNumClasses - 1);
+
+void BM_NewDelete(benchmark::State &state) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (auto s : state) {
+ Span *sp = Span::New(PageId{0}, Length(1));
+ benchmark::DoNotOptimize(sp);
+ Span::Delete(sp);
+ }
+ state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK(BM_NewDelete);
+
+void BM_multiple_spans(benchmark::State &state) {
+ const int cl = state.range(0);
+
+ // Should be large enough to cause cache misses
+ const int num_spans = 10000000;
+ std::vector<Span> spans(num_spans);
+ size_t size = Static::sizemap().class_to_size(cl);
+ size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+ for (int i = 0; i < num_spans; i++) {
+ RawSpan raw_span;
+ raw_span.Init(cl);
+ spans[i] = raw_span.span();
+ }
+ absl::BitGen rng;
+
+ void *batch[kMaxObjectsToMove];
+
+ int64_t processed = 0;
+ while (state.KeepRunningBatch(batch_size)) {
+ int current_span = absl::Uniform(rng, 0, num_spans);
+ int n = spans[current_span].FreelistPopBatch(batch, batch_size, size);
+ processed += n;
+
+ for (int j = 0; j < n; j++) {
+ spans[current_span].FreelistPush(batch[j], size);
+ }
+ }
+
+ state.SetItemsProcessed(processed);
+}
+
+BENCHMARK(BM_multiple_spans)
+ ->Arg(1)
+ ->Arg(2)
+ ->Arg(3)
+ ->Arg(4)
+ ->Arg(5)
+ ->Arg(7)
+ ->Arg(10)
+ ->Arg(12)
+ ->Arg(16)
+ ->Arg(20)
+ ->Arg(30)
+ ->Arg(40)
+ ->Arg(kNumClasses - 1);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_stats.h b/contrib/libs/tcmalloc/tcmalloc/span_stats.h
new file mode 100644
index 0000000000..8c0b40b0fd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_stats.h
@@ -0,0 +1,50 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_SPAN_STATS_H_
+#define TCMALLOC_SPAN_STATS_H_
+
+#include <stddef.h>
+
+#include "absl/base/macros.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct SpanStats {
+ size_t num_spans_requested = 0;
+ size_t num_spans_returned = 0;
+ size_t obj_capacity = 0; // cap of number of objs that could be live anywhere
+
+ size_t num_live_spans() {
+ if (num_spans_requested < num_spans_returned) {
+ return 0;
+ }
+ return num_spans_requested - num_spans_returned;
+ }
+
+ // Probability that a span will be returned
+ double prob_returned() {
+ if (ABSL_PREDICT_FALSE(num_spans_requested == 0)) return 0.0;
+ return static_cast<double>(num_spans_returned) / num_spans_requested;
+ }
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_SPAN_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_test.cc b/contrib/libs/tcmalloc/tcmalloc/span_test.cc
new file mode 100644
index 0000000000..750f3cca26
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_test.cc
@@ -0,0 +1,191 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/span.h"
+
+#include <stdlib.h>
+
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class RawSpan {
+ public:
+ void Init(size_t cl) {
+ size_t size = Static::sizemap().class_to_size(cl);
+ auto npages = Length(Static::sizemap().class_to_pages(cl));
+ size_t objects_per_span = npages.in_bytes() / size;
+
+ void *mem;
+ int res = posix_memalign(&mem, kPageSize, npages.in_bytes());
+ CHECK_CONDITION(res == 0);
+ span_.set_first_page(PageIdContaining(mem));
+ span_.set_num_pages(npages);
+ span_.BuildFreelist(size, objects_per_span, nullptr, 0);
+ }
+
+ ~RawSpan() { free(span_.start_address()); }
+
+ Span &span() { return span_; }
+
+ private:
+ Span span_;
+};
+
+class SpanTest : public testing::TestWithParam<size_t> {
+ protected:
+ size_t cl_;
+ size_t size_;
+ size_t npages_;
+ size_t batch_size_;
+ size_t objects_per_span_;
+ RawSpan raw_span_;
+
+ private:
+ void SetUp() override {
+ cl_ = GetParam();
+ size_ = Static::sizemap().class_to_size(cl_);
+ if (size_ == 0) {
+ GTEST_SKIP() << "Skipping empty size class.";
+ }
+
+ npages_ = Static::sizemap().class_to_pages(cl_);
+ batch_size_ = Static::sizemap().num_objects_to_move(cl_);
+ objects_per_span_ = npages_ * kPageSize / size_;
+
+ raw_span_.Init(cl_);
+ }
+
+ void TearDown() override {}
+};
+
+TEST_P(SpanTest, FreelistBasic) {
+ Span &span_ = raw_span_.span();
+
+ EXPECT_FALSE(span_.FreelistEmpty(size_));
+ void *batch[kMaxObjectsToMove];
+ size_t popped = 0;
+ size_t want = 1;
+ char *start = static_cast<char *>(span_.start_address());
+ std::vector<bool> objects(objects_per_span_);
+ for (size_t x = 0; x < 2; ++x) {
+ // Pop all objects in batches of varying size and ensure that we've got
+ // all objects.
+ for (;;) {
+ size_t n = span_.FreelistPopBatch(batch, want, size_);
+ popped += n;
+ EXPECT_EQ(span_.FreelistEmpty(size_), popped == objects_per_span_);
+ for (size_t i = 0; i < n; ++i) {
+ void *p = batch[i];
+ uintptr_t off = reinterpret_cast<char *>(p) - start;
+ EXPECT_LT(off, span_.bytes_in_span());
+ EXPECT_EQ(off % size_, 0);
+ size_t idx = off / size_;
+ EXPECT_FALSE(objects[idx]);
+ objects[idx] = true;
+ }
+ if (n < want) {
+ break;
+ }
+ ++want;
+ if (want > batch_size_) {
+ want = 1;
+ }
+ }
+ EXPECT_TRUE(span_.FreelistEmpty(size_));
+ EXPECT_EQ(span_.FreelistPopBatch(batch, 1, size_), 0);
+ EXPECT_EQ(popped, objects_per_span_);
+
+ // Push all objects back except the last one (which would not be pushed).
+ for (size_t idx = 0; idx < objects_per_span_ - 1; ++idx) {
+ EXPECT_TRUE(objects[idx]);
+ bool ok = span_.FreelistPush(start + idx * size_, size_);
+ EXPECT_TRUE(ok);
+ EXPECT_FALSE(span_.FreelistEmpty(size_));
+ objects[idx] = false;
+ --popped;
+ }
+ // On the last iteration we can actually push the last object.
+ if (x == 1) {
+ bool ok =
+ span_.FreelistPush(start + (objects_per_span_ - 1) * size_, size_);
+ EXPECT_FALSE(ok);
+ }
+ }
+}
+
+TEST_P(SpanTest, FreelistRandomized) {
+ Span &span_ = raw_span_.span();
+
+ char *start = static_cast<char *>(span_.start_address());
+
+ // Do a bunch of random pushes/pops with random batch size.
+ absl::BitGen rng;
+ absl::flat_hash_set<void *> objects;
+ void *batch[kMaxObjectsToMove];
+ for (size_t x = 0; x < 10000; ++x) {
+ if (!objects.empty() && absl::Bernoulli(rng, 1.0 / 2)) {
+ void *p = *objects.begin();
+ if (span_.FreelistPush(p, size_)) {
+ objects.erase(objects.begin());
+ } else {
+ EXPECT_EQ(objects.size(), 1);
+ }
+ EXPECT_EQ(span_.FreelistEmpty(size_), objects_per_span_ == 1);
+ } else {
+ size_t want = absl::Uniform<int32_t>(rng, 0, batch_size_) + 1;
+ size_t n = span_.FreelistPopBatch(batch, want, size_);
+ if (n < want) {
+ EXPECT_TRUE(span_.FreelistEmpty(size_));
+ }
+ for (size_t i = 0; i < n; ++i) {
+ EXPECT_TRUE(objects.insert(batch[i]).second);
+ }
+ }
+ }
+ // Now pop everything what's there.
+ for (;;) {
+ size_t n = span_.FreelistPopBatch(batch, batch_size_, size_);
+ for (size_t i = 0; i < n; ++i) {
+ EXPECT_TRUE(objects.insert(batch[i]).second);
+ }
+ if (n < batch_size_) {
+ break;
+ }
+ }
+ // Check that we have collected all objects.
+ EXPECT_EQ(objects.size(), objects_per_span_);
+ for (void *p : objects) {
+ uintptr_t off = reinterpret_cast<char *>(p) - start;
+ EXPECT_LT(off, span_.bytes_in_span());
+ EXPECT_EQ(off % size_, 0);
+ }
+}
+
+INSTANTIATE_TEST_SUITE_P(All, SpanTest, testing::Range(size_t(1), kNumClasses));
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc
new file mode 100644
index 0000000000..5b5741b6a8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc
@@ -0,0 +1,155 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stack_trace_table.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/hash/hash.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, const StackTrace& t) const {
+ // Do not merge entries with different sizes so that profiling tools
+ // can allow size-based analysis of the resulting profiles. Note
+ // that sizes being supplied here are already quantized (to either
+ // the size-class size for small objects, or a multiple of pages for
+ // big objects). So the number of distinct buckets kept per stack
+ // trace should be fairly small.
+ if (this->hash != h || this->trace.depth != t.depth ||
+ this->trace.requested_size != t.requested_size ||
+ this->trace.requested_alignment != t.requested_alignment ||
+ // These could theoretically differ due to e.g. memalign choices.
+ // Split the buckets just in case that happens (though it should be rare.)
+ this->trace.allocated_size != t.allocated_size) {
+ return false;
+ }
+ for (int i = 0; i < t.depth; ++i) {
+ if (this->trace.stack[i] != t.stack[i]) {
+ return false;
+ }
+ }
+ return true;
+}
+
+StackTraceTable::StackTraceTable(ProfileType type, int64_t period, bool merge,
+ bool unsample)
+ : type_(type),
+ period_(period),
+ bucket_mask_(merge ? (1 << 14) - 1 : 0),
+ depth_total_(0),
+ table_(new Bucket*[num_buckets()]()),
+ bucket_total_(0),
+ merge_(merge),
+ error_(false),
+ unsample_(unsample) {
+ memset(table_, 0, num_buckets() * sizeof(Bucket*));
+}
+
+StackTraceTable::~StackTraceTable() {
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (int i = 0; i < num_buckets(); ++i) {
+ Bucket* b = table_[i];
+ while (b != nullptr) {
+ Bucket* next = b->next;
+ Static::DestroySampleUserData(b->trace.user_data);
+ Static::bucket_allocator().Delete(b);
+ b = next;
+ }
+ }
+ }
+ delete[] table_;
+}
+
+void StackTraceTable::AddTrace(double count, const StackTrace& t) {
+ if (error_) {
+ return;
+ }
+
+ uintptr_t h = absl::Hash<StackTrace>()(t);
+
+ const int idx = h & bucket_mask_;
+
+ Bucket* b = merge_ ? table_[idx] : nullptr;
+ while (b != nullptr && !b->KeyEqual(h, t)) {
+ b = b->next;
+ }
+ if (b != nullptr) {
+ b->count += count;
+ b->total_weight += count * t.weight;
+ b->trace.weight = b->total_weight / b->count + 0.5;
+ } else {
+ depth_total_ += t.depth;
+ bucket_total_++;
+ b = Static::bucket_allocator().New();
+ b->hash = h;
+ b->trace = t;
+ b->trace.user_data = Static::CopySampleUserData(t.user_data);
+ b->count = count;
+ b->total_weight = t.weight * count;
+ b->next = table_[idx];
+ table_[idx] = b;
+ }
+}
+
+void StackTraceTable::Iterate(
+ absl::FunctionRef<void(const Profile::Sample&)> func) const {
+ if (error_) {
+ return;
+ }
+
+ for (int i = 0; i < num_buckets(); ++i) {
+ Bucket* b = table_[i];
+ while (b != nullptr) {
+ // Report total bytes that are a multiple of the object size.
+ size_t allocated_size = b->trace.allocated_size;
+ size_t requested_size = b->trace.requested_size;
+
+ uintptr_t bytes = b->count * AllocatedBytes(b->trace, unsample_) + 0.5;
+
+ Profile::Sample e;
+ // We want sum to be a multiple of allocated_size; pick the nearest
+ // multiple rather than always rounding up or down.
+ e.count = (bytes + allocated_size / 2) / allocated_size;
+ e.sum = e.count * allocated_size;
+ e.requested_size = requested_size;
+ e.requested_alignment = b->trace.requested_alignment;
+ e.allocated_size = allocated_size;
+
+ e.user_data = b->trace.user_data;
+
+ e.depth = b->trace.depth;
+ static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth,
+ "Profile stack size smaller than internal stack sizes");
+ memcpy(e.stack, b->trace.stack, sizeof(e.stack[0]) * e.depth);
+ func(e);
+
+ b = b->next;
+ }
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h
new file mode 100644
index 0000000000..a5a4a03636
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utility class for coalescing sampled stack traces. Not thread-safe.
+
+#ifndef TCMALLOC_STACK_TRACE_TABLE_H_
+#define TCMALLOC_STACK_TRACE_TABLE_H_
+
+#include <stdint.h>
+
+#include <string>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class StackTraceTable final : public ProfileBase {
+ public:
+ // If merge is true, traces with identical size and stack are merged
+ // together. Else they are kept distinct.
+ // If unsample is true, Iterate() will scale counts to report estimates
+ // of the true total assuming traces were added by the sampler.
+ // REQUIRES: L < pageheap_lock
+ StackTraceTable(ProfileType type, int64_t period, bool merge, bool unsample);
+
+ // REQUIRES: L < pageheap_lock
+ ~StackTraceTable() override;
+
+ // base::Profile methods.
+ void Iterate(
+ absl::FunctionRef<void(const Profile::Sample&)> func) const override;
+
+ int64_t Period() const override { return period_; }
+
+ ProfileType Type() const override { return type_; }
+
+ // Adds stack trace "t" to table with the specified count.
+ // The count is a floating point value to reduce rounding
+ // errors when accounting for sampling probabilities.
+ void AddTrace(double count, const StackTrace& t)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Exposed for PageHeapAllocator
+ struct Bucket {
+ // Key
+ uintptr_t hash;
+ StackTrace trace;
+
+ // Payload
+ double count;
+ size_t total_weight;
+ Bucket* next;
+
+ bool KeyEqual(uintptr_t h, const StackTrace& t) const;
+ };
+
+ // For testing
+ int depth_total() const { return depth_total_; }
+ int bucket_total() const { return bucket_total_; }
+
+ private:
+ static constexpr int kHashTableSize = 1 << 14; // => table_ is 128k
+
+ ProfileType type_;
+ int64_t period_;
+ int bucket_mask_;
+ int depth_total_;
+ Bucket** table_;
+ int bucket_total_;
+ bool merge_;
+ bool error_;
+ bool unsample_;
+
+ int num_buckets() const { return bucket_mask_ + 1; }
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_STACK_TRACE_TABLE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc
new file mode 100644
index 0000000000..4579798906
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc
@@ -0,0 +1,389 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stack_trace_table.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Rather than deal with heap allocating stack/tags, AllocationEntry contains
+// them inline.
+struct AllocationEntry {
+ int64_t sum;
+ int count;
+ size_t requested_size;
+ size_t requested_alignment;
+ size_t allocated_size;
+ int depth;
+ void* stack[64];
+
+ friend bool operator==(const AllocationEntry& x, const AllocationEntry& y);
+ friend bool operator!=(const AllocationEntry& x, const AllocationEntry& y) {
+ return !(x == y);
+ }
+
+ friend std::ostream& operator<<(std::ostream& os, const AllocationEntry& e) {
+ os << "sum = " << e.sum << "; ";
+ os << "count = " << e.count << "; ";
+
+ std::vector<std::string> ptrs;
+ for (int i = 0; i < e.depth; i++) {
+ ptrs.push_back(absl::StrFormat("%p", e.stack[i]));
+ }
+ os << "stack = [" << absl::StrJoin(ptrs, ", ") << "]; ";
+
+ os << "requested_size = " << e.requested_size << "; ";
+ os << "requested_alignment = " << e.requested_alignment << "; ";
+ os << "allocated_size = " << e.allocated_size << "; ";
+ return os;
+ }
+};
+
+inline bool operator==(const AllocationEntry& x, const AllocationEntry& y) {
+ if (x.sum != y.sum) {
+ return false;
+ }
+
+ if (x.count != y.count) {
+ return false;
+ }
+
+ if (x.depth != y.depth) {
+ return false;
+ }
+
+ if (x.depth > 0 && !std::equal(x.stack, x.stack + x.depth, y.stack)) {
+ return false;
+ }
+
+ if (x.requested_size != y.requested_size) {
+ return false;
+ }
+
+ if (x.requested_alignment != y.requested_alignment) {
+ return false;
+ }
+
+ if (x.allocated_size != y.allocated_size) {
+ return false;
+ }
+
+ return true;
+}
+
+void CheckTraces(const StackTraceTable& table,
+ std::initializer_list<AllocationEntry> expected) {
+ std::vector<AllocationEntry> actual;
+
+ table.Iterate([&](const Profile::Sample& e) {
+ AllocationEntry tmp;
+ tmp.sum = e.sum;
+ tmp.count = e.count;
+ tmp.depth = e.depth;
+ ASSERT_LE(tmp.depth, ABSL_ARRAYSIZE(tmp.stack));
+ std::copy(e.stack, e.stack + e.depth, tmp.stack);
+
+ tmp.requested_size = e.requested_size;
+ tmp.requested_alignment = e.requested_alignment;
+ tmp.allocated_size = e.allocated_size;
+
+ actual.push_back(tmp);
+ });
+
+ EXPECT_THAT(actual, testing::UnorderedElementsAreArray(expected));
+}
+
+void AddTrace(StackTraceTable* table, double count, const StackTrace& t) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ table->AddTrace(count, t);
+}
+
+TEST(StackTraceTableTest, StackTraceTable) {
+ // If this test is not linked against TCMalloc, the global arena used for
+ // StackTraceTable's buckets will not be initialized.
+ Static::InitIfNecessary();
+
+ // Empty table
+ {
+ SCOPED_TRACE("empty");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ EXPECT_EQ(0, table.depth_total());
+ EXPECT_EQ(0, table.bucket_total());
+
+ CheckTraces(table, {});
+ }
+
+ StackTrace t1 = {};
+ t1.requested_size = static_cast<uintptr_t>(512);
+ t1.requested_alignment = static_cast<uintptr_t>(16);
+ t1.allocated_size = static_cast<uintptr_t>(1024);
+ t1.depth = static_cast<uintptr_t>(2);
+ t1.stack[0] = reinterpret_cast<void*>(1);
+ t1.stack[1] = reinterpret_cast<void*>(2);
+ t1.weight = 2 << 20;
+
+ const AllocationEntry k1 = {
+ 1024,
+ 1,
+ 512,
+ 16,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ StackTrace t2 = {};
+ t2.requested_size = static_cast<uintptr_t>(375);
+ t2.requested_alignment = static_cast<uintptr_t>(0);
+ t2.allocated_size = static_cast<uintptr_t>(512);
+ t2.depth = static_cast<uintptr_t>(2);
+ t2.stack[0] = reinterpret_cast<void*>(2);
+ t2.stack[1] = reinterpret_cast<void*>(1);
+ t2.weight = 1;
+
+ const AllocationEntry k2 = {
+ 512,
+ 1,
+ 375,
+ 0,
+ 512,
+ 2,
+ {reinterpret_cast<void*>(2), reinterpret_cast<void*>(1)},
+ };
+
+ // Table w/ just t1
+ {
+ SCOPED_TRACE("t1");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 1.0, t1);
+ EXPECT_EQ(2, table.depth_total());
+ EXPECT_EQ(1, table.bucket_total());
+
+ CheckTraces(table, {k1});
+ }
+
+ // We made our last sample at t1.weight (2<<20 bytes). We sample according to
+ // t1.requested_size + 1 (513 bytes). Therefore we overweight the sample to
+ // construct the distribution.
+ //
+ // We rely on the profiling tests to verify that this correctly reconstructs
+ // the distribution (+/- an error tolerance)
+ const int t1_sampled_weight =
+ static_cast<double>(t1.weight) / (t1.requested_size + 1);
+ ASSERT_EQ(t1_sampled_weight, 4088);
+ const AllocationEntry k1_unsampled = {
+ t1_sampled_weight * 1024,
+ t1_sampled_weight,
+ 512,
+ 16,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ // Table w/ just t1 (unsampled)
+ {
+ SCOPED_TRACE("t1 unsampled");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, true);
+ AddTrace(&table, 1.0, t1);
+ EXPECT_EQ(2, table.depth_total());
+ EXPECT_EQ(1, table.bucket_total());
+
+ CheckTraces(table, {k1_unsampled});
+ }
+
+ const AllocationEntry k1_merged = {
+ 2048,
+ 2,
+ 512,
+ 16,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ // Table w/ 2x t1 (merge)
+ {
+ SCOPED_TRACE("2x t1 merge");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t1);
+ EXPECT_EQ(2, table.depth_total());
+ EXPECT_EQ(1, table.bucket_total());
+
+ CheckTraces(table, {k1_merged});
+ }
+
+ // Table w/ 2x t1 (no merge)
+ {
+ SCOPED_TRACE("2x t1 no merge");
+
+ StackTraceTable table(ProfileType::kHeap, 1, false, false);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t1);
+ EXPECT_EQ(4, table.depth_total());
+ EXPECT_EQ(2, table.bucket_total());
+
+ CheckTraces(table, {k1, k1});
+ }
+
+ const AllocationEntry k1_unsampled_merged = {
+ 2 * t1_sampled_weight * 1024,
+ 2 * t1_sampled_weight,
+ 512,
+ 16,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ {
+ SCOPED_TRACE("2x t1 unsampled");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, true);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t1);
+ EXPECT_EQ(2, table.depth_total());
+ EXPECT_EQ(1, table.bucket_total());
+
+ CheckTraces(table, {k1_unsampled_merged});
+ }
+
+ // Table w/ t1, t2
+ {
+ SCOPED_TRACE("t1, t2");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t2);
+ EXPECT_EQ(4, table.depth_total());
+ EXPECT_EQ(2, table.bucket_total());
+ CheckTraces(table, {k1, k2});
+ }
+
+ // Table w/ 1.6 x t1, 1 x t2.
+ // Note that t1's 1.6 count will be rounded-up to 2.0.
+ {
+ SCOPED_TRACE("1.6 t1, t2");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 0.4, t1);
+ AddTrace(&table, 1.0, t2);
+ AddTrace(&table, 1.2, t1);
+ EXPECT_EQ(4, table.depth_total());
+ EXPECT_EQ(2, table.bucket_total());
+
+ const AllocationEntry scaled_k1 = {
+ 2048,
+ 2,
+ 512,
+ 16,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ CheckTraces(table, {scaled_k1, k2});
+ }
+
+ // Same stack as t1, but w/ different size
+ StackTrace t3 = {};
+ t3.requested_size = static_cast<uintptr_t>(13);
+ t3.requested_alignment = static_cast<uintptr_t>(0);
+ t3.allocated_size = static_cast<uintptr_t>(17);
+ t3.depth = static_cast<uintptr_t>(2);
+ t3.stack[0] = reinterpret_cast<void*>(1);
+ t3.stack[1] = reinterpret_cast<void*>(2);
+ t3.weight = 1;
+
+ const AllocationEntry k3 = {
+ 17,
+ 1,
+ 13,
+ 0,
+ 17,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ // Table w/ t1, t3
+ {
+ SCOPED_TRACE("t1, t3");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t3);
+ EXPECT_EQ(4, table.depth_total());
+ EXPECT_EQ(2, table.bucket_total());
+
+ CheckTraces(table, {k1, k3});
+ }
+
+ // Same stack as t1, but w/ different alignment
+ StackTrace t4;
+ t4.requested_size = static_cast<uintptr_t>(512);
+ t4.requested_alignment = static_cast<uintptr_t>(32);
+ t4.allocated_size = static_cast<uintptr_t>(1024);
+ t4.depth = static_cast<uintptr_t>(2);
+ t4.stack[0] = reinterpret_cast<void*>(1);
+ t4.stack[1] = reinterpret_cast<void*>(2);
+ t4.weight = 1;
+
+ const AllocationEntry k4 = {
+ 1024,
+ 1,
+ 512,
+ 32,
+ 1024,
+ 2,
+ {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+ };
+
+ // Table w/ t1, t4
+ {
+ SCOPED_TRACE("t1, t4");
+
+ StackTraceTable table(ProfileType::kHeap, 1, true, false);
+ AddTrace(&table, 1.0, t1);
+ AddTrace(&table, 1.0, t4);
+ EXPECT_EQ(4, table.depth_total());
+ EXPECT_EQ(2, table.bucket_total());
+
+ CheckTraces(table, {k1, k4});
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.cc b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc
new file mode 100644
index 0000000000..08a70de493
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc
@@ -0,0 +1,138 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/static_vars.h"
+
+#include <stddef.h>
+
+#include <atomic>
+#include <new>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mincore.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Cacheline-align our SizeMap and CPUCache. They both have very hot arrays as
+// their first member variables, and aligning them reduces the number of cache
+// lines these arrays use.
+//
+// IF YOU ADD TO THIS LIST, ADD TO STATIC_VAR_SIZE TOO!
+ABSL_CONST_INIT absl::base_internal::SpinLock pageheap_lock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+ABSL_CONST_INIT Arena Static::arena_;
+ABSL_CONST_INIT SizeMap ABSL_CACHELINE_ALIGNED Static::sizemap_;
+ABSL_CONST_INIT TransferCacheManager Static::transfer_cache_;
+ABSL_CONST_INIT ShardedTransferCacheManager Static::sharded_transfer_cache_;
+ABSL_CONST_INIT CPUCache ABSL_CACHELINE_ALIGNED Static::cpu_cache_;
+ABSL_CONST_INIT PageHeapAllocator<Span> Static::span_allocator_;
+ABSL_CONST_INIT PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
+ABSL_CONST_INIT PageHeapAllocator<ThreadCache> Static::threadcache_allocator_;
+ABSL_CONST_INIT SpanList Static::sampled_objects_;
+ABSL_CONST_INIT tcmalloc_internal::StatsCounter Static::sampled_objects_size_;
+ABSL_CONST_INIT PeakHeapTracker Static::peak_heap_tracker_;
+ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+ Static::bucket_allocator_;
+ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
+ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
+ABSL_CONST_INIT Static::CreateSampleUserDataCallback*
+ Static::create_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::CopySampleUserDataCallback*
+ Static::copy_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::DestroySampleUserDataCallback*
+ Static::destroy_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ABSL_CONST_INIT PageMap Static::pagemap_;
+ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+ABSL_CONST_INIT GuardedPageAllocator Static::guardedpage_allocator_;
+ABSL_CONST_INIT NumaTopology<kNumaPartitions, kNumBaseClasses>
+ Static::numa_topology_;
+
+size_t Static::metadata_bytes() {
+ // This is ugly and doesn't nicely account for e.g. alignment losses
+ // -- I'd like to put all the above in a struct and take that
+ // struct's size. But we can't due to linking issues.
+ const size_t static_var_size =
+ sizeof(pageheap_lock) + sizeof(arena_) + sizeof(sizemap_) +
+ sizeof(sharded_transfer_cache_) + sizeof(transfer_cache_) +
+ sizeof(cpu_cache_) + sizeof(span_allocator_) +
+ sizeof(stacktrace_allocator_) + sizeof(threadcache_allocator_) +
+ sizeof(sampled_objects_) + sizeof(bucket_allocator_) +
+ sizeof(inited_) + sizeof(cpu_cache_active_) + sizeof(page_allocator_) +
+ sizeof(pagemap_) + sizeof(sampled_objects_size_) +
+ sizeof(peak_heap_tracker_) + sizeof(guarded_page_lock) +
+ sizeof(guardedpage_allocator_) + sizeof(numa_topology_);
+
+ const size_t allocated = arena().bytes_allocated() +
+ AddressRegionFactory::InternalBytesAllocated();
+ return allocated + static_var_size;
+}
+
+size_t Static::pagemap_residence() {
+ // Determine residence of the root node of the pagemap.
+ size_t total = MInCore::residence(&pagemap_, sizeof(pagemap_));
+ return total;
+}
+
+ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+ // double-checked locking
+ if (!inited_.load(std::memory_order_acquire)) {
+ tracking::Init();
+ sizemap_.Init();
+ numa_topology_.Init();
+ span_allocator_.Init(&arena_);
+ span_allocator_.New(); // Reduce cache conflicts
+ span_allocator_.New(); // Reduce cache conflicts
+ stacktrace_allocator_.Init(&arena_);
+ bucket_allocator_.Init(&arena_);
+ // Do a bit of sanitizing: make sure central_cache is aligned properly
+ CHECK_CONDITION((sizeof(transfer_cache_) % ABSL_CACHELINE_SIZE) == 0);
+ transfer_cache_.Init();
+ sharded_transfer_cache_.Init();
+ new (page_allocator_.memory) PageAllocator;
+ threadcache_allocator_.Init(&arena_);
+ cpu_cache_active_ = false;
+ pagemap_.MapRootWithSmallPages();
+ guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128);
+ inited_.store(true, std::memory_order_release);
+
+ pageheap_lock.Unlock();
+ pthread_atfork(
+ TCMallocPreFork,
+ TCMallocPostFork,
+ TCMallocPostFork);
+ pageheap_lock.Lock();
+ }
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.h b/contrib/libs/tcmalloc/tcmalloc/static_vars.h
new file mode 100644
index 0000000000..be68edc189
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.h
@@ -0,0 +1,262 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Static variables shared by multiple classes.
+
+#ifndef TCMALLOC_STATIC_VARS_H_
+#define TCMALLOC_STATIC_VARS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/page_allocator.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/peak_heap_tracker.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class CPUCache;
+class PageMap;
+class ThreadCache;
+
+void TCMallocPreFork();
+void TCMallocPostFork();
+
+class Static {
+ public:
+ // True if InitIfNecessary() has run to completion.
+ static bool IsInited();
+ // Must be called before calling any of the accessors below.
+ // Safe to call multiple times.
+ static void InitIfNecessary();
+
+ // Central cache.
+ static const CentralFreeList& central_freelist(int size_class) {
+ return transfer_cache().central_freelist(size_class);
+ }
+ // Central cache -- an array of free-lists, one per size-class.
+ // We have a separate lock per free-list to reduce contention.
+ static TransferCacheManager& transfer_cache() { return transfer_cache_; }
+
+ // A per-cache domain TransferCache.
+ static ShardedTransferCacheManager& sharded_transfer_cache() {
+ return sharded_transfer_cache_;
+ }
+
+ static SizeMap& sizemap() { return sizemap_; }
+
+ static CPUCache& cpu_cache() { return cpu_cache_; }
+
+ static PeakHeapTracker& peak_heap_tracker() { return peak_heap_tracker_; }
+
+ static NumaTopology<kNumaPartitions, kNumBaseClasses>& numa_topology() {
+ return numa_topology_;
+ }
+
+ //////////////////////////////////////////////////////////////////////
+ // In addition to the explicit initialization comment, the variables below
+ // must be protected by pageheap_lock.
+
+ static Arena& arena() { return arena_; }
+
+ // Page-level allocator.
+ static PageAllocator& page_allocator() {
+ return *reinterpret_cast<PageAllocator*>(page_allocator_.memory);
+ }
+
+ static PageMap& pagemap() { return pagemap_; }
+
+ static GuardedPageAllocator& guardedpage_allocator() {
+ return guardedpage_allocator_;
+ }
+
+ static PageHeapAllocator<Span>& span_allocator() { return span_allocator_; }
+
+ static PageHeapAllocator<StackTrace>& stacktrace_allocator() {
+ return stacktrace_allocator_;
+ }
+
+ static PageHeapAllocator<ThreadCache>& threadcache_allocator() {
+ return threadcache_allocator_;
+ }
+
+ // State kept for sampled allocations (/heapz support). The StatsCounter is
+ // only written while holding pageheap_lock, so writes can safely use
+ // LossyAdd and reads do not require locking.
+ static SpanList sampled_objects_ ABSL_GUARDED_BY(pageheap_lock);
+ ABSL_CONST_INIT static tcmalloc_internal::StatsCounter sampled_objects_size_;
+
+ static PageHeapAllocator<StackTraceTable::Bucket>& bucket_allocator() {
+ return bucket_allocator_;
+ }
+
+ static bool ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCacheActive() {
+ return cpu_cache_active_;
+ }
+ static void ActivateCPUCache() { cpu_cache_active_ = true; }
+ static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+
+ static bool ForkSupportEnabled() { return fork_support_enabled_; }
+ static void EnableForkSupport() { fork_support_enabled_ = true; }
+
+ using CreateSampleUserDataCallback = void*();
+ using CopySampleUserDataCallback = void*(void*);
+ using DestroySampleUserDataCallback = void(void*);
+
+ static void SetSampleUserDataCallbacks(
+ CreateSampleUserDataCallback create,
+ CopySampleUserDataCallback copy,
+ DestroySampleUserDataCallback destroy) {
+ create_sample_user_data_callback_ = create;
+ copy_sample_user_data_callback_ = copy;
+ destroy_sample_user_data_callback_ = destroy;
+ }
+
+ static void* CreateSampleUserData() {
+ if (create_sample_user_data_callback_)
+ return create_sample_user_data_callback_();
+ return nullptr;
+ }
+ static void* CopySampleUserData(void* user_data) {
+ if (copy_sample_user_data_callback_)
+ return copy_sample_user_data_callback_(user_data);
+ return nullptr;
+ }
+ static void DestroySampleUserData(void* user_data) {
+ if (destroy_sample_user_data_callback_)
+ destroy_sample_user_data_callback_(user_data);
+ }
+
+ static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+ return
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+ // When the per-cpu cache is enabled, and the thread's current cpu
+ // variable is initialized we will try to allocate from the per-cpu
+ // cache. If something fails, we bail out to the full malloc.
+ // Checking the current cpu variable here allows us to remove it from
+ // the fast-path, since we will fall back to the slow path until this
+ // variable is initialized.
+ CPUCacheActive() & subtle::percpu::IsFastNoInit();
+#else
+ !CPUCacheActive();
+#endif
+ }
+
+ static size_t metadata_bytes() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // The root of the pagemap is potentially a large poorly utilized
+ // structure, so figure out how much of it is actually resident.
+ static size_t pagemap_residence();
+
+ private:
+#if defined(__clang__)
+ __attribute__((preserve_most))
+#endif
+ static void
+ SlowInitIfNecessary();
+
+ // These static variables require explicit initialization. We cannot
+ // count on their constructors to do any initialization because other
+ // static variables may try to allocate memory before these variables
+ // can run their constructors.
+
+ ABSL_CONST_INIT static Arena arena_;
+ static SizeMap sizemap_;
+ ABSL_CONST_INIT static TransferCacheManager transfer_cache_;
+ ABSL_CONST_INIT static ShardedTransferCacheManager sharded_transfer_cache_;
+ static CPUCache cpu_cache_;
+ ABSL_CONST_INIT static GuardedPageAllocator guardedpage_allocator_;
+ static PageHeapAllocator<Span> span_allocator_;
+ static PageHeapAllocator<StackTrace> stacktrace_allocator_;
+ static PageHeapAllocator<ThreadCache> threadcache_allocator_;
+ static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
+ ABSL_CONST_INIT static std::atomic<bool> inited_;
+ static bool cpu_cache_active_;
+ static bool fork_support_enabled_;
+ static CreateSampleUserDataCallback* create_sample_user_data_callback_;
+ static CopySampleUserDataCallback* copy_sample_user_data_callback_;
+ static DestroySampleUserDataCallback* destroy_sample_user_data_callback_;
+ ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+ ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+ numa_topology_;
+
+ // PageHeap uses a constructor for initialization. Like the members above,
+ // we can't depend on initialization order, so pageheap is new'd
+ // into this buffer.
+ union PageAllocatorStorage {
+ constexpr PageAllocatorStorage() : extra(0) {}
+
+ char memory[sizeof(PageAllocator)];
+ uintptr_t extra; // To force alignment
+ };
+
+ static PageAllocatorStorage page_allocator_;
+ static PageMap pagemap_;
+};
+
+inline bool Static::IsInited() {
+ return inited_.load(std::memory_order_acquire);
+}
+
+inline void Static::InitIfNecessary() {
+ if (ABSL_PREDICT_FALSE(!IsInited())) {
+ SlowInitIfNecessary();
+ }
+}
+
+// Why are these functions here? Because we want to inline them, but they
+// need access to Static::span_allocator. Putting them in span.h would lead
+// to nasty dependency loops. Since anything that needs them certainly
+// includes static_vars.h, this is a perfectly good compromise.
+// TODO(b/134687001): move span_allocator to Span, getting rid of the need for
+// this.
+inline Span* Span::New(PageId p, Length len) {
+ Span* result = Static::span_allocator().New();
+ result->Init(p, len);
+ return result;
+}
+
+inline void Span::Delete(Span* span) {
+#ifndef NDEBUG
+ // In debug mode, trash the contents of deleted Spans
+ memset(static_cast<void*>(span), 0x3f, sizeof(*span));
+#endif
+ Static::span_allocator().Delete(span);
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_STATIC_VARS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.cc b/contrib/libs/tcmalloc/tcmalloc/stats.cc
new file mode 100644
index 0000000000..bb553ee5cd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats.cc
@@ -0,0 +1,553 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stats.h"
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/macros.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static double BytesToMiB(size_t bytes) {
+ const double MiB = 1048576.0;
+ return bytes / MiB;
+}
+
+static double PagesToMiB(uint64_t pages) {
+ return BytesToMiB(pages * kPageSize);
+}
+
+// For example, PrintRightAdjustedWithPrefix(out, ">=", 42, 6) prints " >=42".
+static void PrintRightAdjustedWithPrefix(Printer *out, const char *prefix,
+ Length num, int width) {
+ width -= strlen(prefix);
+ int num_tmp = num.raw_num();
+ for (int i = 0; i < width - 1; i++) {
+ num_tmp /= 10;
+ if (num_tmp == 0) {
+ out->printf(" ");
+ }
+ }
+ out->printf("%s%zu", prefix, num.raw_num());
+}
+
+void PrintStats(const char *label, Printer *out, const BackingStats &backing,
+ const SmallSpanStats &small, const LargeSpanStats &large,
+ bool everything) {
+ size_t nonempty_sizes = 0;
+ for (int i = 0; i < kMaxPages.raw_num(); ++i) {
+ const size_t norm = small.normal_length[i];
+ const size_t ret = small.returned_length[i];
+ if (norm + ret > 0) nonempty_sizes++;
+ }
+
+ out->printf("------------------------------------------------\n");
+ out->printf("%s: %zu sizes; %6.1f MiB free; %6.1f MiB unmapped\n", label,
+ nonempty_sizes, BytesToMiB(backing.free_bytes),
+ BytesToMiB(backing.unmapped_bytes));
+ out->printf("------------------------------------------------\n");
+
+ Length cum_normal_pages, cum_returned_pages, cum_total_pages;
+ if (!everything) return;
+
+ for (size_t i = 0; i < kMaxPages.raw_num(); ++i) {
+ const size_t norm = small.normal_length[i];
+ const size_t ret = small.returned_length[i];
+ const size_t total = norm + ret;
+ if (total == 0) continue;
+ const Length norm_pages = Length(norm * i);
+ const Length ret_pages = Length(ret * i);
+ const Length total_pages = norm_pages + ret_pages;
+ cum_normal_pages += norm_pages;
+ cum_returned_pages += ret_pages;
+ cum_total_pages += total_pages;
+ out->printf(
+ "%6zu pages * %6zu spans ~ %6.1f MiB; %6.1f MiB cum"
+ "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+ i, total, total_pages.in_mib(), cum_total_pages.in_mib(),
+ ret_pages.in_mib(), cum_returned_pages.in_mib());
+ }
+
+ cum_normal_pages += large.normal_pages;
+ cum_returned_pages += large.returned_pages;
+ const Length large_total_pages = large.normal_pages + large.returned_pages;
+ cum_total_pages += large_total_pages;
+ PrintRightAdjustedWithPrefix(out, ">=", kMaxPages, 6);
+ out->printf(
+ " large * %6zu spans ~ %6.1f MiB; %6.1f MiB cum"
+ "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+ static_cast<size_t>(large.spans), large_total_pages.in_mib(),
+ cum_total_pages.in_mib(), large.returned_pages.in_mib(),
+ cum_returned_pages.in_mib());
+}
+
+struct HistBucket {
+ uint64_t min_sec;
+ const char *label;
+};
+
+static const HistBucket kSpanAgeHistBuckets[] = {
+ // clang-format off
+ {0, "<1s"},
+ {1, "1s"},
+ {30, "30s"},
+ {1 * 60, "1m"},
+ {30 * 60, "30m"},
+ {1 * 60 * 60, "1h"},
+ {8 * 60 * 60, "8+h"},
+ // clang-format on
+};
+
+struct PageHeapEntry {
+ int64_t span_size; // bytes
+ int64_t present; // bytes
+ int64_t released; // bytes
+ int64_t num_spans;
+ double avg_live_age_secs;
+ double avg_released_age_secs;
+ int64_t live_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0,
+ 0, 0, 0};
+ int64_t released_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0,
+ 0, 0, 0};
+
+ void PrintInPbtxt(PbtxtRegion *parent,
+ absl::string_view sub_region_name) const;
+};
+
+void PageHeapEntry::PrintInPbtxt(PbtxtRegion *parent,
+ absl::string_view sub_region_name) const {
+ auto page_heap = parent->CreateSubRegion(sub_region_name);
+ page_heap.PrintI64("span_size", span_size);
+ page_heap.PrintI64("present", present);
+ page_heap.PrintI64("released", released);
+ page_heap.PrintI64("num_spans", num_spans);
+ page_heap.PrintDouble("avg_live_age_secs", avg_live_age_secs);
+ page_heap.PrintDouble("avg_released_age_secs", avg_released_age_secs);
+
+ for (int j = 0; j < PageAgeHistograms::kNumBuckets; j++) {
+ uint64_t min_age_secs = kSpanAgeHistBuckets[j].min_sec;
+ uint64_t max_age_secs = j != PageAgeHistograms::kNumBuckets - 1
+ ? kSpanAgeHistBuckets[j + 1].min_sec
+ : INT_MAX;
+ if (live_age_hist_bytes[j] != 0) {
+ auto live_age_hist = page_heap.CreateSubRegion("live_age_hist");
+ live_age_hist.PrintI64("bytes", live_age_hist_bytes[j]);
+ live_age_hist.PrintI64("min_age_secs", min_age_secs);
+ live_age_hist.PrintI64("max_age_secs", max_age_secs);
+ }
+ if (released_age_hist_bytes[j] != 0) {
+ auto released_age_hist = page_heap.CreateSubRegion("released_age_hist");
+ released_age_hist.PrintI64("bytes", released_age_hist_bytes[j]);
+ released_age_hist.PrintI64("min_age_secs", min_age_secs);
+ released_age_hist.PrintI64("max_age_secs", max_age_secs);
+ }
+ }
+}
+
+void PrintStatsInPbtxt(PbtxtRegion *region, const SmallSpanStats &small,
+ const LargeSpanStats &large,
+ const PageAgeHistograms &ages) {
+ // Print for small pages.
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ const size_t norm = small.normal_length[i.raw_num()];
+ const size_t ret = small.returned_length[i.raw_num()];
+ const size_t total = norm + ret;
+ if (total == 0) continue;
+ const Length norm_pages = norm * i;
+ const Length ret_pages = ret * i;
+ PageHeapEntry entry;
+ entry.span_size = i.in_bytes();
+ entry.present = norm_pages.in_bytes();
+ entry.released = ret_pages.in_bytes();
+ entry.num_spans = total;
+
+ // Histogram is only collected for pages < ages.kNumSize.
+ if (i < Length(PageAgeHistograms::kNumSizes)) {
+ entry.avg_live_age_secs =
+ ages.GetSmallHistogram(/*released=*/false, i)->avg_age();
+ entry.avg_released_age_secs =
+ ages.GetSmallHistogram(/*released=*/true, i)->avg_age();
+ for (int j = 0; j < ages.kNumBuckets; j++) {
+ entry.live_age_hist_bytes[j] =
+ ages.GetSmallHistogram(/*released=*/false, i)->pages_in_bucket(j) *
+ kPageSize;
+ entry.released_age_hist_bytes[j] =
+ ages.GetSmallHistogram(/*released=*/true, i)->pages_in_bucket(j) *
+ kPageSize;
+ }
+ }
+ entry.PrintInPbtxt(region, "page_heap");
+ }
+
+ // Print for large page.
+ {
+ PageHeapEntry entry;
+ entry.span_size = -1;
+ entry.num_spans = large.spans;
+ entry.present = large.normal_pages.in_bytes();
+ entry.released = large.returned_pages.in_bytes();
+ entry.avg_live_age_secs =
+ ages.GetLargeHistogram(/*released=*/false)->avg_age();
+ entry.avg_released_age_secs =
+ ages.GetLargeHistogram(/*released=*/true)->avg_age();
+ for (int j = 0; j < ages.kNumBuckets; j++) {
+ entry.live_age_hist_bytes[j] =
+ ages.GetLargeHistogram(/*released=*/false)->pages_in_bucket(j) *
+ kPageSize;
+ entry.released_age_hist_bytes[j] =
+ ages.GetLargeHistogram(/*released=*/true)->pages_in_bucket(j) *
+ kPageSize;
+ }
+ entry.PrintInPbtxt(region, "page_heap");
+ }
+
+ region->PrintI64("min_large_span_size", kMaxPages.raw_num());
+}
+
+static int HistBucketIndex(double age_exact) {
+ uint64_t age_secs = age_exact; // truncate to seconds
+ for (int i = 0; i < ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1; i++) {
+ if (age_secs < kSpanAgeHistBuckets[i + 1].min_sec) {
+ return i;
+ }
+ }
+ return ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1;
+}
+
+PageAgeHistograms::PageAgeHistograms(int64_t now)
+ : now_(now), freq_(absl::base_internal::CycleClock::Frequency()) {
+ static_assert(
+ PageAgeHistograms::kNumBuckets == ABSL_ARRAYSIZE(kSpanAgeHistBuckets),
+ "buckets don't match constant in header");
+}
+
+void PageAgeHistograms::RecordRange(Length pages, bool released, int64_t when) {
+ double age = std::max(0.0, (now_ - when) / freq_);
+ (released ? returned_ : live_).Record(pages, age);
+}
+
+void PageAgeHistograms::PerSizeHistograms::Record(Length pages, double age) {
+ (pages < kLargeSize ? GetSmall(pages) : GetLarge())->Record(pages, age);
+ total.Record(pages, age);
+}
+
+static uint32_t SaturatingAdd(uint32_t x, uint32_t y) {
+ uint32_t z = x + y;
+ if (z < x) z = std::numeric_limits<uint32_t>::max();
+ return z;
+}
+
+void PageAgeHistograms::Histogram::Record(Length pages, double age) {
+ size_t bucket = HistBucketIndex(age);
+ buckets_[bucket] = SaturatingAdd(buckets_[bucket], pages.raw_num());
+ total_pages_ += pages;
+ total_age_ += pages.raw_num() * age;
+}
+
+void PageAgeHistograms::Print(const char *label, Printer *out) const {
+ out->printf("------------------------------------------------\n");
+ out->printf(
+ "%s cache entry age (count of pages in spans of "
+ "a given size that have been idle for up to the given period of time)\n",
+ label);
+ out->printf("------------------------------------------------\n");
+ out->printf(" ");
+ // Print out the table header. All columns have width 8 chars.
+ out->printf(" mean");
+ for (int b = 0; b < kNumBuckets; b++) {
+ out->printf("%8s", kSpanAgeHistBuckets[b].label);
+ }
+ out->printf("\n");
+
+ live_.Print("Live span", out);
+ out->printf("\n");
+ returned_.Print("Unmapped span", out);
+}
+
+static void PrintLineHeader(Printer *out, const char *kind, const char *prefix,
+ Length num) {
+ // Print the beginning of the line, e.g. "Live span, >=128 pages: ". The
+ // span size ("128" in the example) is padded such that it plus the span
+ // prefix ("Live") plus the span size prefix (">=") is kHeaderExtraChars wide.
+ const int kHeaderExtraChars = 19;
+ const int span_size_width =
+ std::max<int>(0, kHeaderExtraChars - strlen(kind));
+ out->printf("%s, ", kind);
+ PrintRightAdjustedWithPrefix(out, prefix, num, span_size_width);
+ out->printf(" pages: ");
+}
+
+void PageAgeHistograms::PerSizeHistograms::Print(const char *kind,
+ Printer *out) const {
+ out->printf("%-15s TOTAL PAGES: ", kind);
+ total.Print(out);
+
+ for (auto l = Length(1); l < Length(kNumSizes); ++l) {
+ const Histogram *here = &small[l.raw_num() - 1];
+ if (here->empty()) continue;
+ PrintLineHeader(out, kind, "", l);
+ here->Print(out);
+ }
+
+ if (!large.empty()) {
+ PrintLineHeader(out, kind, ">=", Length(kNumSizes));
+ large.Print(out);
+ }
+}
+
+void PageAgeHistograms::Histogram::Print(Printer *out) const {
+ const double mean = avg_age();
+ out->printf(" %7.1f", mean);
+ for (int b = 0; b < kNumBuckets; ++b) {
+ out->printf(" %7" PRIu32, buckets_[b]);
+ }
+
+ out->printf("\n");
+}
+
+void PageAllocInfo::Print(Printer *out) const {
+ int64_t ticks = TimeTicks();
+ double hz = freq_ / ticks;
+ out->printf("%s: stats on allocation sizes\n", label_);
+ out->printf("%s: %zu pages live small allocation\n", label_,
+ total_small_.raw_num());
+ out->printf("%s: %zu pages of slack on large allocations\n", label_,
+ total_slack_.raw_num());
+ out->printf("%s: largest seen allocation %zu pages\n", label_,
+ largest_seen_.raw_num());
+ out->printf("%s: per-size information:\n", label_);
+
+ auto print_counts = [this, hz, out](const Counts &c, Length nmin,
+ Length nmax) {
+ const size_t a = c.nalloc;
+ const size_t f = c.nfree;
+ const Length a_pages = c.alloc_size;
+ const Length f_pages = c.free_size;
+ if (a == 0) return;
+ const size_t live = a - f;
+ const double live_mib = (a_pages - f_pages).in_mib();
+ const double rate_hz = a * hz;
+ const double mib_hz = a_pages.in_mib() * hz;
+ if (nmin == nmax) {
+ out->printf("%s: %21zu page info: ", label_, nmin.raw_num());
+ } else {
+ out->printf("%s: [ %7zu , %7zu ] page info: ", label_, nmin.raw_num(),
+ nmax.raw_num());
+ }
+ out->printf(
+ "%10zu / %10zu a/f, %8zu (%6.1f MiB) live, "
+ "%8.3g allocs/s (%6.1f MiB/s)\n",
+ a, f, live, live_mib, rate_hz, mib_hz);
+ };
+
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ const Length n = i + Length(1);
+ print_counts(small_[i.raw_num()], n, n);
+ }
+
+ for (int i = 0; i < kAddressBits - kPageShift; ++i) {
+ const Length nmax = Length(uintptr_t{1} << i);
+ const Length nmin = nmax / 2 + Length(1);
+ print_counts(large_[i], nmin, nmax);
+ }
+}
+
+void PageAllocInfo::PrintInPbtxt(PbtxtRegion *region,
+ absl::string_view stat_name) const {
+ int64_t ticks = TimeTicks();
+ double hz = freq_ / ticks;
+ region->PrintI64("num_small_allocation_pages", total_small_.raw_num());
+ region->PrintI64("num_slack_pages", total_slack_.raw_num());
+ region->PrintI64("largest_allocation_pages", largest_seen_.raw_num());
+
+ auto print_counts = [hz, region, &stat_name](const Counts &c, Length nmin,
+ Length nmax) {
+ const size_t a = c.nalloc;
+ const size_t f = c.nfree;
+ const Length a_pages = c.alloc_size;
+ const Length f_pages = c.free_size;
+ if (a == 0) return;
+ const int64_t live_bytes = (a_pages - f_pages).in_bytes();
+ const double rate_hz = a * hz;
+ const double bytes_hz = static_cast<double>(a_pages.in_bytes()) * hz;
+ auto stat = region->CreateSubRegion(stat_name);
+ stat.PrintI64("min_span_pages", nmin.raw_num());
+ stat.PrintI64("max_span_pages", nmax.raw_num());
+ stat.PrintI64("num_spans_allocated", a);
+ stat.PrintI64("num_spans_freed", f);
+ stat.PrintI64("live_bytes", live_bytes);
+ stat.PrintDouble("spans_allocated_per_second", rate_hz);
+ stat.PrintI64("bytes_allocated_per_second", static_cast<int64_t>(bytes_hz));
+ };
+
+ for (auto i = Length(0); i < kMaxPages; ++i) {
+ const Length n = i + Length(1);
+ print_counts(small_[i.raw_num()], n, n);
+ }
+
+ for (int i = 0; i < kAddressBits - kPageShift; ++i) {
+ const Length nmax = Length(uintptr_t(1) << i);
+ const Length nmin = nmax / 2 + Length(1);
+ print_counts(large_[i], nmin, nmax);
+ }
+}
+
+static Length RoundUp(Length value, Length alignment) {
+ return Length((value.raw_num() + alignment.raw_num() - 1) &
+ ~(alignment.raw_num() - 1));
+}
+
+void PageAllocInfo::RecordAlloc(PageId p, Length n) {
+ if (ABSL_PREDICT_FALSE(log_on())) {
+ int64_t t = TimeTicks();
+ LogAlloc(t, p, n);
+ }
+
+ static_assert(kMaxPages.in_bytes() == 1024 * 1024, "threshold changed?");
+ static_assert(kMaxPages < kPagesPerHugePage, "there should be slack");
+ largest_seen_ = std::max(largest_seen_, n);
+ if (n <= kMaxPages) {
+ total_small_ += n;
+ small_[(n - Length(1)).raw_num()].Alloc(n);
+ } else {
+ Length slack = RoundUp(n, kPagesPerHugePage) - n;
+ total_slack_ += slack;
+ size_t i = absl::bit_width(n.raw_num() - 1);
+ large_[i].Alloc(n);
+ }
+}
+
+void PageAllocInfo::RecordFree(PageId p, Length n) {
+ if (ABSL_PREDICT_FALSE(log_on())) {
+ int64_t t = TimeTicks();
+ LogFree(t, p, n);
+ }
+
+ if (n <= kMaxPages) {
+ total_small_ -= n;
+ small_[n.raw_num() - 1].Free(n);
+ } else {
+ Length slack = RoundUp(n, kPagesPerHugePage) - n;
+ total_slack_ -= slack;
+ size_t i = absl::bit_width(n.raw_num() - 1);
+ large_[i].Free(n);
+ }
+}
+
+void PageAllocInfo::RecordRelease(Length n, Length got) {
+ if (ABSL_PREDICT_FALSE(log_on())) {
+ int64_t t = TimeTicks();
+ LogRelease(t, n);
+ }
+}
+
+const PageAllocInfo::Counts &PageAllocInfo::counts_for(Length n) const {
+ if (n <= kMaxPages) {
+ return small_[n.raw_num() - 1];
+ }
+ size_t i = absl::bit_width(n.raw_num() - 1);
+ return large_[i];
+}
+
+// Our current format is really simple. We have an eight-byte version
+// number as a header (currently = 1). We then follow up with a sequence
+// of fixed-size events, each 16 bytes:
+// - 8 byte "id" (really returned page)
+// - 4 byte size (in kib, for compatibility)
+// (this gets us to 4 TiB; anything larger is reported truncated)
+// - 4 bytes for when (ms since last event) + what
+// We shift up the when by 8 bits, and store what the event is in
+// low 8 bits. (Currently just 0=alloc, 1=free, 2=Release.)
+// This truncates time deltas to 2^24 ms ~= 4 hours.
+// This could be compressed further. (As is, it compresses well
+// with gzip.)
+// All values are host-order.
+
+struct Entry {
+ uint64_t id;
+ uint32_t kib;
+ uint32_t whenwhat;
+};
+
+using tcmalloc::tcmalloc_internal::signal_safe_write;
+
+void PageAllocInfo::Write(uint64_t when, uint8_t what, PageId p, Length n) {
+ static_assert(sizeof(Entry) == 16, "bad sizing");
+ Entry e;
+ // Round the time to ms *before* computing deltas, because this produces more
+ // accurate results in the long run.
+
+ // Consider events that occur at absolute time 0.7ms and 50ms. If
+ // we take deltas first, we say the first event occurred at +0.7 =
+ // 0ms and the second event occurred at +49.3ms = 49ms.
+ // Rounding first produces 0 and 50.
+ const uint64_t ms = when * 1000 / freq_;
+ uint64_t delta_ms = ms - last_ms_;
+ last_ms_ = ms;
+ // clamping
+ if (delta_ms >= 1 << 24) {
+ delta_ms = (1 << 24) - 1;
+ }
+ e.whenwhat = delta_ms << 8 | what;
+ e.id = p.index();
+ size_t bytes = n.in_bytes();
+ static const size_t KiB = 1024;
+ static const size_t kMaxRep = std::numeric_limits<uint32_t>::max() * KiB;
+ if (bytes > kMaxRep) {
+ bytes = kMaxRep;
+ }
+ e.kib = bytes / KiB;
+ const char *ptr = reinterpret_cast<const char *>(&e);
+ const size_t len = sizeof(Entry);
+ CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr));
+}
+
+PageAllocInfo::PageAllocInfo(const char *label, int log_fd)
+ : label_(label), fd_(log_fd) {
+ if (ABSL_PREDICT_FALSE(log_on())) {
+ // version 1 of the format, in case we change things up
+ uint64_t header = 1;
+ const char *ptr = reinterpret_cast<const char *>(&header);
+ const size_t len = sizeof(header);
+ CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr));
+ }
+}
+
+int64_t PageAllocInfo::TimeTicks() const {
+ return absl::base_internal::CycleClock::Now() - baseline_ticks_;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.h b/contrib/libs/tcmalloc/tcmalloc/stats.h
new file mode 100644
index 0000000000..19070d867d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats.h
@@ -0,0 +1,271 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_STATS_H_
+#define TCMALLOC_STATS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct BackingStats {
+ BackingStats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {}
+ uint64_t system_bytes; // Total bytes allocated from system
+ uint64_t free_bytes; // Total bytes on normal freelists
+ uint64_t unmapped_bytes; // Total bytes on returned freelists
+
+ BackingStats& operator+=(BackingStats rhs) {
+ system_bytes += rhs.system_bytes;
+ free_bytes += rhs.free_bytes;
+ unmapped_bytes += rhs.unmapped_bytes;
+ return *this;
+ }
+};
+
+inline BackingStats operator+(BackingStats lhs, BackingStats rhs) {
+ return lhs += rhs;
+}
+
+struct SmallSpanStats {
+ constexpr SmallSpanStats() = default;
+
+ // For each free list of small spans, the length (in spans) of the
+ // normal and returned free lists for that size.
+ int64_t normal_length[kMaxPages.raw_num()] = {0};
+ int64_t returned_length[kMaxPages.raw_num()] = {0};
+
+ SmallSpanStats& operator+=(SmallSpanStats rhs) {
+ for (size_t i = 0; i < kMaxPages.raw_num(); ++i) {
+ normal_length[i] += rhs.normal_length[i];
+ returned_length[i] += rhs.returned_length[i];
+ }
+ return *this;
+ }
+};
+
+inline SmallSpanStats operator+(SmallSpanStats lhs, SmallSpanStats rhs) {
+ return lhs += rhs;
+}
+
+// Stats for free large spans (i.e., spans with more than kMaxPages pages).
+struct LargeSpanStats {
+ size_t spans = 0; // Number of such spans
+ Length normal_pages; // Combined page length of normal large spans
+ Length returned_pages; // Combined page length of unmapped spans
+
+ LargeSpanStats& operator+=(LargeSpanStats rhs) {
+ spans += rhs.spans;
+ normal_pages += rhs.normal_pages;
+ returned_pages += rhs.returned_pages;
+ return *this;
+ }
+};
+
+inline LargeSpanStats operator+(LargeSpanStats lhs, LargeSpanStats rhs) {
+ return lhs += rhs;
+}
+
+void PrintStats(const char* label, Printer* out, const BackingStats& backing,
+ const SmallSpanStats& small, const LargeSpanStats& large,
+ bool everything);
+
+class PageAgeHistograms {
+ public:
+ // <now> assumed to be taken from absl::base_internal::CycleClock::Now (done
+ // like this for tests)
+ explicit PageAgeHistograms(int64_t now);
+
+ // <when> = absl::base_internal::CycleClock::Now() when the span was last
+ // changed.
+ void RecordRange(Length pages, bool released, int64_t when);
+
+ void Print(const char* label, Printer* out) const;
+
+ static constexpr size_t kNumBuckets = 7;
+ static constexpr size_t kNumSizes = 64;
+
+ static constexpr Length kLargeSize = Length(kNumSizes);
+ class Histogram {
+ public:
+ void Record(Length pages, double age);
+ void Print(Printer* out) const;
+
+ uint32_t pages_in_bucket(size_t i) const { return buckets_[i]; }
+
+ Length total() const { return total_pages_; }
+
+ double avg_age() const {
+ return empty() ? 0.0 : total_age_ / total_pages_.raw_num();
+ }
+
+ bool empty() const { return total_pages_ == Length(0); }
+
+ private:
+ // total number of pages fitting in this bucket We are actually
+ // somewhat space constrained so it's important to _not_ use a
+ // 64-bit counter here. This comfortably supports terabytes of
+ // RAM, and just in case we will update this with saturating arithmetic.
+ uint32_t buckets_[kNumBuckets] = {0};
+
+ Length total_pages_;
+ double total_age_ = 0;
+ };
+
+ const Histogram* GetSmallHistogram(bool released, Length n) const {
+ if (released) {
+ return returned_.GetSmall(n);
+ } else {
+ return live_.GetSmall(n);
+ }
+ }
+
+ const Histogram* GetLargeHistogram(bool released) const {
+ if (released) {
+ return returned_.GetLarge();
+ } else {
+ return live_.GetLarge();
+ }
+ }
+
+ const Histogram* GetTotalHistogram(bool released) {
+ if (released) {
+ return returned_.GetTotal();
+ } else {
+ return live_.GetTotal();
+ }
+ }
+
+ private:
+ struct PerSizeHistograms {
+ void Record(Length pages, double age);
+ void Print(const char* kind, Printer* out) const;
+
+ Histogram* GetSmall(Length n) {
+ CHECK_CONDITION(n.raw_num() < kNumSizes);
+ return &small[n.raw_num() - 1];
+ }
+ const Histogram* GetSmall(Length n) const {
+ CHECK_CONDITION(n.raw_num() < kNumSizes);
+ return &small[n.raw_num() - 1];
+ }
+
+ Histogram* GetLarge() { return &large; }
+ const Histogram* GetLarge() const { return &large; }
+
+ Histogram* GetTotal() { return &total; }
+
+ Histogram small[kNumSizes - 1];
+ Histogram large;
+ Histogram total;
+ };
+
+ const int64_t now_;
+ const double freq_;
+
+ PerSizeHistograms live_;
+ PerSizeHistograms returned_;
+};
+
+void PrintStatsInPbtxt(PbtxtRegion* region, const SmallSpanStats& small,
+ const LargeSpanStats& large,
+ const PageAgeHistograms& ages);
+
+class PageAllocInfo {
+ private:
+ struct Counts;
+
+ public:
+ // If log_fd >= 0, dump a page trace to it as record events come in.
+ PageAllocInfo(const char* label, int log_fd);
+
+ // Subclasses are responsible for calling these methods when
+ // the relevant actions occur
+ void RecordAlloc(PageId p, Length n);
+ void RecordFree(PageId p, Length n);
+ void RecordRelease(Length n, Length got);
+ // And invoking this in their Print() implementation.
+ void Print(Printer* out) const;
+ void PrintInPbtxt(PbtxtRegion* region, absl::string_view stat_name) const;
+
+ // Total size of allocations < 1 MiB
+ Length small() const { return total_small_; }
+ // We define the "slack" of an allocation as the difference
+ // between its size and the nearest hugepage multiple (i.e. how
+ // much would go unused if we allocated it as an aligned hugepage
+ // and didn't use the rest.)
+ // Return the total slack of all non-small allocations.
+ Length slack() const { return total_slack_; }
+
+ const Counts& counts_for(Length n) const;
+
+ // Returns (approximate) CycleClock ticks since class instantiation.
+ int64_t TimeTicks() const;
+
+ private:
+ Length total_small_;
+ Length total_slack_;
+
+ Length largest_seen_;
+
+ // How many alloc/frees have we seen (of some size range?)
+ struct Counts {
+ // raw counts
+ size_t nalloc{0}, nfree{0};
+ // and total sizes (needed if this struct tracks a nontrivial range
+ Length alloc_size;
+ Length free_size;
+
+ void Alloc(Length n) {
+ nalloc++;
+ alloc_size += n;
+ }
+ void Free(Length n) {
+ nfree++;
+ free_size += n;
+ }
+ };
+
+ // Indexed by exact length
+ Counts small_[kMaxPages.raw_num()];
+ // Indexed by power-of-two-buckets
+ Counts large_[kAddressBits - kPageShift];
+ const char* label_;
+
+ const int64_t baseline_ticks_{absl::base_internal::CycleClock::Now()};
+ const double freq_{absl::base_internal::CycleClock::Frequency()};
+
+ // State for page trace logging.
+ const int fd_;
+ uint64_t last_ms_{0};
+ void Write(uint64_t when, uint8_t what, PageId p, Length n);
+ bool log_on() const { return fd_ >= 0; }
+ void LogAlloc(int64_t when, PageId p, Length n) { Write(when, 0, p, n); }
+ void LogFree(int64_t when, PageId p, Length n) { Write(when, 1, p, n); }
+ void LogRelease(int64_t when, Length n) { Write(when, 2, PageId{0}, n); }
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc
new file mode 100644
index 0000000000..733fcc9534
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc
@@ -0,0 +1,268 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stats.h"
+
+#include <limits>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class PrintTest : public ::testing::Test {
+ protected:
+ static constexpr size_t kBufferSize = 256 * 1024;
+ char buf_[kBufferSize];
+
+ void ExpectStats(const BackingStats &back, const SmallSpanStats &small,
+ const LargeSpanStats &large, const std::string &expected) {
+ Printer out(&buf_[0], kBufferSize);
+ PrintStats("PrintTest", &out, back, small, large, true);
+ EXPECT_EQ(expected, buf_);
+ }
+
+ BackingStats Backing(size_t system, size_t free, size_t unmapped) {
+ BackingStats stat;
+ stat.system_bytes = system;
+ stat.free_bytes = free;
+ stat.unmapped_bytes = unmapped;
+
+ return stat;
+ }
+};
+
+TEST_F(PrintTest, Empty) {
+ ExpectStats(Backing(0, 0, 0), {{}, {}}, // small
+ {0, Length(0), Length(0)}, // large
+ // clang-format off
+R"LIT(------------------------------------------------
+PrintTest: 0 sizes; 0.0 MiB free; 0.0 MiB unmapped
+------------------------------------------------
+ >=128 large * 0 spans ~ 0.0 MiB; 0.0 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum
+)LIT");
+ // clang-format on
+}
+
+TEST_F(PrintTest, ManySizes) {
+ ExpectStats(Backing(987654321, 1900 * 1000, 67 * 1000 * 1000),
+ {{0, 100, 0, 250, 0, 0, 0, 0, 0, 51},
+ {0, 0, 300, 400, 0, 0, 0, 0, 0, 27}}, // small
+ {2, Length(100000), Length(2000)}, // large
+ // clang-format off
+R"LIT(------------------------------------------------
+PrintTest: 4 sizes; 1.8 MiB free; 63.9 MiB unmapped
+------------------------------------------------
+ 1 pages * 100 spans ~ 0.8 MiB; 0.8 MiB cum; unmapped: 0.0 MiB; 0.0 MiB cum
+ 2 pages * 300 spans ~ 4.7 MiB; 5.5 MiB cum; unmapped: 4.7 MiB; 4.7 MiB cum
+ 3 pages * 650 spans ~ 15.2 MiB; 20.7 MiB cum; unmapped: 9.4 MiB; 14.1 MiB cum
+ 9 pages * 78 spans ~ 5.5 MiB; 26.2 MiB cum; unmapped: 1.9 MiB; 16.0 MiB cum
+ >=128 large * 2 spans ~ 796.9 MiB; 823.1 MiB cum; unmapped: 15.6 MiB; 31.6 MiB cum
+)LIT");
+ // clang-format on
+}
+
+class AgeTest : public testing::Test {
+ protected:
+ static constexpr size_t kBufferSize = 256 * 1024;
+ char buf_[kBufferSize];
+
+ static constexpr int64_t kNow = 1000ll * 1000 * 1000 * 1000;
+
+ // correct "when" value to compute age as <age>
+ int64_t WhenForAge(double age) {
+ static double freq = absl::base_internal::CycleClock::Frequency();
+ // age = (now - when) / freq
+ return kNow - freq * age;
+ }
+
+ void ExpectAges(const PageAgeHistograms &ages, const std::string &expected) {
+ Printer out(&buf_[0], kBufferSize);
+ ages.Print("AgeTest", &out);
+ std::string got = buf_;
+ EXPECT_EQ(expected, got);
+ }
+};
+
+TEST_F(AgeTest, Basic) {
+ PageAgeHistograms ages(kNow);
+ ages.RecordRange(Length(1), false, WhenForAge(0.5));
+ ages.RecordRange(Length(1), false, WhenForAge(1.2));
+ ages.RecordRange(Length(1), false, WhenForAge(3.7));
+
+ ages.RecordRange(Length(3), false, WhenForAge(60 * 60 * 10));
+
+ for (int i = 0; i < 10; ++i) {
+ ages.RecordRange(Length(2), true, WhenForAge(0.1));
+ }
+ ages.RecordRange(Length(2), true, WhenForAge(10 * 60 + 5));
+
+ ages.RecordRange(Length(200), true, WhenForAge(10 * 60));
+ // clang-format off
+ const char kExpected[] =
+R"LIT(------------------------------------------------
+AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time)
+------------------------------------------------
+ mean <1s 1s 30s 1m 30m 1h 8+h
+Live span TOTAL PAGES: 18000.9 1 2 0 0 0 0 3
+Live span, 1 pages: 1.8 1 2 0 0 0 0 0
+Live span, 3 pages: 36000.0 0 0 0 0 0 0 3
+
+Unmapped span TOTAL PAGES: 546.0 20 0 0 202 0 0 0
+Unmapped span, 2 pages: 55.1 20 0 0 2 0 0 0
+Unmapped span, >=64 pages: 600.0 0 0 0 200 0 0 0
+)LIT";
+ // clang-format on
+ ExpectAges(ages, kExpected);
+}
+
+TEST_F(AgeTest, Overflow) {
+ PageAgeHistograms ages(kNow);
+ const Length too_big = Length(4 * (std::numeric_limits<uint32_t>::max() / 5));
+ ages.RecordRange(too_big, false, WhenForAge(0.5));
+ ages.RecordRange(too_big, false, WhenForAge(0.5));
+
+ // clang-format off
+ const char kExpected[] =
+R"LIT(------------------------------------------------
+AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time)
+------------------------------------------------
+ mean <1s 1s 30s 1m 30m 1h 8+h
+Live span TOTAL PAGES: 0.5 4294967295 0 0 0 0 0 0
+Live span, >=64 pages: 0.5 4294967295 0 0 0 0 0 0
+
+Unmapped span TOTAL PAGES: 0.0 0 0 0 0 0 0 0
+)LIT";
+ // clang-format on
+ ExpectAges(ages, kExpected);
+}
+
+TEST_F(AgeTest, ManySizes) {
+ PageAgeHistograms ages(kNow);
+ const Length N = PageAgeHistograms::kLargeSize;
+ for (auto i = Length(1); i <= N; ++i) {
+ ages.RecordRange(i, false, WhenForAge(i.raw_num() * 3));
+ }
+
+ for (auto i = Length(1); i < N; ++i) {
+ auto hist = ages.GetSmallHistogram(false, i);
+ EXPECT_EQ(i, hist->total());
+ EXPECT_FLOAT_EQ(i.raw_num() * 3, hist->avg_age());
+ }
+
+ auto large = ages.GetLargeHistogram(false);
+ EXPECT_EQ(N, large->total());
+ EXPECT_FLOAT_EQ(N.raw_num() * 3, large->avg_age());
+
+ auto total = ages.GetTotalHistogram(false);
+ // sum_{i = 1}^N i = n(n+1)/2
+ EXPECT_EQ(N.raw_num() * (N.raw_num() + 1) / 2, total->total().raw_num());
+ // sum_{i = 1}^N 3 * i * i = n(n + 1)(2n + 1) / 2;
+ // divide by the above page total gives (2n+1)
+ EXPECT_FLOAT_EQ(2 * N.raw_num() + 1, total->avg_age());
+}
+
+TEST(PageAllocInfo, Small) {
+ PageAllocInfo info("", -1);
+ static_assert(kMaxPages >= Length(4), "odd config");
+
+ info.RecordAlloc(PageId{0}, Length(2));
+ info.RecordAlloc(PageId{0}, Length(2));
+ info.RecordAlloc(PageId{0}, Length(2));
+
+ info.RecordAlloc(PageId{0}, Length(3));
+ info.RecordAlloc(PageId{0}, Length(3));
+
+ info.RecordFree(PageId{0}, Length(3));
+
+ auto c2 = info.counts_for(Length(2));
+ EXPECT_EQ(3, c2.nalloc);
+ EXPECT_EQ(0, c2.nfree);
+ EXPECT_EQ(Length(6), c2.alloc_size);
+ EXPECT_EQ(Length(0), c2.free_size);
+
+ auto c3 = info.counts_for(Length(3));
+ EXPECT_EQ(2, c3.nalloc);
+ EXPECT_EQ(1, c3.nfree);
+ EXPECT_EQ(Length(6), c3.alloc_size);
+ EXPECT_EQ(Length(3), c3.free_size);
+
+ EXPECT_EQ(Length(3 * 2 + (2 - 1) * 3), info.small());
+ EXPECT_EQ(Length(0), info.slack());
+}
+
+TEST(PageAllocInfo, Large) {
+ PageAllocInfo info("", -1);
+ static_assert(kPagesPerHugePage > kMaxPages, "odd config");
+
+ // These three should be aggregated
+ Length slack;
+ info.RecordAlloc(PageId{0}, kMaxPages + Length(1));
+ slack += kPagesPerHugePage - kMaxPages - Length(1);
+ info.RecordAlloc(PageId{0}, kMaxPages * 3 / 2);
+ slack += kPagesPerHugePage - kMaxPages * 3 / 2;
+ info.RecordAlloc(PageId{0}, kMaxPages * 2);
+ slack += kPagesPerHugePage - kMaxPages * 2;
+
+ // This shouldn't
+ const Length larger = kMaxPages * 2 + Length(1);
+ info.RecordAlloc(PageId{0}, larger);
+ slack +=
+ (kPagesPerHugePage - (larger % kPagesPerHugePage)) % kPagesPerHugePage;
+
+ auto c1 = info.counts_for(kMaxPages + Length(1));
+ EXPECT_EQ(3, c1.nalloc);
+ EXPECT_EQ(0, c1.nfree);
+ EXPECT_EQ(kMaxPages * 9 / 2 + Length(1), c1.alloc_size);
+ EXPECT_EQ(Length(0), c1.free_size);
+
+ auto c2 = info.counts_for(kMaxPages * 2 + Length(1));
+ EXPECT_EQ(1, c2.nalloc);
+ EXPECT_EQ(0, c2.nfree);
+ EXPECT_EQ(kMaxPages * 2 + Length(1), c2.alloc_size);
+ EXPECT_EQ(Length(0), c2.free_size);
+
+ EXPECT_EQ(Length(0), info.small());
+ EXPECT_EQ(slack, info.slack());
+}
+
+TEST(ClockTest, ClockTicks) {
+ // It's a bit ironic to test this clock against other clocks since
+ // this exists because we don't trust other clocks. But hopefully
+ // no one is using libfaketime on this binary, and of course we
+ // don't care about signal safety, just ticking.
+ const absl::Time before = absl::Now();
+ const double b = absl::base_internal::CycleClock::Now() /
+ absl::base_internal::CycleClock::Frequency();
+ static const absl::Duration kDur = absl::Milliseconds(500);
+ absl::SleepFor(kDur);
+ const double a = absl::base_internal::CycleClock::Now() /
+ absl::base_internal::CycleClock::Frequency();
+ const absl::Time after = absl::Now();
+
+ const absl::Duration actual = (after - before);
+ const absl::Duration measured = absl::Seconds(a - b);
+ EXPECT_LE(actual * 0.99, measured) << actual;
+ EXPECT_GE(actual * 1.01, measured) << actual;
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc
new file mode 100644
index 0000000000..b079c9c966
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc
@@ -0,0 +1,623 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/system-alloc.h"
+
+#include <asm/unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <new>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/sampler.h"
+
+// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
+// form of the name instead.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Solaris has a bug where it doesn't declare madvise() for C++.
+// http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
+#if defined(__sun) && defined(__SVR4)
+#include <sys/types.h>
+extern "C" int madvise(caddr_t, size_t, int);
+#endif
+
+#ifdef __linux__
+#include <linux/mempolicy.h>
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+
+// Check that no bit is set at position ADDRESS_BITS or higher.
+template <int ADDRESS_BITS>
+void CheckAddressBits(uintptr_t ptr) {
+ ASSERT((ptr >> ADDRESS_BITS) == 0);
+}
+
+// Specialize for the bit width of a pointer to avoid undefined shift.
+template <>
+ABSL_ATTRIBUTE_UNUSED void CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {}
+
+static_assert(kAddressBits <= 8 * sizeof(void*),
+ "kAddressBits must be smaller than the pointer size");
+
+// Structure for discovering alignment
+union MemoryAligner {
+ void* p;
+ double d;
+ size_t s;
+} ABSL_CACHELINE_ALIGNED;
+
+static_assert(sizeof(MemoryAligner) < kMinSystemAlloc,
+ "hugepage alignment too small");
+
+ABSL_CONST_INIT absl::base_internal::SpinLock spinlock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+// Page size is initialized on demand
+size_t pagesize = 0;
+size_t preferred_alignment = 0;
+
+// The current region factory.
+AddressRegionFactory* region_factory = nullptr;
+
+// Rounds size down to a multiple of alignment.
+size_t RoundDown(const size_t size, const size_t alignment) {
+ // Checks that the alignment has only one bit set.
+ ASSERT(absl::has_single_bit(alignment));
+ return (size) & ~(alignment - 1);
+}
+
+// Rounds size up to a multiple of alignment.
+size_t RoundUp(const size_t size, const size_t alignment) {
+ return RoundDown(size + alignment - 1, alignment);
+}
+
+class MmapRegion final : public AddressRegion {
+ public:
+ MmapRegion(uintptr_t start, size_t size, AddressRegionFactory::UsageHint hint)
+ : start_(start), free_size_(size), hint_(hint) {}
+ std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override;
+ ~MmapRegion() override = default;
+
+ private:
+ const uintptr_t start_;
+ size_t free_size_;
+ const AddressRegionFactory::UsageHint hint_;
+};
+
+class MmapRegionFactory final : public AddressRegionFactory {
+ public:
+ AddressRegion* Create(void* start, size_t size, UsageHint hint) override;
+ size_t GetStats(absl::Span<char> buffer) override;
+ size_t GetStatsInPbtxt(absl::Span<char> buffer) override;
+ ~MmapRegionFactory() override = default;
+
+ private:
+ std::atomic<size_t> bytes_reserved_{0};
+};
+std::aligned_storage<sizeof(MmapRegionFactory),
+ alignof(MmapRegionFactory)>::type mmap_space;
+
+class RegionManager {
+ public:
+ std::pair<void*, size_t> Alloc(size_t size, size_t alignment, MemoryTag tag);
+
+ void DiscardMappedRegions() {
+ std::fill(normal_region_.begin(), normal_region_.end(), nullptr);
+ sampled_region_ = nullptr;
+ }
+
+ private:
+ // Checks that there is sufficient space available in the reserved region
+ // for the next allocation, if not allocate a new region.
+ // Then returns a pointer to the new memory.
+ std::pair<void*, size_t> Allocate(size_t size, size_t alignment,
+ MemoryTag tag);
+
+ std::array<AddressRegion*, kNumaPartitions> normal_region_{{nullptr}};
+ AddressRegion* sampled_region_{nullptr};
+};
+std::aligned_storage<sizeof(RegionManager), alignof(RegionManager)>::type
+ region_manager_space;
+RegionManager* region_manager = nullptr;
+
+std::pair<void*, size_t> MmapRegion::Alloc(size_t request_size,
+ size_t alignment) {
+ // Align on kMinSystemAlloc boundaries to reduce external fragmentation for
+ // future allocations.
+ size_t size = RoundUp(request_size, kMinSystemAlloc);
+ if (size < request_size) return {nullptr, 0};
+ alignment = std::max(alignment, preferred_alignment);
+
+ // Tries to allocate size bytes from the end of [start_, start_ + free_size_),
+ // aligned to alignment.
+ uintptr_t end = start_ + free_size_;
+ uintptr_t result = end - size;
+ if (result > end) return {nullptr, 0}; // Underflow.
+ result &= ~(alignment - 1);
+ if (result < start_) return {nullptr, 0}; // Out of memory in region.
+ size_t actual_size = end - result;
+
+ ASSERT(result % pagesize == 0);
+ void* result_ptr = reinterpret_cast<void*>(result);
+ if (mprotect(result_ptr, actual_size, PROT_READ | PROT_WRITE) != 0) {
+ Log(kLogWithStack, __FILE__, __LINE__,
+ "mprotect() region failed (ptr, size, error)", result_ptr, actual_size,
+ strerror(errno));
+ return {nullptr, 0};
+ }
+ (void)hint_;
+ free_size_ -= actual_size;
+ return {result_ptr, actual_size};
+}
+
+AddressRegion* MmapRegionFactory::Create(void* start, size_t size,
+ UsageHint hint) {
+ void* region_space = MallocInternal(sizeof(MmapRegion));
+ if (!region_space) return nullptr;
+ bytes_reserved_.fetch_add(size, std::memory_order_relaxed);
+ return new (region_space)
+ MmapRegion(reinterpret_cast<uintptr_t>(start), size, hint);
+}
+
+size_t MmapRegionFactory::GetStats(absl::Span<char> buffer) {
+ Printer printer(buffer.data(), buffer.size());
+ size_t allocated = bytes_reserved_.load(std::memory_order_relaxed);
+ constexpr double MiB = 1048576.0;
+ printer.printf("MmapSysAllocator: %zu bytes (%.1f MiB) reserved\n", allocated,
+ allocated / MiB);
+
+ return printer.SpaceRequired();
+}
+
+size_t MmapRegionFactory::GetStatsInPbtxt(absl::Span<char> buffer) {
+ Printer printer(buffer.data(), buffer.size());
+ size_t allocated = bytes_reserved_.load(std::memory_order_relaxed);
+ printer.printf("mmap_sys_allocator: %lld\n", allocated);
+
+ return printer.SpaceRequired();
+}
+
+static AddressRegionFactory::UsageHint TagToHint(MemoryTag tag) {
+ using UsageHint = AddressRegionFactory::UsageHint;
+ switch (tag) {
+ case MemoryTag::kNormal:
+ case MemoryTag::kNormalP1:
+ return UsageHint::kNormal;
+ break;
+ case MemoryTag::kSampled:
+ return UsageHint::kInfrequentAllocation;
+ break;
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+}
+
+std::pair<void*, size_t> RegionManager::Alloc(size_t request_size,
+ size_t alignment,
+ const MemoryTag tag) {
+ constexpr uintptr_t kTagFree = uintptr_t{1} << kTagShift;
+
+ // We do not support size or alignment larger than kTagFree.
+ // TODO(b/141325493): Handle these large allocations.
+ if (request_size > kTagFree || alignment > kTagFree) return {nullptr, 0};
+
+ // If we are dealing with large sizes, or large alignments we do not
+ // want to throw away the existing reserved region, so instead we
+ // return a new region specifically targeted for the request.
+ if (request_size > kMinMmapAlloc || alignment > kMinMmapAlloc) {
+ // Align on kMinSystemAlloc boundaries to reduce external fragmentation for
+ // future allocations.
+ size_t size = RoundUp(request_size, kMinSystemAlloc);
+ if (size < request_size) return {nullptr, 0};
+ alignment = std::max(alignment, preferred_alignment);
+ void* ptr = MmapAligned(size, alignment, tag);
+ if (!ptr) return {nullptr, 0};
+
+ const auto region_type = TagToHint(tag);
+ AddressRegion* region = region_factory->Create(ptr, size, region_type);
+ if (!region) {
+ munmap(ptr, size);
+ return {nullptr, 0};
+ }
+ std::pair<void*, size_t> result = region->Alloc(size, alignment);
+ if (result.first != nullptr) {
+ ASSERT(result.first == ptr);
+ ASSERT(result.second == size);
+ } else {
+ ASSERT(result.second == 0);
+ }
+ return result;
+ }
+ return Allocate(request_size, alignment, tag);
+}
+
+std::pair<void*, size_t> RegionManager::Allocate(size_t size, size_t alignment,
+ const MemoryTag tag) {
+ AddressRegion*& region = *[&]() {
+ switch (tag) {
+ case MemoryTag::kNormal:
+ return &normal_region_[0];
+ case MemoryTag::kNormalP1:
+ return &normal_region_[1];
+ case MemoryTag::kSampled:
+ return &sampled_region_;
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+ }();
+ // For sizes that fit in our reserved range first of all check if we can
+ // satisfy the request from what we have available.
+ if (region) {
+ std::pair<void*, size_t> result = region->Alloc(size, alignment);
+ if (result.first) return result;
+ }
+
+ // Allocation failed so we need to reserve more memory.
+ // Reserve new region and try allocation again.
+ void* ptr = MmapAligned(kMinMmapAlloc, kMinMmapAlloc, tag);
+ if (!ptr) return {nullptr, 0};
+
+ const auto region_type = TagToHint(tag);
+ region = region_factory->Create(ptr, kMinMmapAlloc, region_type);
+ if (!region) {
+ munmap(ptr, kMinMmapAlloc);
+ return {nullptr, 0};
+ }
+ return region->Alloc(size, alignment);
+}
+
+void InitSystemAllocatorIfNecessary() {
+ if (region_factory) return;
+ pagesize = getpagesize();
+ // Sets the preferred alignment to be the largest of either the alignment
+ // returned by mmap() or our minimum allocation size. The minimum allocation
+ // size is usually a multiple of page size, but this need not be true for
+ // SMALL_BUT_SLOW where we do not allocate in units of huge pages.
+ preferred_alignment = std::max(pagesize, kMinSystemAlloc);
+ region_manager = new (&region_manager_space) RegionManager();
+ region_factory = new (&mmap_space) MmapRegionFactory();
+}
+
+// Bind the memory region spanning `size` bytes starting from `base` to NUMA
+// nodes assigned to `partition`. Returns zero upon success, or a standard
+// error code upon failure.
+void BindMemory(void* const base, const size_t size, const size_t partition) {
+ auto& topology = Static::numa_topology();
+
+ // If NUMA awareness is unavailable or disabled, or the user requested that
+ // we don't bind memory then do nothing.
+ const NumaBindMode bind_mode = topology.bind_mode();
+ if (!topology.numa_aware() || bind_mode == NumaBindMode::kNone) {
+ return;
+ }
+
+ const uint64_t nodemask = topology.GetPartitionNodes(partition);
+ int err =
+ syscall(__NR_mbind, base, size, MPOL_BIND | MPOL_F_STATIC_NODES,
+ &nodemask, sizeof(nodemask) * 8, MPOL_MF_STRICT | MPOL_MF_MOVE);
+ if (err == 0) {
+ return;
+ }
+
+ if (bind_mode == NumaBindMode::kAdvisory) {
+ Log(kLogWithStack, __FILE__, __LINE__, "Warning: Unable to mbind memory",
+ err, base, nodemask);
+ return;
+ }
+
+ ASSERT(bind_mode == NumaBindMode::kStrict);
+ Crash(kCrash, __FILE__, __LINE__, "Unable to mbind memory", err, base,
+ nodemask);
+}
+
+ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0);
+
+} // namespace
+
+void AcquireSystemAllocLock() {
+ spinlock.Lock();
+}
+
+void ReleaseSystemAllocLock() {
+ spinlock.Unlock();
+}
+
+void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment,
+ const MemoryTag tag) {
+ // If default alignment is set request the minimum alignment provided by
+ // the system.
+ alignment = std::max(alignment, pagesize);
+
+ // Discard requests that overflow
+ if (bytes + alignment < bytes) return nullptr;
+
+ // This may return significantly more memory than "bytes" by default, so
+ // require callers to know the true amount allocated.
+ ASSERT(actual_bytes != nullptr);
+
+ absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+
+ InitSystemAllocatorIfNecessary();
+
+ void* result = nullptr;
+ std::tie(result, *actual_bytes) =
+ region_manager->Alloc(bytes, alignment, tag);
+
+ if (result != nullptr) {
+ CheckAddressBits<kAddressBits>(reinterpret_cast<uintptr_t>(result) +
+ *actual_bytes - 1);
+ ASSERT(GetMemoryTag(result) == tag);
+ }
+ return result;
+}
+
+static bool ReleasePages(void* start, size_t length) {
+ int ret;
+ // Note -- ignoring most return codes, because if this fails it
+ // doesn't matter...
+ // Moreover, MADV_REMOVE *will* fail (with EINVAL) on anonymous memory,
+ // but that's harmless.
+#ifdef MADV_REMOVE
+ // MADV_REMOVE deletes any backing storage for non-anonymous memory
+ // (tmpfs).
+ do {
+ ret = madvise(start, length, MADV_REMOVE);
+ } while (ret == -1 && errno == EAGAIN);
+
+ if (ret == 0) {
+ return true;
+ }
+#endif
+#ifdef MADV_DONTNEED
+ // MADV_DONTNEED drops page table info and any anonymous pages.
+ do {
+ ret = madvise(start, length, MADV_DONTNEED);
+ } while (ret == -1 && errno == EAGAIN);
+
+ if (ret == 0) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+int SystemReleaseErrors() {
+ return system_release_errors.load(std::memory_order_relaxed);
+}
+
+void SystemRelease(void* start, size_t length) {
+ int saved_errno = errno;
+#if defined(MADV_DONTNEED) || defined(MADV_REMOVE)
+ const size_t pagemask = pagesize - 1;
+
+ size_t new_start = reinterpret_cast<size_t>(start);
+ size_t end = new_start + length;
+ size_t new_end = end;
+
+ // Round up the starting address and round down the ending address
+ // to be page aligned:
+ new_start = (new_start + pagesize - 1) & ~pagemask;
+ new_end = new_end & ~pagemask;
+
+ ASSERT((new_start & pagemask) == 0);
+ ASSERT((new_end & pagemask) == 0);
+ ASSERT(new_start >= reinterpret_cast<size_t>(start));
+ ASSERT(new_end <= end);
+
+ if (new_end > new_start) {
+ void* new_ptr = reinterpret_cast<void*>(new_start);
+ size_t new_length = new_end - new_start;
+
+ if (!ReleasePages(new_ptr, new_length)) {
+ // Try unlocking.
+ int ret;
+ do {
+ ret = munlock(reinterpret_cast<char*>(new_start), new_end - new_start);
+ } while (ret == -1 && errno == EAGAIN);
+
+ if (ret != 0 || !ReleasePages(new_ptr, new_length)) {
+ // If we fail to munlock *or* fail our second attempt at madvise,
+ // increment our failure count.
+ system_release_errors.fetch_add(1, std::memory_order_relaxed);
+ }
+ }
+ }
+#endif
+ errno = saved_errno;
+}
+
+void SystemBack(void* start, size_t length) {
+ // TODO(b/134694141): use madvise when we have better support for that;
+ // taking faults is not free.
+
+ // TODO(b/134694141): enable this, if we can avoid causing trouble for apps
+ // that routinely make large mallocs they never touch (sigh).
+ return;
+
+ // Strictly speaking, not everything uses 4K pages. However, we're
+ // not asking the OS for anything actually page-related, just taking
+ // a fault on every "page". If the real page size is bigger, we do
+ // a few extra reads; this is not worth worrying about.
+ static const size_t kHardwarePageSize = 4 * 1024;
+ CHECK_CONDITION(reinterpret_cast<intptr_t>(start) % kHardwarePageSize == 0);
+ CHECK_CONDITION(length % kHardwarePageSize == 0);
+ const size_t num_pages = length / kHardwarePageSize;
+
+ struct PageStruct {
+ volatile size_t data[kHardwarePageSize / sizeof(size_t)];
+ };
+ CHECK_CONDITION(sizeof(PageStruct) == kHardwarePageSize);
+
+ PageStruct* ps = reinterpret_cast<PageStruct*>(start);
+ PageStruct* limit = ps + num_pages;
+ for (; ps < limit; ++ps) {
+ ps->data[0] = 0;
+ }
+}
+
+AddressRegionFactory* GetRegionFactory() {
+ absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+ InitSystemAllocatorIfNecessary();
+ return region_factory;
+}
+
+void SetRegionFactory(AddressRegionFactory* factory) {
+ absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+ InitSystemAllocatorIfNecessary();
+ region_manager->DiscardMappedRegions();
+ region_factory = factory;
+}
+
+static uintptr_t RandomMmapHint(size_t size, size_t alignment,
+ const MemoryTag tag) {
+ // Rely on kernel's mmap randomization to seed our RNG.
+ static uintptr_t rnd = []() {
+ void* seed =
+ mmap(nullptr, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (seed == MAP_FAILED) {
+ Crash(kCrash, __FILE__, __LINE__,
+ "Initial mmap() reservation failed (size)", kPageSize);
+ }
+ munmap(seed, kPageSize);
+ return reinterpret_cast<uintptr_t>(seed);
+ }();
+
+ // Mask out bits that cannot be used by the hardware, mask out the top
+ // "usable" bit since it is reserved for kernel use, and also mask out the
+ // next top bit to significantly reduce collisions with mappings that tend to
+ // be placed in the upper half of the address space (e.g., stack, executable,
+ // kernel-placed mmaps). See b/139357826.
+ //
+ // TODO(b/124707070): Remove this #ifdef
+#if defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER)
+ // MSan and TSan use up all of the lower address space, so we allow use of
+ // mid-upper address space when they're active. This only matters for
+ // TCMalloc-internal tests, since sanitizers install their own malloc/free.
+ constexpr uintptr_t kAddrMask = (uintptr_t{3} << (kAddressBits - 3)) - 1;
+#else
+ constexpr uintptr_t kAddrMask = (uintptr_t{3} << (kAddressBits - 3)) - 1;
+#endif
+
+ // Ensure alignment >= size so we're guaranteed the full mapping has the same
+ // tag.
+ alignment = absl::bit_ceil(std::max(alignment, size));
+
+ rnd = Sampler::NextRandom(rnd);
+ uintptr_t addr = rnd & kAddrMask & ~(alignment - 1) & ~kTagMask;
+ addr |= static_cast<uintptr_t>(tag) << kTagShift;
+ ASSERT(GetMemoryTag(reinterpret_cast<const void*>(addr)) == tag);
+ return addr;
+}
+
+void* MmapAligned(size_t size, size_t alignment, const MemoryTag tag) {
+ ASSERT(size <= kTagMask);
+ ASSERT(alignment <= kTagMask);
+
+ static uintptr_t next_sampled_addr = 0;
+ static std::array<uintptr_t, kNumaPartitions> next_normal_addr = {0};
+
+ absl::optional<int> numa_partition;
+ uintptr_t& next_addr = *[&]() {
+ switch (tag) {
+ case MemoryTag::kSampled:
+ return &next_sampled_addr;
+ case MemoryTag::kNormalP0:
+ numa_partition = 0;
+ return &next_normal_addr[0];
+ case MemoryTag::kNormalP1:
+ numa_partition = 1;
+ return &next_normal_addr[1];
+ default:
+ ASSUME(false);
+ __builtin_unreachable();
+ }
+ }();
+
+ if (!next_addr || next_addr & (alignment - 1) ||
+ GetMemoryTag(reinterpret_cast<void*>(next_addr)) != tag ||
+ GetMemoryTag(reinterpret_cast<void*>(next_addr + size - 1)) != tag) {
+ next_addr = RandomMmapHint(size, alignment, tag);
+ }
+ void* hint;
+ for (int i = 0; i < 1000; ++i) {
+ hint = reinterpret_cast<void*>(next_addr);
+ ASSERT(GetMemoryTag(hint) == tag);
+ // TODO(b/140190055): Use MAP_FIXED_NOREPLACE once available.
+ void* result =
+ mmap(hint, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (result == hint) {
+ if (numa_partition.has_value()) {
+ BindMemory(result, size, *numa_partition);
+ }
+ // Attempt to keep the next mmap contiguous in the common case.
+ next_addr += size;
+ CHECK_CONDITION(kAddressBits == std::numeric_limits<uintptr_t>::digits ||
+ next_addr <= uintptr_t{1} << kAddressBits);
+
+ ASSERT((reinterpret_cast<uintptr_t>(result) & (alignment - 1)) == 0);
+ return result;
+ }
+ if (result == MAP_FAILED) {
+ Log(kLogWithStack, __FILE__, __LINE__,
+ "mmap() reservation failed (hint, size, error)", hint, size,
+ strerror(errno));
+ return nullptr;
+ }
+ if (int err = munmap(result, size)) {
+ Log(kLogWithStack, __FILE__, __LINE__, "munmap() failed");
+ ASSERT(err == 0);
+ }
+ next_addr = RandomMmapHint(size, alignment, tag);
+ }
+
+ Log(kLogWithStack, __FILE__, __LINE__,
+ "MmapAligned() failed - unable to allocate with tag (hint, size, "
+ "alignment) - is something limiting address placement?",
+ hint, size, alignment);
+ return nullptr;
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.h b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h
new file mode 100644
index 0000000000..3d1e7fd60b
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h
@@ -0,0 +1,91 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Routine that uses sbrk/mmap to allocate memory from the system.
+// Useful for implementing malloc.
+
+#ifndef TCMALLOC_SYSTEM_ALLOC_H_
+#define TCMALLOC_SYSTEM_ALLOC_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment
+// REQUIRES: "alignment" and "size" <= kTagMask
+//
+// Allocate and return "bytes" of zeroed memory. The allocator may optionally
+// return more bytes than asked for (i.e. return an entire "huge" page). The
+// length of the returned memory area is stored in *actual_bytes.
+//
+// The returned pointer is a multiple of "alignment" if non-zero. The
+// returned pointer will always be aligned suitably for holding a
+// void*, double, or size_t. In addition, if this platform defines
+// ABSL_CACHELINE_ALIGNED, the return pointer will always be cacheline
+// aligned.
+//
+// The returned pointer is guaranteed to satisfy GetMemoryTag(ptr) == "tag".
+//
+// Returns nullptr when out of memory.
+void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment,
+ MemoryTag tag);
+
+// Returns the number of times we failed to give pages back to the OS after a
+// call to SystemRelease.
+int SystemReleaseErrors();
+
+void AcquireSystemAllocLock();
+void ReleaseSystemAllocLock();
+
+// This call is a hint to the operating system that the pages
+// contained in the specified range of memory will not be used for a
+// while, and can be released for use by other processes or the OS.
+// Pages which are released in this way may be destroyed (zeroed) by
+// the OS. The benefit of this function is that it frees memory for
+// use by the system, the cost is that the pages are faulted back into
+// the address space next time they are touched, which can impact
+// performance. (Only pages fully covered by the memory region will
+// be released, partial pages will not.)
+void SystemRelease(void *start, size_t length);
+
+// This call is the inverse of SystemRelease: the pages in this range
+// are in use and should be faulted in. (In principle this is a
+// best-effort hint, but in practice we will unconditionally fault the
+// range.)
+// REQUIRES: [start, start + length) is a range aligned to 4KiB boundaries.
+void SystemBack(void *start, size_t length);
+
+// Returns the current address region factory.
+AddressRegionFactory *GetRegionFactory();
+
+// Sets the current address region factory to factory.
+void SetRegionFactory(AddressRegionFactory *factory);
+
+// Reserves using mmap() a region of memory of the requested size and alignment,
+// with the bits specified by kTagMask set according to tag.
+//
+// REQUIRES: pagesize <= alignment <= kTagMask
+// REQUIRES: size <= kTagMask
+void *MmapAligned(size_t size, size_t alignment, MemoryTag tag);
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_SYSTEM_ALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc
new file mode 100644
index 0000000000..496bd048ee
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc
@@ -0,0 +1,147 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/system-alloc.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <limits>
+#include <new>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class MmapAlignedTest : public testing::TestWithParam<size_t> {
+ protected:
+ void MmapAndCheck(size_t size, size_t alignment) {
+ SCOPED_TRACE(absl::StrFormat("size = %u, alignment = %u", size, alignment));
+
+ for (MemoryTag tag : {MemoryTag::kNormal, MemoryTag::kSampled}) {
+ SCOPED_TRACE(static_cast<unsigned int>(tag));
+
+ void* p = MmapAligned(size, alignment, tag);
+ EXPECT_NE(p, nullptr);
+ EXPECT_EQ(reinterpret_cast<uintptr_t>(p) % alignment, 0);
+ EXPECT_EQ(IsTaggedMemory(p), tag == MemoryTag::kSampled);
+ EXPECT_EQ(GetMemoryTag(p), tag);
+ EXPECT_EQ(GetMemoryTag(static_cast<char*>(p) + size - 1), tag);
+ EXPECT_EQ(munmap(p, size), 0);
+ }
+ }
+};
+INSTANTIATE_TEST_SUITE_P(VariedAlignment, MmapAlignedTest,
+ testing::Values(kPageSize, kMinSystemAlloc,
+ kMinMmapAlloc,
+ uintptr_t{1} << kTagShift));
+
+TEST_P(MmapAlignedTest, CorrectAlignmentAndTag) {
+ MmapAndCheck(kMinSystemAlloc, GetParam());
+}
+
+// Ensure mmap sizes near kTagMask still have the correct tag at the beginning
+// and end of the mapping.
+TEST_F(MmapAlignedTest, LargeSizeSmallAlignment) {
+ MmapAndCheck(uintptr_t{1} << kTagShift, kPageSize);
+}
+
+// Was SimpleRegion::Alloc invoked at least once?
+static bool simple_region_alloc_invoked = false;
+
+class SimpleRegion : public AddressRegion {
+ public:
+ SimpleRegion(uintptr_t start, size_t size)
+ : start_(start), free_size_(size) {}
+
+ std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+ simple_region_alloc_invoked = true;
+ uintptr_t result = (start_ + free_size_ - size) & ~(alignment - 1);
+ if (result < start_ || result >= start_ + free_size_) return {nullptr, 0};
+ size_t actual_size = start_ + free_size_ - result;
+ free_size_ -= actual_size;
+ void* ptr = reinterpret_cast<void*>(result);
+ int err = mprotect(ptr, actual_size, PROT_READ | PROT_WRITE);
+ CHECK_CONDITION(err == 0);
+ return {ptr, actual_size};
+ }
+
+ private:
+ uintptr_t start_;
+ size_t free_size_;
+};
+
+class SimpleRegionFactory : public AddressRegionFactory {
+ public:
+ AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+ void* region_space = MallocInternal(sizeof(SimpleRegion));
+ CHECK_CONDITION(region_space != nullptr);
+ return new (region_space)
+ SimpleRegion(reinterpret_cast<uintptr_t>(start), size);
+ }
+};
+SimpleRegionFactory f;
+
+TEST(Basic, InvokedTest) {
+ MallocExtension::SetRegionFactory(&f);
+
+ // An allocation size that is likely to trigger the system allocator.
+ void* ptr = ::operator new(kMinSystemAlloc);
+ // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+ // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+ benchmark::DoNotOptimize(ptr);
+ ::operator delete(ptr);
+
+ // Make sure that our allocator was invoked.
+ ASSERT_TRUE(simple_region_alloc_invoked);
+}
+
+TEST(Basic, RetryFailTest) {
+ // Check with the allocator still works after a failed allocation.
+ //
+ // There is no way to call malloc and guarantee it will fail. malloc takes a
+ // size_t parameter and the C++ standard does not constrain the size of
+ // size_t. For example, consider an implementation where size_t is 32 bits
+ // and pointers are 64 bits.
+ //
+ // It is likely, though, that sizeof(size_t) == sizeof(void*). In that case,
+ // the first allocation here might succeed but the second allocation must
+ // fail.
+ //
+ // If the second allocation succeeds, you will have to rewrite or
+ // disable this test.
+ const size_t kHugeSize = std::numeric_limits<size_t>::max() / 2;
+ void* p1 = malloc(kHugeSize);
+ void* p2 = malloc(kHugeSize);
+ ASSERT_EQ(p2, nullptr);
+ if (p1 != nullptr) free(p1);
+
+ void* q = malloc(1024);
+ ASSERT_NE(q, nullptr);
+ free(q);
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc
new file mode 100644
index 0000000000..8e62ba91b9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc
@@ -0,0 +1,2441 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// tcmalloc is a fast malloc implementation. See
+// https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of
+// how this malloc works.
+//
+// SYNCHRONIZATION
+// 1. The thread-/cpu-specific lists are accessed without acquiring any locks.
+// This is safe because each such list is only accessed by one thread/cpu at
+// a time.
+// 2. We have a lock per central free-list, and hold it while manipulating
+// the central free list for a particular size.
+// 3. The central page allocator is protected by "pageheap_lock".
+// 4. The pagemap (which maps from page-number to descriptor),
+// can be read without holding any locks, and written while holding
+// the "pageheap_lock".
+//
+// This multi-threaded access to the pagemap is safe for fairly
+// subtle reasons. We basically assume that when an object X is
+// allocated by thread A and deallocated by thread B, there must
+// have been appropriate synchronization in the handoff of object
+// X from thread A to thread B.
+//
+// PAGEMAP
+// -------
+// Page map contains a mapping from page id to Span.
+//
+// If Span s occupies pages [p..q],
+// pagemap[p] == s
+// pagemap[q] == s
+// pagemap[p+1..q-1] are undefined
+// pagemap[p-1] and pagemap[q+1] are defined:
+// NULL if the corresponding page is not yet in the address space.
+// Otherwise it points to a Span. This span may be free
+// or allocated. If free, it is in one of pageheap's freelist.
+
+#include "tcmalloc/tcmalloc.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <limits>
+#include <map>
+#include <memory>
+#include <new>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/const_init.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/memory/memory.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/strip.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/memory_stats.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+#include "tcmalloc/tcmalloc_policy.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+#include "tcmalloc/transfer_cache.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO)
+#include <malloc.h>
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// ----------------------- IMPLEMENTATION -------------------------------
+
+// Extract interesting stats
+struct TCMallocStats {
+ uint64_t thread_bytes; // Bytes in thread caches
+ uint64_t central_bytes; // Bytes in central cache
+ uint64_t transfer_bytes; // Bytes in central transfer cache
+ uint64_t metadata_bytes; // Bytes alloced for metadata
+ uint64_t sharded_transfer_bytes; // Bytes in per-CCX cache
+ uint64_t per_cpu_bytes; // Bytes in per-CPU cache
+ uint64_t pagemap_root_bytes_res; // Resident bytes of pagemap root node
+ uint64_t percpu_metadata_bytes_res; // Resident bytes of the per-CPU metadata
+ AllocatorStats tc_stats; // ThreadCache objects
+ AllocatorStats span_stats; // Span objects
+ AllocatorStats stack_stats; // StackTrace objects
+ AllocatorStats bucket_stats; // StackTraceTable::Bucket objects
+ size_t pagemap_bytes; // included in metadata bytes
+ size_t percpu_metadata_bytes; // included in metadata bytes
+ BackingStats pageheap; // Stats from page heap
+
+ // Explicitly declare the ctor to put it in the google_malloc section.
+ TCMallocStats() = default;
+};
+
+// Get stats into "r". Also, if class_count != NULL, class_count[k]
+// will be set to the total number of objects of size class k in the
+// central cache, transfer cache, and per-thread and per-CPU caches.
+// If small_spans is non-NULL, it is filled. Same for large_spans.
+// The boolean report_residence determines whether residence information
+// should be captured or not. Residence info requires a potentially
+// costly OS call, and is not necessary in all situations.
+static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
+ SpanStats* span_stats, SmallSpanStats* small_spans,
+ LargeSpanStats* large_spans,
+ TransferCacheStats* tc_stats, bool report_residence) {
+ r->central_bytes = 0;
+ r->transfer_bytes = 0;
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ const size_t length = Static::central_freelist(cl).length();
+ const size_t tc_length = Static::transfer_cache().tc_length(cl);
+ const size_t cache_overhead = Static::central_freelist(cl).OverheadBytes();
+ const size_t size = Static::sizemap().class_to_size(cl);
+ r->central_bytes += (size * length) + cache_overhead;
+ r->transfer_bytes += (size * tc_length);
+ if (class_count) {
+ // Sum the lengths of all per-class freelists, except the per-thread
+ // freelists, which get counted when we call GetThreadStats(), below.
+ class_count[cl] = length + tc_length;
+ if (UsePerCpuCache()) {
+ class_count[cl] += Static::cpu_cache().TotalObjectsOfClass(cl);
+ }
+ }
+ if (span_stats) {
+ span_stats[cl] = Static::central_freelist(cl).GetSpanStats();
+ }
+ if (tc_stats) {
+ tc_stats[cl] = Static::transfer_cache().GetHitRateStats(cl);
+ }
+ }
+
+ // Add stats from per-thread heaps
+ r->thread_bytes = 0;
+ { // scope
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
+ r->tc_stats = ThreadCache::HeapStats();
+ r->span_stats = Static::span_allocator().stats();
+ r->stack_stats = Static::stacktrace_allocator().stats();
+ r->bucket_stats = Static::bucket_allocator().stats();
+ r->metadata_bytes = Static::metadata_bytes();
+ r->pagemap_bytes = Static::pagemap().bytes();
+ r->pageheap = Static::page_allocator().stats();
+ if (small_spans != nullptr) {
+ Static::page_allocator().GetSmallSpanStats(small_spans);
+ }
+ if (large_spans != nullptr) {
+ Static::page_allocator().GetLargeSpanStats(large_spans);
+ }
+ }
+ // We can access the pagemap without holding the pageheap_lock since it
+ // is static data, and we are only taking address and size which are
+ // constants.
+ if (report_residence) {
+ auto resident_bytes = Static::pagemap_residence();
+ r->pagemap_root_bytes_res = resident_bytes;
+ ASSERT(r->metadata_bytes >= r->pagemap_bytes);
+ r->metadata_bytes = r->metadata_bytes - r->pagemap_bytes + resident_bytes;
+ } else {
+ r->pagemap_root_bytes_res = 0;
+ }
+
+ r->per_cpu_bytes = 0;
+ r->sharded_transfer_bytes = 0;
+ r->percpu_metadata_bytes_res = 0;
+ r->percpu_metadata_bytes = 0;
+ if (UsePerCpuCache()) {
+ r->per_cpu_bytes = Static::cpu_cache().TotalUsedBytes();
+ r->sharded_transfer_bytes = Static::sharded_transfer_cache().TotalBytes();
+
+ if (report_residence) {
+ auto percpu_metadata = Static::cpu_cache().MetadataMemoryUsage();
+ r->percpu_metadata_bytes_res = percpu_metadata.resident_size;
+ r->percpu_metadata_bytes = percpu_metadata.virtual_size;
+
+ ASSERT(r->metadata_bytes >= r->percpu_metadata_bytes);
+ r->metadata_bytes = r->metadata_bytes - r->percpu_metadata_bytes +
+ r->percpu_metadata_bytes_res;
+ }
+ }
+}
+
+static void ExtractTCMallocStats(TCMallocStats* r, bool report_residence) {
+ ExtractStats(r, nullptr, nullptr, nullptr, nullptr, nullptr,
+ report_residence);
+}
+
+// Because different fields of stats are computed from state protected
+// by different locks, they may be inconsistent. Prevent underflow
+// when subtracting to avoid gigantic results.
+static uint64_t StatSub(uint64_t a, uint64_t b) {
+ return (a >= b) ? (a - b) : 0;
+}
+
+// Return approximate number of bytes in use by app.
+static uint64_t InUseByApp(const TCMallocStats& stats) {
+ return StatSub(stats.pageheap.system_bytes,
+ stats.thread_bytes + stats.central_bytes +
+ stats.transfer_bytes + stats.per_cpu_bytes +
+ stats.sharded_transfer_bytes + stats.pageheap.free_bytes +
+ stats.pageheap.unmapped_bytes);
+}
+
+static uint64_t VirtualMemoryUsed(const TCMallocStats& stats) {
+ return stats.pageheap.system_bytes + stats.metadata_bytes;
+}
+
+static uint64_t PhysicalMemoryUsed(const TCMallocStats& stats) {
+ return StatSub(VirtualMemoryUsed(stats), stats.pageheap.unmapped_bytes);
+}
+
+// The number of bytes either in use by the app or fragmented so that
+// it cannot be (arbitrarily) reused.
+static uint64_t RequiredBytes(const TCMallocStats& stats) {
+ return StatSub(PhysicalMemoryUsed(stats), stats.pageheap.free_bytes);
+}
+
+static int CountAllowedCpus() {
+ cpu_set_t allowed_cpus;
+ if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) {
+ return 0;
+ }
+
+ return CPU_COUNT(&allowed_cpus);
+}
+
+// WRITE stats to "out"
+static void DumpStats(Printer* out, int level) {
+ TCMallocStats stats;
+ uint64_t class_count[kNumClasses];
+ SpanStats span_stats[kNumClasses];
+ TransferCacheStats tc_stats[kNumClasses];
+ if (level >= 2) {
+ ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats,
+ true);
+ } else {
+ ExtractTCMallocStats(&stats, true);
+ }
+
+ static const double MiB = 1048576.0;
+
+ out->printf(
+ "See https://github.com/google/tcmalloc/tree/master/docs/stats.md for an explanation of "
+ "this page\n");
+
+ const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+ const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+ const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+
+#ifdef TCMALLOC_SMALL_BUT_SLOW
+ out->printf("NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
+#endif
+ // clang-format off
+ // Avoid clang-format complaining about the way that this text is laid out.
+ out->printf(
+ "------------------------------------------------\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in per-CPU cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in Sharded cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
+ "MALLOC: ------------\n"
+ "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
+ "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
+ "MALLOC: ------------\n"
+ "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
+ "MALLOC:\n"
+ "MALLOC: %12" PRIu64 " Spans in use\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Spans created\n"
+ "MALLOC: %12" PRIu64 " Thread heaps in use\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Thread heaps created\n"
+ "MALLOC: %12" PRIu64 " Stack traces in use\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Stack traces created\n"
+ "MALLOC: %12" PRIu64 " Table buckets in use\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Table buckets created\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Pagemap bytes used\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) Pagemap root resident bytes\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab bytes used\n"
+ "MALLOC: %12" PRIu64 " (%7.1f MiB) per-CPU slab resident bytes\n"
+ "MALLOC: %12" PRIu64 " Tcmalloc page size\n"
+ "MALLOC: %12" PRIu64 " Tcmalloc hugepage size\n"
+ "MALLOC: %12" PRIu64 " CPUs Allowed in Mask\n",
+ bytes_in_use_by_app, bytes_in_use_by_app / MiB,
+ stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
+ stats.central_bytes, stats.central_bytes / MiB,
+ stats.per_cpu_bytes, stats.per_cpu_bytes / MiB,
+ stats.sharded_transfer_bytes, stats.sharded_transfer_bytes / MiB,
+ stats.transfer_bytes, stats.transfer_bytes / MiB,
+ stats.thread_bytes, stats.thread_bytes / MiB,
+ stats.metadata_bytes, stats.metadata_bytes / MiB,
+ physical_memory_used, physical_memory_used / MiB,
+ stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
+ virtual_memory_used, virtual_memory_used / MiB,
+ uint64_t(stats.span_stats.in_use),
+ uint64_t(stats.span_stats.total),
+ (stats.span_stats.total * sizeof(Span)) / MiB,
+ uint64_t(stats.tc_stats.in_use),
+ uint64_t(stats.tc_stats.total),
+ (stats.tc_stats.total * sizeof(ThreadCache)) / MiB,
+ uint64_t(stats.stack_stats.in_use),
+ uint64_t(stats.stack_stats.total),
+ (stats.stack_stats.total * sizeof(StackTrace)) / MiB,
+ uint64_t(stats.bucket_stats.in_use),
+ uint64_t(stats.bucket_stats.total),
+ (stats.bucket_stats.total * sizeof(StackTraceTable::Bucket)) / MiB,
+ uint64_t(stats.pagemap_bytes),
+ stats.pagemap_bytes / MiB,
+ stats.pagemap_root_bytes_res, stats.pagemap_root_bytes_res / MiB,
+ uint64_t(stats.percpu_metadata_bytes),
+ stats.percpu_metadata_bytes / MiB,
+ stats.percpu_metadata_bytes_res, stats.percpu_metadata_bytes_res / MiB,
+ uint64_t(kPageSize),
+ uint64_t(kHugePageSize),
+ CountAllowedCpus());
+ // clang-format on
+
+ PrintExperiments(out);
+ out->printf(
+ "MALLOC SAMPLED PROFILES: %zu bytes (current), %zu bytes (peak)\n",
+ static_cast<size_t>(Static::sampled_objects_size_.value()),
+ Static::peak_heap_tracker().CurrentPeakSize());
+
+ MemoryStats memstats;
+ if (GetMemoryStats(&memstats)) {
+ uint64_t rss = memstats.rss;
+ uint64_t vss = memstats.vss;
+ // clang-format off
+ out->printf(
+ "\n"
+ "Total process stats (inclusive of non-malloc sources):\n"
+ "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes resident (physical memory used)\n"
+ "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes mapped (virtual memory used)\n",
+ rss, rss / MiB, vss, vss / MiB);
+ // clang-format on
+ }
+
+ out->printf(
+ "------------------------------------------------\n"
+ "Call ReleaseMemoryToSystem() to release freelist memory to the OS"
+ " (via madvise()).\n"
+ "Bytes released to the OS take up virtual address space"
+ " but no physical memory.\n");
+ if (level >= 2) {
+ out->printf("------------------------------------------------\n");
+ out->printf("Total size of freelists for per-thread and per-CPU caches,\n");
+ out->printf("transfer cache, and central cache, as well as number of\n");
+ out->printf("live pages, returned/requested spans by size class\n");
+ out->printf("------------------------------------------------\n");
+
+ uint64_t cumulative = 0;
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ uint64_t class_bytes =
+ class_count[cl] * Static::sizemap().class_to_size(cl);
+
+ cumulative += class_bytes;
+ // clang-format off
+ out->printf(
+ "class %3d [ %8zu bytes ] : %8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB; "
+ "%8" PRIu64 " live pages; spans: %6zu ret / %6zu req = %5.4f;\n",
+ cl, Static::sizemap().class_to_size(cl), class_count[cl],
+ class_bytes / MiB, cumulative / MiB,
+ span_stats[cl].num_live_spans()*Static::sizemap().class_to_pages(cl),
+ span_stats[cl].num_spans_returned, span_stats[cl].num_spans_requested,
+ span_stats[cl].prob_returned());
+ // clang-format on
+ }
+
+ out->printf("------------------------------------------------\n");
+ out->printf("Transfer cache implementation: %s\n",
+ TransferCacheImplementationToLabel(
+ Static::transfer_cache().implementation()));
+
+ out->printf("------------------------------------------------\n");
+ out->printf("Transfer cache insert/remove hits/misses by size class\n");
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ out->printf(
+ "class %3d [ %8zu bytes ] : %8" PRIu64 " insert hits; %8" PRIu64
+ " insert misses (%8lu partial); %8" PRIu64 " remove hits; %8" PRIu64
+ " remove misses (%8lu partial);\n",
+ cl, Static::sizemap().class_to_size(cl), tc_stats[cl].insert_hits,
+ tc_stats[cl].insert_misses, tc_stats[cl].insert_non_batch_misses,
+ tc_stats[cl].remove_hits, tc_stats[cl].remove_misses,
+ tc_stats[cl].remove_non_batch_misses);
+ }
+
+ if (UsePerCpuCache()) {
+ Static::cpu_cache().Print(out);
+ }
+
+ Static::page_allocator().Print(out, MemoryTag::kNormal);
+ if (Static::numa_topology().active_partitions() > 1) {
+ Static::page_allocator().Print(out, MemoryTag::kNormalP1);
+ }
+ Static::page_allocator().Print(out, MemoryTag::kSampled);
+ tracking::Print(out);
+ Static::guardedpage_allocator().Print(out);
+
+ uint64_t limit_bytes;
+ bool is_hard;
+ std::tie(limit_bytes, is_hard) = Static::page_allocator().limit();
+ out->printf("PARAMETER desired_usage_limit_bytes %" PRIu64 " %s\n",
+ limit_bytes, is_hard ? "(hard)" : "");
+ out->printf("Number of times limit was hit: %lld\n",
+ Static::page_allocator().limit_hits());
+
+ out->printf("PARAMETER tcmalloc_per_cpu_caches %d\n",
+ Parameters::per_cpu_caches() ? 1 : 0);
+ out->printf("PARAMETER tcmalloc_max_per_cpu_cache_size %d\n",
+ Parameters::max_per_cpu_cache_size());
+ out->printf("PARAMETER tcmalloc_max_total_thread_cache_bytes %lld\n",
+ Parameters::max_total_thread_cache_bytes());
+ out->printf("PARAMETER malloc_release_bytes_per_sec %llu\n",
+ Parameters::background_release_rate());
+ out->printf(
+ "PARAMETER tcmalloc_skip_subrelease_interval %s\n",
+ absl::FormatDuration(Parameters::filler_skip_subrelease_interval()));
+ out->printf("PARAMETER flat vcpus %d\n",
+ subtle::percpu::UsingFlatVirtualCpus() ? 1 : 0);
+ }
+}
+
+namespace {
+
+/*static*/ void DumpStatsInPbtxt(Printer* out, int level) {
+ TCMallocStats stats;
+ uint64_t class_count[kNumClasses];
+ SpanStats span_stats[kNumClasses];
+ TransferCacheStats tc_stats[kNumClasses];
+ if (level >= 2) {
+ ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats,
+ true);
+ } else {
+ ExtractTCMallocStats(&stats, true);
+ }
+
+ const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+ const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+ const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+
+ PbtxtRegion region(out, kTop, /*indent=*/0);
+ region.PrintI64("in_use_by_app", bytes_in_use_by_app);
+ region.PrintI64("page_heap_freelist", stats.pageheap.free_bytes);
+ region.PrintI64("central_cache_freelist", stats.central_bytes);
+ region.PrintI64("per_cpu_cache_freelist", stats.per_cpu_bytes);
+ region.PrintI64("sharded_transfer_cache_freelist",
+ stats.sharded_transfer_bytes);
+ region.PrintI64("transfer_cache_freelist", stats.transfer_bytes);
+ region.PrintI64("thread_cache_freelists", stats.thread_bytes);
+ region.PrintI64("malloc_metadata", stats.metadata_bytes);
+ region.PrintI64("actual_mem_used", physical_memory_used);
+ region.PrintI64("unmapped", stats.pageheap.unmapped_bytes);
+ region.PrintI64("virtual_address_space_used", virtual_memory_used);
+ region.PrintI64("num_spans", uint64_t(stats.span_stats.in_use));
+ region.PrintI64("num_spans_created", uint64_t(stats.span_stats.total));
+ region.PrintI64("num_thread_heaps", uint64_t(stats.tc_stats.in_use));
+ region.PrintI64("num_thread_heaps_created", uint64_t(stats.tc_stats.total));
+ region.PrintI64("num_stack_traces", uint64_t(stats.stack_stats.in_use));
+ region.PrintI64("num_stack_traces_created",
+ uint64_t(stats.stack_stats.total));
+ region.PrintI64("num_table_buckets", uint64_t(stats.bucket_stats.in_use));
+ region.PrintI64("num_table_buckets_created",
+ uint64_t(stats.bucket_stats.total));
+ region.PrintI64("pagemap_size", uint64_t(stats.pagemap_bytes));
+ region.PrintI64("pagemap_root_residence", stats.pagemap_root_bytes_res);
+ region.PrintI64("percpu_slab_size", stats.percpu_metadata_bytes);
+ region.PrintI64("percpu_slab_residence", stats.percpu_metadata_bytes_res);
+ region.PrintI64("tcmalloc_page_size", uint64_t(kPageSize));
+ region.PrintI64("tcmalloc_huge_page_size", uint64_t(kHugePageSize));
+ region.PrintI64("cpus_allowed", CountAllowedCpus());
+
+ {
+ auto sampled_profiles = region.CreateSubRegion("sampled_profiles");
+ sampled_profiles.PrintI64("current_bytes",
+ Static::sampled_objects_size_.value());
+ sampled_profiles.PrintI64("peak_bytes",
+ Static::peak_heap_tracker().CurrentPeakSize());
+ }
+
+ // Print total process stats (inclusive of non-malloc sources).
+ MemoryStats memstats;
+ if (GetMemoryStats(&memstats)) {
+ region.PrintI64("total_resident", uint64_t(memstats.rss));
+ region.PrintI64("total_mapped", uint64_t(memstats.vss));
+ }
+
+ if (level >= 2) {
+ {
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ uint64_t class_bytes =
+ class_count[cl] * Static::sizemap().class_to_size(cl);
+ PbtxtRegion entry = region.CreateSubRegion("freelist");
+ entry.PrintI64("sizeclass", Static::sizemap().class_to_size(cl));
+ entry.PrintI64("bytes", class_bytes);
+ entry.PrintI64("num_spans_requested",
+ span_stats[cl].num_spans_requested);
+ entry.PrintI64("num_spans_returned", span_stats[cl].num_spans_returned);
+ entry.PrintI64("obj_capacity", span_stats[cl].obj_capacity);
+ }
+ }
+
+ {
+ for (int cl = 1; cl < kNumClasses; ++cl) {
+ PbtxtRegion entry = region.CreateSubRegion("transfer_cache");
+ entry.PrintI64("sizeclass", Static::sizemap().class_to_size(cl));
+ entry.PrintI64("insert_hits", tc_stats[cl].insert_hits);
+ entry.PrintI64("insert_misses", tc_stats[cl].insert_misses);
+ entry.PrintI64("insert_non_batch_misses",
+ tc_stats[cl].insert_non_batch_misses);
+ entry.PrintI64("remove_hits", tc_stats[cl].remove_hits);
+ entry.PrintI64("remove_misses", tc_stats[cl].remove_misses);
+ entry.PrintI64("remove_non_batch_misses",
+ tc_stats[cl].remove_non_batch_misses);
+ }
+ }
+
+ region.PrintRaw("transfer_cache_implementation",
+ TransferCacheImplementationToLabel(
+ Static::transfer_cache().implementation()));
+
+ if (UsePerCpuCache()) {
+ Static::cpu_cache().PrintInPbtxt(&region);
+ }
+ }
+ Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kNormal);
+ if (Static::numa_topology().active_partitions() > 1) {
+ Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kNormalP1);
+ }
+ Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kSampled);
+ // We do not collect tracking information in pbtxt.
+
+ size_t limit_bytes;
+ bool is_hard;
+ std::tie(limit_bytes, is_hard) = Static::page_allocator().limit();
+ region.PrintI64("desired_usage_limit_bytes", limit_bytes);
+ region.PrintBool("hard_limit", is_hard);
+ region.PrintI64("limit_hits", Static::page_allocator().limit_hits());
+
+ {
+ auto gwp_asan = region.CreateSubRegion("gwp_asan");
+ Static::guardedpage_allocator().PrintInPbtxt(&gwp_asan);
+ }
+
+ region.PrintI64("memory_release_failures", SystemReleaseErrors());
+
+ region.PrintBool("tcmalloc_per_cpu_caches", Parameters::per_cpu_caches());
+ region.PrintI64("tcmalloc_max_per_cpu_cache_size",
+ Parameters::max_per_cpu_cache_size());
+ region.PrintI64("tcmalloc_max_total_thread_cache_bytes",
+ Parameters::max_total_thread_cache_bytes());
+ region.PrintI64("malloc_release_bytes_per_sec",
+ static_cast<int64_t>(Parameters::background_release_rate()));
+ region.PrintI64(
+ "tcmalloc_skip_subrelease_interval_ns",
+ absl::ToInt64Nanoseconds(Parameters::filler_skip_subrelease_interval()));
+ region.PrintRaw("percpu_vcpu_type",
+ subtle::percpu::UsingFlatVirtualCpus() ? "FLAT" : "NONE");
+}
+
+} // namespace
+
+// Gets a human readable description of the current state of the malloc data
+// structures. A part of the state is stored in pbtxt format in `buffer`, the
+// rest of the state is stored in the old format (the same as in
+// MallocExtension::GetStats) in `other_buffer`. Both buffers are
+// null-terminated strings in a prefix of "buffer[0,buffer_length-1]" or
+// "other_buffer[0,other_buffer_length-1]". Returns the actual written sizes for
+// buffer and other_buffer.
+//
+// REQUIRES: buffer_length > 0 and other_buffer_length > 0.
+//
+// TODO(b/130249686): This is NOT YET ready to use.
+extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt(
+ char* buffer, int buffer_length) {
+ ASSERT(buffer_length > 0);
+ Printer printer(buffer, buffer_length);
+
+ // Print level one stats unless lots of space is available
+ if (buffer_length < 10000) {
+ DumpStatsInPbtxt(&printer, 1);
+ } else {
+ DumpStatsInPbtxt(&printer, 2);
+ }
+
+ size_t required = printer.SpaceRequired();
+
+ if (buffer_length > required) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ required += GetRegionFactory()->GetStatsInPbtxt(
+ absl::Span<char>(buffer + required, buffer_length - required));
+ }
+
+ return required;
+}
+
+static void PrintStats(int level) {
+ const int kBufferSize = (TCMALLOC_HAVE_TRACKING ? 2 << 20 : 64 << 10);
+ char* buffer = new char[kBufferSize];
+ Printer printer(buffer, kBufferSize);
+ DumpStats(&printer, level);
+ (void)write(STDERR_FILENO, buffer, strlen(buffer));
+ delete[] buffer;
+}
+
+// This function computes a profile that maps a live stack trace to
+// the number of bytes of central-cache memory pinned by an allocation
+// at that stack trace.
+static std::unique_ptr<const ProfileBase> DumpFragmentationProfile() {
+ auto profile = absl::make_unique<StackTraceTable>(ProfileType::kFragmentation,
+ 1, true, true);
+
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (Span* s : Static::sampled_objects_) {
+ // Compute fragmentation to charge to this sample:
+ StackTrace* const t = s->sampled_stack();
+ if (t->proxy == nullptr) {
+ // There is just one object per-span, and neighboring spans
+ // can be released back to the system, so we charge no
+ // fragmentation to this sampled object.
+ continue;
+ }
+
+ // Fetch the span on which the proxy lives so we can examine its
+ // co-residents.
+ const PageId p = PageIdContaining(t->proxy);
+ Span* span = Static::pagemap().GetDescriptor(p);
+ if (span == nullptr) {
+ // Avoid crashes in production mode code, but report in tests.
+ ASSERT(span != nullptr);
+ continue;
+ }
+
+ const double frag = span->Fragmentation();
+ if (frag > 0) {
+ profile->AddTrace(frag, *t);
+ }
+ }
+ }
+ return profile;
+}
+
+// If <unsample> is true, the caller expects a profile where sampling has been
+// compensated for (that is, it reports 8000 16-byte objects iff we believe the
+// program has that many live objects.) Otherwise, do not adjust for sampling
+// (the caller will do so somehow.)
+static std::unique_ptr<const ProfileBase> DumpHeapProfile(bool unsample) {
+ auto profile = absl::make_unique<StackTraceTable>(
+ ProfileType::kHeap, Sampler::GetSamplePeriod(), true, unsample);
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ for (Span* s : Static::sampled_objects_) {
+ profile->AddTrace(1.0, *s->sampled_stack());
+ }
+ return profile;
+}
+
+class AllocationSampleList;
+
+class AllocationSample final : public AllocationProfilingTokenBase {
+ public:
+ AllocationSample();
+ ~AllocationSample() override;
+
+ Profile Stop() && override;
+
+ private:
+ std::unique_ptr<StackTraceTable> mallocs_;
+ AllocationSample* next ABSL_GUARDED_BY(pageheap_lock);
+ friend class AllocationSampleList;
+};
+
+class AllocationSampleList {
+ public:
+ void Add(AllocationSample* as) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ as->next = first_;
+ first_ = as;
+ }
+
+ // This list is very short and we're nowhere near a hot path, just walk
+ void Remove(AllocationSample* as)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ AllocationSample** link = &first_;
+ AllocationSample* cur = first_;
+ while (cur != as) {
+ CHECK_CONDITION(cur != nullptr);
+ link = &cur->next;
+ cur = cur->next;
+ }
+ *link = as->next;
+ }
+
+ void ReportMalloc(const struct StackTrace& sample)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ AllocationSample* cur = first_;
+ while (cur != nullptr) {
+ cur->mallocs_->AddTrace(1.0, sample);
+ cur = cur->next;
+ }
+ }
+
+ private:
+ AllocationSample* first_;
+} allocation_samples_ ABSL_GUARDED_BY(pageheap_lock);
+
+AllocationSample::AllocationSample() {
+ mallocs_ = absl::make_unique<StackTraceTable>(
+ ProfileType::kAllocations, Sampler::GetSamplePeriod(), true, true);
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ allocation_samples_.Add(this);
+}
+
+AllocationSample::~AllocationSample() {
+ if (mallocs_ == nullptr) {
+ return;
+ }
+
+ // deleted before ending profile, do it for them
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ allocation_samples_.Remove(this);
+ }
+}
+
+Profile AllocationSample::Stop() && ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+ // We need to remove ourselves from the allocation_samples_ list before we
+ // mutate mallocs_;
+ if (mallocs_) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ allocation_samples_.Remove(this);
+ }
+ return ProfileAccessor::MakeProfile(std::move(mallocs_));
+}
+
+extern "C" void MallocExtension_Internal_GetStats(std::string* ret) {
+ for (size_t shift = 17; shift < 22; shift++) {
+ const size_t size = 1 << shift;
+ // Double ret's size until we succeed in writing the buffer without
+ // truncation.
+ //
+ // TODO(b/142931922): printer only writes data and does not read it.
+ // Leverage https://wg21.link/P1072 when it is standardized.
+ ret->resize(size - 1);
+
+ size_t written_size = TCMalloc_Internal_GetStats(&*ret->begin(), size - 1);
+ if (written_size < size - 1) {
+ // We did not truncate.
+ ret->resize(written_size);
+ break;
+ }
+ }
+}
+
+extern "C" size_t TCMalloc_Internal_GetStats(char* buffer,
+ size_t buffer_length) {
+ Printer printer(buffer, buffer_length);
+ if (buffer_length < 10000) {
+ DumpStats(&printer, 1);
+ } else {
+ DumpStats(&printer, 2);
+ }
+
+ printer.printf("\nLow-level allocator stats:\n");
+ printer.printf("Memory Release Failures: %d\n", SystemReleaseErrors());
+
+ size_t n = printer.SpaceRequired();
+
+ size_t bytes_remaining = buffer_length > n ? buffer_length - n : 0;
+ if (bytes_remaining > 0) {
+ n += GetRegionFactory()->GetStats(
+ absl::Span<char>(buffer + n, bytes_remaining));
+ }
+
+ return n;
+}
+
+extern "C" const ProfileBase* MallocExtension_Internal_SnapshotCurrent(
+ ProfileType type) {
+ switch (type) {
+ case ProfileType::kHeap:
+ return DumpHeapProfile(true).release();
+ case ProfileType::kFragmentation:
+ return DumpFragmentationProfile().release();
+ case ProfileType::kPeakHeap:
+ return Static::peak_heap_tracker().DumpSample().release();
+ default:
+ return nullptr;
+ }
+}
+
+extern "C" AllocationProfilingTokenBase*
+MallocExtension_Internal_StartAllocationProfiling() {
+ return new AllocationSample();
+}
+
+bool GetNumericProperty(const char* name_data, size_t name_size,
+ size_t* value) {
+ ASSERT(name_data != nullptr);
+ ASSERT(value != nullptr);
+ const absl::string_view name(name_data, name_size);
+
+ // This is near the top since ReleasePerCpuMemoryToOS() calls it frequently.
+ if (name == "tcmalloc.per_cpu_caches_active") {
+ *value = Static::CPUCacheActive();
+ return true;
+ }
+
+ if (name == "generic.virtual_memory_used") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = VirtualMemoryUsed(stats);
+ return true;
+ }
+
+ if (name == "generic.physical_memory_used") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = PhysicalMemoryUsed(stats);
+ return true;
+ }
+
+ if (name == "generic.current_allocated_bytes" ||
+ name == "generic.bytes_in_use_by_app") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = InUseByApp(stats);
+ return true;
+ }
+
+ if (name == "generic.heap_size") {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ BackingStats stats = Static::page_allocator().stats();
+ *value = stats.system_bytes - stats.unmapped_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.central_cache_free") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.central_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.cpu_free") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.per_cpu_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.sharded_transfer_cache_free") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.sharded_transfer_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.slack_bytes") {
+ // Kept for backwards compatibility. Now defined externally as:
+ // pageheap_free_bytes + pageheap_unmapped_bytes.
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ BackingStats stats = Static::page_allocator().stats();
+ *value = stats.free_bytes + stats.unmapped_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.pageheap_free_bytes" ||
+ name == "tcmalloc.page_heap_free") {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ *value = Static::page_allocator().stats().free_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.pageheap_unmapped_bytes" ||
+ name == "tcmalloc.page_heap_unmapped") {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ *value = Static::page_allocator().stats().unmapped_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.page_algorithm") {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ *value = Static::page_allocator().algorithm();
+ return true;
+ }
+
+ if (name == "tcmalloc.max_total_thread_cache_bytes") {
+ absl::base_internal::SpinLockHolder l(&pageheap_lock);
+ *value = ThreadCache::overall_thread_cache_size();
+ return true;
+ }
+
+ if (name == "tcmalloc.current_total_thread_cache_bytes" ||
+ name == "tcmalloc.thread_cache_free") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.thread_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.thread_cache_count") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.tc_stats.in_use;
+ return true;
+ }
+
+ if (name == "tcmalloc.local_bytes") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value =
+ stats.thread_bytes + stats.per_cpu_bytes + stats.sharded_transfer_bytes;
+ ;
+ return true;
+ }
+
+ if (name == "tcmalloc.external_fragmentation_bytes") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = (stats.pageheap.free_bytes + stats.central_bytes +
+ stats.per_cpu_bytes + stats.sharded_transfer_bytes +
+ stats.transfer_bytes + stats.thread_bytes + stats.metadata_bytes);
+ return true;
+ }
+
+ if (name == "tcmalloc.metadata_bytes") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, true);
+ *value = stats.metadata_bytes;
+ return true;
+ }
+
+ if (name == "tcmalloc.transfer_cache_free") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = stats.transfer_bytes;
+ return true;
+ }
+
+ bool want_hard_limit = (name == "tcmalloc.hard_usage_limit_bytes");
+ if (want_hard_limit || name == "tcmalloc.desired_usage_limit_bytes") {
+ size_t amount;
+ bool is_hard;
+ std::tie(amount, is_hard) = Static::page_allocator().limit();
+ if (want_hard_limit != is_hard) {
+ amount = std::numeric_limits<size_t>::max();
+ }
+ *value = amount;
+ return true;
+ }
+
+ if (name == "tcmalloc.required_bytes") {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+ *value = RequiredBytes(stats);
+ return true;
+ }
+
+ const absl::string_view kExperimentPrefix = "tcmalloc.experiment.";
+ if (absl::StartsWith(name, kExperimentPrefix)) {
+ absl::optional<Experiment> exp =
+ FindExperimentByName(absl::StripPrefix(name, kExperimentPrefix));
+ if (exp.has_value()) {
+ *value = IsExperimentActive(*exp) ? 1 : 0;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+MallocExtension::Ownership GetOwnership(const void* ptr) {
+ const PageId p = PageIdContaining(ptr);
+ return Static::pagemap().GetDescriptor(p)
+ ? MallocExtension::Ownership::kOwned
+ : MallocExtension::Ownership::kNotOwned;
+}
+
+extern "C" bool MallocExtension_Internal_GetNumericProperty(
+ const char* name_data, size_t name_size, size_t* value) {
+ return GetNumericProperty(name_data, name_size, value);
+}
+
+extern "C" void MallocExtension_Internal_GetMemoryLimit(
+ MallocExtension::MemoryLimit* limit) {
+ ASSERT(limit != nullptr);
+
+ std::tie(limit->limit, limit->hard) = Static::page_allocator().limit();
+}
+
+extern "C" void MallocExtension_Internal_SetMemoryLimit(
+ const MallocExtension::MemoryLimit* limit) {
+ ASSERT(limit != nullptr);
+
+ if (!limit->hard) {
+ Parameters::set_heap_size_hard_limit(0);
+ Static::page_allocator().set_limit(limit->limit, false /* !hard */);
+ } else {
+ Parameters::set_heap_size_hard_limit(limit->limit);
+ }
+}
+
+extern "C" void MallocExtension_Internal_MarkThreadIdle() {
+ ThreadCache::BecomeIdle();
+}
+
+extern "C" AddressRegionFactory* MallocExtension_Internal_GetRegionFactory() {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ return GetRegionFactory();
+}
+
+extern "C" void MallocExtension_Internal_SetRegionFactory(
+ AddressRegionFactory* factory) {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ SetRegionFactory(factory);
+}
+
+// ReleaseMemoryToSystem drops the page heap lock while actually calling to
+// kernel to release pages. To avoid confusing ourselves with
+// extra_bytes_released handling, lets do separate lock just for release.
+ABSL_CONST_INIT static absl::base_internal::SpinLock release_lock(
+ absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem(
+ size_t num_bytes) {
+ // ReleaseMemoryToSystem() might release more than the requested bytes because
+ // the page heap releases at the span granularity, and spans are of wildly
+ // different sizes. This keeps track of the extra bytes bytes released so
+ // that the app can periodically call ReleaseMemoryToSystem() to release
+ // memory at a constant rate.
+ ABSL_CONST_INIT static size_t extra_bytes_released;
+
+ absl::base_internal::SpinLockHolder rh(&release_lock);
+
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ if (num_bytes <= extra_bytes_released) {
+ // We released too much on a prior call, so don't release any
+ // more this time.
+ extra_bytes_released = extra_bytes_released - num_bytes;
+ num_bytes = 0;
+ } else {
+ num_bytes = num_bytes - extra_bytes_released;
+ }
+
+ Length num_pages;
+ if (num_bytes > 0) {
+ // A sub-page size request may round down to zero. Assume the caller wants
+ // some memory released.
+ num_pages = BytesToLengthCeil(num_bytes);
+ ASSERT(num_pages > Length(0));
+ } else {
+ num_pages = Length(0);
+ }
+ size_t bytes_released =
+ Static::page_allocator().ReleaseAtLeastNPages(num_pages).in_bytes();
+ if (bytes_released > num_bytes) {
+ extra_bytes_released = bytes_released - num_bytes;
+ } else {
+ // The PageHeap wasn't able to release num_bytes. Don't try to compensate
+ // with a big release next time.
+ extra_bytes_released = 0;
+ }
+}
+
+extern "C" void MallocExtension_EnableForkSupport() {
+ Static::EnableForkSupport();
+}
+
+void TCMallocPreFork() {
+ if (!Static::ForkSupportEnabled()) {
+ return;
+ }
+
+ if (Static::CPUCacheActive()) {
+ Static::cpu_cache().AcquireInternalLocks();
+ }
+ Static::transfer_cache().AcquireInternalLocks();
+ guarded_page_lock.Lock();
+ release_lock.Lock();
+ pageheap_lock.Lock();
+ AcquireSystemAllocLock();
+}
+
+void TCMallocPostFork() {
+ if (!Static::ForkSupportEnabled()) {
+ return;
+ }
+
+ ReleaseSystemAllocLock();
+ pageheap_lock.Unlock();
+ guarded_page_lock.Unlock();
+ release_lock.Unlock();
+ Static::transfer_cache().ReleaseInternalLocks();
+ if (Static::CPUCacheActive()) {
+ Static::cpu_cache().ReleaseInternalLocks();
+ }
+}
+
+extern "C" void MallocExtension_SetSampleUserDataCallbacks(
+ MallocExtension::CreateSampleUserDataCallback create,
+ MallocExtension::CopySampleUserDataCallback copy,
+ MallocExtension::DestroySampleUserDataCallback destroy) {
+ Static::SetSampleUserDataCallbacks(create, copy, destroy);
+}
+
+// nallocx slow path.
+// Moved to a separate function because size_class_with_alignment is not inlined
+// which would cause nallocx to become non-leaf function with stack frame and
+// stack spills. ABSL_ATTRIBUTE_ALWAYS_INLINE does not work on
+// size_class_with_alignment, compiler barks that it can't inline the function
+// somewhere.
+static ABSL_ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) {
+ Static::InitIfNecessary();
+ size_t align = static_cast<size_t>(1ull << (flags & 0x3f));
+ uint32_t cl;
+ if (ABSL_PREDICT_TRUE(Static::sizemap().GetSizeClass(
+ CppPolicy().AlignAs(align), size, &cl))) {
+ ASSERT(cl != 0);
+ return Static::sizemap().class_to_size(cl);
+ } else {
+ return BytesToLengthCeil(size).in_bytes();
+ }
+}
+
+// The nallocx function allocates no memory, but it performs the same size
+// computation as the malloc function, and returns the real size of the
+// allocation that would result from the equivalent malloc function call.
+// nallocx is a malloc extension originally implemented by jemalloc:
+// http://www.unix.com/man-page/freebsd/3/nallocx/
+extern "C" size_t nallocx(size_t size, int flags) noexcept {
+ if (ABSL_PREDICT_FALSE(!Static::IsInited() || flags != 0)) {
+ return nallocx_slow(size, flags);
+ }
+ uint32_t cl;
+ if (ABSL_PREDICT_TRUE(
+ Static::sizemap().GetSizeClass(CppPolicy(), size, &cl))) {
+ ASSERT(cl != 0);
+ return Static::sizemap().class_to_size(cl);
+ } else {
+ return BytesToLengthCeil(size).in_bytes();
+ }
+}
+
+extern "C" MallocExtension::Ownership MallocExtension_Internal_GetOwnership(
+ const void* ptr) {
+ return GetOwnership(ptr);
+}
+
+extern "C" void MallocExtension_Internal_GetProperties(
+ std::map<std::string, MallocExtension::Property>* result) {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, true);
+
+ const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+ const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+ const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+
+ result->clear();
+ // Virtual Memory Used
+ (*result)["generic.virtual_memory_used"].value = virtual_memory_used;
+ // Physical Memory used
+ (*result)["generic.physical_memory_used"].value = physical_memory_used;
+ // Bytes in use By App
+ (*result)["generic.bytes_in_use_by_app"].value = bytes_in_use_by_app;
+ // Page Heap Free
+ (*result)["tcmalloc.page_heap_free"].value = stats.pageheap.free_bytes;
+ // Metadata Bytes
+ (*result)["tcmalloc.metadata_bytes"].value = stats.metadata_bytes;
+ // Heaps in Use
+ (*result)["tcmalloc.thread_cache_count"].value = stats.tc_stats.in_use;
+ // Central Cache Free List
+ (*result)["tcmalloc.central_cache_free"].value = stats.central_bytes;
+ // Transfer Cache Free List
+ (*result)["tcmalloc.transfer_cache_free"].value = stats.transfer_bytes;
+ // Per CPU Cache Free List
+ (*result)["tcmalloc.cpu_free"].value = stats.per_cpu_bytes;
+ (*result)["tcmalloc.sharded_transfer_cache_free"].value =
+ stats.sharded_transfer_bytes;
+ (*result)["tcmalloc.per_cpu_caches_active"].value = Static::CPUCacheActive();
+ // Thread Cache Free List
+ (*result)["tcmalloc.thread_cache_free"].value = stats.thread_bytes;
+ // Page Unmapped
+ (*result)["tcmalloc.pageheap_unmapped_bytes"].value =
+ stats.pageheap.unmapped_bytes;
+ (*result)["tcmalloc.page_heap_unmapped"].value =
+ stats.pageheap.unmapped_bytes;
+
+ (*result)["tcmalloc.page_algorithm"].value =
+ Static::page_allocator().algorithm();
+
+ FillExperimentProperties(result);
+ tracking::GetProperties(result);
+}
+
+extern "C" size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu) {
+ size_t bytes = 0;
+ if (Static::CPUCacheActive()) {
+ bytes = Static::cpu_cache().Reclaim(cpu);
+ }
+ return bytes;
+}
+
+//-------------------------------------------------------------------
+// Helpers for the exported routines below
+//-------------------------------------------------------------------
+
+#ifdef ABSL_HAVE_TLS
+// See the comment on ThreadCache::thread_local_data_ regarding
+// ABSL_ATTRIBUTE_INITIAL_EXEC.
+__thread Sampler thread_sampler_ ABSL_ATTRIBUTE_INITIAL_EXEC;
+
+inline Sampler* GetThreadSampler() { return &thread_sampler_; }
+
+#else
+
+inline Sampler* GetThreadSampler() {
+ ThreadCache* heap = ThreadCache::GetCache();
+ return heap->GetSampler();
+}
+
+#endif
+
+enum class Hooks { RUN, NO };
+
+static void FreeSmallSlow(void* ptr, size_t cl);
+
+namespace {
+
+// Sets `*psize` to `size`,
+inline void SetCapacity(size_t size, std::nullptr_t) {}
+inline void SetCapacity(size_t size, size_t* psize) { *psize = size; }
+
+// Sets `*psize` to the size for the size class in `cl`,
+inline void SetClassCapacity(size_t size, std::nullptr_t) {}
+inline void SetClassCapacity(uint32_t cl, size_t* psize) {
+ *psize = Static::sizemap().class_to_size(cl);
+}
+
+// Sets `*psize` to the size for the size class in `cl` if `ptr` is not null,
+// else `*psize` is set to 0. This method is overloaded for `nullptr_t` below,
+// allowing the compiler to optimize code between regular and size returning
+// allocation operations.
+inline void SetClassCapacity(const void*, uint32_t, std::nullptr_t) {}
+inline void SetClassCapacity(const void* ptr, uint32_t cl, size_t* psize) {
+ if (ABSL_PREDICT_TRUE(ptr != nullptr)) {
+ *psize = Static::sizemap().class_to_size(cl);
+ } else {
+ *psize = 0;
+ }
+}
+
+// Sets `*psize` to the size in pages corresponding to the requested size in
+// `size` if `ptr` is not null, else `*psize` is set to 0. This method is
+// overloaded for `nullptr_t` below, allowing the compiler to optimize code
+// between regular and size returning allocation operations.
+inline void SetPagesCapacity(const void*, size_t, std::nullptr_t) {}
+inline void SetPagesCapacity(const void* ptr, size_t size, size_t* psize) {
+ if (ABSL_PREDICT_TRUE(ptr != nullptr)) {
+ *psize = BytesToLengthCeil(size).in_bytes();
+ } else {
+ *psize = 0;
+ }
+}
+
+} // namespace
+
+// In free fast-path we handle delete hooks by delegating work to slower
+// function that both performs delete hooks calls and does free. This is done so
+// that free fast-path only does tail calls, which allow compiler to avoid
+// generating costly prologue/epilogue for fast-path.
+template <void F(void*, size_t), Hooks hooks_state>
+static ABSL_ATTRIBUTE_SECTION(google_malloc) void invoke_delete_hooks_and_free(
+ void* ptr, size_t t) {
+ // Refresh the fast path state.
+ GetThreadSampler()->UpdateFastPathState();
+ return F(ptr, t);
+}
+
+template <void F(void*, PageId), Hooks hooks_state>
+static ABSL_ATTRIBUTE_SECTION(google_malloc) void invoke_delete_hooks_and_free(
+ void* ptr, PageId p) {
+ // Refresh the fast path state.
+ GetThreadSampler()->UpdateFastPathState();
+ return F(ptr, p);
+}
+
+// Helper for do_free_with_cl
+template <Hooks hooks_state>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreeSmall(void* ptr,
+ size_t cl) {
+ if (ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) {
+ // Take the slow path.
+ invoke_delete_hooks_and_free<FreeSmallSlow, hooks_state>(ptr, cl);
+ return;
+ }
+
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+ // The CPU Cache is enabled, so we're able to take the fastpath.
+ ASSERT(Static::CPUCacheActive());
+ ASSERT(subtle::percpu::IsFastNoInit());
+
+ Static::cpu_cache().Deallocate(ptr, cl);
+#else // TCMALLOC_DEPRECATED_PERTHREAD
+ ThreadCache* cache = ThreadCache::GetCacheIfPresent();
+
+ // IsOnFastPath does not track whether or not we have an active ThreadCache on
+ // this thread, so we need to check cache for nullptr.
+ if (ABSL_PREDICT_FALSE(cache == nullptr)) {
+ FreeSmallSlow(ptr, cl);
+ return;
+ }
+
+ cache->Deallocate(ptr, cl);
+#endif // TCMALLOC_DEPRECATED_PERTHREAD
+}
+
+// this helper function is used when FreeSmall (defined above) hits
+// the case of thread state not being in per-cpu mode or hitting case
+// of no thread cache. This happens when thread state is not yet
+// properly initialized with real thread cache or with per-cpu mode,
+// or when thread state is already destroyed as part of thread
+// termination.
+//
+// We explicitly prevent inlining it to keep it out of fast-path, so
+// that fast-path only has tail-call, so that fast-path doesn't need
+// function prologue/epilogue.
+ABSL_ATTRIBUTE_NOINLINE
+static void FreeSmallSlow(void* ptr, size_t cl) {
+ if (ABSL_PREDICT_TRUE(UsePerCpuCache())) {
+ Static::cpu_cache().Deallocate(ptr, cl);
+ } else if (ThreadCache* cache = ThreadCache::GetCacheIfPresent()) {
+ // TODO(b/134691947): If we reach this path from the ThreadCache fastpath,
+ // we've already checked that UsePerCpuCache is false and cache == nullptr.
+ // Consider optimizing this.
+ cache->Deallocate(ptr, cl);
+ } else {
+ // This thread doesn't have thread-cache yet or already. Delete directly
+ // into central cache.
+ Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&ptr, 1));
+ }
+}
+
+namespace {
+
+// If this allocation can be guarded, and if it's time to do a guarded sample,
+// returns a guarded allocation Span. Otherwise returns nullptr.
+static void* TrySampleGuardedAllocation(size_t size, size_t alignment,
+ Length num_pages) {
+ if (num_pages == Length(1) &&
+ GetThreadSampler()->ShouldSampleGuardedAllocation()) {
+ // The num_pages == 1 constraint ensures that size <= kPageSize. And since
+ // alignments above kPageSize cause cl == 0, we're also guaranteed
+ // alignment <= kPageSize
+ //
+ // In all cases kPageSize <= GPA::page_size_, so Allocate's preconditions
+ // are met.
+ return Static::guardedpage_allocator().Allocate(size, alignment);
+ }
+ return nullptr;
+}
+
+// Performs sampling for already occurred allocation of object.
+//
+// For very small object sizes, object is used as 'proxy' and full
+// page with sampled marked is allocated instead.
+//
+// For medium-sized objects that have single instance per span,
+// they're simply freed and fresh page span is allocated to represent
+// sampling.
+//
+// For large objects (i.e. allocated with do_malloc_pages) they are
+// also fully reused and their span is marked as sampled.
+//
+// Note that do_free_with_size assumes sampled objects have
+// page-aligned addresses. Please change both functions if need to
+// invalidate the assumption.
+//
+// Note that cl might not match requested_size in case of
+// memalign. I.e. when larger than requested allocation is done to
+// satisfy alignment constraint.
+//
+// In case of out-of-memory condition when allocating span or
+// stacktrace struct, this function simply cheats and returns original
+// object. As if no sampling was requested.
+static void* SampleifyAllocation(size_t requested_size, size_t weight,
+ size_t requested_alignment, size_t cl,
+ void* obj, Span* span, size_t* capacity) {
+ CHECK_CONDITION((cl != 0 && obj != nullptr && span == nullptr) ||
+ (cl == 0 && obj == nullptr && span != nullptr));
+
+ void* proxy = nullptr;
+ void* guarded_alloc = nullptr;
+ size_t allocated_size;
+
+ // requested_alignment = 1 means 'small size table alignment was used'
+ // Historically this is reported as requested_alignment = 0
+ if (requested_alignment == 1) {
+ requested_alignment = 0;
+ }
+
+ if (cl != 0) {
+ ASSERT(cl == Static::pagemap().sizeclass(PageIdContaining(obj)));
+
+ allocated_size = Static::sizemap().class_to_size(cl);
+
+ // If the caller didn't provide a span, allocate one:
+ Length num_pages = BytesToLengthCeil(allocated_size);
+ if ((guarded_alloc = TrySampleGuardedAllocation(
+ requested_size, requested_alignment, num_pages))) {
+ ASSERT(IsSampledMemory(guarded_alloc));
+ const PageId p = PageIdContaining(guarded_alloc);
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ span = Span::New(p, num_pages);
+ Static::pagemap().Set(p, span);
+ // If we report capacity back from a size returning allocation, we can not
+ // report the allocated_size, as we guard the size to 'requested_size',
+ // and we maintain the invariant that GetAllocatedSize() must match the
+ // returned size from size returning allocations. So in that case, we
+ // report the requested size for both capacity and GetAllocatedSize().
+ if (capacity) allocated_size = requested_size;
+ } else if ((span = Static::page_allocator().New(
+ num_pages, MemoryTag::kSampled)) == nullptr) {
+ if (capacity) *capacity = allocated_size;
+ return obj;
+ }
+
+ size_t span_size = Length(Static::sizemap().class_to_pages(cl)).in_bytes();
+ size_t objects_per_span = span_size / allocated_size;
+
+ if (objects_per_span != 1) {
+ ASSERT(objects_per_span > 1);
+ proxy = obj;
+ obj = nullptr;
+ }
+ } else {
+ // Set allocated_size to the exact size for a page allocation.
+ // NOTE: if we introduce gwp-asan sampling / guarded allocations
+ // for page allocations, then we need to revisit do_malloc_pages as
+ // the current assumption is that only class sized allocs are sampled
+ // for gwp-asan.
+ allocated_size = span->bytes_in_span();
+ }
+ if (capacity) *capacity = allocated_size;
+
+ ASSERT(span != nullptr);
+
+ // Grab the stack trace outside the heap lock
+ StackTrace tmp;
+ tmp.proxy = proxy;
+ tmp.depth = absl::GetStackTrace(tmp.stack, kMaxStackDepth, 1);
+ tmp.requested_size = requested_size;
+ tmp.requested_alignment = requested_alignment;
+ tmp.allocated_size = allocated_size;
+ tmp.weight = weight;
+ tmp.user_data = Static::CreateSampleUserData();
+
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ // Allocate stack trace
+ StackTrace* stack = Static::stacktrace_allocator().New();
+ allocation_samples_.ReportMalloc(tmp);
+ *stack = tmp;
+ span->Sample(stack);
+ }
+
+ Static::peak_heap_tracker().MaybeSaveSample();
+
+ if (obj != nullptr) {
+#if TCMALLOC_HAVE_TRACKING
+ // We delete directly into central cache to avoid tracking this as
+ // purely internal deletion. We've already (correctly) tracked
+ // this allocation as either malloc hit or malloc miss, and we
+ // must not count anything else for this allocation.
+ //
+ // TODO(b/158678747): As of cl/315283185, we may occasionally see a hit in
+ // the TransferCache here. Prior to that CL, we always forced a miss. Both
+ // of these may artificially skew our tracking data.
+ Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&obj, 1));
+#else
+ // We are not maintaining precise statistics on malloc hit/miss rates at our
+ // cache tiers. We can deallocate into our ordinary cache.
+ ASSERT(cl != 0);
+ FreeSmallSlow(obj, cl);
+#endif
+ }
+ return guarded_alloc ? guarded_alloc : span->start_address();
+}
+
+// ShouldSampleAllocation() is called when an allocation of the given requested
+// size is in progress. It returns the sampling weight of the allocation if it
+// should be "sampled," and 0 otherwise. See SampleifyAllocation().
+//
+// Sampling is done based on requested sizes and later unskewed during profile
+// generation.
+inline size_t ShouldSampleAllocation(size_t size) {
+ return GetThreadSampler()->RecordAllocation(size);
+}
+
+template <typename Policy>
+inline void* do_malloc_pages(Policy policy, size_t size) {
+ // Page allocator does not deal well with num_pages = 0.
+ Length num_pages = std::max<Length>(BytesToLengthCeil(size), Length(1));
+
+ MemoryTag tag = MemoryTag::kNormal;
+ if (Static::numa_topology().numa_aware()) {
+ tag = NumaNormalTag(policy.numa_partition());
+ }
+ const size_t alignment = policy.align();
+ Span* span = Static::page_allocator().NewAligned(
+ num_pages, BytesToLengthCeil(alignment), tag);
+
+ if (span == nullptr) {
+ return nullptr;
+ }
+
+ void* result = span->start_address();
+ ASSERT(
+ tag == GetMemoryTag(span->start_address()));
+
+ if (size_t weight = ShouldSampleAllocation(size)) {
+ CHECK_CONDITION(result == SampleifyAllocation(size, weight, alignment, 0,
+ nullptr, span, nullptr));
+ }
+
+ return result;
+}
+
+template <typename Policy, typename CapacityPtr>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE AllocSmall(Policy policy, size_t cl,
+ size_t size,
+ CapacityPtr capacity) {
+ ASSERT(cl != 0);
+ void* result;
+
+ if (UsePerCpuCache()) {
+ result = Static::cpu_cache().Allocate<Policy::handle_oom>(cl);
+ } else {
+ result = ThreadCache::GetCache()->Allocate<Policy::handle_oom>(cl);
+ }
+
+ if (!Policy::can_return_nullptr()) {
+ ASSUME(result != nullptr);
+ }
+
+ if (ABSL_PREDICT_FALSE(result == nullptr)) {
+ SetCapacity(0, capacity);
+ return nullptr;
+ }
+ size_t weight;
+ if (ABSL_PREDICT_FALSE(weight = ShouldSampleAllocation(size))) {
+ return SampleifyAllocation(size, weight, policy.align(), cl, result,
+ nullptr, capacity);
+ }
+ SetClassCapacity(cl, capacity);
+ return result;
+}
+
+// Handles freeing object that doesn't have size class, i.e. which
+// is either large or sampled. We explicitly prevent inlining it to
+// keep it out of fast-path. This helps avoid expensive
+// prologue/epiloge for fast-path freeing functions.
+ABSL_ATTRIBUTE_NOINLINE
+static void do_free_pages(void* ptr, const PageId p) {
+ void* proxy = nullptr;
+ size_t size;
+ bool notify_sampled_alloc = false;
+
+ Span* span = Static::pagemap().GetExistingDescriptor(p);
+ ASSERT(span != nullptr);
+ // Prefetch now to avoid a stall accessing *span while under the lock.
+ span->Prefetch();
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ ASSERT(span->first_page() == p);
+ if (StackTrace* st = span->Unsample()) {
+ proxy = st->proxy;
+ size = st->allocated_size;
+ if (proxy == nullptr && size <= kMaxSize) {
+ tracking::Report(kFreeMiss,
+ Static::sizemap().SizeClass(
+ CppPolicy().InSameNumaPartitionAs(ptr), size),
+ 1);
+ }
+ notify_sampled_alloc = true;
+ Static::DestroySampleUserData(st->user_data);
+ Static::stacktrace_allocator().Delete(st);
+ }
+ if (IsSampledMemory(ptr)) {
+ if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+ // Release lock while calling Deallocate() since it does a system call.
+ pageheap_lock.Unlock();
+ Static::guardedpage_allocator().Deallocate(ptr);
+ pageheap_lock.Lock();
+ Span::Delete(span);
+ } else {
+ ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+ Static::page_allocator().Delete(span, MemoryTag::kSampled);
+ }
+ } else if (kNumaPartitions != 1) {
+ ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+ Static::page_allocator().Delete(span, GetMemoryTag(ptr));
+ } else {
+ ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+ Static::page_allocator().Delete(span, MemoryTag::kNormal);
+ }
+ }
+
+ if (notify_sampled_alloc) {
+ }
+
+ if (proxy) {
+ const auto policy = CppPolicy().InSameNumaPartitionAs(proxy);
+ const size_t cl = Static::sizemap().SizeClass(policy, size);
+ FreeSmall<Hooks::NO>(proxy, cl);
+ }
+}
+
+#ifndef NDEBUG
+static size_t GetSizeClass(void* ptr) {
+ const PageId p = PageIdContaining(ptr);
+ return Static::pagemap().sizeclass(p);
+}
+#endif
+
+// Helper for the object deletion (free, delete, etc.). Inputs:
+// ptr is object to be freed
+// cl is the size class of that object, or 0 if it's unknown
+// have_cl is true iff cl is known and is non-0.
+//
+// Note that since have_cl is compile-time constant, genius compiler
+// would not need it. Since it would be able to somehow infer that
+// GetSizeClass never produces 0 cl, and so it
+// would know that places that call this function with explicit 0 is
+// "have_cl-case" and others are "!have_cl-case". But we certainly
+// don't have such compiler. See also do_free_with_size below.
+template <bool have_cl, Hooks hooks_state>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_cl(void* ptr, size_t cl) {
+ // !have_cl -> cl == 0
+ ASSERT(have_cl || cl == 0);
+
+ const PageId p = PageIdContaining(ptr);
+
+ // if we have_cl, then we've excluded ptr == nullptr case. See
+ // comment in do_free_with_size. Thus we only bother testing nullptr
+ // in non-sized case.
+ //
+ // Thus: ptr == nullptr -> !have_cl
+ ASSERT(ptr != nullptr || !have_cl);
+ if (!have_cl && ABSL_PREDICT_FALSE(ptr == nullptr)) {
+ return;
+ }
+
+ // ptr must be a result of a previous malloc/memalign/... call, and
+ // therefore static initialization must have already occurred.
+ ASSERT(Static::IsInited());
+
+ if (!have_cl) {
+ cl = Static::pagemap().sizeclass(p);
+ }
+ if (have_cl || ABSL_PREDICT_TRUE(cl != 0)) {
+ ASSERT(cl == GetSizeClass(ptr));
+ ASSERT(ptr != nullptr);
+ ASSERT(!Static::pagemap().GetExistingDescriptor(p)->sampled());
+ FreeSmall<hooks_state>(ptr, cl);
+ } else {
+ invoke_delete_hooks_and_free<do_free_pages, hooks_state>(ptr, p);
+ }
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free(void* ptr) {
+ return do_free_with_cl<false, Hooks::RUN>(ptr, 0);
+}
+
+void do_free_no_hooks(void* ptr) {
+ return do_free_with_cl<false, Hooks::NO>(ptr, 0);
+}
+
+template <typename AlignPolicy>
+bool CorrectSize(void* ptr, size_t size, AlignPolicy align);
+
+bool CorrectAlignment(void* ptr, std::align_val_t alignment);
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreePages(void* ptr) {
+ const PageId p = PageIdContaining(ptr);
+ invoke_delete_hooks_and_free<do_free_pages, Hooks::RUN>(ptr, p);
+}
+
+template <typename AlignPolicy>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_size(void* ptr,
+ size_t size,
+ AlignPolicy align) {
+ ASSERT(CorrectSize(ptr, size, align));
+ ASSERT(CorrectAlignment(ptr, static_cast<std::align_val_t>(align.align())));
+
+ // This is an optimized path that may be taken if the binary is compiled
+ // with -fsized-delete. We attempt to discover the size class cheaply
+ // without any cache misses by doing a plain computation that
+ // maps from size to size-class.
+ //
+ // The optimized path doesn't work with sampled objects, whose deletions
+ // trigger more operations and require to visit metadata.
+ if (ABSL_PREDICT_FALSE(IsSampledMemory(ptr))) {
+ // we don't know true class size of the ptr
+ if (ptr == nullptr) return;
+ return FreePages(ptr);
+ }
+
+ // At this point, since ptr's tag bit is 1, it means that it
+ // cannot be nullptr either. Thus all code below may rely on ptr !=
+ // nullptr. And particularly, since we're only caller of
+ // do_free_with_cl with have_cl == true, it means have_cl implies
+ // ptr != nullptr.
+ ASSERT(ptr != nullptr);
+
+ uint32_t cl;
+ if (ABSL_PREDICT_FALSE(!Static::sizemap().GetSizeClass(
+ CppPolicy().AlignAs(align.align()).InSameNumaPartitionAs(ptr), size,
+ &cl))) {
+ // We couldn't calculate the size class, which means size > kMaxSize.
+ ASSERT(size > kMaxSize || align.align() > alignof(std::max_align_t));
+ static_assert(kMaxSize >= kPageSize, "kMaxSize must be at least kPageSize");
+ return FreePages(ptr);
+ }
+
+ return do_free_with_cl<true, Hooks::RUN>(ptr, cl);
+}
+
+inline size_t GetSize(const void* ptr) {
+ if (ptr == nullptr) return 0;
+ const PageId p = PageIdContaining(ptr);
+ size_t cl = Static::pagemap().sizeclass(p);
+ if (cl != 0) {
+ return Static::sizemap().class_to_size(cl);
+ } else {
+ const Span* span = Static::pagemap().GetExistingDescriptor(p);
+ if (span->sampled()) {
+ if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+ return Static::guardedpage_allocator().GetRequestedSize(ptr);
+ }
+ return span->sampled_stack()->allocated_size;
+ } else {
+ return span->bytes_in_span();
+ }
+ }
+}
+
+// Checks that an asserted object size for <ptr> is valid.
+template <typename AlignPolicy>
+bool CorrectSize(void* ptr, size_t size, AlignPolicy align) {
+ // size == 0 means we got no hint from sized delete, so we certainly don't
+ // have an incorrect one.
+ if (size == 0) return true;
+ if (ptr == nullptr) return true;
+ uint32_t cl = 0;
+ // Round-up passed in size to how much tcmalloc allocates for that size.
+ if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+ size = Static::guardedpage_allocator().GetRequestedSize(ptr);
+ } else if (Static::sizemap().GetSizeClass(CppPolicy().AlignAs(align.align()),
+ size, &cl)) {
+ size = Static::sizemap().class_to_size(cl);
+ } else {
+ size = BytesToLengthCeil(size).in_bytes();
+ }
+ size_t actual = GetSize(ptr);
+ if (ABSL_PREDICT_TRUE(actual == size)) return true;
+ Log(kLog, __FILE__, __LINE__, "size check failed", actual, size, cl);
+ return false;
+}
+
+// Checks that an asserted object <ptr> has <align> alignment.
+bool CorrectAlignment(void* ptr, std::align_val_t alignment) {
+ size_t align = static_cast<size_t>(alignment);
+ ASSERT(absl::has_single_bit(align));
+ return ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0);
+}
+
+// Helpers for use by exported routines below or inside debugallocation.cc:
+
+inline void do_malloc_stats() { PrintStats(1); }
+
+inline int do_mallopt(int cmd, int value) {
+ return 1; // Indicates error
+}
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+inline struct mallinfo do_mallinfo() {
+ TCMallocStats stats;
+ ExtractTCMallocStats(&stats, false);
+
+ // Just some of the fields are filled in.
+ struct mallinfo info;
+ memset(&info, 0, sizeof(info));
+
+ // Unfortunately, the struct contains "int" field, so some of the
+ // size values will be truncated.
+ info.arena = static_cast<int>(stats.pageheap.system_bytes);
+ info.fsmblks = static_cast<int>(stats.thread_bytes + stats.central_bytes +
+ stats.transfer_bytes);
+ info.fordblks = static_cast<int>(stats.pageheap.free_bytes +
+ stats.pageheap.unmapped_bytes);
+ info.uordblks = static_cast<int>(InUseByApp(stats));
+
+ return info;
+}
+#endif // TCMALLOC_HAVE_STRUCT_MALLINFO
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+using tcmalloc::tcmalloc_internal::AllocSmall;
+using tcmalloc::tcmalloc_internal::CppPolicy;
+using tcmalloc::tcmalloc_internal::do_free_no_hooks;
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+using tcmalloc::tcmalloc_internal::do_mallinfo;
+#endif
+using tcmalloc::tcmalloc_internal::do_malloc_pages;
+using tcmalloc::tcmalloc_internal::do_malloc_stats;
+using tcmalloc::tcmalloc_internal::do_mallopt;
+using tcmalloc::tcmalloc_internal::GetThreadSampler;
+using tcmalloc::tcmalloc_internal::MallocPolicy;
+using tcmalloc::tcmalloc_internal::SetClassCapacity;
+using tcmalloc::tcmalloc_internal::SetPagesCapacity;
+using tcmalloc::tcmalloc_internal::Static;
+using tcmalloc::tcmalloc_internal::UsePerCpuCache;
+
+#ifdef TCMALLOC_DEPRECATED_PERTHREAD
+using tcmalloc::tcmalloc_internal::ThreadCache;
+#endif // TCMALLOC_DEPRECATED_PERTHREAD
+
+// Slow path implementation.
+// This function is used by `fast_alloc` if the allocation requires page sized
+// allocations or some complex logic is required such as initialization,
+// invoking new/delete hooks, sampling, etc.
+//
+// TODO(b/130771275): This function is marked as static, rather than appearing
+// in the anonymous namespace, to workaround incomplete heapz filtering.
+template <typename Policy, typename CapacityPtr = std::nullptr_t>
+static void* ABSL_ATTRIBUTE_SECTION(google_malloc)
+ slow_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) {
+ Static::InitIfNecessary();
+ GetThreadSampler()->UpdateFastPathState();
+ void* p;
+ uint32_t cl;
+ bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl);
+ if (ABSL_PREDICT_TRUE(is_small)) {
+ p = AllocSmall(policy, cl, size, capacity);
+ } else {
+ p = do_malloc_pages(policy, size);
+ // Set capacity to the exact size for a page allocation.
+ // This needs to be revisited if we introduce gwp-asan
+ // sampling / guarded allocations to do_malloc_pages().
+ SetPagesCapacity(p, size, capacity);
+ if (ABSL_PREDICT_FALSE(p == nullptr)) {
+ return Policy::handle_oom(size);
+ }
+ }
+ if (Policy::invoke_hooks()) {
+ }
+ return p;
+}
+
+template <typename Policy, typename CapacityPtr = std::nullptr_t>
+static inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE
+fast_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) {
+ // If size is larger than kMaxSize, it's not fast-path anymore. In
+ // such case, GetSizeClass will return false, and we'll delegate to the slow
+ // path. If malloc is not yet initialized, we may end up with cl == 0
+ // (regardless of size), but in this case should also delegate to the slow
+ // path by the fast path check further down.
+ uint32_t cl;
+ bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl);
+ if (ABSL_PREDICT_FALSE(!is_small)) {
+ return slow_alloc(policy, size, capacity);
+ }
+
+ // When using per-thread caches, we have to check for the presence of the
+ // cache for this thread before we try to sample, as slow_alloc will
+ // also try to sample the allocation.
+#ifdef TCMALLOC_DEPRECATED_PERTHREAD
+ ThreadCache* const cache = ThreadCache::GetCacheIfPresent();
+ if (ABSL_PREDICT_FALSE(cache == nullptr)) {
+ return slow_alloc(policy, size, capacity);
+ }
+#endif
+ // TryRecordAllocationFast() returns true if no extra logic is required, e.g.:
+ // - this allocation does not need to be sampled
+ // - no new/delete hooks need to be invoked
+ // - no need to initialize thread globals, data or caches.
+ // The method updates 'bytes until next sample' thread sampler counters.
+ if (ABSL_PREDICT_FALSE(!GetThreadSampler()->TryRecordAllocationFast(size))) {
+ return slow_alloc(policy, size, capacity);
+ }
+
+ // Fast path implementation for allocating small size memory.
+ // This code should only be reached if all of the below conditions are met:
+ // - the size does not exceed the maximum size (size class > 0)
+ // - cpu / thread cache data has been initialized.
+ // - the allocation is not subject to sampling / gwp-asan.
+ // - no new/delete hook is installed and required to be called.
+ ASSERT(cl != 0);
+ void* ret;
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+ // The CPU cache should be ready.
+ ret = Static::cpu_cache().Allocate<Policy::handle_oom>(cl);
+#else // !defined(TCMALLOC_DEPRECATED_PERTHREAD)
+ // The ThreadCache should be ready.
+ ASSERT(cache != nullptr);
+ ret = cache->Allocate<Policy::handle_oom>(cl);
+#endif // TCMALLOC_DEPRECATED_PERTHREAD
+ if (!Policy::can_return_nullptr()) {
+ ASSUME(ret != nullptr);
+ }
+ SetClassCapacity(ret, cl, capacity);
+ return ret;
+}
+
+using tcmalloc::tcmalloc_internal::GetOwnership;
+using tcmalloc::tcmalloc_internal::GetSize;
+
+extern "C" size_t MallocExtension_Internal_GetAllocatedSize(const void* ptr) {
+ ASSERT(!ptr ||
+ GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned);
+ return GetSize(ptr);
+}
+
+extern "C" void MallocExtension_Internal_MarkThreadBusy() {
+ // Allocate to force the creation of a thread cache, but avoid
+ // invoking any hooks.
+ Static::InitIfNecessary();
+
+ if (UsePerCpuCache()) {
+ return;
+ }
+
+ do_free_no_hooks(slow_alloc(CppPolicy().Nothrow().WithoutHooks(), 0));
+}
+
+//-------------------------------------------------------------------
+// Exported routines
+//-------------------------------------------------------------------
+
+using tcmalloc::tcmalloc_internal::AlignAsPolicy;
+using tcmalloc::tcmalloc_internal::CorrectAlignment;
+using tcmalloc::tcmalloc_internal::CorrectSize;
+using tcmalloc::tcmalloc_internal::DefaultAlignPolicy;
+using tcmalloc::tcmalloc_internal::do_free;
+using tcmalloc::tcmalloc_internal::do_free_with_size;
+
+// depends on TCMALLOC_HAVE_STRUCT_MALLINFO, so needs to come after that.
+#include "tcmalloc/libc_override.h"
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc(
+ size_t size) noexcept {
+ // Use TCMallocInternalMemalign to avoid requiring size %
+ // alignof(std::max_align_t) == 0. TCMallocInternalAlignedAlloc enforces this
+ // property.
+ return TCMallocInternalMemalign(alignof(std::max_align_t), size);
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNew(size_t size) {
+ return fast_alloc(CppPolicy(), size);
+}
+
+extern "C" ABSL_ATTRIBUTE_SECTION(google_malloc) tcmalloc::sized_ptr_t
+ tcmalloc_size_returning_operator_new(size_t size) {
+ size_t capacity;
+ void* p = fast_alloc(CppPolicy(), size, &capacity);
+ return {p, capacity};
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc_aligned(
+ size_t size, std::align_val_t alignment) noexcept {
+ return fast_alloc(MallocPolicy().AlignAs(alignment), size);
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNewAligned(
+ size_t size, std::align_val_t alignment) {
+ return fast_alloc(CppPolicy().AlignAs(alignment), size);
+}
+
+#ifdef TCMALLOC_ALIAS
+extern "C" void* TCMallocInternalNewAligned_nothrow(
+ size_t size, std::align_val_t alignment, const std::nothrow_t& nt) noexcept
+ // Note: we use malloc rather than new, as we are allowed to return nullptr.
+ // The latter crashes in that case.
+ TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+ google_malloc) void* TCMallocInternalNewAligned_nothrow(size_t size,
+ std::align_val_t
+ alignment,
+ const std::nothrow_t&
+ nt) noexcept {
+ return fast_alloc(CppPolicy().Nothrow().AlignAs(alignment), size);
+}
+#endif // TCMALLOC_ALIAS
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalFree(
+ void* ptr) noexcept {
+ do_free(ptr);
+}
+
+extern "C" void TCMallocInternalSdallocx(void* ptr, size_t size,
+ int flags) noexcept {
+ size_t alignment = alignof(std::max_align_t);
+
+ if (ABSL_PREDICT_FALSE(flags != 0)) {
+ ASSERT((flags & ~0x3f) == 0);
+ alignment = static_cast<size_t>(1ull << (flags & 0x3f));
+ }
+
+ return do_free_with_size(ptr, size, AlignAsPolicy(alignment));
+}
+
+extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept {
+ // Overflow check
+ const size_t size = n * elem_size;
+ if (elem_size != 0 && size / elem_size != n) {
+ return MallocPolicy::handle_oom(std::numeric_limits<size_t>::max());
+ }
+ void* result = fast_alloc(MallocPolicy(), size);
+ if (result != nullptr) {
+ memset(result, 0, size);
+ }
+ return result;
+}
+
+// Here and below we use TCMALLOC_ALIAS (if supported) to make
+// identical functions aliases. This saves space in L1 instruction
+// cache. As of now it saves ~9K.
+extern "C" void TCMallocInternalCfree(void* ptr) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+ do_free(ptr);
+}
+#endif // TCMALLOC_ALIAS
+
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* do_realloc(void* old_ptr,
+ size_t new_size) {
+ Static::InitIfNecessary();
+ // Get the size of the old entry
+ const size_t old_size = GetSize(old_ptr);
+
+ // Reallocate if the new size is larger than the old size,
+ // or if the new size is significantly smaller than the old size.
+ // We do hysteresis to avoid resizing ping-pongs:
+ // . If we need to grow, grow to max(new_size, old_size * 1.X)
+ // . Don't shrink unless new_size < old_size * 0.Y
+ // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5.
+ const size_t min_growth = std::min(
+ old_size / 4,
+ std::numeric_limits<size_t>::max() - old_size); // Avoid overflow.
+ const size_t lower_bound_to_grow = old_size + min_growth;
+ const size_t upper_bound_to_shrink = old_size / 2;
+ if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
+ // Need to reallocate.
+ void* new_ptr = nullptr;
+
+ if (new_size > old_size && new_size < lower_bound_to_grow) {
+ // Avoid fast_alloc() reporting a hook with the lower bound size
+ // as we the expectation for pointer returning allocation functions
+ // is that malloc hooks are invoked with the requested_size.
+ new_ptr = fast_alloc(MallocPolicy().Nothrow().WithoutHooks(),
+ lower_bound_to_grow);
+ if (new_ptr != nullptr) {
+ }
+ }
+ if (new_ptr == nullptr) {
+ // Either new_size is not a tiny increment, or last do_malloc failed.
+ new_ptr = fast_alloc(MallocPolicy(), new_size);
+ }
+ if (new_ptr == nullptr) {
+ return nullptr;
+ }
+ memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
+ // We could use a variant of do_free() that leverages the fact
+ // that we already know the sizeclass of old_ptr. The benefit
+ // would be small, so don't bother.
+ do_free(old_ptr);
+ return new_ptr;
+ } else {
+ return old_ptr;
+ }
+}
+
+extern "C" void* TCMallocInternalRealloc(void* old_ptr,
+ size_t new_size) noexcept {
+ if (old_ptr == NULL) {
+ return fast_alloc(MallocPolicy(), new_size);
+ }
+ if (new_size == 0) {
+ do_free(old_ptr);
+ return NULL;
+ }
+ return do_realloc(old_ptr, new_size);
+}
+
+extern "C" void* TCMallocInternalNewNothrow(size_t size,
+ const std::nothrow_t&) noexcept {
+ return fast_alloc(CppPolicy().Nothrow(), size);
+}
+
+extern "C" tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow(
+ size_t size) noexcept {
+ size_t capacity;
+ void* p = fast_alloc(CppPolicy().Nothrow(), size, &capacity);
+ return {p, capacity};
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+ do_free(p);
+}
+#endif // TCMALLOC_ALIAS
+
+extern "C" void TCMallocInternalDeleteAligned(
+ void* p, std::align_val_t alignment) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+ TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+{
+ // Note: The aligned delete/delete[] implementations differ slightly from
+ // their respective aliased implementations to take advantage of checking the
+ // passed-in alignment.
+ ASSERT(CorrectAlignment(p, alignment));
+ return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDeleteSized(
+ void* p, size_t size) noexcept {
+ ASSERT(CorrectSize(p, size, DefaultAlignPolicy()));
+ do_free_with_size(p, size, DefaultAlignPolicy());
+}
+
+extern "C" void TCMallocInternalDeleteSizedAligned(
+ void* p, size_t t, std::align_val_t alignment) noexcept {
+ return do_free_with_size(p, t, AlignAsPolicy(alignment));
+}
+
+extern "C" void TCMallocInternalDeleteArraySized(void* p, size_t size) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalDeleteSized);
+#else
+{
+ do_free_with_size(p, size, DefaultAlignPolicy());
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArraySizedAligned(
+ void* p, size_t t, std::align_val_t alignment) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned);
+#else
+{
+ return TCMallocInternalDeleteSizedAligned(p, t, alignment);
+}
+#endif
+
+// Standard C++ library implementations define and use this
+// (via ::operator delete(ptr, nothrow)).
+// But it's really the same as normal delete, so we just do the same thing.
+extern "C" void TCMallocInternalDeleteNothrow(void* p,
+ const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+ do_free(p);
+}
+#endif // TCMALLOC_ALIAS
+
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void TCMallocInternalDeleteAligned_nothrow(
+ void* p, std::align_val_t alignment, const std::nothrow_t& nt) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+ google_malloc) void TCMallocInternalDeleteAligned_nothrow(void* p,
+ std::align_val_t
+ alignment,
+ const std::nothrow_t&
+ nt) noexcept {
+ ASSERT(CorrectAlignment(p, alignment));
+ return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void* TCMallocInternalNewArray(size_t size)
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalNew);
+#else
+{
+ return fast_alloc(CppPolicy().WithoutHooks(), size);
+}
+#endif // TCMALLOC_ALIAS
+
+extern "C" void* TCMallocInternalNewArrayAligned(size_t size,
+ std::align_val_t alignment)
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+ TCMALLOC_ALIAS(TCMallocInternalNewAligned);
+#else
+{
+ return TCMallocInternalNewAligned(size, alignment);
+}
+#endif
+
+extern "C" void* TCMallocInternalNewArrayNothrow(size_t size,
+ const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalNewNothrow);
+#else
+{
+ return fast_alloc(CppPolicy().Nothrow(), size);
+}
+#endif // TCMALLOC_ALIAS
+
+// Note: we use malloc rather than new, as we are allowed to return nullptr.
+// The latter crashes in that case.
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void* TCMallocInternalNewArrayAligned_nothrow(
+ size_t size, std::align_val_t alignment, const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+ google_malloc) void* TCMallocInternalNewArrayAligned_nothrow(size_t size,
+ std::align_val_t
+ alignment,
+ const std::
+ nothrow_t&) noexcept {
+ return TCMallocInternalMalloc_aligned(size, alignment);
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArray(void* p) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+ do_free(p);
+}
+#endif // TCMALLOC_ALIAS
+
+extern "C" void TCMallocInternalDeleteArrayAligned(
+ void* p, std::align_val_t alignment) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+ TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+{
+ ASSERT(CorrectAlignment(p, alignment));
+ return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArrayNothrow(
+ void* p, const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+ TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+ do_free(p);
+}
+#endif // TCMALLOC_ALIAS
+
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void TCMallocInternalDeleteArrayAligned_nothrow(
+ void* p, std::align_val_t alignment, const std::nothrow_t&) noexcept
+ TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+ google_malloc) void TCMallocInternalDeleteArrayAligned_nothrow(void* p,
+ std::align_val_t
+ alignment,
+ const std::
+ nothrow_t&) noexcept {
+ ASSERT(CorrectAlignment(p, alignment));
+ return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void* TCMallocInternalMemalign(size_t align, size_t size) noexcept {
+ ASSERT(absl::has_single_bit(align));
+ return fast_alloc(MallocPolicy().AlignAs(align), size);
+}
+
+extern "C" void* TCMallocInternalAlignedAlloc(size_t align,
+ size_t size) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+ TCMALLOC_ALIAS(TCMallocInternalMemalign);
+#else
+{
+ // aligned_alloc is memalign, but with the requirement that:
+ // align be a power of two (like memalign)
+ // size be a multiple of align (for the time being).
+ ASSERT(align != 0);
+ ASSERT(size % align == 0);
+
+ return TCMallocInternalMemalign(align, size);
+}
+#endif
+
+extern "C" int TCMallocInternalPosixMemalign(void** result_ptr, size_t align,
+ size_t size) noexcept {
+ if (((align % sizeof(void*)) != 0) || !absl::has_single_bit(align)) {
+ return EINVAL;
+ }
+ void* result = fast_alloc(MallocPolicy().Nothrow().AlignAs(align), size);
+ if (result == NULL) {
+ return ENOMEM;
+ } else {
+ *result_ptr = result;
+ return 0;
+ }
+}
+
+static size_t pagesize = 0;
+
+extern "C" void* TCMallocInternalValloc(size_t size) noexcept {
+ // Allocate page-aligned object of length >= size bytes
+ if (pagesize == 0) pagesize = getpagesize();
+ return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size);
+}
+
+extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept {
+ // Round up size to a multiple of pagesize
+ if (pagesize == 0) pagesize = getpagesize();
+ if (size == 0) { // pvalloc(0) should allocate one page, according to
+ size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html
+ }
+ size = (size + pagesize - 1) & ~(pagesize - 1);
+ return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size);
+}
+
+extern "C" void TCMallocInternalMallocStats(void) noexcept {
+ do_malloc_stats();
+}
+
+extern "C" int TCMallocInternalMallOpt(int cmd, int value) noexcept {
+ return do_mallopt(cmd, value);
+}
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+extern "C" struct mallinfo TCMallocInternalMallocInfo(void) noexcept {
+ return do_mallinfo();
+}
+#endif
+
+extern "C" size_t TCMallocInternalMallocSize(void* ptr) noexcept {
+ ASSERT(GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned);
+ return GetSize(ptr);
+}
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// The constructor allocates an object to ensure that initialization
+// runs before main(), and therefore we do not have a chance to become
+// multi-threaded before initialization. We also create the TSD key
+// here. Presumably by the time this constructor runs, glibc is in
+// good enough shape to handle pthread_key_create().
+//
+// The destructor prints stats when the program exits.
+class TCMallocGuard {
+ public:
+ TCMallocGuard() {
+ TCMallocInternalFree(TCMallocInternalMalloc(1));
+ ThreadCache::InitTSD();
+ TCMallocInternalFree(TCMallocInternalMalloc(1));
+ }
+};
+
+static TCMallocGuard module_enter_exit_hook;
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h
new file mode 100644
index 0000000000..1a8eeb4157
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h
@@ -0,0 +1,126 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This is the exported interface from tcmalloc. For most users,
+// tcmalloc just overrides existing libc functionality, and thus this
+// .h file isn't needed. But we also provide the tcmalloc allocation
+// routines through their own, dedicated name -- so people can wrap
+// their own malloc functions around tcmalloc routines, perhaps.
+// These are exported here.
+
+#ifndef TCMALLOC_TCMALLOC_H_
+#define TCMALLOC_TCMALLOC_H_
+
+#include <malloc.h>
+#include <stddef.h>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/declarations.h"
+
+// __THROW is defined in glibc systems. It means, counter-intuitively,
+// "This function will never throw an exception." It's an optional
+// optimization tool, but we may need to use it to match glibc prototypes.
+#ifndef __THROW // I guess we're not on a glibc system
+#define __THROW __attribute__((__nothrow__))
+#endif
+
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+void* TCMallocInternalMalloc(size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalFree(void* ptr) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalSdallocx(void* ptr, size_t size, int flags) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalRealloc(void* ptr, size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalCalloc(size_t n, size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalCfree(void* ptr) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+void* TCMallocInternalAlignedAlloc(size_t align, size_t __size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalMemalign(size_t align, size_t __size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+int TCMallocInternalPosixMemalign(void** ptr, size_t align, size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalValloc(size_t __size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalPvalloc(size_t __size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+void TCMallocInternalMallocStats(void) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+int TCMallocInternalMallOpt(int cmd, int value) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO)
+struct mallinfo TCMallocInternalMallocInfo(void) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+#endif
+
+// This is an alias for MallocExtension::GetAllocatedSize().
+// It is equivalent to
+// OS X: malloc_size()
+// glibc: malloc_usable_size()
+// Windows: _msize()
+size_t TCMallocInternalMallocSize(void* ptr) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+#ifdef __cplusplus
+void* TCMallocInternalNew(size_t size) ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewAligned(size_t size, std::align_val_t alignment)
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewNothrow(size_t size, const std::nothrow_t&) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDelete(void* p) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteAligned(void* p, std::align_val_t alignment) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteSized(void* p, size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteSizedAligned(void* p, size_t t,
+ std::align_val_t alignment) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteNothrow(void* p, const std::nothrow_t&) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArray(size_t size)
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArrayAligned(size_t size, std::align_val_t alignment)
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArrayNothrow(size_t size,
+ const std::nothrow_t&) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArray(void* p) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArrayAligned(void* p,
+ std::align_val_t alignment) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArraySized(void* p, size_t size) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArraySizedAligned(void* p, size_t t,
+ std::align_val_t alignment) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW
+ ABSL_ATTRIBUTE_SECTION(google_malloc);
+}
+#endif
+
+void TCMallocInternalAcquireLocks();
+void TCMallocInternalReleaseLocks();
+
+#endif // TCMALLOC_TCMALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc
new file mode 100644
index 0000000000..f940120f46
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc
@@ -0,0 +1,204 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This is a unit test for large allocations in malloc and friends.
+// "Large" means "so large that they overflow the address space".
+// For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes.
+// For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes.
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/container/node_hash_set.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Alloc a size that should always fail.
+void TryAllocExpectFail(size_t size) {
+ void* p1 = malloc(size);
+ ASSERT_EQ(p1, nullptr);
+
+ void* p2 = malloc(1);
+ ASSERT_NE(p2, nullptr);
+
+ void* p3 = realloc(p2, size);
+ ASSERT_EQ(p3, nullptr);
+
+ free(p2);
+}
+
+// Alloc a size that might work and might fail.
+// If it does work, touch some pages.
+
+void TryAllocMightFail(size_t size) {
+ unsigned char* p = static_cast<unsigned char*>(malloc(size));
+ if (p != nullptr) {
+ unsigned char volatile* vp = p; // prevent optimizations
+ static const size_t kPoints = 1024;
+
+ for (size_t i = 0; i < kPoints; ++i) {
+ vp[i * (size / kPoints)] = static_cast<unsigned char>(i);
+ }
+
+ for (size_t i = 0; i < kPoints; ++i) {
+ ASSERT_EQ(vp[i * (size / kPoints)], static_cast<unsigned char>(i));
+ }
+
+ vp[size - 1] = 'M';
+ ASSERT_EQ(vp[size - 1], 'M');
+ } else {
+ ASSERT_EQ(errno, ENOMEM);
+ }
+
+ free(p);
+}
+
+class NoErrnoRegion final : public AddressRegion {
+ public:
+ explicit NoErrnoRegion(AddressRegion* underlying) : underlying_(underlying) {}
+
+ std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+ std::pair<void*, size_t> result = underlying_->Alloc(size, alignment);
+ errno = 0;
+ return result;
+ }
+
+ private:
+ AddressRegion* underlying_;
+};
+
+class NoErrnoRegionFactory final : public AddressRegionFactory {
+ public:
+ explicit NoErrnoRegionFactory(AddressRegionFactory* underlying)
+ : underlying_(underlying) {}
+ ~NoErrnoRegionFactory() override {}
+
+ AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+ AddressRegion* underlying_region = underlying_->Create(start, size, hint);
+ CHECK_CONDITION(underlying_region != nullptr);
+ void* region_space = MallocInternal(sizeof(NoErrnoRegion));
+ CHECK_CONDITION(region_space != nullptr);
+ return new (region_space) NoErrnoRegion(underlying_region);
+ }
+
+ // Get a human-readable description of the current state of the
+ // allocator.
+ size_t GetStats(absl::Span<char> buffer) override {
+ return underlying_->GetStats(buffer);
+ }
+
+ private:
+ AddressRegionFactory* const underlying_;
+};
+
+class LargeAllocationTest : public ::testing::Test {
+ public:
+ LargeAllocationTest() {
+ old_ = MallocExtension::GetRegionFactory();
+ MallocExtension::SetRegionFactory(new NoErrnoRegionFactory(old_));
+
+ // Grab some memory so that some later allocations are guaranteed to fail.
+ small_ = ::operator new(4 << 20);
+ }
+
+ ~LargeAllocationTest() override {
+ ::operator delete(small_);
+
+ auto* current = MallocExtension::GetRegionFactory();
+
+ MallocExtension::SetRegionFactory(old_);
+ delete current;
+ }
+
+ private:
+ AddressRegionFactory* old_;
+ void* small_;
+};
+
+// Allocate some 0-byte objects. They better be unique. 0 bytes is not large
+// but it exercises some paths related to large-allocation code.
+TEST_F(LargeAllocationTest, UniqueAddresses) {
+ constexpr int kZeroTimes = 1024;
+
+ absl::flat_hash_set<void*> ptrs;
+ for (int i = 0; i < kZeroTimes; ++i) {
+ void* p = malloc(1);
+ ASSERT_NE(p, nullptr);
+ EXPECT_THAT(ptrs, ::testing::Not(::testing::Contains(p)));
+ ptrs.insert(p);
+ }
+
+ for (auto* p : ptrs) {
+ free(p);
+ }
+}
+
+TEST_F(LargeAllocationTest, MaxSize) {
+ // Test sizes up near the maximum size_t. These allocations test the
+ // wrap-around code.
+ constexpr size_t zero = 0;
+ constexpr size_t kMinusNTimes = 16384;
+ for (size_t i = 1; i < kMinusNTimes; ++i) {
+ TryAllocExpectFail(zero - i);
+ }
+}
+
+TEST_F(LargeAllocationTest, NearMaxSize) {
+ // Test sizes a bit smaller. The small malloc above guarantees that all these
+ // return nullptr.
+ constexpr size_t zero = 0;
+ constexpr size_t kMinusMBMinusNTimes = 16384;
+ for (size_t i = 0; i < kMinusMBMinusNTimes; ++i) {
+ TryAllocExpectFail(zero - 1048576 - i);
+ }
+}
+
+TEST_F(LargeAllocationTest, Half) {
+ // Test sizes at half of size_t.
+ // These might or might not fail to allocate.
+ constexpr size_t kHalfPlusMinusTimes = 64;
+ constexpr size_t half = std::numeric_limits<size_t>::max() / 2 + 1;
+ for (size_t i = 0; i < kHalfPlusMinusTimes; ++i) {
+ TryAllocMightFail(half - i);
+ TryAllocMightFail(half + i);
+ }
+}
+
+TEST_F(LargeAllocationTest, NearMaxAddressBits) {
+ // Tests sizes near the maximum address space size.
+ // For -1 <= i < 5, we expect all allocations to fail. For -6 <= i < -1, the
+ // allocation might succeed but create so much pagemap metadata that we exceed
+ // test memory limits and OOM. So we skip that range.
+ for (int i = -10; i < -6; ++i) {
+ TryAllocMightFail(size_t{1} << (kAddressBits + i));
+ }
+ for (int i = -1; i < 5; ++i) {
+ TryAllocExpectFail(size_t{1} << (kAddressBits + i));
+ }
+}
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h
new file mode 100644
index 0000000000..d81f8f3be0
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h
@@ -0,0 +1,260 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file defines policies used when allocation memory.
+//
+// An allocation policy encapsulates four policies:
+//
+// - Out of memory policy.
+// Dictates how to handle OOM conditions.
+//
+// struct OomPolicyTemplate {
+// // Invoked when we failed to allocate memory
+// // Must either terminate, throw, or return nullptr
+// static void* handle_oom(size_t size);
+// };
+//
+// - Alignment policy
+// Dictates alignment to use for an allocation.
+// Must be trivially copyable.
+//
+// struct AlignPolicyTemplate {
+// // Returns the alignment to use for the memory allocation,
+// // or 1 to use small allocation table alignments (8 bytes)
+// // Returned value Must be a non-zero power of 2.
+// size_t align() const;
+// };
+//
+// - Hook invocation policy
+// dictates invocation of allocation hooks
+//
+// struct HooksPolicyTemplate {
+// // Returns true if allocation hooks must be invoked.
+// static bool invoke_hooks();
+// };
+//
+// - NUMA partition policy
+// When NUMA awareness is enabled this dictates which NUMA partition we will
+// allocate memory from. Must be trivially copyable.
+//
+// struct NumaPartitionPolicyTemplate {
+// // Returns the NUMA partition to allocate from.
+// size_t partition() const;
+//
+// // Returns the NUMA partition to allocate from multiplied by
+// // kNumBaseClasses - i.e. the first size class that corresponds to the
+// // NUMA partition to allocate from.
+// size_t scaled_partition() const;
+// };
+
+#ifndef TCMALLOC_TCMALLOC_POLICY_H_
+#define TCMALLOC_TCMALLOC_POLICY_H_
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstddef>
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// NullOomPolicy: returns nullptr
+struct NullOomPolicy {
+ static inline constexpr void* handle_oom(size_t size) { return nullptr; }
+
+ static constexpr bool can_return_nullptr() { return true; }
+};
+
+// MallocOomPolicy: sets errno to ENOMEM and returns nullptr
+struct MallocOomPolicy {
+ static inline void* handle_oom(size_t size) {
+ errno = ENOMEM;
+ return nullptr;
+ }
+
+ static constexpr bool can_return_nullptr() { return true; }
+};
+
+// CppOomPolicy: terminates the program
+struct CppOomPolicy {
+ static ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NORETURN void* handle_oom(
+ size_t size) {
+ Crash(kCrashWithStats, __FILE__, __LINE__,
+ "Unable to allocate (new failed)", size);
+ __builtin_unreachable();
+ }
+
+ static constexpr bool can_return_nullptr() { return false; }
+};
+
+// DefaultAlignPolicy: use default small size table based allocation
+struct DefaultAlignPolicy {
+ // Important: the value here is explicitly '1' to indicate that the used
+ // alignment is the default alignment of the size tables in tcmalloc.
+ // The constexpr value of 1 will optimize out the alignment checks and
+ // iterations in the GetSizeClass() calls for default aligned allocations.
+ static constexpr size_t align() { return 1; }
+};
+
+// MallocAlignPolicy: use std::max_align_t allocation
+struct MallocAlignPolicy {
+ static constexpr size_t align() { return alignof(std::max_align_t); }
+};
+
+// AlignAsPolicy: use user provided alignment
+class AlignAsPolicy {
+ public:
+ AlignAsPolicy() = delete;
+ explicit constexpr AlignAsPolicy(size_t value) : value_(value) {}
+ explicit constexpr AlignAsPolicy(std::align_val_t value)
+ : AlignAsPolicy(static_cast<size_t>(value)) {}
+
+ size_t constexpr align() const { return value_; }
+
+ private:
+ size_t value_;
+};
+
+// InvokeHooksPolicy: invoke memory allocation hooks
+struct InvokeHooksPolicy {
+ static constexpr bool invoke_hooks() { return true; }
+};
+
+// NoHooksPolicy: do not invoke memory allocation hooks
+struct NoHooksPolicy {
+ static constexpr bool invoke_hooks() { return false; }
+};
+
+// Use a fixed NUMA partition.
+class FixedNumaPartitionPolicy {
+ public:
+ explicit constexpr FixedNumaPartitionPolicy(size_t partition)
+ : partition_(partition) {}
+
+ size_t constexpr partition() const { return partition_; }
+
+ size_t constexpr scaled_partition() const {
+ return partition_ * kNumBaseClasses;
+ }
+
+ private:
+ size_t partition_;
+};
+
+// Use the NUMA partition which the executing CPU is local to.
+struct LocalNumaPartitionPolicy {
+ // Note that the partition returned may change between calls if the executing
+ // thread migrates between NUMA nodes & partitions. Users of this function
+ // should not rely upon multiple invocations returning the same partition.
+ size_t partition() const {
+ return Static::numa_topology().GetCurrentPartition();
+ }
+ size_t scaled_partition() const {
+ return Static::numa_topology().GetCurrentScaledPartition();
+ }
+};
+
+// TCMallocPolicy defines the compound policy object containing
+// the OOM, alignment and hooks policies.
+// Is trivially constructible, copyable and destructible.
+template <typename OomPolicy = CppOomPolicy,
+ typename AlignPolicy = DefaultAlignPolicy,
+ typename HooksPolicy = InvokeHooksPolicy,
+ typename NumaPolicy = LocalNumaPartitionPolicy>
+class TCMallocPolicy {
+ public:
+ constexpr TCMallocPolicy() = default;
+ explicit constexpr TCMallocPolicy(AlignPolicy align, NumaPolicy numa)
+ : align_(align), numa_(numa) {}
+
+ // OOM policy
+ static void* handle_oom(size_t size) { return OomPolicy::handle_oom(size); }
+
+ // Alignment policy
+ constexpr size_t align() const { return align_.align(); }
+
+ // NUMA partition
+ constexpr size_t numa_partition() const { return numa_.partition(); }
+
+ // NUMA partition multiplied by kNumBaseClasses
+ constexpr size_t scaled_numa_partition() const {
+ return numa_.scaled_partition();
+ }
+
+ // Hooks policy
+ static constexpr bool invoke_hooks() { return HooksPolicy::invoke_hooks(); }
+
+ // Returns this policy aligned as 'align'
+ template <typename align_t>
+ constexpr TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>
+ AlignAs(
+ align_t align) const {
+ return TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>(
+ AlignAsPolicy{align}, numa_);
+ }
+
+ // Returns this policy with a nullptr OOM policy.
+ constexpr TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy,
+ NumaPolicy> Nothrow()
+ const {
+ return TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy,
+ NumaPolicy>(align_, numa_);
+ }
+
+ // Returns this policy with NewAllocHook invocations disabled.
+ constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, NumaPolicy>
+ WithoutHooks()
+ const {
+ return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+ NumaPolicy>(align_, numa_);
+ }
+
+ // Returns this policy with a fixed NUMA partition.
+ constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+ FixedNumaPartitionPolicy> InNumaPartition(size_t partition) const {
+ return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+ FixedNumaPartitionPolicy>(
+ align_, FixedNumaPartitionPolicy{partition});
+ }
+
+ // Returns this policy with a fixed NUMA partition matching that of the
+ // previously allocated `ptr`.
+ constexpr auto InSameNumaPartitionAs(void* ptr) const {
+ return InNumaPartition(NumaPartitionFromPointer(ptr));
+ }
+
+ static constexpr bool can_return_nullptr() {
+ return OomPolicy::can_return_nullptr();
+ }
+
+ private:
+ AlignPolicy align_;
+ NumaPolicy numa_;
+};
+
+using CppPolicy = TCMallocPolicy<CppOomPolicy, DefaultAlignPolicy>;
+using MallocPolicy = TCMallocPolicy<MallocOomPolicy, MallocAlignPolicy>;
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_TCMALLOC_POLICY_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc
new file mode 100644
index 0000000000..89cc779af1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc
@@ -0,0 +1,417 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/thread_cache.h"
+
+#include <algorithm>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
+size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
+int64_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
+ThreadCache* ThreadCache::thread_heaps_ = nullptr;
+int ThreadCache::thread_heap_count_ = 0;
+ThreadCache* ThreadCache::next_memory_steal_ = nullptr;
+#ifdef ABSL_HAVE_TLS
+__thread ThreadCache* ThreadCache::thread_local_data_
+ ABSL_ATTRIBUTE_INITIAL_EXEC = nullptr;
+#endif
+ABSL_CONST_INIT bool ThreadCache::tsd_inited_ = false;
+pthread_key_t ThreadCache::heap_key_;
+
+void ThreadCache::Init(pthread_t tid) {
+ size_ = 0;
+
+ max_size_ = 0;
+ IncreaseCacheLimitLocked();
+ if (max_size_ == 0) {
+ // There isn't enough memory to go around. Just give the minimum to
+ // this thread.
+ max_size_ = kMinThreadCacheSize;
+
+ // Take unclaimed_cache_space_ negative.
+ unclaimed_cache_space_ -= kMinThreadCacheSize;
+ ASSERT(unclaimed_cache_space_ < 0);
+ }
+
+ next_ = nullptr;
+ prev_ = nullptr;
+ tid_ = tid;
+ in_setspecific_ = false;
+ for (size_t cl = 0; cl < kNumClasses; ++cl) {
+ list_[cl].Init();
+ }
+}
+
+void ThreadCache::Cleanup() {
+ // Put unused memory back into central cache
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ if (list_[cl].length() > 0) {
+ ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
+ }
+ }
+}
+
+// Remove some objects of class "cl" from central cache and add to thread heap.
+// On success, return the first object for immediate use; otherwise return NULL.
+void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
+ FreeList* list = &list_[cl];
+ ASSERT(list->empty());
+ const int batch_size = Static::sizemap().num_objects_to_move(cl);
+
+ const int num_to_move = std::min<int>(list->max_length(), batch_size);
+ void* batch[kMaxObjectsToMove];
+ int fetch_count =
+ Static::transfer_cache().RemoveRange(cl, batch, num_to_move);
+ if (fetch_count == 0) {
+ return nullptr;
+ }
+
+ if (--fetch_count > 0) {
+ size_ += byte_size * fetch_count;
+ list->PushBatch(fetch_count, batch + 1);
+ }
+
+ // Increase max length slowly up to batch_size. After that,
+ // increase by batch_size in one shot so that the length is a
+ // multiple of batch_size.
+ if (list->max_length() < batch_size) {
+ list->set_max_length(list->max_length() + 1);
+ } else {
+ // Don't let the list get too long. In 32 bit builds, the length
+ // is represented by a 16 bit int, so we need to watch out for
+ // integer overflow.
+ int new_length = std::min<int>(list->max_length() + batch_size,
+ kMaxDynamicFreeListLength);
+ // The list's max_length must always be a multiple of batch_size,
+ // and kMaxDynamicFreeListLength is not necessarily a multiple
+ // of batch_size.
+ new_length -= new_length % batch_size;
+ ASSERT(new_length % batch_size == 0);
+ list->set_max_length(new_length);
+ }
+ return batch[0];
+}
+
+void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
+ const int batch_size = Static::sizemap().num_objects_to_move(cl);
+ ReleaseToCentralCache(list, cl, batch_size);
+
+ // If the list is too long, we need to transfer some number of
+ // objects to the central cache. Ideally, we would transfer
+ // num_objects_to_move, so the code below tries to make max_length
+ // converge on num_objects_to_move.
+
+ if (list->max_length() < batch_size) {
+ // Slow start the max_length so we don't overreserve.
+ list->set_max_length(list->max_length() + 1);
+ } else if (list->max_length() > batch_size) {
+ // If we consistently go over max_length, shrink max_length. If we don't
+ // shrink it, some amount of memory will always stay in this freelist.
+ list->set_length_overages(list->length_overages() + 1);
+ if (list->length_overages() > kMaxOverages) {
+ ASSERT(list->max_length() > batch_size);
+ list->set_max_length(list->max_length() - batch_size);
+ list->set_length_overages(0);
+ }
+ }
+}
+
+// Remove some objects of class "cl" from thread heap and add to central cache
+void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
+ ASSERT(src == &list_[cl]);
+ if (N > src->length()) N = src->length();
+ size_t delta_bytes = N * Static::sizemap().class_to_size(cl);
+
+ // We return prepackaged chains of the correct size to the central cache.
+ void* batch[kMaxObjectsToMove];
+ int batch_size = Static::sizemap().num_objects_to_move(cl);
+ while (N > batch_size) {
+ src->PopBatch(batch_size, batch);
+ static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+ "not enough space in batch");
+ Static::transfer_cache().InsertRange(cl,
+ absl::Span<void*>(batch, batch_size));
+ N -= batch_size;
+ }
+ src->PopBatch(N, batch);
+ static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+ "not enough space in batch");
+ Static::transfer_cache().InsertRange(cl, absl::Span<void*>(batch, N));
+ size_ -= delta_bytes;
+}
+
+// Release idle memory to the central cache
+void ThreadCache::Scavenge() {
+ // If the low-water mark for the free list is L, it means we would
+ // not have had to allocate anything from the central cache even if
+ // we had reduced the free list size by L. We aim to get closer to
+ // that situation by dropping L/2 nodes from the free list. This
+ // may not release much memory, but if so we will call scavenge again
+ // pretty soon and the low-water marks will be high on that call.
+ for (int cl = 0; cl < kNumClasses; cl++) {
+ FreeList* list = &list_[cl];
+ const int lowmark = list->lowwatermark();
+ if (lowmark > 0) {
+ const int drop = (lowmark > 1) ? lowmark / 2 : 1;
+ ReleaseToCentralCache(list, cl, drop);
+
+ // Shrink the max length if it isn't used. Only shrink down to
+ // batch_size -- if the thread was active enough to get the max_length
+ // above batch_size, it will likely be that active again. If
+ // max_length shinks below batch_size, the thread will have to
+ // go through the slow-start behavior again. The slow-start is useful
+ // mainly for threads that stay relatively idle for their entire
+ // lifetime.
+ const int batch_size = Static::sizemap().num_objects_to_move(cl);
+ if (list->max_length() > batch_size) {
+ list->set_max_length(
+ std::max<int>(list->max_length() - batch_size, batch_size));
+ }
+ }
+ list->clear_lowwatermark();
+ }
+
+ IncreaseCacheLimit();
+}
+
+void ThreadCache::DeallocateSlow(void* ptr, FreeList* list, size_t cl) {
+ tracking::Report(kFreeMiss, cl, 1);
+ if (ABSL_PREDICT_FALSE(list->length() > list->max_length())) {
+ tracking::Report(kFreeTruncations, cl, 1);
+ ListTooLong(list, cl);
+ }
+ if (size_ >= max_size_) {
+ tracking::Report(kFreeScavenges, cl, 1);
+ Scavenge();
+ }
+}
+
+void ThreadCache::IncreaseCacheLimit() {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ IncreaseCacheLimitLocked();
+}
+
+void ThreadCache::IncreaseCacheLimitLocked() {
+ if (unclaimed_cache_space_ > 0) {
+ // Possibly make unclaimed_cache_space_ negative.
+ unclaimed_cache_space_ -= kStealAmount;
+ max_size_ += kStealAmount;
+ return;
+ }
+ // Don't hold pageheap_lock too long. Try to steal from 10 other
+ // threads before giving up. The i < 10 condition also prevents an
+ // infinite loop in case none of the existing thread heaps are
+ // suitable places to steal from.
+ for (int i = 0; i < 10; ++i, next_memory_steal_ = next_memory_steal_->next_) {
+ // Reached the end of the linked list. Start at the beginning.
+ if (next_memory_steal_ == nullptr) {
+ ASSERT(thread_heaps_ != nullptr);
+ next_memory_steal_ = thread_heaps_;
+ }
+ if (next_memory_steal_ == this ||
+ next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
+ continue;
+ }
+ next_memory_steal_->max_size_ -= kStealAmount;
+ max_size_ += kStealAmount;
+
+ next_memory_steal_ = next_memory_steal_->next_;
+ return;
+ }
+}
+
+void ThreadCache::InitTSD() {
+ ASSERT(!tsd_inited_);
+ pthread_key_create(&heap_key_, DestroyThreadCache);
+ tsd_inited_ = true;
+}
+
+ThreadCache* ThreadCache::CreateCacheIfNecessary() {
+ // Initialize per-thread data if necessary
+ Static::InitIfNecessary();
+ ThreadCache* heap = nullptr;
+
+#ifdef ABSL_HAVE_TLS
+ const bool maybe_reentrant = !tsd_inited_;
+ // If we have set up our TLS, we can avoid a scan of the thread_heaps_ list.
+ if (tsd_inited_) {
+ if (thread_local_data_) {
+ return thread_local_data_;
+ }
+ }
+#else
+ const bool maybe_reentrant = true;
+#endif
+
+ {
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ const pthread_t me = pthread_self();
+
+ // This may be a recursive malloc call from pthread_setspecific()
+ // In that case, the heap for this thread has already been created
+ // and added to the linked list. So we search for that first.
+ if (maybe_reentrant) {
+ for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+ if (h->tid_ == me) {
+ heap = h;
+ break;
+ }
+ }
+ }
+
+ if (heap == nullptr) {
+ heap = NewHeap(me);
+ }
+ }
+
+ // We call pthread_setspecific() outside the lock because it may
+ // call malloc() recursively. We check for the recursive call using
+ // the "in_setspecific_" flag so that we can avoid calling
+ // pthread_setspecific() if we are already inside pthread_setspecific().
+ if (!heap->in_setspecific_ && tsd_inited_) {
+ heap->in_setspecific_ = true;
+#ifdef ABSL_HAVE_TLS
+ // Also keep a copy in __thread for faster retrieval
+ thread_local_data_ = heap;
+#endif
+ pthread_setspecific(heap_key_, heap);
+ heap->in_setspecific_ = false;
+ }
+ return heap;
+}
+
+ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
+ // Create the heap and add it to the linked list
+ ThreadCache* heap = Static::threadcache_allocator().New();
+ heap->Init(tid);
+ heap->next_ = thread_heaps_;
+ heap->prev_ = nullptr;
+ if (thread_heaps_ != nullptr) {
+ thread_heaps_->prev_ = heap;
+ } else {
+ // This is the only thread heap at the momment.
+ ASSERT(next_memory_steal_ == nullptr);
+ next_memory_steal_ = heap;
+ }
+ thread_heaps_ = heap;
+ thread_heap_count_++;
+ return heap;
+}
+
+void ThreadCache::BecomeIdle() {
+ if (!tsd_inited_) return; // No caches yet
+ ThreadCache* heap = GetCacheIfPresent();
+ if (heap == nullptr) return; // No thread cache to remove
+ if (heap->in_setspecific_) return; // Do not disturb the active caller
+
+ heap->in_setspecific_ = true;
+ pthread_setspecific(heap_key_, nullptr);
+#ifdef ABSL_HAVE_TLS
+ // Also update the copy in __thread
+ thread_local_data_ = nullptr;
+#endif
+ heap->in_setspecific_ = false;
+ if (GetCacheIfPresent() == heap) {
+ // Somehow heap got reinstated by a recursive call to malloc
+ // from pthread_setspecific. We give up in this case.
+ return;
+ }
+
+ // We can now get rid of the heap
+ DeleteCache(heap);
+}
+
+void ThreadCache::DestroyThreadCache(void* ptr) {
+ // Note that "ptr" cannot be NULL since pthread promises not
+ // to invoke the destructor on NULL values, but for safety,
+ // we check anyway.
+ if (ptr != nullptr) {
+#ifdef ABSL_HAVE_TLS
+ thread_local_data_ = nullptr;
+#endif
+ DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
+ }
+}
+
+void ThreadCache::DeleteCache(ThreadCache* heap) {
+ // Remove all memory from heap
+ heap->Cleanup();
+
+ // Remove from linked list
+ absl::base_internal::SpinLockHolder h(&pageheap_lock);
+ if (heap->next_ != nullptr) heap->next_->prev_ = heap->prev_;
+ if (heap->prev_ != nullptr) heap->prev_->next_ = heap->next_;
+ if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
+ thread_heap_count_--;
+
+ if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
+ if (next_memory_steal_ == nullptr) next_memory_steal_ = thread_heaps_;
+ unclaimed_cache_space_ += heap->max_size_;
+
+ Static::threadcache_allocator().Delete(heap);
+}
+
+void ThreadCache::RecomputePerThreadCacheSize() {
+ // Divide available space across threads
+ int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
+ size_t space = overall_thread_cache_size_ / n;
+
+ // Limit to allowed range
+ if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
+ if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
+
+ double ratio = space / std::max<double>(1, per_thread_cache_size_);
+ size_t claimed = 0;
+ for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+ // Increasing the total cache size should not circumvent the
+ // slow-start growth of max_size_.
+ if (ratio < 1.0) {
+ h->max_size_ *= ratio;
+ }
+ claimed += h->max_size_;
+ }
+ unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
+ per_thread_cache_size_ = space;
+}
+
+void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
+ for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+ *total_bytes += h->Size();
+ if (class_count) {
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ class_count[cl] += h->freelist_length(cl);
+ }
+ }
+ }
+}
+
+void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
+ // Clip the value to a reasonable minimum
+ if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
+ overall_thread_cache_size_ = new_size;
+
+ RecomputePerThreadCacheSize();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.h b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h
new file mode 100644
index 0000000000..ae6cef869f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h
@@ -0,0 +1,345 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_THREAD_CACHE_H_
+#define TCMALLOC_THREAD_CACHE_H_
+
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Data kept per thread
+//-------------------------------------------------------------------
+
+class ThreadCache {
+ public:
+ void Init(pthread_t tid) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+ void Cleanup();
+
+ // Accessors (mostly just for printing stats)
+ int freelist_length(size_t cl) const { return list_[cl].length(); }
+
+ // Total byte size in cache
+ size_t Size() const { return size_; }
+
+ // Allocate an object of the given size class. When allocation fails
+ // (from this cache and after running FetchFromCentralCache),
+ // OOMHandler(size) is called and its return value is
+ // returned from Allocate. OOMHandler is used to parameterize
+ // out-of-memory handling (raising exception, returning nullptr,
+ // calling new_handler or anything else). "Passing" OOMHandler in
+ // this way allows Allocate to be used in tail-call position in
+ // fast-path, making allocate tail-call slow path code.
+ template <void* OOMHandler(size_t)>
+ void* Allocate(size_t cl);
+
+ void Deallocate(void* ptr, size_t cl);
+
+ void Scavenge();
+
+ Sampler* GetSampler();
+
+ static void InitTSD();
+ static ThreadCache* GetCache();
+ static ThreadCache* GetCacheIfPresent();
+ static ThreadCache* CreateCacheIfNecessary();
+ static void BecomeIdle();
+
+ // returns stats on total thread caches created/used
+ static inline AllocatorStats HeapStats()
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Adds to *total_bytes the total number of bytes used by all thread heaps.
+ // Also, if class_count is not NULL, it must be an array of size kNumClasses,
+ // and this function will increment each element of class_count by the number
+ // of items in all thread-local freelists of the corresponding size class.
+ static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Sets the total thread cache size to new_size, recomputing the
+ // individual thread cache sizes as necessary.
+ static void set_overall_thread_cache_size(size_t new_size)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ static size_t overall_thread_cache_size()
+ ABSL_SHARED_LOCKS_REQUIRED(pageheap_lock) {
+ return overall_thread_cache_size_;
+ }
+
+ template <void* OOMHandler(size_t)>
+ void* ABSL_ATTRIBUTE_NOINLINE AllocateSlow(size_t cl, size_t allocated_size) {
+ tracking::Report(kMallocMiss, cl, 1);
+ void* ret = FetchFromCentralCache(cl, allocated_size);
+ if (ABSL_PREDICT_TRUE(ret != nullptr)) {
+ return ret;
+ }
+ return OOMHandler(allocated_size);
+ }
+
+ private:
+ // We inherit rather than include the list as a data structure to reduce
+ // compiler padding. Without inheritance, the compiler pads the list
+ // structure and then adds it as a member, even though we could fit everything
+ // without padding.
+ class FreeList : public LinkedList {
+ private:
+ uint32_t lowater_; // Low water mark for list length.
+ uint32_t max_length_; // Dynamic max list length based on usage.
+ // Tracks the number of times a deallocation has caused
+ // length_ > max_length_. After the kMaxOverages'th time, max_length_
+ // shrinks and length_overages_ is reset to zero.
+ uint32_t length_overages_;
+
+ // This extra unused field pads FreeList size to 32 bytes on 64
+ // bit machines, helping compiler generate faster code for
+ // indexing array of lists.
+ void* ABSL_ATTRIBUTE_UNUSED extra_;
+
+ public:
+ void Init() {
+ LinkedList::Init();
+ lowater_ = 0;
+ max_length_ = 1;
+ length_overages_ = 0;
+ }
+
+ // Return the maximum length of the list.
+ size_t max_length() const { return max_length_; }
+
+ // Set the maximum length of the list. If 'new_max' > length(), the
+ // client is responsible for removing objects from the list.
+ void set_max_length(size_t new_max) { max_length_ = new_max; }
+
+ // Return the number of times that length() has gone over max_length().
+ size_t length_overages() const { return length_overages_; }
+
+ void set_length_overages(size_t new_count) { length_overages_ = new_count; }
+
+ // Low-water mark management
+ int lowwatermark() const { return lowater_; }
+ void clear_lowwatermark() { lowater_ = length(); }
+
+ ABSL_ATTRIBUTE_ALWAYS_INLINE bool TryPop(void** ret) {
+ bool out = LinkedList::TryPop(ret);
+ if (ABSL_PREDICT_TRUE(out) && ABSL_PREDICT_FALSE(length() < lowater_)) {
+ lowater_ = length();
+ }
+ return out;
+ }
+
+ void PopBatch(int N, void** batch) {
+ LinkedList::PopBatch(N, batch);
+ if (length() < lowater_) lowater_ = length();
+ }
+ };
+
+// we've deliberately introduced unused extra_ field into FreeList
+// to pad the size. Lets ensure that it is still working as
+// intended.
+#ifdef _LP64
+ static_assert(sizeof(FreeList) == 32, "Freelist size has changed");
+#endif
+
+ // Gets and returns an object from the central cache, and, if possible,
+ // also adds some objects of that size class to this thread cache.
+ void* FetchFromCentralCache(size_t cl, size_t byte_size);
+
+ // Releases some number of items from src. Adjusts the list's max_length
+ // to eventually converge on num_objects_to_move(cl).
+ void ListTooLong(FreeList* list, size_t cl);
+
+ void DeallocateSlow(void* ptr, FreeList* list, size_t cl);
+
+ // Releases N items from this thread cache.
+ void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
+
+ // Increase max_size_ by reducing unclaimed_cache_space_ or by
+ // reducing the max_size_ of some other thread. In both cases,
+ // the delta is kStealAmount.
+ void IncreaseCacheLimit();
+
+ // Same as above but called with pageheap_lock held.
+ void IncreaseCacheLimitLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // If TLS is available, we also store a copy of the per-thread object
+ // in a __thread variable since __thread variables are faster to read
+ // than pthread_getspecific(). We still need pthread_setspecific()
+ // because __thread variables provide no way to run cleanup code when
+ // a thread is destroyed.
+ //
+ // We also give a hint to the compiler to use the "initial exec" TLS
+ // model. This is faster than the default TLS model, at the cost that
+ // you cannot dlopen this library. (To see the difference, look at
+ // the CPU use of __tls_get_addr with and without this attribute.)
+ //
+ // Since using dlopen on a malloc replacement is asking for trouble in any
+ // case, that's a good tradeoff for us.
+#ifdef ABSL_HAVE_TLS
+ static __thread ThreadCache* thread_local_data_ ABSL_ATTRIBUTE_INITIAL_EXEC;
+#endif
+
+ // Thread-specific key. Initialization here is somewhat tricky
+ // because some Linux startup code invokes malloc() before it
+ // is in a good enough state to handle pthread_keycreate().
+ // Therefore, we use TSD keys only after tsd_inited is set to true.
+ // Until then, we use a slow path to get the heap object.
+ static bool tsd_inited_;
+ static pthread_key_t heap_key_;
+
+ // Linked list of heap objects.
+ static ThreadCache* thread_heaps_ ABSL_GUARDED_BY(pageheap_lock);
+ static int thread_heap_count_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // A pointer to one of the objects in thread_heaps_. Represents
+ // the next ThreadCache from which a thread over its max_size_ should
+ // steal memory limit. Round-robin through all of the objects in
+ // thread_heaps_.
+ static ThreadCache* next_memory_steal_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // Overall thread cache size.
+ static size_t overall_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // Global per-thread cache size.
+ static size_t per_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // Represents overall_thread_cache_size_ minus the sum of max_size_
+ // across all ThreadCaches. We use int64_t even in 32-bit builds because
+ // with enough ThreadCaches, this number can get smaller than -2^31.
+ static int64_t unclaimed_cache_space_ ABSL_GUARDED_BY(pageheap_lock);
+
+ // This class is laid out with the most frequently used fields
+ // first so that hot elements are placed on the same cache line.
+
+ FreeList list_[kNumClasses]; // Array indexed by size-class
+
+ size_t size_; // Combined size of data
+ size_t max_size_; // size_ > max_size_ --> Scavenge()
+
+#ifndef ABSL_HAVE_TLS
+ // We sample allocations, biased by the size of the allocation.
+ // If we have TLS, then we use sampler defined in tcmalloc.cc.
+ Sampler sampler_;
+#endif
+
+ pthread_t tid_;
+ bool in_setspecific_;
+
+ // Allocate a new heap.
+ static ThreadCache* NewHeap(pthread_t tid)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ // Use only as pthread thread-specific destructor function.
+ static void DestroyThreadCache(void* ptr);
+
+ static void DeleteCache(ThreadCache* heap);
+ static void RecomputePerThreadCacheSize()
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ public:
+ // All ThreadCache objects are kept in a linked list (for stats collection)
+ ThreadCache* next_;
+ ThreadCache* prev_;
+
+ private:
+#ifdef ABSL_CACHELINE_SIZE
+ // Ensure that two instances of this class are never on the same cache line.
+ // This is critical for performance, as false sharing would negate many of
+ // the benefits of a per-thread cache.
+ char padding_[ABSL_CACHELINE_SIZE];
+#endif
+};
+
+inline AllocatorStats ThreadCache::HeapStats() {
+ return Static::threadcache_allocator().stats();
+}
+
+#ifndef ABSL_HAVE_TLS
+inline Sampler* ThreadCache::GetSampler() { return &sampler_; }
+#endif
+
+template <void* OOMHandler(size_t)>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Allocate(size_t cl) {
+ const size_t allocated_size = Static::sizemap().class_to_size(cl);
+
+ FreeList* list = &list_[cl];
+ void* ret;
+ if (ABSL_PREDICT_TRUE(list->TryPop(&ret))) {
+ tracking::Report(kMallocHit, cl, 1);
+ size_ -= allocated_size;
+ return ret;
+ }
+
+ return AllocateSlow<OOMHandler>(cl, allocated_size);
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Deallocate(void* ptr,
+ size_t cl) {
+ FreeList* list = &list_[cl];
+ size_ += Static::sizemap().class_to_size(cl);
+ ssize_t size_headroom = max_size_ - size_ - 1;
+
+ list->Push(ptr);
+ ssize_t list_headroom =
+ static_cast<ssize_t>(list->max_length()) - list->length();
+
+ // There are two relatively uncommon things that require further work.
+ // In the common case we're done, and in that case we need a single branch
+ // because of the bitwise-or trick that follows.
+ if ((list_headroom | size_headroom) < 0) {
+ DeallocateSlow(ptr, list, cl);
+ } else {
+ tracking::Report(kFreeHit, cl, 1);
+ }
+}
+
+inline ThreadCache* ABSL_ATTRIBUTE_ALWAYS_INLINE
+ThreadCache::GetCacheIfPresent() {
+#ifdef ABSL_HAVE_TLS
+ // __thread is faster
+ return thread_local_data_;
+#else
+ return tsd_inited_
+ ? reinterpret_cast<ThreadCache*>(pthread_getspecific(heap_key_))
+ : nullptr;
+#endif
+}
+
+inline ThreadCache* ThreadCache::GetCache() {
+ ThreadCache* tc = GetCacheIfPresent();
+ return (ABSL_PREDICT_TRUE(tc != nullptr)) ? tc : CreateCacheIfNecessary();
+}
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_THREAD_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc
new file mode 100644
index 0000000000..5b2d10b2ac
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc
@@ -0,0 +1,132 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <string>
+#include <thread> // NOLINT(build/c++11)
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/memory_stats.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace {
+
+int64_t MemoryUsageSlow(pid_t pid) {
+ int64_t ret = 0;
+
+ FILE *f =
+ fopen(absl::StrCat("/proc/", pid, "/task/", pid, "/smaps").c_str(), "r");
+ CHECK_CONDITION(f != nullptr);
+
+ char buf[BUFSIZ];
+ while (fgets(buf, sizeof(buf), f) != nullptr) {
+ size_t rss;
+ if (sscanf(buf, "Rss: %zu kB", &rss) == 1) ret += rss;
+ }
+ CHECK_CONDITION(feof(f));
+ fclose(f);
+
+ // Rss is reported in KiB
+ ret *= 1024;
+
+ // A sanity check: our return value should be in the same ballpark as
+ // GetMemoryStats.
+ tcmalloc::tcmalloc_internal::MemoryStats stats;
+ CHECK_CONDITION(tcmalloc::tcmalloc_internal::GetMemoryStats(&stats));
+ EXPECT_GE(ret, 0.9 * stats.rss);
+ EXPECT_LE(ret, 1.1 * stats.rss);
+
+ return ret;
+}
+
+class ThreadCacheTest : public ::testing::Test {
+ public:
+ ThreadCacheTest() {
+ // Explicitly disable guarded allocations for this test. For aggressive
+ // sample rates on PPC (with 64KB pages), RSS grows quickly due to
+ // page-sized allocations that we don't release.
+ MallocExtension::SetGuardedSamplingRate(-1);
+ }
+};
+
+// Make sure that creating and destroying many mallocing threads
+// does not leak memory.
+TEST_F(ThreadCacheTest, NoLeakOnThreadDestruction) {
+ // Test only valid in per-thread mode
+ ASSERT_FALSE(MallocExtension::PerCpuCachesActive());
+
+ // Force a small sample to initialize tagged page allocator.
+ constexpr int64_t kAlloc = 8192;
+ const int64_t num_allocs =
+ 32 * MallocExtension::GetProfileSamplingRate() / kAlloc;
+ for (int64_t i = 0; i < num_allocs; ++i) {
+ ::operator delete(::operator new(kAlloc));
+ }
+
+ // Prefault and mlock the currently mapped address space. This avoids minor
+ // faults during the test from appearing as an apparent memory leak due to RSS
+ // growth.
+ //
+ // Previously, we tried to only mlock file-backed mappings, but page faults
+ // for .bss are also problematic (either from small pages [PPC] or hugepages
+ // [all platforms]) for test flakiness.
+ //
+ // We do *not* apply MCL_FUTURE, as to allow allocations during the test run
+ // to be released.
+ if (mlockall(MCL_CURRENT) != 0) {
+ GTEST_SKIP();
+ }
+ const int64_t start_size = MemoryUsageSlow(getpid());
+ ASSERT_GT(start_size, 0);
+
+ static const size_t kThreads = 16 * 1024;
+
+ for (int i = 0; i < kThreads; ++i) {
+ std::thread t([]() {
+ void *p = calloc(1024, 1);
+ benchmark::DoNotOptimize(p);
+ free(p);
+ });
+
+ t.join();
+ }
+ const int64_t end_size = MemoryUsageSlow(getpid());
+
+ // Flush the page heap. Our allocations may have been retained.
+ if (TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval != nullptr) {
+ TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+ absl::ZeroDuration());
+ }
+ MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
+
+ // This will detect a leak rate of 12 bytes per thread, which is well under 1%
+ // of the allocation done.
+ EXPECT_GE(192 * 1024, end_size - start_size)
+ << "Before: " << start_size << " After: " << end_size;
+}
+
+} // namespace
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tracking.h b/contrib/libs/tcmalloc/tcmalloc/tracking.h
new file mode 100644
index 0000000000..68d4c59b9c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tracking.h
@@ -0,0 +1,109 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRACKING_H_
+#define TCMALLOC_TRACKING_H_
+// Optional support for tracking various stats in tcmalloc. For each
+// sizeclass, we track:
+// * # of mallocs
+// * ...that hit the fast path
+// * # of frees
+// * ...that hit the fast path
+//
+// both on each CPU and on each thread.
+//
+// If disabled (TCMALLOC_TRACK_ALLOCS not defined), it has no runtime cost in
+// time or space.
+//
+// If enabled and an implementation provided, we issue calls to record various
+// statistics about cache hit rates.
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#include <map>
+#include <string>
+
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/internal/spinlock.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/malloc_extension.h"
+
+// Uncomment here or pass --copt=-DTCMALLOC_TRACK_ALLOCS at build time if you
+// want tracking.
+#ifndef TCMALLOC_TRACK_ALLOCS
+// #define TCMALLOC_TRACK_ALLOCS
+#endif
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+#if 1
+#define TCMALLOC_HAVE_TRACKING 0
+#endif
+
+// We track various kinds of events on each thread and each cpu. Each
+// event is broken down by sizeclass where it happened.
+// To track a new event, add a enum value here, insert calls to
+// Tracking::Report() where the event occurs, and add a printable name
+// to the event in kTrackingStatNames (in tracking.cc). Optionally
+// print the stat somehow in State::Print.
+enum TrackingStat {
+ kMallocHit = 0, // malloc that took the fast path
+ kMallocMiss = 1, // malloc that didn't
+ kFreeHit = 2, // ibid. for free
+ kFreeMiss = 3,
+ kFreeScavenges = 4, // # of frees that leads to scavenge
+ kFreeTruncations = 5, // # of frees that leads to list truncation
+ kTCInsertHit = 6, // # of times the returned object list hits transfer cache.
+ kTCInsertMiss = 7, // # of times the object list misses the transfer cache.
+ kTCRemoveHit = 8, // # of times object list fetching hits transfer cache.
+ kTCRemoveMiss = 9, // # of times object list fetching misses transfer cache.
+ kTCElementsPlunder = 10, // # of elements plundered from the transfer cache.
+ kNumTrackingStats = 11,
+};
+
+namespace tracking {
+
+// Report <count> occurences of <stat> associated with sizeclass <cl>.
+void Report(TrackingStat stat, size_t cl, ssize_t count);
+
+// Dump all tracking data to <out>. We could support various other
+// mechanisms for data delivery without too much trouble...
+void Print(Printer* out);
+
+// Call on startup during tcmalloc initialization.
+void Init();
+
+// Fill <result> with information for each stat type (broken down by
+// sizeclass if level == kDetailed.)
+void GetProperties(std::map<std::string, MallocExtension::Property>* result);
+
+#if !TCMALLOC_HAVE_TRACKING
+// no tracking, these are all no-ops
+inline void Report(TrackingStat stat, size_t cl, ssize_t count) {}
+inline void Print(Printer* out) {}
+inline void Init() {}
+inline void GetProperties(
+ std::map<std::string, MallocExtension::Property>* result) {}
+#endif
+
+} // namespace tracking
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_TRACKING_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc
new file mode 100644
index 0000000000..efde485288
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/transfer_cache.h"
+
+#include <fcntl.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/cache_topology.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+absl::string_view TransferCacheImplementationToLabel(
+ TransferCacheImplementation type) {
+ switch (type) {
+ case TransferCacheImplementation::Legacy:
+ return "LEGACY";
+ case TransferCacheImplementation::None:
+ return "NO_TRANSFERCACHE";
+ case TransferCacheImplementation::Ring:
+ return "RING";
+ default:
+ ASSUME(false);
+ }
+}
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+
+size_t StaticForwarder::class_to_size(int size_class) {
+ return Static::sizemap().class_to_size(size_class);
+}
+size_t StaticForwarder::num_objects_to_move(int size_class) {
+ return Static::sizemap().num_objects_to_move(size_class);
+}
+void *StaticForwarder::Alloc(size_t size, int alignment) {
+ return Static::arena().Alloc(size, alignment);
+}
+
+void ShardedTransferCacheManager::Init() {
+ if (!IsExperimentActive(
+ Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE)) {
+ return;
+ }
+ num_shards_ = BuildCpuToL3CacheMap(l3_cache_index_);
+ cache_ = reinterpret_cast<Cache *>(Static::arena().Alloc(
+ sizeof(Cache) * kNumClasses * num_shards_, ABSL_CACHELINE_SIZE));
+ ASSERT(cache_ != nullptr);
+ for (int shard = 0; shard < num_shards_; ++shard) {
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ const int index = shard * kNumClasses + cl;
+ const int size_per_object = Static::sizemap().class_to_size(cl);
+ static constexpr int k12MB = 12 << 20;
+ static constexpr int min_size = 4096;
+ const int use_this_size_class = size_per_object >= min_size;
+ const int capacity = use_this_size_class ? k12MB / size_per_object : 0;
+ active_for_class_[cl] = use_this_size_class;
+ new (&cache_[index].tc)
+ TransferCache(nullptr, capacity > 0 ? cl : 0, {capacity, capacity});
+ cache_[index].tc.freelist().Init(cl);
+ }
+ }
+}
+
+size_t ShardedTransferCacheManager::TotalBytes() {
+ if (cache_ == nullptr) return 0;
+ size_t out = 0;
+ for (int shard = 0; shard < num_shards_; ++shard) {
+ for (int cl = 0; cl < kNumClasses; ++cl) {
+ const int bytes_per_entry = Static::sizemap().class_to_size(cl);
+ if (bytes_per_entry <= 0) continue;
+ const int index = shard * kNumClasses + cl;
+ out += cache_[index].tc.tc_length() * bytes_per_entry;
+ }
+ }
+ return out;
+}
+
+void ShardedTransferCacheManager::BackingTransferCache::InsertRange(
+ absl::Span<void *> batch) const {
+ Static::transfer_cache().InsertRange(size_class_, batch);
+}
+
+ABSL_MUST_USE_RESULT int
+ShardedTransferCacheManager::BackingTransferCache::RemoveRange(void **batch,
+ int n) const {
+ return Static::transfer_cache().RemoveRange(size_class_, batch, n);
+}
+
+TransferCacheImplementation TransferCacheManager::ChooseImplementation() {
+ // Prefer ring, if we're forcing it on.
+ if (IsExperimentActive(
+ Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE)) {
+ return TransferCacheImplementation::Ring;
+ }
+
+ // Consider opt-outs
+ const char *e = thread_safe_getenv("TCMALLOC_INTERNAL_TRANSFERCACHE_CONTROL");
+ if (e) {
+ if (e[0] == '0') {
+ return TransferCacheImplementation::Legacy;
+ }
+ if (e[0] == '1') {
+ return TransferCacheImplementation::Ring;
+ }
+ Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+ }
+
+ // Otherwise, default to ring.
+ return TransferCacheImplementation::Ring;
+}
+
+int TransferCacheManager::DetermineSizeClassToEvict() {
+ int t = next_to_evict_.load(std::memory_order_relaxed);
+ if (t >= kNumClasses) t = 1;
+ next_to_evict_.store(t + 1, std::memory_order_relaxed);
+
+ // Ask nicely first.
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ if (cache_[t].rbtc.HasSpareCapacity(t)) return t;
+ } else {
+ if (cache_[t].tc.HasSpareCapacity(t)) return t;
+ }
+
+ // But insist on the second try.
+ t = next_to_evict_.load(std::memory_order_relaxed);
+ if (t >= kNumClasses) t = 1;
+ next_to_evict_.store(t + 1, std::memory_order_relaxed);
+ return t;
+}
+
+#endif
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h
new file mode 100644
index 0000000000..8b47eefafb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h
@@ -0,0 +1,341 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_H_
+#define TCMALLOC_TRANSFER_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <limits>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+#include "tcmalloc/transfer_cache_internals.h"
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+enum class TransferCacheImplementation {
+ Legacy,
+ None,
+ Ring,
+};
+
+absl::string_view TransferCacheImplementationToLabel(
+ TransferCacheImplementation type);
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+
+class StaticForwarder {
+ public:
+ static size_t class_to_size(int size_class);
+ static size_t num_objects_to_move(int size_class);
+ static void *Alloc(size_t size, int alignment = kAlignment)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+};
+
+// This transfer-cache is set up to be sharded per L3 cache. It is backed by
+// the non-sharded "normal" TransferCacheManager.
+class ShardedTransferCacheManager {
+ public:
+ constexpr ShardedTransferCacheManager() {}
+
+ void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ bool should_use(int cl) const { return active_for_class_[cl]; }
+
+ size_t TotalBytes();
+
+ void *Pop(int cl) {
+ void *batch[1];
+ const int got = cache_[get_index(cl)].tc.RemoveRange(cl, batch, 1);
+ return got == 1 ? batch[0] : nullptr;
+ }
+
+ void Push(int cl, void *ptr) {
+ cache_[get_index(cl)].tc.InsertRange(cl, {&ptr, 1});
+ }
+
+ // All caches not touched since last attempt will return all objects
+ // to the non-sharded TransferCache.
+ void Plunder() {
+ if (cache_ == nullptr || num_shards_ == 0) return;
+ for (int i = 0; i < num_shards_ * kNumClasses; ++i) {
+ cache_[i].tc.TryPlunder(cache_[i].tc.freelist().size_class());
+ }
+ }
+
+ private:
+ // The Manager is set up so that stealing is disabled for this TransferCache.
+ class Manager : public StaticForwarder {
+ public:
+ static constexpr int DetermineSizeClassToEvict() { return -1; }
+ static constexpr bool MakeCacheSpace(int) { return false; }
+ static constexpr bool ShrinkCache(int) { return false; }
+ };
+
+ // Forwards calls to the unsharded TransferCache.
+ class BackingTransferCache {
+ public:
+ void Init(int cl) { size_class_ = cl; }
+ void InsertRange(absl::Span<void *> batch) const;
+ ABSL_MUST_USE_RESULT int RemoveRange(void **batch, int n) const;
+ int size_class() const { return size_class_; }
+
+ private:
+ int size_class_ = -1;
+ };
+
+ using TransferCache =
+ internal_transfer_cache::RingBufferTransferCache<BackingTransferCache,
+ Manager>;
+
+ union Cache {
+ constexpr Cache() : dummy(false) {}
+ ~Cache() {}
+ TransferCache tc;
+ bool dummy;
+ };
+
+ int get_index(int cl) {
+ const int cpu = tcmalloc::tcmalloc_internal::subtle::percpu::RseqCpuId();
+ ASSERT(cpu < 256);
+ ASSERT(cpu >= 0);
+ return get_index(cpu, cl);
+ }
+
+ int get_index(int cpu, int cl) {
+ const int shard = l3_cache_index_[cpu];
+ ASSERT(shard < num_shards_);
+ const int index = shard * kNumClasses + cl;
+ ASSERT(index < num_shards_ * kNumClasses);
+ return index;
+ }
+
+ // Mapping from cpu to the L3 cache used.
+ uint8_t l3_cache_index_[CPU_SETSIZE] = {0};
+
+ Cache *cache_ = nullptr;
+ int num_shards_ = 0;
+ bool active_for_class_[kNumClasses] = {false};
+};
+
+class TransferCacheManager : public StaticForwarder {
+ template <typename CentralFreeList, typename Manager>
+ friend class internal_transfer_cache::TransferCache;
+ using TransferCache =
+ internal_transfer_cache::TransferCache<tcmalloc_internal::CentralFreeList,
+ TransferCacheManager>;
+
+ template <typename CentralFreeList, typename Manager>
+ friend class internal_transfer_cache::RingBufferTransferCache;
+ using RingBufferTransferCache =
+ internal_transfer_cache::RingBufferTransferCache<
+ tcmalloc_internal::CentralFreeList, TransferCacheManager>;
+
+ public:
+ constexpr TransferCacheManager() : next_to_evict_(1) {}
+
+ TransferCacheManager(const TransferCacheManager &) = delete;
+ TransferCacheManager &operator=(const TransferCacheManager &) = delete;
+
+ void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ implementation_ = ChooseImplementation();
+ for (int i = 0; i < kNumClasses; ++i) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ new (&cache_[i].rbtc) RingBufferTransferCache(this, i);
+ } else {
+ new (&cache_[i].tc) TransferCache(this, i);
+ }
+ }
+ }
+
+ void AcquireInternalLocks() {
+ for (int i = 0; i < kNumClasses; ++i) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ cache_[i].rbtc.AcquireInternalLocks();
+ } else {
+ cache_[i].tc.AcquireInternalLocks();
+ }
+ }
+ }
+
+ void ReleaseInternalLocks() {
+ for (int i = 0; i < kNumClasses; ++i) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ cache_[i].rbtc.ReleaseInternalLocks();
+ } else {
+ cache_[i].tc.ReleaseInternalLocks();
+ }
+ }
+ }
+
+ void InsertRange(int size_class, absl::Span<void *> batch) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ cache_[size_class].rbtc.InsertRange(size_class, batch);
+ } else {
+ cache_[size_class].tc.InsertRange(size_class, batch);
+ }
+ }
+
+ ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ return cache_[size_class].rbtc.RemoveRange(size_class, batch, n);
+ } else {
+ return cache_[size_class].tc.RemoveRange(size_class, batch, n);
+ }
+ }
+
+ // All caches which have not been modified since the last time this method has
+ // been called will return all objects to the freelist.
+ void Plunder() {
+ for (int i = 0; i < kNumClasses; ++i) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ cache_[i].rbtc.TryPlunder(i);
+ } else {
+ cache_[i].tc.TryPlunder(i);
+ }
+ }
+ }
+
+ // This is not const because the underlying ring-buffer transfer cache
+ // function requires acquiring a lock.
+ size_t tc_length(int size_class) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ return cache_[size_class].rbtc.tc_length();
+ } else {
+ return cache_[size_class].tc.tc_length();
+ }
+ }
+
+ TransferCacheStats GetHitRateStats(int size_class) const {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ return cache_[size_class].rbtc.GetHitRateStats();
+ } else {
+ return cache_[size_class].tc.GetHitRateStats();
+ }
+ }
+
+ const CentralFreeList &central_freelist(int size_class) const {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ return cache_[size_class].rbtc.freelist();
+ } else {
+ return cache_[size_class].tc.freelist();
+ }
+ }
+
+ TransferCacheImplementation implementation() const { return implementation_; }
+
+ private:
+ static TransferCacheImplementation ChooseImplementation();
+
+ int DetermineSizeClassToEvict();
+ bool ShrinkCache(int size_class) {
+ if (implementation_ == TransferCacheImplementation::Ring) {
+ return cache_[size_class].rbtc.ShrinkCache(size_class);
+ } else {
+ return cache_[size_class].tc.ShrinkCache(size_class);
+ }
+ }
+
+ TransferCacheImplementation implementation_ =
+ TransferCacheImplementation::Legacy;
+ std::atomic<int32_t> next_to_evict_;
+ union Cache {
+ constexpr Cache() : dummy(false) {}
+ ~Cache() {}
+
+ TransferCache tc;
+ RingBufferTransferCache rbtc;
+ bool dummy;
+ };
+ Cache cache_[kNumClasses];
+} ABSL_CACHELINE_ALIGNED;
+
+#else
+
+// For the small memory model, the transfer cache is not used.
+class TransferCacheManager {
+ public:
+ constexpr TransferCacheManager() : freelist_() {}
+ TransferCacheManager(const TransferCacheManager &) = delete;
+ TransferCacheManager &operator=(const TransferCacheManager &) = delete;
+
+ void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+ for (int i = 0; i < kNumClasses; ++i) {
+ freelist_[i].Init(i);
+ }
+ }
+
+ void InsertRange(int size_class, absl::Span<void *> batch) {
+ freelist_[size_class].InsertRange(batch);
+ }
+
+ ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) {
+ return freelist_[size_class].RemoveRange(batch, n);
+ }
+
+ static constexpr size_t tc_length(int size_class) { return 0; }
+
+ static constexpr TransferCacheStats GetHitRateStats(int size_class) {
+ return {0, 0, 0, 0};
+ }
+
+ const CentralFreeList &central_freelist(int size_class) const {
+ return freelist_[size_class];
+ }
+
+ TransferCacheImplementation implementation() const {
+ return TransferCacheImplementation::None;
+ }
+
+ void AcquireInternalLocks() {}
+ void ReleaseInternalLocks() {}
+
+ private:
+ CentralFreeList freelist_[kNumClasses];
+} ABSL_CACHELINE_ALIGNED;
+
+// A trivial no-op implementation.
+struct ShardedTransferCacheManager {
+ static constexpr void Init() {}
+ static constexpr bool should_use(int cl) { return false; }
+ static constexpr void *Pop(int cl) { return nullptr; }
+ static constexpr void Push(int cl, void *ptr) {}
+ static constexpr size_t TotalBytes() { return 0; }
+ static constexpr void Plunder() {}
+};
+
+#endif
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_TRANSFER_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc
new file mode 100644
index 0000000000..70b1dcffc1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc
@@ -0,0 +1,149 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+
+#include "absl/types/optional.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/transfer_cache_internals.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using TransferCacheEnv =
+ FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+ MinimalFakeCentralFreeList, FakeTransferCacheManager>>;
+using RingBufferTransferCacheEnv = FakeTransferCacheEnvironment<
+ internal_transfer_cache::RingBufferTransferCache<MinimalFakeCentralFreeList,
+ FakeTransferCacheManager>>;
+static constexpr int kSizeClass = 0;
+
+template <typename Env>
+void BM_CrossThread(benchmark::State& state) {
+ using Cache = typename Env::TransferCache;
+ const int kBatchSize = Env::kBatchSize;
+ const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+ void* batch[kMaxObjectsToMove];
+
+ struct CrossThreadState {
+ CrossThreadState() : m{}, c{Cache(&m, 1), Cache(&m, 1)} {}
+ FakeTransferCacheManager m;
+ Cache c[2];
+ };
+
+ static CrossThreadState* s = nullptr;
+ if (state.thread_index == 0) {
+ s = new CrossThreadState();
+ for (int i = 0; i < ::tcmalloc::tcmalloc_internal::internal_transfer_cache::
+ kInitialCapacityInBatches /
+ 2;
+ ++i) {
+ for (Cache& c : s->c) {
+ c.freelist().AllocateBatch(batch, kBatchSize);
+ c.InsertRange(kSizeClass, {batch, kBatchSize});
+ }
+ }
+ }
+
+ int src = state.thread_index % 2;
+ int dst = (src + 1) % 2;
+ for (auto iter : state) {
+ benchmark::DoNotOptimize(batch);
+ (void)s->c[src].RemoveRange(kSizeClass, batch, kBatchSize);
+ benchmark::DoNotOptimize(batch);
+ s->c[dst].InsertRange(kSizeClass, {batch, kBatchSize});
+ benchmark::DoNotOptimize(batch);
+ }
+ if (state.thread_index == 0) {
+ TransferCacheStats stats{};
+ for (Cache& c : s->c) {
+ TransferCacheStats other = c.GetHitRateStats();
+ stats.insert_hits += other.insert_hits;
+ stats.insert_misses += other.insert_misses;
+ stats.remove_hits += other.remove_hits;
+ stats.remove_misses += other.remove_misses;
+ }
+
+ state.counters["insert_hit_ratio"] =
+ static_cast<double>(stats.insert_hits) /
+ (stats.insert_hits + stats.insert_misses);
+ state.counters["remove_hit_ratio"] =
+ static_cast<double>(stats.remove_hits) /
+ (stats.remove_hits + stats.remove_misses);
+ delete s;
+ s = nullptr;
+ }
+}
+
+template <typename Env>
+void BM_InsertRange(benchmark::State& state) {
+ const int kBatchSize = Env::kBatchSize;
+ const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+
+ // optional to have more precise control of when the destruction occurs, as
+ // we want to avoid polluting the timing with the dtor.
+ absl::optional<Env> e;
+ void* batch[kMaxObjectsToMove];
+ for (auto iter : state) {
+ state.PauseTiming();
+ e.emplace();
+ e->central_freelist().AllocateBatch(batch, kBatchSize);
+ benchmark::DoNotOptimize(e);
+ benchmark::DoNotOptimize(batch);
+ state.ResumeTiming();
+
+ e->transfer_cache().InsertRange(kSizeClass, {batch, kBatchSize});
+ }
+}
+
+template <typename Env>
+void BM_RemoveRange(benchmark::State& state) {
+ const int kBatchSize = Env::kBatchSize;
+ const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+
+ // optional to have more precise control of when the destruction occurs, as
+ // we want to avoid polluting the timing with the dtor.
+ absl::optional<Env> e;
+ void* batch[kMaxObjectsToMove];
+ for (auto iter : state) {
+ state.PauseTiming();
+ e.emplace();
+ e->Insert(kBatchSize);
+ benchmark::DoNotOptimize(e);
+ state.ResumeTiming();
+
+ (void)e->transfer_cache().RemoveRange(kSizeClass, batch, kBatchSize);
+ benchmark::DoNotOptimize(batch);
+ }
+}
+
+BENCHMARK_TEMPLATE(BM_CrossThread, TransferCacheEnv)->ThreadRange(2, 64);
+BENCHMARK_TEMPLATE(BM_CrossThread, RingBufferTransferCacheEnv)
+ ->ThreadRange(2, 64);
+BENCHMARK_TEMPLATE(BM_InsertRange, TransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_InsertRange, RingBufferTransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_RemoveRange, TransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_RemoveRange, RingBufferTransferCacheEnv);
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc
new file mode 100644
index 0000000000..a31b06135e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc
@@ -0,0 +1,73 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+#include <cstdint>
+
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace {
+
+using TransferCache = tcmalloc_internal::internal_transfer_cache::TransferCache<
+ tcmalloc_internal::MockCentralFreeList,
+ tcmalloc_internal::MockTransferCacheManager>;
+using TransferCacheEnv =
+ tcmalloc_internal::FakeTransferCacheEnvironment<TransferCache>;
+
+using RingBufferTransferCache =
+ tcmalloc_internal::internal_transfer_cache::RingBufferTransferCache<
+ tcmalloc_internal::MockCentralFreeList,
+ tcmalloc_internal::MockTransferCacheManager>;
+using RingBufferTransferCacheEnv =
+ tcmalloc_internal::FakeTransferCacheEnvironment<RingBufferTransferCache>;
+
+template <typename Env>
+int RunFuzzer(const uint8_t *data, size_t size) {
+ Env env;
+ for (int i = 0; i < size; ++i) {
+ switch (data[i] % 10) {
+ case 0:
+ env.Grow();
+ break;
+ case 1:
+ env.Shrink();
+ break;
+ default:
+ if (++i < size) {
+ int batch = data[i] % 32;
+ if (data[i - 1] % 2) {
+ env.Insert(batch);
+ } else {
+ env.Remove(batch);
+ }
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+} // namespace
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+ tcmalloc::RunFuzzer<tcmalloc::TransferCacheEnv>(data, size);
+ tcmalloc::RunFuzzer<tcmalloc::RingBufferTransferCacheEnv>(data, size);
+ return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h
new file mode 100644
index 0000000000..26d18fd99d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h
@@ -0,0 +1,896 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
+#define TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
+
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+
+#include "absl/numeric/bits.h"
+#include "tcmalloc/internal/config.h"
+
+#ifdef __x86_64__
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/casts.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/synchronization/internal/futex.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/tracking.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc::tcmalloc_internal::internal_transfer_cache {
+
+struct alignas(8) SizeInfo {
+ int32_t used;
+ int32_t capacity;
+};
+static constexpr int kMaxCapacityInBatches = 64;
+static constexpr int kInitialCapacityInBatches = 16;
+
+// TransferCache is used to cache transfers of
+// sizemap.num_objects_to_move(size_class) back and forth between
+// thread caches and the central cache for a given size class.
+template <typename CentralFreeList, typename TransferCacheManager>
+class TransferCache {
+ public:
+ using Manager = TransferCacheManager;
+ using FreeList = CentralFreeList;
+
+ TransferCache(Manager *owner, int cl)
+ : TransferCache(owner, cl, CapacityNeeded(cl)) {}
+
+ struct Capacity {
+ int capacity;
+ int max_capacity;
+ };
+
+ TransferCache(Manager *owner, int cl, Capacity capacity)
+ : owner_(owner),
+ lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+ max_capacity_(capacity.max_capacity),
+ slot_info_(SizeInfo({0, capacity.capacity})),
+ low_water_mark_(std::numeric_limits<int>::max()),
+ slots_(nullptr),
+ freelist_do_not_access_directly_() {
+ freelist().Init(cl);
+ slots_ = max_capacity_ != 0 ? reinterpret_cast<void **>(owner_->Alloc(
+ max_capacity_ * sizeof(void *)))
+ : nullptr;
+ }
+
+ TransferCache(const TransferCache &) = delete;
+ TransferCache &operator=(const TransferCache &) = delete;
+
+ // Compute initial and max capacity that we should configure this cache for.
+ static Capacity CapacityNeeded(size_t cl) {
+ // We need at least 2 slots to store list head and tail.
+ static_assert(kMinObjectsToMove >= 2);
+
+ const size_t bytes = Manager::class_to_size(cl);
+ if (cl <= 0 || bytes <= 0) return {0, 0};
+
+ // Limit the maximum size of the cache based on the size class. If this
+ // is not done, large size class objects will consume a lot of memory if
+ // they just sit in the transfer cache.
+ const size_t objs_to_move = Manager::num_objects_to_move(cl);
+ ASSERT(objs_to_move > 0);
+
+ // Starting point for the maximum number of entries in the transfer cache.
+ // This actual maximum for a given size class may be lower than this
+ // maximum value.
+ int max_capacity = kMaxCapacityInBatches * objs_to_move;
+ // A transfer cache freelist can have anywhere from 0 to
+ // max_capacity_ slots to put link list chains into.
+ int capacity = kInitialCapacityInBatches * objs_to_move;
+
+ // Limit each size class cache to at most 1MB of objects or one entry,
+ // whichever is greater. Total transfer cache memory used across all
+ // size classes then can't be greater than approximately
+ // 1MB * kMaxNumTransferEntries.
+ max_capacity = std::min<int>(
+ max_capacity,
+ std::max<int>(objs_to_move,
+ (1024 * 1024) / (bytes * objs_to_move) * objs_to_move));
+ capacity = std::min(capacity, max_capacity);
+
+ return {capacity, max_capacity};
+ }
+
+ // This transfercache implementation does not deal well with non-batch sized
+ // inserts and removes.
+ static constexpr bool IsFlexible() { return false; }
+
+ // These methods all do internal locking.
+
+ // Insert the specified batch into the transfer cache. N is the number of
+ // elements in the range. RemoveRange() is the opposite operation.
+ void InsertRange(int size_class, absl::Span<void *> batch)
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ const int N = batch.size();
+ const int B = Manager::num_objects_to_move(size_class);
+ ASSERT(0 < N && N <= B);
+ auto info = slot_info_.load(std::memory_order_relaxed);
+ if (N == B) {
+ if (info.used + N <= max_capacity_) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ if (MakeCacheSpace(size_class, N)) {
+ // MakeCacheSpace can drop the lock, so refetch
+ info = slot_info_.load(std::memory_order_relaxed);
+ info.used += N;
+ SetSlotInfo(info);
+
+ void **entry = GetSlot(info.used - N);
+ memcpy(entry, batch.data(), sizeof(void *) * N);
+ tracking::Report(kTCInsertHit, size_class, 1);
+ insert_hits_.LossyAdd(1);
+ return;
+ }
+ }
+
+ insert_misses_.Add(1);
+ } else {
+ insert_non_batch_misses_.Add(1);
+ }
+
+ tracking::Report(kTCInsertMiss, size_class, 1);
+ freelist().InsertRange(batch);
+ }
+
+ // Returns the actual number of fetched elements and stores elements in the
+ // batch.
+ ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N)
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ ASSERT(N > 0);
+ const int B = Manager::num_objects_to_move(size_class);
+ auto info = slot_info_.load(std::memory_order_relaxed);
+ if (N == B) {
+ if (info.used >= N) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ // Refetch with the lock
+ info = slot_info_.load(std::memory_order_relaxed);
+ if (info.used >= N) {
+ info.used -= N;
+ SetSlotInfo(info);
+ void **entry = GetSlot(info.used);
+ memcpy(batch, entry, sizeof(void *) * N);
+ tracking::Report(kTCRemoveHit, size_class, 1);
+ remove_hits_.LossyAdd(1);
+ low_water_mark_.store(
+ std::min(low_water_mark_.load(std::memory_order_acquire),
+ info.used),
+ std::memory_order_release);
+ return N;
+ }
+ }
+
+ remove_misses_.Add(1);
+ } else {
+ remove_non_batch_misses_.Add(1);
+ }
+ low_water_mark_.store(0, std::memory_order_release);
+
+ tracking::Report(kTCRemoveMiss, size_class, 1);
+ return freelist().RemoveRange(batch, N);
+ }
+
+ // If this object has not been touched since the last attempt, then
+ // return all objects to 'freelist()'.
+ void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ if (max_capacity_ == 0) return;
+ int low_water_mark = low_water_mark_.load(std::memory_order_acquire);
+ low_water_mark_.store(std::numeric_limits<int>::max(),
+ std::memory_order_release);
+ while (low_water_mark > 0) {
+ if (!lock_.TryLock()) return;
+ if (low_water_mark_.load(std::memory_order_acquire) !=
+ std::numeric_limits<int>::max()) {
+ lock_.Unlock();
+ return;
+ }
+ const int B = Manager::num_objects_to_move(size_class);
+ SizeInfo info = GetSlotInfo();
+ if (info.used == 0) {
+ lock_.Unlock();
+ return;
+ }
+ const size_t num_to_move = std::min(B, info.used);
+ void *buf[kMaxObjectsToMove];
+ void **const entry = GetSlot(info.used - B);
+ memcpy(buf, entry, sizeof(void *) * B);
+ info.used -= num_to_move;
+ low_water_mark -= num_to_move;
+ SetSlotInfo(info);
+ lock_.Unlock();
+ tracking::Report(kTCElementsPlunder, size_class, num_to_move);
+ freelist().InsertRange({buf, num_to_move});
+ }
+ }
+ // Returns the number of free objects in the transfer cache.
+ size_t tc_length() const {
+ return static_cast<size_t>(slot_info_.load(std::memory_order_relaxed).used);
+ }
+
+ // Returns the number of transfer cache insert/remove hits/misses.
+ TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) {
+ TransferCacheStats stats;
+
+ stats.insert_hits = insert_hits_.value();
+ stats.remove_hits = remove_hits_.value();
+ stats.insert_misses = insert_misses_.value();
+ stats.insert_non_batch_misses = insert_non_batch_misses_.value();
+ stats.remove_misses = remove_misses_.value();
+ stats.remove_non_batch_misses = remove_non_batch_misses_.value();
+
+ // For performance reasons, we only update a single atomic as part of the
+ // actual allocation operation. For reporting, we keep reporting all
+ // misses together and separately break-out how many of those misses were
+ // non-batch sized.
+ stats.insert_misses += stats.insert_non_batch_misses;
+ stats.remove_misses += stats.remove_non_batch_misses;
+
+ return stats;
+ }
+
+ SizeInfo GetSlotInfo() const {
+ return slot_info_.load(std::memory_order_relaxed);
+ }
+
+ // REQUIRES: lock is held.
+ // Tries to make room for N elements. If the cache is full it will try to
+ // expand it at the cost of some other cache size. Return false if there is
+ // no space.
+ bool MakeCacheSpace(int size_class, int N)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ auto info = slot_info_.load(std::memory_order_relaxed);
+ // Is there room in the cache?
+ if (info.used + N <= info.capacity) return true;
+ // Check if we can expand this cache?
+ if (info.capacity + N > max_capacity_) return false;
+
+ int to_evict = owner_->DetermineSizeClassToEvict();
+ if (to_evict == size_class) return false;
+
+ // Release the held lock before the other instance tries to grab its lock.
+ lock_.Unlock();
+ bool made_space = owner_->ShrinkCache(to_evict);
+ lock_.Lock();
+
+ if (!made_space) return false;
+
+ // Succeeded in evicting, we're going to make our cache larger. However, we
+ // may have dropped and re-acquired the lock, so the cache_size may have
+ // changed. Therefore, check and verify that it is still OK to increase the
+ // cache_size.
+ info = slot_info_.load(std::memory_order_relaxed);
+ if (info.capacity + N > max_capacity_) return false;
+ info.capacity += N;
+ SetSlotInfo(info);
+ return true;
+ }
+
+ bool HasSpareCapacity(int size_class) const {
+ int n = Manager::num_objects_to_move(size_class);
+ auto info = GetSlotInfo();
+ return info.capacity - info.used >= n;
+ }
+
+ // Takes lock_ and invokes MakeCacheSpace() on this cache. Returns true if it
+ // succeeded at growing the cache by a batch size.
+ bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class));
+ }
+
+ // REQUIRES: lock_ is *not* held.
+ // Tries to shrink the Cache. Return false if it failed to shrink the cache.
+ // Decreases cache_slots_ on success.
+ bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ int N = Manager::num_objects_to_move(size_class);
+
+ void *to_free[kMaxObjectsToMove];
+ int num_to_free;
+ {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ auto info = slot_info_.load(std::memory_order_relaxed);
+ if (info.capacity == 0) return false;
+ if (info.capacity < N) return false;
+
+ N = std::min(N, info.capacity);
+ int unused = info.capacity - info.used;
+ if (N <= unused) {
+ info.capacity -= N;
+ SetSlotInfo(info);
+ return true;
+ }
+
+ num_to_free = N - unused;
+ info.capacity -= N;
+ info.used -= num_to_free;
+ SetSlotInfo(info);
+
+ // Our internal slot array may get overwritten as soon as we drop the
+ // lock, so copy the items to free to an on stack buffer.
+ memcpy(to_free, GetSlot(info.used), sizeof(void *) * num_to_free);
+ }
+
+ // Access the freelist without holding the lock.
+ freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)});
+ return true;
+ }
+
+ // This is a thin wrapper for the CentralFreeList. It is intended to ensure
+ // that we are not holding lock_ when we access it.
+ ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) {
+ return freelist_do_not_access_directly_;
+ }
+
+ // The const version of the wrapper, needed to call stats on
+ ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ return freelist_do_not_access_directly_;
+ }
+
+ void AcquireInternalLocks()
+ {
+ freelist().AcquireInternalLocks();
+ lock_.Lock();
+ }
+
+ void ReleaseInternalLocks()
+ {
+ lock_.Unlock();
+ freelist().ReleaseInternalLocks();
+ }
+
+ private:
+ // Returns first object of the i-th slot.
+ void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ return slots_ + i;
+ }
+
+ void SetSlotInfo(SizeInfo info) {
+ ASSERT(0 <= info.used);
+ ASSERT(info.used <= info.capacity);
+ ASSERT(info.capacity <= max_capacity_);
+ slot_info_.store(info, std::memory_order_relaxed);
+ }
+
+ Manager *const owner_;
+
+ // This lock protects all the data members. used_slots_ and cache_slots_
+ // may be looked at without holding the lock.
+ absl::base_internal::SpinLock lock_;
+
+ // Maximum size of the cache.
+ const int32_t max_capacity_;
+
+ // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations
+ // and use LossyAdd, but the thread annotations cannot indicate that we do not
+ // need a lock for reads.
+ StatsCounter insert_hits_;
+ StatsCounter remove_hits_;
+ // Miss counters do not hold lock_, so they use Add.
+ StatsCounter insert_misses_;
+ StatsCounter insert_non_batch_misses_;
+ StatsCounter remove_misses_;
+ StatsCounter remove_non_batch_misses_;
+
+ // Number of currently used and available cached entries in slots_. This
+ // variable is updated under a lock but can be read without one.
+ // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_]
+ std::atomic<SizeInfo> slot_info_;
+
+ // Lowest value of "slot_info_.used" since last call to TryPlunder. All
+ // elements not used for a full cycle (2 seconds) are unlikely to get used
+ // again.
+ std::atomic<int> low_water_mark_;
+
+ // Pointer to array of free objects. Use GetSlot() to get pointers to
+ // entries.
+ void **slots_ ABSL_GUARDED_BY(lock_);
+
+ FreeList freelist_do_not_access_directly_;
+} ABSL_CACHELINE_ALIGNED;
+
+struct RingBufferSizeInfo {
+ // The starting index of data stored in the ring buffer.
+ int32_t start;
+ // How many elements are stored.
+ int32_t used;
+ // How many elements are allowed to be stored at most.
+ int32_t capacity;
+};
+
+// RingBufferTransferCache is a transfer cache which stores cache entries in a
+// ring buffer instead of a stack.
+template <typename CentralFreeList, typename TransferCacheManager>
+class RingBufferTransferCache {
+ public:
+ using Manager = TransferCacheManager;
+ using FreeList = CentralFreeList;
+
+ RingBufferTransferCache(Manager *owner, int cl)
+ : RingBufferTransferCache(owner, cl, CapacityNeeded(cl)) {}
+
+ RingBufferTransferCache(
+ Manager *owner, int cl,
+ typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity
+ capacity)
+ : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+ slot_info_(RingBufferSizeInfo({0, 0, capacity.capacity})),
+ max_capacity_(capacity.max_capacity),
+ freelist_do_not_access_directly_(),
+ owner_(owner) {
+ freelist().Init(cl);
+ if (max_capacity_ == 0) {
+ // We don't allocate a buffer. Set slots_bitmask_ to 0 to prevent UB.
+ slots_bitmask_ = 0;
+ } else {
+ const size_t slots_size = absl::bit_ceil<size_t>(max_capacity_);
+ ASSERT(slots_size >= max_capacity_);
+ ASSERT(slots_size < max_capacity_ * 2);
+ slots_ =
+ reinterpret_cast<void **>(owner_->Alloc(slots_size * sizeof(void *)));
+ slots_bitmask_ = slots_size - 1;
+ }
+ }
+
+ RingBufferTransferCache(const RingBufferTransferCache &) = delete;
+ RingBufferTransferCache &operator=(const RingBufferTransferCache &) = delete;
+
+ // This transfercache implementation handles non-batch sized
+ // inserts and removes efficiently.
+ static constexpr bool IsFlexible() { return true; }
+
+ // These methods all do internal locking.
+
+ void AcquireInternalLocks()
+ {
+ freelist().AcquireInternalLocks();
+ lock_.Lock();
+ }
+
+ void ReleaseInternalLocks()
+ {
+ lock_.Unlock();
+ freelist().ReleaseInternalLocks();
+ }
+
+ // Insert the specified batch into the transfer cache. N is the number of
+ // elements in the range. RemoveRange() is the opposite operation.
+ void InsertRange(int size_class, absl::Span<void *> batch)
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ const int N = batch.size();
+ const int B = Manager::num_objects_to_move(size_class);
+ ASSERT(0 < N && N <= B);
+ void *to_free_buf[kMaxObjectsToMove];
+ int to_free_num = 0;
+
+ {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ RingBufferSizeInfo info = GetSlotInfo();
+ if (info.used + N <= max_capacity_) {
+ const bool cache_grown = MakeCacheSpace(size_class, N);
+ // MakeCacheSpace can drop the lock, so refetch
+ info = GetSlotInfo();
+ if (cache_grown) {
+ CopyIntoEnd(batch.data(), N, info);
+ SetSlotInfo(info);
+ tracking::Report(kTCInsertHit, size_class, 1);
+ insert_hits_.LossyAdd(1);
+ return;
+ }
+ }
+
+ // If we arrive here, this means that there is not enough capacity in the
+ // current cache to include the new items, and we cannot grow it.
+
+ // We want to return up to `B` items from the transfer cache and currently
+ // inserted items.
+ const int returned_from_cache = std::min<int>(B, info.used);
+ if (returned_from_cache > 0) {
+ CopyOutOfStart(to_free_buf, returned_from_cache, info);
+ }
+ to_free_num = returned_from_cache;
+ if (info.used > 0) {
+ // We didn't have to return the whole cache. This means we can copy
+ // in all of the inserted items.
+ ASSERT(info.used + N <= info.capacity);
+ CopyIntoEnd(batch.data(), N, info);
+ } else {
+ // The transfercache is empty. We might still not have enough capacity
+ // to store all of the inserted items though.
+ const int to_insert_start = std::max(0, N - info.capacity);
+ ASSERT(returned_from_cache + to_insert_start <= B);
+ if (to_insert_start > 0) {
+ // We also want to return some of the inserted items in this case.
+ memcpy(to_free_buf + to_free_num, batch.data(),
+ to_insert_start * sizeof(void *));
+ to_free_num += to_insert_start;
+ }
+ // This is only false if info.capacity is 0.
+ if (ABSL_PREDICT_TRUE(N > to_insert_start)) {
+ CopyIntoEnd(batch.data() + to_insert_start, N - to_insert_start,
+ info);
+ }
+ }
+ SetSlotInfo(info);
+ }
+ // It can work out that we manage to insert all items into the cache after
+ // all.
+ if (to_free_num > 0) {
+ ASSERT(to_free_num <= kMaxObjectsToMove);
+ ASSERT(to_free_num <= B);
+ insert_misses_.Add(1);
+ tracking::Report(kTCInsertMiss, size_class, 1);
+ freelist().InsertRange(absl::Span<void *>(to_free_buf, to_free_num));
+ }
+ }
+
+ // Returns the actual number of fetched elements and stores elements in the
+ // batch. This might return less than N if the transfercache is non-empty but
+ // contains fewer elements than N. It is guaranteed to return at least 1 as
+ // long as either the transfercache or the free list are not empty.
+ ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N)
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ ASSERT(N > 0);
+
+ {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ RingBufferSizeInfo info = GetSlotInfo();
+ if (info.used > 0) {
+ // Return up to however much we have in our local cache.
+ const int copied = std::min<int>(N, info.used);
+ CopyOutOfEnd(batch, copied, info);
+ SetSlotInfo(info);
+ tracking::Report(kTCRemoveHit, size_class, 1);
+ remove_hits_.LossyAdd(1);
+ low_water_mark_ = std::min(low_water_mark_, info.used);
+ return copied;
+ }
+ low_water_mark_ = 0;
+ }
+
+ remove_misses_.Add(1);
+ tracking::Report(kTCRemoveMiss, size_class, 1);
+ return freelist().RemoveRange(batch, N);
+ }
+
+ // Return all objects not touched since last call to this function.
+ void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ if (max_capacity_ == 0) return;
+ // If the lock is being held, someone is modifying the cache.
+ if (!lock_.TryLock()) return;
+ int low_water_mark = low_water_mark_;
+ low_water_mark_ = std::numeric_limits<int>::max();
+ const int B = Manager::num_objects_to_move(size_class);
+ while (slot_info_.used > 0 && low_water_mark >= B &&
+ (low_water_mark_ == std::numeric_limits<int>::max())) {
+ const size_t num_to_move(std::min(B, slot_info_.used));
+ void *buf[kMaxObjectsToMove];
+ CopyOutOfEnd(buf, num_to_move, slot_info_);
+ low_water_mark -= num_to_move;
+ lock_.Unlock();
+ freelist().InsertRange({buf, num_to_move});
+ tracking::Report(kTCElementsPlunder, size_class, num_to_move);
+ // If someone is starting to use the cache, stop doing this.
+ if (!lock_.TryLock()) {
+ return;
+ }
+ }
+ lock_.Unlock();
+ }
+
+ // Returns the number of free objects in the transfer cache.
+ size_t tc_length() ABSL_LOCKS_EXCLUDED(lock_) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ return static_cast<size_t>(GetSlotInfo().used);
+ }
+
+ // Returns the number of transfer cache insert/remove hits/misses.
+ TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) {
+ TransferCacheStats stats;
+
+ stats.insert_hits = insert_hits_.value();
+ stats.remove_hits = remove_hits_.value();
+ stats.insert_misses = insert_misses_.value();
+ stats.insert_non_batch_misses = 0;
+ stats.remove_misses = remove_misses_.value();
+ stats.remove_non_batch_misses = 0;
+
+ return stats;
+ }
+
+ RingBufferSizeInfo GetSlotInfo() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ return slot_info_;
+ }
+
+ // REQUIRES: lock is held.
+ // Tries to make room for N elements. If the cache is full it will try to
+ // expand it at the cost of some other cache size. Return false if there is
+ // no space.
+ bool MakeCacheSpace(int size_class, int N)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ // Increase capacity in number of batches, as we do when reducing capacity.
+ const int B = Manager::num_objects_to_move(size_class);
+ ASSERT(B >= N);
+
+ auto info = GetSlotInfo();
+ // Is there room in the cache?
+ if (info.used + N <= info.capacity) return true;
+ // Check if we can expand this cache?
+ if (info.capacity + B > max_capacity_) return false;
+
+ // Release the held lock before the other instance tries to grab its lock.
+ lock_.Unlock();
+ int to_evict = owner_->DetermineSizeClassToEvict();
+ if (to_evict == size_class) {
+ lock_.Lock();
+ return false;
+ }
+ bool made_space = owner_->ShrinkCache(to_evict);
+ lock_.Lock();
+
+ if (!made_space) return false;
+
+ // Succeeded in evicting, we're going to make our cache larger. However, we
+ // have dropped and re-acquired the lock, so slot_info_ may have
+ // changed. Therefore, check and verify that it is still OK to increase the
+ // cache size.
+ info = GetSlotInfo();
+ if (info.capacity + B > max_capacity_) return false;
+ info.capacity += B;
+ SetSlotInfo(info);
+ return true;
+ }
+
+ bool HasSpareCapacity(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ const int n = Manager::num_objects_to_move(size_class);
+ absl::base_internal::SpinLockHolder h(&lock_);
+ const auto info = GetSlotInfo();
+ return info.capacity - info.used >= n;
+ }
+
+ // Takes lock_ and invokes MakeCacheSpace() on this cache. Returns true if it
+ // succeeded at growing the cache by a batch size.
+ bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class));
+ }
+
+ // REQUIRES: lock_ is *not* held.
+ // Tries to shrink the Cache. Return false if it failed to shrink the cache.
+ // Decreases cache_slots_ on success.
+ bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+ const int N = Manager::num_objects_to_move(size_class);
+
+ void *to_free[kMaxObjectsToMove];
+ int num_to_free;
+ {
+ absl::base_internal::SpinLockHolder h(&lock_);
+ auto info = GetSlotInfo();
+ if (info.capacity == 0) return false;
+ if (info.capacity < N) return false;
+
+ const int unused = info.capacity - info.used;
+ if (N <= unused) {
+ info.capacity -= N;
+ SetSlotInfo(info);
+ return true;
+ }
+
+ num_to_free = N - unused;
+
+ // Remove from the beginning of the buffer which holds the oldest entries.
+ // Our internal slot array may get overwritten as soon as we drop the
+ // lock, so copy the items to free to an on stack buffer.
+ CopyOutOfStart(to_free, num_to_free, info);
+ low_water_mark_ = info.used;
+ info.capacity -= N;
+ SetSlotInfo(info);
+ }
+
+ // Access the freelist without holding the lock.
+ freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)});
+ return true;
+ }
+
+ // This is a thin wrapper for the CentralFreeList. It is intended to ensure
+ // that we are not holding lock_ when we access it.
+ ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) {
+ return freelist_do_not_access_directly_;
+ }
+
+ // The const version of the wrapper, needed to call stats on
+ ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const
+ ABSL_LOCKS_EXCLUDED(lock_) {
+ return freelist_do_not_access_directly_;
+ }
+
+ private:
+ // Due to decreased downward pressure, the ring buffer based transfer cache
+ // contains on average more bytes than the legacy implementation.
+ // To counteract this, decrease the capacity (but not max capacity).
+ // TODO(b/161927252): Revisit TransferCache rebalancing strategy
+ static typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity
+ CapacityNeeded(int cl) {
+ auto capacity =
+ TransferCache<CentralFreeList, TransferCacheManager>::CapacityNeeded(
+ cl);
+ const int N = Manager::num_objects_to_move(cl);
+ if (N == 0) return {0, 0};
+ ASSERT(capacity.capacity % N == 0);
+ // We still want capacity to be in multiples of batches.
+ const int capacity_in_batches = capacity.capacity / N;
+ // This factor was found by trial and error.
+ const int new_batches =
+ static_cast<int>(std::ceil(capacity_in_batches / 1.5));
+ capacity.capacity = new_batches * N;
+ return capacity;
+ }
+
+ // Converts a logical index (i.e. i-th element stored in the ring buffer) into
+ // a physical index into slots_.
+ size_t GetSlotIndex(size_t start, size_t i) const {
+ return (start + i) & slots_bitmask_;
+ }
+
+ // Copies N elements from source to the end of the ring buffer. It updates
+ // `info`, be sure to call SetSlotInfo() to save the modifications.
+ // N has to be > 0.
+ void CopyIntoEnd(void *const *source, size_t N, RingBufferSizeInfo &info)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ ASSERT(N > 0);
+ ASSERT(info.used + N <= info.capacity);
+ const size_t begin = GetSlotIndex(info.start, info.used);
+ const size_t end = GetSlotIndex(info.start, info.used + N);
+ if (ABSL_PREDICT_FALSE(end < begin && end != 0)) {
+ // We wrap around the buffer.
+ memcpy(slots_ + begin, source, sizeof(void *) * (N - end));
+ memcpy(slots_, source + (N - end), sizeof(void *) * end);
+ } else {
+ memcpy(slots_ + begin, source, sizeof(void *) * N);
+ }
+ info.used += N;
+ }
+
+ // Copies N elements stored in slots_ starting at the given logic index into
+ // target. Does not do any updates to slot_info_.
+ // N has to be > 0.
+ // You should use CopyOutOfEnd or CopyOutOfStart instead in most cases.
+ void CopyOutOfSlots(void **target, size_t N, size_t start, size_t index) const
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ ASSERT(N > 0);
+ const size_t begin = GetSlotIndex(start, index);
+ const size_t end = GetSlotIndex(start, index + N);
+ if (ABSL_PREDICT_FALSE(end < begin && end != 0)) {
+ // We wrap around the buffer.
+ memcpy(target, slots_ + begin, sizeof(void *) * (N - end));
+ memcpy(target + (N - end), slots_, sizeof(void *) * end);
+ } else {
+ memcpy(target, slots_ + begin, sizeof(void *) * N);
+ }
+ }
+
+ // Copies N elements from the start of the ring buffer into target. Updates
+ // `info`, be sure to call SetSlotInfo() to save the modifications.
+ // N has to be > 0.
+ void CopyOutOfStart(void **target, size_t N, RingBufferSizeInfo &info)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ ASSERT(N > 0);
+ ASSERT(N <= info.used);
+ CopyOutOfSlots(target, N, info.start, 0);
+ info.used -= N;
+ if (info.used == 0) {
+ // This makes it less likely that we will have to do copies that wrap
+ // around in the immediate future.
+ info.start = 0;
+ } else {
+ info.start = (info.start + N) & slots_bitmask_;
+ }
+ }
+
+ // Copies N elements from the end of the ring buffer into target. Updates
+ // `info`, be sure to call SetSlotInfo() to save the modifications.
+ // N has to be > 0.
+ void CopyOutOfEnd(void **target, size_t N, RingBufferSizeInfo &info)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ ASSERT(N > 0);
+ ASSERT(N <= info.used);
+ info.used -= N;
+ CopyOutOfSlots(target, N, info.start, info.used);
+ if (info.used == 0) {
+ // This makes it less likely that we will have to do copies that wrap
+ // around in the immediate future.
+ info.start = 0;
+ }
+ }
+
+ void SetSlotInfo(RingBufferSizeInfo info)
+ ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+ ASSERT(0 <= info.start);
+ ASSERT((info.start & slots_bitmask_) == info.start);
+ ASSERT(0 <= info.used);
+ ASSERT(info.used <= info.capacity);
+ ASSERT(info.capacity <= max_capacity_);
+ slot_info_ = info;
+ }
+
+ // Pointer to array of free objects.
+ void **slots_ ABSL_GUARDED_BY(lock_);
+
+ // This lock protects all the data members. used_slots_ and cache_slots_
+ // may be looked at without holding the lock.
+ absl::base_internal::SpinLock lock_;
+
+ // Number of currently used and available cached entries in slots_. Use
+ // GetSlotInfo() to read this.
+ // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_]
+ RingBufferSizeInfo slot_info_ ABSL_GUARDED_BY(lock_);
+
+ // Lowest value of "slot_info_.used" since last call to TryPlunder. All
+ // elements not used for a full cycle (2 seconds) are unlikely to get used
+ // again.
+ int low_water_mark_ ABSL_GUARDED_BY(lock_) = std::numeric_limits<int>::max();
+
+ // Maximum size of the cache.
+ const int32_t max_capacity_;
+ // This is a bitmask used instead of a modulus in the ringbuffer index
+ // calculations. This is 1 smaller than the size of slots_ which itself has
+ // the size of `absl::bit_ceil(max_capacity_)`, i.e. the smallest power of two
+ // >= max_capacity_.
+ size_t slots_bitmask_;
+
+ // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations
+ // and use LossyAdd, but the thread annotations cannot indicate that we do not
+ // need a lock for reads.
+ StatsCounter insert_hits_;
+ StatsCounter remove_hits_;
+ // Miss counters do not hold lock_, so they use Add.
+ StatsCounter insert_misses_;
+ StatsCounter remove_misses_;
+
+ FreeList freelist_do_not_access_directly_;
+ Manager *const owner_;
+} ABSL_CACHELINE_ALIGNED;
+
+} // namespace tcmalloc::tcmalloc_internal::internal_transfer_cache
+GOOGLE_MALLOC_SECTION_END
+
+#endif // TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h
new file mode 100644
index 0000000000..fdc8fba53c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_STATS_H_
+#define TCMALLOC_TRANSFER_CACHE_STATS_H_
+
+#include <stddef.h>
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct TransferCacheStats {
+ size_t insert_hits;
+ size_t insert_misses;
+ size_t insert_non_batch_misses;
+ size_t remove_hits;
+ size_t remove_misses;
+ size_t remove_non_batch_misses;
+};
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+
+#endif // TCMALLOC_TRANSFER_CACHE_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc
new file mode 100644
index 0000000000..4531f7a921
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc
@@ -0,0 +1,625 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/transfer_cache.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <cstring>
+#include <random>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/testing/thread_manager.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr int kSizeClass = 0;
+
+template <typename Env>
+using TransferCacheTest = ::testing::Test;
+TYPED_TEST_SUITE_P(TransferCacheTest);
+
+TYPED_TEST_P(TransferCacheTest, IsolatedSmoke) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+ EXPECT_CALL(e.central_freelist(), InsertRange)
+ .Times(e.transfer_cache().IsFlexible() ? 0 : 1);
+ EXPECT_CALL(e.central_freelist(), RemoveRange)
+ .Times(e.transfer_cache().IsFlexible() ? 0 : 1);
+
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+
+ e.Insert(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+ e.Insert(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2);
+ e.Insert(batch_size - 1);
+ if (e.transfer_cache().IsFlexible()) {
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 3);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+ } else {
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 1);
+ }
+ e.Remove(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1);
+ e.Remove(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2);
+ e.Remove(batch_size - 1);
+ if (e.transfer_cache().IsFlexible()) {
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 3);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+ } else {
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 1);
+ }
+}
+
+TYPED_TEST_P(TransferCacheTest, ReadStats) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+ EXPECT_CALL(e.central_freelist(), RemoveRange).Times(0);
+
+ // Ensure there is at least one insert hit/remove hit, so we can assert a
+ // non-tautology in t2.
+ e.Insert(batch_size);
+ e.Remove(batch_size);
+
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+
+ std::atomic<bool> stop{false};
+
+ std::thread t1([&]() {
+ while (!stop.load(std::memory_order_acquire)) {
+ e.Insert(batch_size);
+ e.Remove(batch_size);
+ }
+ });
+
+ std::thread t2([&]() {
+ while (!stop.load(std::memory_order_acquire)) {
+ auto stats = e.transfer_cache().GetHitRateStats();
+ CHECK_CONDITION(stats.insert_hits >= 1);
+ CHECK_CONDITION(stats.insert_misses == 0);
+ CHECK_CONDITION(stats.insert_non_batch_misses == 0);
+ CHECK_CONDITION(stats.remove_hits >= 1);
+ CHECK_CONDITION(stats.remove_misses == 0);
+ CHECK_CONDITION(stats.remove_non_batch_misses == 0);
+ }
+ });
+
+ absl::SleepFor(absl::Seconds(1));
+ stop.store(true, std::memory_order_release);
+
+ t1.join();
+ t2.join();
+}
+
+TYPED_TEST_P(TransferCacheTest, SingleItemSmoke) {
+ const int batch_size = TypeParam::kBatchSize;
+ if (batch_size == 1) {
+ GTEST_SKIP() << "skipping trivial batch size";
+ }
+ TypeParam e;
+ const int actions = e.transfer_cache().IsFlexible() ? 2 : 0;
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(2 - actions);
+ EXPECT_CALL(e.central_freelist(), RemoveRange).Times(2 - actions);
+
+ e.Insert(1);
+ e.Insert(1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, actions);
+ e.Remove(1);
+ e.Remove(1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, actions);
+}
+
+TYPED_TEST_P(TransferCacheTest, FetchesFromFreelist) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+ EXPECT_CALL(e.central_freelist(), RemoveRange).Times(1);
+ e.Remove(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1);
+}
+
+TYPED_TEST_P(TransferCacheTest, PartialFetchFromFreelist) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+ EXPECT_CALL(e.central_freelist(), RemoveRange)
+ .Times(2)
+ .WillOnce([&](void** batch, int n) {
+ int returned = static_cast<FakeCentralFreeList&>(e.central_freelist())
+ .RemoveRange(batch, std::min(batch_size / 2, n));
+ // Overwrite the elements of batch that were not populated by
+ // RemoveRange.
+ memset(batch + returned, 0x3f, sizeof(*batch) * (n - returned));
+ return returned;
+ });
+ e.Remove(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 2);
+}
+
+TYPED_TEST_P(TransferCacheTest, EvictsOtherCaches) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+
+ EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillOnce([]() {
+ return true;
+ });
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+
+ while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+ e.Insert(batch_size);
+ }
+ size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits;
+ e.Insert(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, old_hits + 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+}
+
+TYPED_TEST_P(TransferCacheTest, EvictsOtherCachesFlex) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+
+ EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() {
+ return true;
+ });
+ if (e.transfer_cache().IsFlexible()) {
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+ } else {
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(batch_size - 1);
+ }
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+
+ int total = 0;
+ for (int i = 1; i <= batch_size; i++) {
+ e.Insert(i);
+ total += i;
+ }
+
+ if (e.transfer_cache().IsFlexible()) {
+ EXPECT_EQ(e.transfer_cache().tc_length(), total);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+ } else {
+ EXPECT_EQ(e.transfer_cache().tc_length(), 1 * batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses,
+ batch_size - 1);
+ }
+}
+
+// Similar to EvictsOtherCachesFlex, but with full cache.
+TYPED_TEST_P(TransferCacheTest, FullCacheFlex) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+
+ EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() {
+ return true;
+ });
+ if (e.transfer_cache().IsFlexible()) {
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+ } else {
+ EXPECT_CALL(e.central_freelist(), InsertRange)
+ .Times(testing::AtLeast(batch_size));
+ }
+
+ while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+ e.Insert(batch_size);
+ }
+ for (int i = 1; i < batch_size + 2; i++) {
+ e.Insert(i);
+ }
+}
+
+TYPED_TEST_P(TransferCacheTest, PushesToFreelist) {
+ const int batch_size = TypeParam::kBatchSize;
+ TypeParam e;
+
+ EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillOnce([]() {
+ return false;
+ });
+ EXPECT_CALL(e.central_freelist(), InsertRange).Times(1);
+
+ while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+ e.Insert(batch_size);
+ }
+ size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits;
+ e.Insert(batch_size);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, old_hits);
+ EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1);
+}
+
+TYPED_TEST_P(TransferCacheTest, WrappingWorks) {
+ const int batch_size = TypeParam::kBatchSize;
+
+ TypeParam env;
+ EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0);
+
+ while (env.transfer_cache().HasSpareCapacity(kSizeClass)) {
+ env.Insert(batch_size);
+ }
+ for (int i = 0; i < 100; ++i) {
+ env.Remove(batch_size);
+ env.Insert(batch_size);
+ }
+}
+
+TYPED_TEST_P(TransferCacheTest, WrappingFlex) {
+ const int batch_size = TypeParam::kBatchSize;
+
+ TypeParam env;
+ EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0);
+ if (env.transfer_cache().IsFlexible()) {
+ EXPECT_CALL(env.central_freelist(), InsertRange).Times(0);
+ EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+ }
+
+ while (env.transfer_cache().HasSpareCapacity(kSizeClass)) {
+ env.Insert(batch_size);
+ }
+ for (int i = 0; i < 100; ++i) {
+ for (size_t size = 1; size < batch_size + 2; size++) {
+ env.Remove(size);
+ env.Insert(size);
+ }
+ }
+}
+
+TYPED_TEST_P(TransferCacheTest, Plunder) {
+ TypeParam env;
+ // EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+ // EXPECT_CALL(env.central_freelist(), InsertRange).Times(1);
+ // Fill in some elements.
+ env.Insert(TypeParam::kBatchSize);
+ env.Insert(TypeParam::kBatchSize);
+ ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+ // All these elements will be plundered.
+ env.transfer_cache().TryPlunder(kSizeClass);
+ ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+
+ env.Insert(TypeParam::kBatchSize);
+ env.Insert(TypeParam::kBatchSize);
+ ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+
+ void* buf[TypeParam::kBatchSize];
+ // -1 +1, this sets the low_water_mark (the lowest end-state after a
+ // call to RemoveRange to 1 batch.
+ (void)env.transfer_cache().RemoveRange(kSizeClass, buf,
+ TypeParam::kBatchSize);
+ env.transfer_cache().InsertRange(kSizeClass, {buf, TypeParam::kBatchSize});
+ ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+ // We have one batch, and this is the same as the low water mark, so nothing
+ // gets plundered.
+ env.transfer_cache().TryPlunder(kSizeClass);
+ ASSERT_EQ(env.transfer_cache().tc_length(), TypeParam::kBatchSize);
+ // If we plunder immediately the low_water_mark is at maxint, and eveything
+ // gets plundered.
+ env.transfer_cache().TryPlunder(kSizeClass);
+ ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+}
+
+// PickCoprimeBatchSize picks a batch size in [2, max_batch_size) that is
+// coprime with 2^32. We choose the largest possible batch size within that
+// constraint to minimize the number of iterations of insert/remove required.
+static size_t PickCoprimeBatchSize(size_t max_batch_size) {
+ while (max_batch_size > 1) {
+ if ((size_t{1} << 32) % max_batch_size != 0) {
+ return max_batch_size;
+ }
+ max_batch_size--;
+ }
+
+ return max_batch_size;
+}
+
+TEST(RingBufferTest, b172283201) {
+ // This test is designed to exercise the wraparound behavior for the
+ // RingBufferTransferCache, which manages its indices in uint32_t's. Because
+ // it uses a non-standard batch size (kBatchSize) as part of
+ // PickCoprimeBatchSize, it triggers a TransferCache miss to the
+ // CentralFreeList, which is uninteresting for exercising b/172283201.
+
+ // For performance reasons, limit to optimized builds.
+#if !defined(NDEBUG)
+ GTEST_SKIP() << "skipping long running test on debug build";
+#elif defined(THREAD_SANITIZER)
+ // This test is single threaded, so thread sanitizer will not be useful.
+ GTEST_SKIP() << "skipping under thread sanitizer, which slows test execution";
+#endif
+
+ using EnvType = FakeTransferCacheEnvironment<
+ internal_transfer_cache::RingBufferTransferCache<
+ MockCentralFreeList, MockTransferCacheManager>>;
+ EnvType env;
+
+ // We pick the largest value <= EnvType::kBatchSize to use as a batch size,
+ // such that it is prime relative to 2^32. This ensures that when we
+ // encounter a wraparound, the last operation actually spans both ends of the
+ // buffer.
+ const size_t batch_size = PickCoprimeBatchSize(EnvType::kBatchSize);
+ ASSERT_GT(batch_size, 0);
+ ASSERT_NE((size_t{1} << 32) % batch_size, 0) << batch_size;
+ // For ease of comparison, allocate a buffer of char's. We will use these to
+ // generate unique addresses. Since we assert that we will never miss in the
+ // TransferCache and go to the CentralFreeList, these do not need to be valid
+ // objects for deallocation.
+ std::vector<char> buffer(batch_size);
+ std::vector<void*> pointers;
+ pointers.reserve(batch_size);
+ for (size_t i = 0; i < batch_size; i++) {
+ pointers.push_back(&buffer[i]);
+ }
+
+ // To produce wraparound in the RingBufferTransferCache, we fill up the cache
+ // completely and then keep inserting new elements. This makes the cache
+ // return old elements to the freelist and eventually wrap around.
+ EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+ // We do return items to the freelist, don't try to actually free them.
+ ON_CALL(env.central_freelist(), InsertRange).WillByDefault(testing::Return());
+ ON_CALL(env.transfer_cache_manager(), DetermineSizeClassToEvict)
+ .WillByDefault(testing::Return(kSizeClass));
+
+ // First fill up the cache to its capacity.
+
+ while (env.transfer_cache().HasSpareCapacity(kSizeClass) ||
+ env.transfer_cache().GrowCache(kSizeClass)) {
+ env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers));
+ }
+
+ // The current size of the transfer cache is close to its capacity. Insert
+ // enough batches to make sure we wrap around twice (1 batch size should wrap
+ // around as we are full currently, then insert the same amount of items
+ // again, then one more wrap around).
+ const size_t kObjects = env.transfer_cache().tc_length() + 2 * batch_size;
+
+ // From now on, calls to InsertRange() should result in a corresponding call
+ // to the freelist whenever the cache is full. This doesn't happen on every
+ // call, as we return up to num_to_move (i.e. kBatchSize) items to the free
+ // list in one batch.
+ EXPECT_CALL(env.central_freelist(),
+ InsertRange(testing::SizeIs(EnvType::kBatchSize)))
+ .Times(testing::AnyNumber());
+ for (size_t i = 0; i < kObjects; i += batch_size) {
+ env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers));
+ }
+ // Manually drain the items in the transfercache, otherwise the destructor
+ // will try to free them.
+ std::vector<void*> to_free(batch_size);
+ size_t N = env.transfer_cache().tc_length();
+ while (N > 0) {
+ const size_t to_remove = std::min(N, batch_size);
+ const size_t removed =
+ env.transfer_cache().RemoveRange(kSizeClass, to_free.data(), to_remove);
+ ASSERT_THAT(removed, testing::Le(to_remove));
+ ASSERT_THAT(removed, testing::Gt(0));
+ N -= removed;
+ }
+ ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TransferCacheTest, IsolatedSmoke, ReadStats,
+ FetchesFromFreelist, PartialFetchFromFreelist,
+ EvictsOtherCaches, PushesToFreelist, WrappingWorks,
+ SingleItemSmoke, EvictsOtherCachesFlex,
+ FullCacheFlex, WrappingFlex, Plunder);
+template <typename Env>
+using FuzzTest = ::testing::Test;
+TYPED_TEST_SUITE_P(FuzzTest);
+
+TYPED_TEST_P(FuzzTest, MultiThreadedUnbiased) {
+ TypeParam env;
+ ThreadManager threads;
+ threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+ auto start = absl::Now();
+ while (start + absl::Seconds(0.3) > absl::Now()) env.RandomlyPoke();
+ threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedInsert) {
+ const int batch_size = TypeParam::kBatchSize;
+
+ TypeParam env;
+ ThreadManager threads;
+ threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+ auto start = absl::Now();
+ while (start + absl::Seconds(5) > absl::Now()) env.Insert(batch_size);
+ threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedRemove) {
+ const int batch_size = TypeParam::kBatchSize;
+
+ TypeParam env;
+ ThreadManager threads;
+ threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+ auto start = absl::Now();
+ while (start + absl::Seconds(5) > absl::Now()) env.Remove(batch_size);
+ threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedShrink) {
+ TypeParam env;
+ ThreadManager threads;
+ threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+ auto start = absl::Now();
+ while (start + absl::Seconds(5) > absl::Now()) env.Shrink();
+ threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedGrow) {
+ TypeParam env;
+ ThreadManager threads;
+ threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+ auto start = absl::Now();
+ while (start + absl::Seconds(5) > absl::Now()) env.Grow();
+ threads.Stop();
+}
+
+REGISTER_TYPED_TEST_SUITE_P(FuzzTest, MultiThreadedUnbiased,
+ MultiThreadedBiasedInsert,
+ MultiThreadedBiasedRemove, MultiThreadedBiasedGrow,
+ MultiThreadedBiasedShrink);
+
+namespace unit_tests {
+using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+ MockCentralFreeList, MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TransferCacheTest,
+ ::testing::Types<Env>);
+
+using RingBufferEnv = FakeTransferCacheEnvironment<
+ internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList,
+ MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TransferCacheTest,
+ ::testing::Types<RingBufferEnv>);
+} // namespace unit_tests
+
+namespace fuzz_tests {
+// Use the FakeCentralFreeList instead of the MockCentralFreeList for fuzz tests
+// as it avoids the overheads of mocks and allows more iterations of the fuzzing
+// itself.
+using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+ MockCentralFreeList, MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, FuzzTest, ::testing::Types<Env>);
+
+using RingBufferEnv = FakeTransferCacheEnvironment<
+ internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList,
+ MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, FuzzTest,
+ ::testing::Types<RingBufferEnv>);
+} // namespace fuzz_tests
+
+namespace leak_tests {
+
+template <typename Env>
+using TwoSizeClassTest = ::testing::Test;
+TYPED_TEST_SUITE_P(TwoSizeClassTest);
+
+TYPED_TEST_P(TwoSizeClassTest, NoLeaks) {
+ TypeParam env;
+
+ // The point of this test is to see that adding "random" amounts of
+ // allocations to the transfer caches behaves correctly, even in the case that
+ // there are multiple size classes interacting by stealing from each other.
+
+ // Fill all caches to their maximum without starting to steal from each other.
+ for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) {
+ const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+ while (env.transfer_cache_manager().HasSpareCapacity(cl)) {
+ env.Insert(cl, batch_size);
+ }
+ }
+
+ // Count the number of batches currently in the cache.
+ auto count_batches = [&env]() {
+ int batch_count = 0;
+ for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) {
+ const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+ batch_count += env.transfer_cache_manager().tc_length(cl) / batch_size;
+ }
+ return batch_count;
+ };
+
+ absl::BitGen bitgen;
+ const int max_batches = count_batches();
+ int expected_batches = max_batches;
+ for (int i = 0; i < 100; ++i) {
+ {
+ // First remove.
+ const int cl =
+ absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses);
+ const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+ if (env.transfer_cache_manager().tc_length(cl) >= batch_size) {
+ env.Remove(cl, batch_size);
+ --expected_batches;
+ }
+ const int current_batches = count_batches();
+ EXPECT_EQ(current_batches, expected_batches) << "iteration " << i;
+ }
+ {
+ // Then add in another size class.
+ const int cl =
+ absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses);
+ // Evict from the "next" size class, skipping 0.
+ // This makes sure we are always evicting from somewhere if at all
+ // possible.
+ env.transfer_cache_manager().evicting_from_ =
+ 1 + cl % (TypeParam::Manager::kSizeClasses - 1);
+ if (expected_batches < max_batches) {
+ const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+ env.Insert(cl, batch_size);
+ ++expected_batches;
+ }
+ const int current_batches = count_batches();
+ EXPECT_EQ(current_batches, expected_batches) << "iteration " << i;
+ }
+ }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TwoSizeClassTest, NoLeaks);
+
+using TwoTransferCacheEnv =
+ TwoSizeClassEnv<internal_transfer_cache::TransferCache>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TwoSizeClassTest,
+ ::testing::Types<TwoTransferCacheEnv>);
+
+using TwoRingBufferEnv =
+ TwoSizeClassEnv<internal_transfer_cache::RingBufferTransferCache>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TwoSizeClassTest,
+ ::testing::Types<TwoRingBufferEnv>);
+
+} // namespace leak_tests
+
+} // namespace
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc
new file mode 100644
index 0000000000..b488ceb54f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and forces HPAA
+// on/subrelease off.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc
new file mode 100644
index 0000000000..323cce40ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and forces HPAA
+// on/subrelease on.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return 1; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc
new file mode 100644
index 0000000000..28580e13ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc
@@ -0,0 +1,28 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides common.cc and
+// forces old span sizes.
+ABSL_ATTRIBUTE_UNUSED int default_want_legacy_spans() { return 1; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc
new file mode 100644
index 0000000000..e23d93d9ce
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and
+// forces HPAA off/subrelease off.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return -1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc
new file mode 100644
index 0000000000..3f0519dd50
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc
@@ -0,0 +1,28 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// When linked into a binary this overrides the weak implementation in numa.cc
+// and causes TCMalloc to enable NUMA awareness by default.
+ABSL_ATTRIBUTE_UNUSED bool default_want_numa_aware() { return true; }
+
+} // namespace tcmalloc_internal
+} // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/ya.make b/contrib/libs/tcmalloc/ya.make
new file mode 100644
index 0000000000..54701b1b77
--- /dev/null
+++ b/contrib/libs/tcmalloc/ya.make
@@ -0,0 +1,38 @@
+LIBRARY()
+
+LICENSE(Apache-2.0)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+OWNER(
+ ayles
+ prime
+ g:cpp-contrib
+)
+
+# https://github.com/google/tcmalloc
+VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376)
+
+SRCS(
+ # Options
+ tcmalloc/want_hpaa.cc
+)
+
+INCLUDE(common.inc)
+
+CFLAGS(
+ -DTCMALLOC_256K_PAGES
+)
+
+END()
+
+IF (NOT DLL_FOR)
+ RECURSE(
+ default
+ dynamic
+ malloc_extension
+ numa_256k
+ numa_large_pages
+ slow_but_small
+ )
+ENDIF()