intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tcmalloc
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
199 files changed, 53413 insertions, 0 deletions
diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report
new file mode 100644
index 0000000000..f5c52b8f16
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.copyrights.report
@@ -0,0 +1,255 @@
+# File format ($ symbol means the beginning of a line):
+#
+# $ # this message
+# $ # =======================
+# $     # comments (all commentaries should starts with some number of spaces and # symbol)
+# ${action} {license id} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make
+# ${all_file_action} filename
+# $ # user commentaries (many lines)
+# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/3/ya.make
+# ${all_file_action} filename
+# $    #    user commentaries
+# $ generated description
+# $ ...
+#
+# You can modify action, all_file_action and add commentaries
+# Available actions:
+# keep - keep license in contrib and use in credits
+# skip - skip license
+# remove - remove all files with this license
+# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file
+#
+# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory)
+# We suppose that that files can contain some license info
+# Available all file actions:
+# FILE_IGNORE - ignore file (do nothing)
+# FILE_INCLUDE - include all file data into licenses text file
+# =======================
+
+KEEP     COPYRIGHT_SERVICE_LABEL 279545394b5ad4b6b26c0686ac5f9921
+BELONGS ya.make
+    License text:
+        // Copyright 2019 The TCMalloc Authors
+    Scancode info:
+        Original SPDX id: COPYRIGHT_SERVICE_LABEL
+        Score           : 100.00
+        Match type      : COPYRIGHT
+    Files with this license:
+        tcmalloc/BUILD [1:1]
+        tcmalloc/arena.cc [1:1]
+        tcmalloc/arena.h [1:1]
+        tcmalloc/background.cc [1:1]
+        tcmalloc/central_freelist.cc [1:1]
+        tcmalloc/central_freelist.h [1:1]
+        tcmalloc/central_freelist_test.cc [1:1]
+        tcmalloc/common.cc [1:1]
+        tcmalloc/common.h [1:1]
+        tcmalloc/cpu_cache.cc [1:1]
+        tcmalloc/cpu_cache.h [1:1]
+        tcmalloc/cpu_cache_test.cc [1:1]
+        tcmalloc/experiment.cc [1:1]
+        tcmalloc/experiment.h [1:1]
+        tcmalloc/experiment_config.h [1:1]
+        tcmalloc/experiment_config_test.cc [1:1]
+        tcmalloc/experiment_fuzz.cc [1:1]
+        tcmalloc/experimental_56_size_class.cc [1:1]
+        tcmalloc/experimental_pow2_below64_size_class.cc [1:1]
+        tcmalloc/experimental_pow2_size_class.cc [1:1]
+        tcmalloc/guarded_page_allocator.cc [1:1]
+        tcmalloc/guarded_page_allocator.h [1:1]
+        tcmalloc/guarded_page_allocator_benchmark.cc [1:1]
+        tcmalloc/guarded_page_allocator_test.cc [1:1]
+        tcmalloc/heap_profiling_test.cc [1:1]
+        tcmalloc/huge_address_map.cc [1:1]
+        tcmalloc/huge_address_map.h [1:1]
+        tcmalloc/huge_address_map_test.cc [1:1]
+        tcmalloc/huge_allocator.cc [1:1]
+        tcmalloc/huge_allocator.h [1:1]
+        tcmalloc/huge_allocator_test.cc [1:1]
+        tcmalloc/huge_cache.cc [1:1]
+        tcmalloc/huge_cache.h [1:1]
+        tcmalloc/huge_cache_test.cc [1:1]
+        tcmalloc/huge_page_aware_allocator.cc [1:1]
+        tcmalloc/huge_page_aware_allocator.h [1:1]
+        tcmalloc/huge_page_aware_allocator_test.cc [1:1]
+        tcmalloc/huge_page_filler.h [1:1]
+        tcmalloc/huge_page_filler_test.cc [1:1]
+        tcmalloc/huge_pages.h [1:1]
+        tcmalloc/huge_region.h [1:1]
+        tcmalloc/huge_region_test.cc [1:1]
+        tcmalloc/internal/atomic_danger.h [1:1]
+        tcmalloc/internal/atomic_stats_counter.h [1:1]
+        tcmalloc/internal/bits.h [1:1]
+        tcmalloc/internal/bits_test.cc [1:1]
+        tcmalloc/internal/config.h [1:1]
+        tcmalloc/internal/declarations.h [1:1]
+        tcmalloc/internal/environment.cc [1:1]
+        tcmalloc/internal/environment.h [1:1]
+        tcmalloc/internal/environment_test.cc [1:1]
+        tcmalloc/internal/lifetime_predictions_test.cc [1:1]
+        tcmalloc/internal/lifetime_tracker_test.cc [1:1]
+        tcmalloc/internal/linked_list.h [1:1]
+        tcmalloc/internal/linked_list_benchmark.cc [1:1]
+        tcmalloc/internal/linked_list_test.cc [1:1]
+        tcmalloc/internal/linux_syscall_support.h [1:1]
+        tcmalloc/internal/logging.cc [1:1]
+        tcmalloc/internal/logging.h [1:1]
+        tcmalloc/internal/logging_test.cc [1:1]
+        tcmalloc/internal/memory_stats.cc [1:1]
+        tcmalloc/internal/memory_stats.h [1:1]
+        tcmalloc/internal/memory_stats_test.cc [1:1]
+        tcmalloc/internal/mincore.cc [1:1]
+        tcmalloc/internal/mincore.h [1:1]
+        tcmalloc/internal/mincore_benchmark.cc [1:1]
+        tcmalloc/internal/mincore_test.cc [1:1]
+        tcmalloc/internal/mock_span.h [1:1]
+        tcmalloc/internal/parameter_accessors.h [1:1]
+        tcmalloc/internal/percpu.cc [1:1]
+        tcmalloc/internal/percpu.h [1:1]
+        tcmalloc/internal/percpu_rseq_asm.S [1:1]
+        tcmalloc/internal/percpu_rseq_ppc.S [2:2]
+        tcmalloc/internal/percpu_rseq_unsupported.cc [1:1]
+        tcmalloc/internal/percpu_rseq_x86_64.S [2:2]
+        tcmalloc/internal/percpu_tcmalloc.h [1:1]
+        tcmalloc/internal/percpu_tcmalloc_test.cc [1:1]
+        tcmalloc/internal/proc_maps.cc [1:1]
+        tcmalloc/internal/proc_maps.h [1:1]
+        tcmalloc/internal/range_tracker.h [1:1]
+        tcmalloc/internal/range_tracker_benchmark.cc [1:1]
+        tcmalloc/internal/range_tracker_test.cc [1:1]
+        tcmalloc/internal/timeseries_tracker.h [1:1]
+        tcmalloc/internal/timeseries_tracker_test.cc [1:1]
+        tcmalloc/internal/util.cc [1:1]
+        tcmalloc/internal/util.h [1:1]
+        tcmalloc/internal_malloc_extension.h [1:1]
+        tcmalloc/legacy_size_classes.cc [1:1]
+        tcmalloc/libc_override.h [1:1]
+        tcmalloc/libc_override_gcc_and_weak.h [1:1]
+        tcmalloc/libc_override_glibc.h [1:1]
+        tcmalloc/libc_override_redefine.h [1:1]
+        tcmalloc/malloc_extension.cc [1:1]
+        tcmalloc/malloc_extension.h [1:1]
+        tcmalloc/malloc_extension_fuzz.cc [1:1]
+        tcmalloc/malloc_extension_system_malloc_test.cc [1:1]
+        tcmalloc/malloc_extension_test.cc [1:1]
+        tcmalloc/noruntime_size_classes.cc [1:1]
+        tcmalloc/page_allocator.cc [1:1]
+        tcmalloc/page_allocator.h [1:1]
+        tcmalloc/page_allocator_interface.cc [1:1]
+        tcmalloc/page_allocator_interface.h [1:1]
+        tcmalloc/page_allocator_test.cc [1:1]
+        tcmalloc/page_allocator_test_util.h [1:1]
+        tcmalloc/page_heap.cc [1:1]
+        tcmalloc/page_heap.h [1:1]
+        tcmalloc/page_heap_allocator.h [1:1]
+        tcmalloc/page_heap_test.cc [1:1]
+        tcmalloc/pagemap.cc [1:1]
+        tcmalloc/pagemap.h [1:1]
+        tcmalloc/pagemap_test.cc [1:1]
+        tcmalloc/pages.h [1:1]
+        tcmalloc/parameters.cc [1:1]
+        tcmalloc/parameters.h [1:1]
+        tcmalloc/peak_heap_tracker.cc [1:1]
+        tcmalloc/peak_heap_tracker.h [1:1]
+        tcmalloc/profile_test.cc [1:1]
+        tcmalloc/realloc_test.cc [1:1]
+        tcmalloc/runtime_size_classes.cc [1:1]
+        tcmalloc/runtime_size_classes.h [1:1]
+        tcmalloc/runtime_size_classes_fuzz.cc [1:1]
+        tcmalloc/runtime_size_classes_test.cc [1:1]
+        tcmalloc/sampler.cc [1:1]
+        tcmalloc/sampler.h [1:1]
+        tcmalloc/size_class_info.h [1:1]
+        tcmalloc/size_classes.cc [1:1]
+        tcmalloc/size_classes_test.cc [1:1]
+        tcmalloc/size_classes_with_runtime_size_classes_test.cc [1:1]
+        tcmalloc/span.cc [1:1]
+        tcmalloc/span.h [1:1]
+        tcmalloc/span_benchmark.cc [1:1]
+        tcmalloc/span_stats.h [1:1]
+        tcmalloc/span_test.cc [1:1]
+        tcmalloc/stack_trace_table.cc [1:1]
+        tcmalloc/stack_trace_table.h [1:1]
+        tcmalloc/stack_trace_table_test.cc [1:1]
+        tcmalloc/static_vars.cc [1:1]
+        tcmalloc/static_vars.h [1:1]
+        tcmalloc/stats.cc [1:1]
+        tcmalloc/stats.h [1:1]
+        tcmalloc/stats_test.cc [1:1]
+        tcmalloc/system-alloc.cc [1:1]
+        tcmalloc/system-alloc.h [1:1]
+        tcmalloc/system-alloc_test.cc [1:1]
+        tcmalloc/tcmalloc.cc [1:1]
+        tcmalloc/tcmalloc.h [1:1]
+        tcmalloc/tcmalloc_large_test.cc [1:1]
+        tcmalloc/tcmalloc_policy.h [1:1]
+        tcmalloc/thread_cache.cc [1:1]
+        tcmalloc/thread_cache.h [1:1]
+        tcmalloc/thread_cache_test.cc [1:1]
+        tcmalloc/tracking.h [1:1]
+        tcmalloc/transfer_cache.cc [1:1]
+        tcmalloc/transfer_cache.h [1:1]
+        tcmalloc/want_hpaa.cc [1:1]
+        tcmalloc/want_hpaa_subrelease.cc [1:1]
+        tcmalloc/want_no_hpaa.cc [1:1]
+
+KEEP     COPYRIGHT_SERVICE_LABEL 2f85f99f6e6cdec04f6948d273430658
+BELONGS ya.make
+    License text:
+        // Copyright 2021 The TCMalloc Authors
+    Scancode info:
+        Original SPDX id: COPYRIGHT_SERVICE_LABEL
+        Score           : 100.00
+        Match type      : COPYRIGHT
+    Files with this license:
+        tcmalloc/arena_test.cc [1:1]
+        tcmalloc/central_freelist_benchmark.cc [1:1]
+        tcmalloc/internal/cache_topology.cc [1:1]
+        tcmalloc/internal/cache_topology.h [1:1]
+        tcmalloc/internal/cache_topology_test.cc [1:1]
+        tcmalloc/internal/clock.h [1:1]
+        tcmalloc/internal/logging_test_helper.cc [1:1]
+        tcmalloc/internal/numa.cc [1:1]
+        tcmalloc/internal/numa.h [1:1]
+        tcmalloc/internal/numa_test.cc [1:1]
+        tcmalloc/want_numa_aware.cc [1:1]
+
+KEEP     COPYRIGHT_SERVICE_LABEL 62f2df7d02ddf07de59d1a4e25e663aa
+BELONGS ya.make
+    License text:
+        // Copyright 2020 The TCMalloc Authors
+    Scancode info:
+        Original SPDX id: COPYRIGHT_SERVICE_LABEL
+        Score           : 100.00
+        Match type      : COPYRIGHT
+    Files with this license:
+        tcmalloc/internal/lifetime_predictions.h [1:1]
+        tcmalloc/internal/lifetime_tracker.h [1:1]
+        tcmalloc/internal/optimization.h [1:1]
+        tcmalloc/internal/percpu_rseq_aarch64.S [2:2]
+        tcmalloc/mock_central_freelist.cc [1:1]
+        tcmalloc/mock_central_freelist.h [1:1]
+        tcmalloc/mock_transfer_cache.cc [1:1]
+        tcmalloc/mock_transfer_cache.h [1:1]
+        tcmalloc/transfer_cache_benchmark.cc [1:1]
+        tcmalloc/transfer_cache_fuzz.cc [1:1]
+        tcmalloc/transfer_cache_internals.h [1:1]
+        tcmalloc/transfer_cache_stats.h [1:1]
+        tcmalloc/transfer_cache_test.cc [1:1]
+        tcmalloc/want_legacy_spans.cc [1:1]
+
+KEEP     COPYRIGHT_SERVICE_LABEL b7c6499c855f04bbe7161fc4de3a41d6
+BELONGS ya.make
+    License text:
+          Delete(c);
+          SmallSpanStats small;
+          LargeSpanStats large;
+    Scancode info:
+        Original SPDX id: COPYRIGHT_SERVICE_LABEL
+        Score           : 100.00
+        Match type      : COPYRIGHT
+    Files with this license:
+        tcmalloc/huge_region_test.cc [433:435]
diff --git a/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report
new file mode 100644
index 0000000000..29c5c149ce
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/devtools.licenses.report
@@ -0,0 +1,331 @@
+# File format ($ symbol means the beginning of a line):
+#
+# $ # this message
+# $ # =======================
+# $     # comments (all commentaries should starts with some number of spaces and # symbol)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/1/ya.make ./ya/make/2/ya.make
+# ${all_file_action} filename
+# $ # user commentaries (many lines)
+# $ generated description - files with this license, license text... (some number of lines that starts with some number of spaces, do not modify)
+# ${action} {license spdx} {license text hash}
+# $BELONGS ./ya/make/file/relative/path/3/ya.make
+# ${all_file_action} filename
+# $    #    user commentaries
+# $ generated description
+# $ ...
+#
+# You can modify action, all_file_action and add commentaries
+# Available actions:
+# keep - keep license in contrib and use in credits
+# skip - skip license
+# remove - remove all files with this license
+# rename - save license text/links into licenses texts file, but not store SPDX into LINCENSE macro. You should store correct license id into devtools.license.spdx.txt file
+#
+# {all file action} records will be generated when license text contains filename that exists on filesystem (in contrib directory)
+# We suppose that that files can contain some license info
+# Available all file actions:
+# FILE_IGNORE - ignore file (do nothing)
+# FILE_INCLUDE - include all file data into licenses text file
+# =======================
+
+KEEP     Apache-2.0           0e8699c5f5ea602534a6558430df2b8d
+BELONGS ya.make
+    Note: matched license text is too long. Read it in the source files.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 100.00
+        Match type      : NOTICE
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        tcmalloc/arena.cc [3:13]
+        tcmalloc/arena.h [3:13]
+        tcmalloc/arena_test.cc [3:13]
+        tcmalloc/background.cc [3:13]
+        tcmalloc/central_freelist.cc [3:13]
+        tcmalloc/central_freelist.h [3:13]
+        tcmalloc/central_freelist_benchmark.cc [3:13]
+        tcmalloc/central_freelist_test.cc [3:13]
+        tcmalloc/common.cc [3:13]
+        tcmalloc/common.h [3:13]
+        tcmalloc/cpu_cache.cc [3:13]
+        tcmalloc/cpu_cache.h [3:13]
+        tcmalloc/cpu_cache_test.cc [3:13]
+        tcmalloc/experiment.cc [3:13]
+        tcmalloc/experiment.h [3:13]
+        tcmalloc/experiment_config.h [3:13]
+        tcmalloc/experiment_config_test.cc [3:13]
+        tcmalloc/experiment_fuzz.cc [3:13]
+        tcmalloc/experimental_56_size_class.cc [3:13]
+        tcmalloc/experimental_pow2_below64_size_class.cc [3:13]
+        tcmalloc/experimental_pow2_size_class.cc [3:13]
+        tcmalloc/guarded_page_allocator.cc [3:13]
+        tcmalloc/guarded_page_allocator.h [3:13]
+        tcmalloc/guarded_page_allocator_benchmark.cc [3:13]
+        tcmalloc/guarded_page_allocator_test.cc [3:13]
+        tcmalloc/heap_profiling_test.cc [3:13]
+        tcmalloc/huge_address_map.cc [3:13]
+        tcmalloc/huge_address_map.h [3:13]
+        tcmalloc/huge_address_map_test.cc [3:13]
+        tcmalloc/huge_allocator.cc [3:13]
+        tcmalloc/huge_allocator.h [3:13]
+        tcmalloc/huge_allocator_test.cc [3:13]
+        tcmalloc/huge_cache.cc [3:13]
+        tcmalloc/huge_cache.h [3:13]
+        tcmalloc/huge_cache_test.cc [3:13]
+        tcmalloc/huge_page_aware_allocator.cc [3:13]
+        tcmalloc/huge_page_aware_allocator.h [3:13]
+        tcmalloc/huge_page_aware_allocator_test.cc [3:13]
+        tcmalloc/huge_page_filler.h [3:13]
+        tcmalloc/huge_page_filler_test.cc [3:13]
+        tcmalloc/huge_pages.h [3:13]
+        tcmalloc/huge_region.h [3:13]
+        tcmalloc/huge_region_test.cc [3:13]
+        tcmalloc/internal/atomic_danger.h [3:13]
+        tcmalloc/internal/atomic_stats_counter.h [3:13]
+        tcmalloc/internal/bits.h [3:13]
+        tcmalloc/internal/bits_test.cc [3:13]
+        tcmalloc/internal/cache_topology.cc [3:13]
+        tcmalloc/internal/cache_topology.h [3:13]
+        tcmalloc/internal/cache_topology_test.cc [3:13]
+        tcmalloc/internal/clock.h [3:13]
+        tcmalloc/internal/config.h [3:13]
+        tcmalloc/internal/declarations.h [3:13]
+        tcmalloc/internal/environment.cc [3:13]
+        tcmalloc/internal/environment.h [3:13]
+        tcmalloc/internal/environment_test.cc [3:13]
+        tcmalloc/internal/lifetime_predictions.h [3:13]
+        tcmalloc/internal/lifetime_predictions_test.cc [3:13]
+        tcmalloc/internal/lifetime_tracker.h [3:13]
+        tcmalloc/internal/lifetime_tracker_test.cc [3:13]
+        tcmalloc/internal/linked_list.h [3:13]
+        tcmalloc/internal/linked_list_benchmark.cc [3:13]
+        tcmalloc/internal/linked_list_test.cc [3:13]
+        tcmalloc/internal/linux_syscall_support.h [3:13]
+        tcmalloc/internal/logging.cc [3:13]
+        tcmalloc/internal/logging.h [3:13]
+        tcmalloc/internal/logging_test.cc [3:13]
+        tcmalloc/internal/logging_test_helper.cc [3:13]
+        tcmalloc/internal/memory_stats.cc [3:13]
+        tcmalloc/internal/memory_stats.h [3:13]
+        tcmalloc/internal/memory_stats_test.cc [3:13]
+        tcmalloc/internal/mincore.cc [3:13]
+        tcmalloc/internal/mincore.h [3:13]
+        tcmalloc/internal/mincore_benchmark.cc [3:13]
+        tcmalloc/internal/mincore_test.cc [3:13]
+        tcmalloc/internal/mock_span.h [3:13]
+        tcmalloc/internal/numa.cc [3:13]
+        tcmalloc/internal/numa.h [3:13]
+        tcmalloc/internal/numa_test.cc [3:13]
+        tcmalloc/internal/optimization.h [3:13]
+        tcmalloc/internal/parameter_accessors.h [3:13]
+        tcmalloc/internal/percpu.cc [3:13]
+        tcmalloc/internal/percpu.h [3:13]
+        tcmalloc/internal/percpu_rseq_asm.S [3:13]
+        tcmalloc/internal/percpu_rseq_unsupported.cc [3:13]
+        tcmalloc/internal/percpu_tcmalloc.h [3:13]
+        tcmalloc/internal/percpu_tcmalloc_test.cc [3:13]
+        tcmalloc/internal/proc_maps.cc [3:13]
+        tcmalloc/internal/proc_maps.h [3:13]
+        tcmalloc/internal/range_tracker.h [3:13]
+        tcmalloc/internal/range_tracker_benchmark.cc [3:13]
+        tcmalloc/internal/range_tracker_test.cc [3:13]
+        tcmalloc/internal/timeseries_tracker.h [3:13]
+        tcmalloc/internal/timeseries_tracker_test.cc [3:13]
+        tcmalloc/internal/util.cc [3:13]
+        tcmalloc/internal/util.h [3:13]
+        tcmalloc/internal_malloc_extension.h [3:13]
+        tcmalloc/legacy_size_classes.cc [3:13]
+        tcmalloc/libc_override.h [3:13]
+        tcmalloc/libc_override_gcc_and_weak.h [3:13]
+        tcmalloc/libc_override_glibc.h [3:13]
+        tcmalloc/libc_override_redefine.h [3:13]
+        tcmalloc/malloc_extension.cc [3:13]
+        tcmalloc/malloc_extension.h [3:13]
+        tcmalloc/malloc_extension_fuzz.cc [3:13]
+        tcmalloc/malloc_extension_system_malloc_test.cc [3:13]
+        tcmalloc/malloc_extension_test.cc [3:13]
+        tcmalloc/mock_central_freelist.cc [3:13]
+        tcmalloc/mock_central_freelist.h [3:13]
+        tcmalloc/mock_transfer_cache.cc [3:13]
+        tcmalloc/mock_transfer_cache.h [3:13]
+        tcmalloc/noruntime_size_classes.cc [3:13]
+        tcmalloc/page_allocator.cc [3:13]
+        tcmalloc/page_allocator.h [3:13]
+        tcmalloc/page_allocator_interface.cc [3:13]
+        tcmalloc/page_allocator_interface.h [3:13]
+        tcmalloc/page_allocator_test.cc [3:13]
+        tcmalloc/page_allocator_test_util.h [3:13]
+        tcmalloc/page_heap.cc [3:13]
+        tcmalloc/page_heap.h [3:13]
+        tcmalloc/page_heap_allocator.h [3:13]
+        tcmalloc/page_heap_test.cc [3:13]
+        tcmalloc/pagemap.cc [3:13]
+        tcmalloc/pagemap.h [3:13]
+        tcmalloc/pagemap_test.cc [3:13]
+        tcmalloc/pages.h [3:13]
+        tcmalloc/parameters.cc [3:13]
+        tcmalloc/parameters.h [3:13]
+        tcmalloc/peak_heap_tracker.cc [3:13]
+        tcmalloc/peak_heap_tracker.h [3:13]
+        tcmalloc/profile_test.cc [3:13]
+        tcmalloc/realloc_test.cc [3:13]
+        tcmalloc/runtime_size_classes.cc [3:13]
+        tcmalloc/runtime_size_classes.h [3:13]
+        tcmalloc/runtime_size_classes_fuzz.cc [3:13]
+        tcmalloc/runtime_size_classes_test.cc [3:13]
+        tcmalloc/sampler.cc [3:13]
+        tcmalloc/sampler.h [3:13]
+        tcmalloc/size_class_info.h [3:13]
+        tcmalloc/size_classes.cc [3:13]
+        tcmalloc/size_classes_test.cc [3:13]
+        tcmalloc/size_classes_with_runtime_size_classes_test.cc [3:13]
+        tcmalloc/span.cc [3:13]
+        tcmalloc/span.h [3:13]
+        tcmalloc/span_benchmark.cc [3:13]
+        tcmalloc/span_stats.h [3:13]
+        tcmalloc/span_test.cc [3:13]
+        tcmalloc/stack_trace_table.cc [3:13]
+        tcmalloc/stack_trace_table.h [3:13]
+        tcmalloc/stack_trace_table_test.cc [3:13]
+        tcmalloc/static_vars.cc [3:13]
+        tcmalloc/static_vars.h [3:13]
+        tcmalloc/stats.cc [3:13]
+        tcmalloc/stats.h [3:13]
+        tcmalloc/stats_test.cc [3:13]
+        tcmalloc/system-alloc.cc [3:13]
+        tcmalloc/system-alloc.h [3:13]
+        tcmalloc/system-alloc_test.cc [3:13]
+        tcmalloc/tcmalloc.cc [3:13]
+        tcmalloc/tcmalloc.h [3:13]
+        tcmalloc/tcmalloc_large_test.cc [3:13]
+        tcmalloc/tcmalloc_policy.h [3:13]
+        tcmalloc/thread_cache.cc [3:13]
+        tcmalloc/thread_cache.h [3:13]
+        tcmalloc/thread_cache_test.cc [3:13]
+        tcmalloc/tracking.h [3:13]
+        tcmalloc/transfer_cache.cc [3:13]
+        tcmalloc/transfer_cache.h [3:13]
+        tcmalloc/transfer_cache_benchmark.cc [3:13]
+        tcmalloc/transfer_cache_fuzz.cc [3:13]
+        tcmalloc/transfer_cache_internals.h [3:13]
+        tcmalloc/transfer_cache_stats.h [3:13]
+        tcmalloc/transfer_cache_test.cc [3:13]
+        tcmalloc/want_hpaa.cc [3:13]
+        tcmalloc/want_hpaa_subrelease.cc [3:13]
+        tcmalloc/want_legacy_spans.cc [3:13]
+        tcmalloc/want_no_hpaa.cc [3:13]
+        tcmalloc/want_numa_aware.cc [3:13]
+
+KEEP     Apache-2.0           24be4e5673a9c71cdba851c53ed9677b
+BELONGS ya.make
+    Note: matched license text is too long. Read it in the source files.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 100.00
+        Match type      : NOTICE
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        tcmalloc/internal/percpu_rseq_aarch64.S [4:14]
+
+KEEP     Apache-2.0           34ef0c6d1296bad9c0b8ea4447611e19
+BELONGS ya.make
+    Note: matched license text is too long. Read it in the source files.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 100.00
+        Match type      : NOTICE
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        tcmalloc/BUILD [3:13]
+
+KEEP     Apache-2.0           566444825cbcc83578050639168bd08f
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+    License text:
+        The TCMalloc library is licensed under the terms of the Apache
+        license. See LICENSE for more information.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 90.00
+        Match type      : NOTICE
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        README.md [41:42]
+
+SKIP     LicenseRef-scancode-generic-cla 5d780ffa423067f23c6a123ae33e7c18
+BELONGS ya.make
+    License text:
+        \## Contributor License Agreement
+    Scancode info:
+        Original SPDX id: LicenseRef-scancode-generic-cla
+        Score           : 16.00
+        Match type      : NOTICE
+        Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
+    Files with this license:
+        CONTRIBUTING.md [9:9]
+
+KEEP     Apache-2.0           7007f7032a612d02b590073b4f7e5b25
+BELONGS ya.make
+    Note: matched license text is too long. Read it in the source files.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 100.00
+        Match type      : NOTICE
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        tcmalloc/internal/percpu_rseq_ppc.S [4:14]
+        tcmalloc/internal/percpu_rseq_x86_64.S [4:14]
+
+SKIP     LicenseRef-scancode-generic-cla 979d7de2e3ff119ee2c22c7efbec766d
+BELONGS ya.make
+    License text:
+        Contributions to this project must be accompanied by a Contributor License
+        Agreement. You (or your employer) retain the copyright to your contribution;
+    Scancode info:
+        Original SPDX id: LicenseRef-scancode-generic-cla
+        Score           : 16.00
+        Match type      : NOTICE
+        Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE
+    Files with this license:
+        CONTRIBUTING.md [11:12]
+
+KEEP     Apache-2.0           cac6cbe8ed5a3da569f7c01e4e486688
+BELONGS ya.make
+    Note: matched license text is too long. Read it in the source files.
+    Scancode info:
+        Original SPDX id: Apache-2.0
+        Score           : 100.00
+        Match type      : TEXT
+        Links           : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0
+    Files with this license:
+        LICENSE [2:202]
+
+SKIP     LicenseRef-scancode-other-permissive cd348406a46a4c91e9edaa5be5e9c074
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+    # File LICENSES allready included
+    License text:
+        license. See LICENSE for more information.
+    Scancode info:
+        Original SPDX id: LicenseRef-scancode-unknown-license-reference
+        Score           : 100.00
+        Match type      : REFERENCE
+        Links           : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/unknown-license-reference.LICENSE
+    Files with this license:
+        README.md [42:42]
+
+KEEP     Apache-2.0           e0f9a998414a9ae203fd34f4452d4dbc
+BELONGS ya.make
+FILE_INCLUDE LICENSE found in files: README.md at line 42
+    License text:
+        \## License
+        The TCMalloc library is licensed under the terms of the Apache
+        license. See LICENSE for more information.
+    Scancode info:
+        Original SPDX id: MIT
+        Score           : 52.63
+        Match type      : NOTICE
+        Links           : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT
+    Files with this license:
+        README.md [39:42]
diff --git a/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt
new file mode 100644
index 0000000000..a5a0b42768
--- /dev/null
+++ b/contrib/libs/tcmalloc/.yandex_meta/licenses.list.txt
@@ -0,0 +1,492 @@
+====================Apache-2.0====================
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+====================Apache-2.0====================
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+
+
+====================Apache-2.0====================
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+
+
+====================Apache-2.0====================
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+====================Apache-2.0====================
+## License
+
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+
+====================Apache-2.0====================
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+====================Apache-2.0====================
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+
+====================COPYRIGHT====================
+  Delete(c);
+  SmallSpanStats small;
+  LargeSpanStats large;
+
+
+====================COPYRIGHT====================
+// Copyright 2019 The TCMalloc Authors
+
+
+====================COPYRIGHT====================
+// Copyright 2020 The TCMalloc Authors
+
+
+====================COPYRIGHT====================
+// Copyright 2021 The TCMalloc Authors
+
+
+====================File: LICENSE====================
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/contrib/libs/tcmalloc/CONTRIBUTING.md b/contrib/libs/tcmalloc/CONTRIBUTING.md
new file mode 100644
index 0000000000..d10cc0d08f
--- /dev/null
+++ b/contrib/libs/tcmalloc/CONTRIBUTING.md
@@ -0,0 +1,74 @@
+# How to Contribute to TCMalloc
+
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+
+NOTE: If you are new to GitHub, please start by reading [Pull Request
+howto](https://help.github.com/articles/about-pull-requests/)
+
+## Contributor License Agreement
+
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+
+## Guidelines for Pull Requests
+
+*  All submissions, including submissions by project members, require review.
+   We use GitHub pull requests for this purpose. Consult
+   [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+   information on using pull requests.
+
+*  If you are a Googler, it is preferable to first create an internal CL and
+   have it reviewed and submitted. The code propagation process will deliver
+   the change to GitHub.
+
+*  Create **small PRs** that are narrowly focused on **addressing a single concern**.
+   When PRs try to fix several things at a time, if only one fix is considered
+   acceptable, nothing gets merged and both author's & review's time is wasted.
+   Create more PRs to address different concerns and everyone will be happy.
+
+*  Provide a good **PR description** as a record of **what** change is being
+   made and **why** it was made. Link to a GitHub issue if it exists.
+
+*  Don't fix code style and formatting unless you are already changing that line
+   to address an issue. Formatting of modified lines may be done using
+   `git clang-format`. PRs with irrelevant changes won't be merged. If you do
+   want to fix formatting or style, do that in a separate PR.
+
+*  Unless your PR is trivial, you should expect there will be reviewer comments
+   that you'll need to address before merging. We expect you to be reasonably
+   responsive to those comments, otherwise the PR will be closed after 2-3 weeks
+   of inactivity.
+
+*  Maintain **clean commit history** and use **meaningful commit messages**.
+   PRs with messy commit history are difficult to review and won't be merged.
+   Use `rebase -i upstream/master` to curate your commit history and/or to
+   bring in latest changes from master (but avoid rebasing in the middle of a
+   code review).
+
+*  Keep your PR up to date with upstream/master (if there are merge conflicts,
+   we can't really merge your change).
+
+*  **All tests need to be passing** before your change can be merged. We
+   recommend you **run tests locally** (see below)
+
+*  Exceptions to the rules can be made if there's a compelling reason for doing
+   so. That is - the rules are here to serve us, not the other way around, and
+   the rules need to be serving their intended purpose to be valuable.
+
+## TCMalloc Committers
+
+The current members of the TCMalloc engineering team are the only committers at
+present.
+
+## Community Guidelines
+
+This project follows
+[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/contrib/libs/tcmalloc/LICENSE b/contrib/libs/tcmalloc/LICENSE
new file mode 100644
index 0000000000..62589edd12
--- /dev/null
+++ b/contrib/libs/tcmalloc/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        https://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/contrib/libs/tcmalloc/README.md b/contrib/libs/tcmalloc/README.md
new file mode 100644
index 0000000000..c848b4838d
--- /dev/null
+++ b/contrib/libs/tcmalloc/README.md
@@ -0,0 +1,44 @@
+# TCMalloc
+
+This repository contains the TCMalloc C++ code.
+
+TCMalloc is Google's customized implementation of C's `malloc()` and C++'s
+`operator new` used for memory allocation within our C and C++ code. TCMalloc is
+a fast, multi-threaded malloc implementation.
+
+## Building TCMalloc
+
+[Bazel](https://bazel.build) is the official build system for TCMalloc.
+
+The [TCMalloc Platforms Guide](docs/platforms.md) contains information on
+platform support for TCMalloc.
+
+## Documentation
+
+All users of TCMalloc should consult the following documentation resources:
+
+* The [TCMalloc Quickstart](docs/quickstart.md) covers downloading, installing,
+  building, and testing TCMalloc, including incorporating within your codebase.
+* The [TCMalloc Overview](docs/overview.md) covers the basic architecture of
+  TCMalloc, and how that may affect configuration choices.
+* The [TCMalloc Reference](docs/reference.md) covers the C and C++ TCMalloc API
+  endpoints.
+
+More advanced usages of TCMalloc may find the following documentation useful:
+
+* The [TCMalloc Tuning Guide](docs/tuning.md) covers the configuration choices
+  in more depth, and also illustrates other ways to customize TCMalloc.  This
+  also covers important operating system-level properties for improving TCMalloc
+  performance.
+* The [TCMalloc Design Doc](docs/design.md) covers how TCMalloc works
+  underneath the hood, and why certain design choices were made. Most developers
+  will not need this level of implementation detail.
+* The [TCMalloc Compatibility Guide](docs/compatibility.md) which documents our
+  expectations for how our APIs are used.
+
+## License
+
+The TCMalloc library is licensed under the terms of the Apache
+license. See LICENSE for more information.
+
+Disclaimer: This is not an officially supported Google product.
diff --git a/contrib/libs/tcmalloc/common.inc b/contrib/libs/tcmalloc/common.inc
new file mode 100644
index 0000000000..077942c387
--- /dev/null
+++ b/contrib/libs/tcmalloc/common.inc
@@ -0,0 +1,58 @@
+GLOBAL_SRCS(
+    # TCMalloc
+    tcmalloc/tcmalloc.cc
+
+    # Common Sources
+    tcmalloc/arena.cc
+    tcmalloc/background.cc
+    tcmalloc/central_freelist.cc
+    tcmalloc/common.cc
+    tcmalloc/cpu_cache.cc
+    tcmalloc/experimental_pow2_below64_size_class.cc
+    tcmalloc/experimental_pow2_size_class.cc
+    tcmalloc/legacy_size_classes.cc
+    tcmalloc/guarded_page_allocator.cc
+    tcmalloc/huge_address_map.cc
+    tcmalloc/huge_allocator.cc
+    tcmalloc/huge_cache.cc
+    tcmalloc/huge_page_aware_allocator.cc
+    tcmalloc/page_allocator.cc
+    tcmalloc/page_allocator_interface.cc
+    tcmalloc/page_heap.cc
+    tcmalloc/pagemap.cc
+    tcmalloc/parameters.cc
+    tcmalloc/peak_heap_tracker.cc
+    tcmalloc/sampler.cc
+    tcmalloc/size_classes.cc
+    tcmalloc/span.cc
+    tcmalloc/stack_trace_table.cc
+    tcmalloc/static_vars.cc
+    tcmalloc/stats.cc
+    tcmalloc/system-alloc.cc
+    tcmalloc/thread_cache.cc
+    tcmalloc/transfer_cache.cc
+
+    # Common deps
+    tcmalloc/experiment.cc
+    tcmalloc/noruntime_size_classes.cc
+
+    # Internal libraries
+    tcmalloc/internal/cache_topology.cc
+    tcmalloc/internal/environment.cc
+    tcmalloc/internal/logging.cc
+    tcmalloc/internal/memory_stats.cc
+    tcmalloc/internal/mincore.cc
+    tcmalloc/internal/numa.cc
+    tcmalloc/internal/percpu.cc
+    tcmalloc/internal/percpu_rseq_asm.S
+    tcmalloc/internal/percpu_rseq_unsupported.cc
+    tcmalloc/internal/util.cc
+)
+
+PEERDIR(
+    contrib/restricted/abseil-cpp
+    contrib/libs/tcmalloc/malloc_extension
+)
+
+NO_UTIL()
+NO_COMPILER_WARNINGS()
diff --git a/contrib/libs/tcmalloc/default/ya.make b/contrib/libs/tcmalloc/default/ya.make
new file mode 100644
index 0000000000..b69b077e19
--- /dev/null
+++ b/contrib/libs/tcmalloc/default/ya.make
@@ -0,0 +1,22 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+    ayles
+    prime
+    g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+    # Options
+    tcmalloc/want_hpaa.cc
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/dynamic/ya.make b/contrib/libs/tcmalloc/dynamic/ya.make
new file mode 100644
index 0000000000..72f91dfc81
--- /dev/null
+++ b/contrib/libs/tcmalloc/dynamic/ya.make
@@ -0,0 +1,2 @@
+DLL_FOR(contrib/libs/tcmalloc tcmalloc)
+OWNER(g:contrib)
diff --git a/contrib/libs/tcmalloc/malloc_extension/ya.make b/contrib/libs/tcmalloc/malloc_extension/ya.make
new file mode 100644
index 0000000000..c9a07c2454
--- /dev/null
+++ b/contrib/libs/tcmalloc/malloc_extension/ya.make
@@ -0,0 +1,37 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+    prime
+    g:cpp-contrib
+)
+
+NO_UTIL()
+
+NO_COMPILER_WARNINGS()
+
+# https://github.com/google/tcmalloc
+VERSION(2020-11-23-a643d89610317be1eff9f7298104eef4c987d8d5)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+SRCS(
+    tcmalloc/malloc_extension.cc
+)
+
+PEERDIR(
+    contrib/restricted/abseil-cpp
+)
+
+ADDINCL(
+    GLOBAL contrib/libs/tcmalloc
+)
+
+CFLAGS(
+    -DTCMALLOC_256K_PAGES
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/numa_256k/ya.make b/contrib/libs/tcmalloc/numa_256k/ya.make
new file mode 100644
index 0000000000..ffede5df8b
--- /dev/null
+++ b/contrib/libs/tcmalloc/numa_256k/ya.make
@@ -0,0 +1,28 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+    ayles
+    prime
+    g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+    # Options
+    tcmalloc/want_hpaa.cc
+    tcmalloc/want_numa_aware.cc
+)
+
+CFLAGS(
+    -DTCMALLOC_256K_PAGES
+    -DTCMALLOC_NUMA_AWARE
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/numa_large_pages/ya.make b/contrib/libs/tcmalloc/numa_large_pages/ya.make
new file mode 100644
index 0000000000..f39c1e15ba
--- /dev/null
+++ b/contrib/libs/tcmalloc/numa_large_pages/ya.make
@@ -0,0 +1,28 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+    ayles
+    prime
+    g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+GLOBAL_SRCS(
+    # Options
+    tcmalloc/want_hpaa.cc
+    tcmalloc/want_numa_aware.cc
+)
+
+CFLAGS(
+    -DTCMALLOC_LARGE_PAGES
+    -DTCMALLOC_NUMA_AWARE
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/patches/fork.patch b/contrib/libs/tcmalloc/patches/fork.patch
new file mode 100644
index 0000000000..2503394431
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/fork.patch
@@ -0,0 +1,310 @@
+--- contrib/libs/tcmalloc/tcmalloc/central_freelist.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/central_freelist.h	(working tree)
+@@ -70,6 +70,14 @@ class CentralFreeList {
+ 
+   SpanStats GetSpanStats() const;
+ 
++  void AcquireInternalLocks() {
++    lock_.Lock();
++  }
++
++  void ReleaseInternalLocks() {
++    lock_.Unlock();
++  }
++
+  private:
+   // Release an object to spans.
+   // Returns object's span if it become completely free.
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc	(working tree)
+@@ -1031,6 +1031,20 @@ void CPUCache::PrintInPbtxt(PbtxtRegion *region) const {
+   }
+ }
+ 
++void CPUCache::AcquireInternalLocks() {
++  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
++       ++cpu) {
++    resize_[cpu].lock.Lock();
++  }
++}
++
++void CPUCache::ReleaseInternalLocks() {
++  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
++       ++cpu) {
++    resize_[cpu].lock.Unlock();
++  }
++}
++
+ void CPUCache::PerClassResizeInfo::Init() {
+   state_.store(0, std::memory_order_relaxed);
+ }
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.h	(working tree)
+@@ -164,6 +164,9 @@ class CPUCache {
+   void Print(Printer* out) const;
+   void PrintInPbtxt(PbtxtRegion* region) const;
+ 
++  void AcquireInternalLocks();
++  void ReleaseInternalLocks();
++
+  private:
+   // Per-size-class freelist resizing info.
+   class PerClassResizeInfo {
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(working tree)
+@@ -116,6 +116,10 @@ ABSL_ATTRIBUTE_WEAK int64_t
+ MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+     int64_t value);
++
++ABSL_ATTRIBUTE_WEAK void
++MallocExtension_EnableForkSupport();
++
+ }
+ 
+ #endif
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(working tree)
+@@ -460,6 +460,14 @@ void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) {
+ #endif
+ }
+ 
++void MallocExtension::EnableForkSupport() {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++  if (&MallocExtension_EnableForkSupport != nullptr) {
++    MallocExtension_EnableForkSupport();
++  }
++#endif
++}
++
+ }  // namespace tcmalloc
+ 
+ // Default implementation just returns size. The expectation is that
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(working tree)
+@@ -468,6 +468,10 @@ class MallocExtension final {
+   // Specifies the release rate from the page heap.  ProcessBackgroundActions
+   // must be called for this to be operative.
+   static void SetBackgroundReleaseRate(BytesPerSecond rate);
++
++  // Enables fork support.
++  // Allocator will continue to function correctly in the child, after calling fork().
++  static void EnableForkSupport();
+ };
+ 
+ }  // namespace tcmalloc
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc	(working tree)
+@@ -59,6 +59,7 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+     Static::bucket_allocator_;
+ ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
++ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
+ ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ ABSL_CONST_INIT PageMap Static::pagemap_;
+ ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+@@ -116,6 +117,13 @@ ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() {
+     pagemap_.MapRootWithSmallPages();
+     guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128);
+     inited_.store(true, std::memory_order_release);
++
++    pageheap_lock.Unlock();
++    pthread_atfork(
++      TCMallocPreFork,
++      TCMallocPostFork,
++      TCMallocPostFork);
++    pageheap_lock.Lock();
+   }
+ }
+ 
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h	(working tree)
+@@ -50,6 +50,9 @@ class CPUCache;
+ class PageMap;
+ class ThreadCache;
+ 
++void TCMallocPreFork();
++void TCMallocPostFork();
++
+ class Static {
+  public:
+   // True if InitIfNecessary() has run to completion.
+@@ -124,6 +127,9 @@ class Static {
+   static void ActivateCPUCache() { cpu_cache_active_ = true; }
+   static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+ 
++  static bool ForkSupportEnabled() { return fork_support_enabled_; }
++  static void EnableForkSupport() { fork_support_enabled_ = true; }
++
+   static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+     return
+ #ifndef TCMALLOC_DEPRECATED_PERTHREAD
+@@ -169,6 +175,7 @@ class Static {
+   static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
+   ABSL_CONST_INIT static std::atomic<bool> inited_;
+   static bool cpu_cache_active_;
++  static bool fork_support_enabled_;
+   ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+   ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+       numa_topology_;
+--- contrib/libs/tcmalloc/tcmalloc/system-alloc.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.cc	(working tree)
+@@ -354,6 +354,14 @@ ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0);
+ 
+ }  // namespace
+ 
++void AcquireSystemAllocLock() {
++  spinlock.Lock();
++}
++
++void ReleaseSystemAllocLock() {
++  spinlock.Unlock();
++}
++
+ void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment,
+                   const MemoryTag tag) {
+   // If default alignment is set request the minimum alignment provided by
+--- contrib/libs/tcmalloc/tcmalloc/system-alloc.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/system-alloc.h	(working tree)
+@@ -50,6 +50,9 @@ void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment,
+ // call to SystemRelease.
+ int SystemReleaseErrors();
+ 
++void AcquireSystemAllocLock();
++void ReleaseSystemAllocLock();
++
+ // This call is a hint to the operating system that the pages
+ // contained in the specified range of memory will not be used for a
+ // while, and can be released for use by other processes or the OS.
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(working tree)
+@@ -1117,6 +1117,40 @@ extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem(
+   }
+ }
+ 
++extern "C" void MallocExtension_EnableForkSupport() {
++  Static::EnableForkSupport();
++}
++
++void TCMallocPreFork() {
++  if (!Static::ForkSupportEnabled()) {
++    return;
++  }
++
++  if (Static::CPUCacheActive()) {
++    Static::cpu_cache().AcquireInternalLocks();
++  }
++  Static::transfer_cache().AcquireInternalLocks();
++  guarded_page_lock.Lock();
++  release_lock.Lock();
++  pageheap_lock.Lock();
++  AcquireSystemAllocLock();
++}
++
++void TCMallocPostFork() {
++  if (!Static::ForkSupportEnabled()) {
++    return;
++  }
++
++  ReleaseSystemAllocLock();
++  pageheap_lock.Unlock();  
++  guarded_page_lock.Unlock();
++  release_lock.Unlock();
++  Static::transfer_cache().ReleaseInternalLocks();
++  if (Static::CPUCacheActive()) {
++    Static::cpu_cache().ReleaseInternalLocks();
++  }
++}
++
+ // nallocx slow path.
+ // Moved to a separate function because size_class_with_alignment is not inlined
+ // which would cause nallocx to become non-leaf function with stack frame and
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.h	(working tree)
+@@ -120,4 +120,7 @@ void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW
+ }
+ #endif
+ 
++void TCMallocInternalAcquireLocks();
++void TCMallocInternalReleaseLocks();
++
+ #endif  // TCMALLOC_TCMALLOC_H_
+--- contrib/libs/tcmalloc/tcmalloc/transfer_cache.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache.h	(working tree)
+@@ -176,6 +176,26 @@ class TransferCacheManager : public StaticForwarder {
+     }
+   }
+ 
++  void AcquireInternalLocks() {
++    for (int i = 0; i < kNumClasses; ++i) {
++      if (implementation_ == TransferCacheImplementation::Ring) {
++        cache_[i].rbtc.AcquireInternalLocks();
++      } else {
++        cache_[i].tc.AcquireInternalLocks();
++      }
++    }
++  }
++
++  void ReleaseInternalLocks() {
++    for (int i = 0; i < kNumClasses; ++i) {
++      if (implementation_ == TransferCacheImplementation::Ring) {
++        cache_[i].rbtc.ReleaseInternalLocks();
++      } else {
++        cache_[i].tc.ReleaseInternalLocks();
++      }
++    }    
++  }
++
+   void InsertRange(int size_class, absl::Span<void *> batch) {
+     if (implementation_ == TransferCacheImplementation::Ring) {
+       cache_[size_class].rbtc.InsertRange(size_class, batch);
+@@ -295,6 +315,9 @@ class TransferCacheManager {
+     return TransferCacheImplementation::None;
+   }
+ 
++  void AcquireInternalLocks() {}
++  void ReleaseInternalLocks() {}
++
+  private:
+   CentralFreeList freelist_[kNumClasses];
+ } ABSL_CACHELINE_ALIGNED;
+--- contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h	(working tree)
+@@ -366,6 +366,18 @@ class TransferCache {
+     return freelist_do_not_access_directly_;
+   }
+ 
++  void AcquireInternalLocks()
++  {
++    freelist().AcquireInternalLocks();
++    lock_.Lock();
++  }
++
++  void ReleaseInternalLocks()
++  {
++    lock_.Unlock();
++    freelist().ReleaseInternalLocks();
++  }
++
+  private:
+   // Returns first object of the i-th slot.
+   void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+@@ -468,6 +480,18 @@ class RingBufferTransferCache {
+ 
+   // These methods all do internal locking.
+ 
++  void AcquireInternalLocks()
++  {
++    freelist().AcquireInternalLocks();
++    lock_.Lock();
++  }
++
++  void ReleaseInternalLocks()
++  {
++    lock_.Unlock();
++    freelist().ReleaseInternalLocks();
++  }
++
+   // Insert the specified batch into the transfer cache.  N is the number of
+   // elements in the range.  RemoveRange() is the opposite operation.
+   void InsertRange(int size_class, absl::Span<void *> batch)
diff --git a/contrib/libs/tcmalloc/patches/userdata.patch b/contrib/libs/tcmalloc/patches/userdata.patch
new file mode 100644
index 0000000000..83373cebfe
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/userdata.patch
@@ -0,0 +1,220 @@
+--- contrib/libs/tcmalloc/tcmalloc/internal/logging.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/internal/logging.h	(working tree)
+@@ -67,6 +67,8 @@ struct StackTrace {
+   // between the previous sample and this one
+   size_t weight;
+ 
++  void* user_data;
++
+   template <typename H>
+   friend H AbslHashValue(H h, const StackTrace& t) {
+     // As we use StackTrace as a key-value node in StackTraceTable, we only
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(working tree)
+@@ -120,6 +120,12 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+ ABSL_ATTRIBUTE_WEAK void
+ MallocExtension_EnableForkSupport();
+ 
++ABSL_ATTRIBUTE_WEAK void
++MallocExtension_SetSampleUserDataCallbacks(
++    tcmalloc::MallocExtension::CreateSampleUserDataCallback create,
++    tcmalloc::MallocExtension::CopySampleUserDataCallback copy,
++    tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy);
++
+ }
+ 
+ #endif
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(working tree)
+@@ -468,6 +468,21 @@ void MallocExtension::EnableForkSupport() {
+ #endif
+ }
+ 
++void MallocExtension::SetSampleUserDataCallbacks(
++    CreateSampleUserDataCallback create,
++    CopySampleUserDataCallback copy,
++    DestroySampleUserDataCallback destroy) {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++  if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) {
++    MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy);
++  }
++#else
++  (void)create;
++  (void)copy;
++  (void)destroy;
++#endif
++}
++
+ }  // namespace tcmalloc
+ 
+ // Default implementation just returns size. The expectation is that
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(working tree)
+@@ -94,6 +94,8 @@ class Profile final {
+ 
+     int depth;
+     void* stack[kMaxStackDepth];
++
++    void* user_data;
+   };
+ 
+   void Iterate(absl::FunctionRef<void(const Sample&)> f) const;
+@@ -472,6 +474,16 @@ class MallocExtension final {
+   // Enables fork support.
+   // Allocator will continue to function correctly in the child, after calling fork().
+   static void EnableForkSupport();
++
++  using CreateSampleUserDataCallback = void*();
++  using CopySampleUserDataCallback = void*(void*);
++  using DestroySampleUserDataCallback = void(void*);
++
++  // Sets callbacks for lifetime control of custom user data attached to allocation samples
++  static void SetSampleUserDataCallbacks(
++    CreateSampleUserDataCallback create,
++    CopySampleUserDataCallback copy,
++    DestroySampleUserDataCallback destroy);
+ };
+ 
+ }  // namespace tcmalloc
+--- contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc	(working tree)
+@@ -55,6 +55,7 @@ void PeakHeapTracker::MaybeSaveSample() {
+   StackTrace *t = peak_sampled_span_stacks_, *next = nullptr;
+   while (t != nullptr) {
+     next = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1]);
++    Static::DestroySampleUserData(t->user_data);
+     Static::stacktrace_allocator().Delete(t);
+     t = next;
+   }
+@@ -63,7 +64,9 @@ void PeakHeapTracker::MaybeSaveSample() {
+   for (Span* s : Static::sampled_objects_) {
+     t = Static::stacktrace_allocator().New();
+ 
+-    *t = *s->sampled_stack();
++    StackTrace* sampled_stack = s->sampled_stack();
++    *t = *sampled_stack;
++    t->user_data = Static::CopySampleUserData(sampled_stack->user_data);
+     if (t->depth == kMaxStackDepth) {
+       t->depth = kMaxStackDepth - 1;
+     }
+--- contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc	(working tree)
+@@ -73,6 +73,7 @@ StackTraceTable::~StackTraceTable() {
+       Bucket* b = table_[i];
+       while (b != nullptr) {
+         Bucket* next = b->next;
++        Static::DestroySampleUserData(b->trace.user_data);
+         Static::bucket_allocator().Delete(b);
+         b = next;
+       }
+@@ -104,6 +105,7 @@ void StackTraceTable::AddTrace(double count, const StackTrace& t) {
+     b = Static::bucket_allocator().New();
+     b->hash = h;
+     b->trace = t;
++    b->trace.user_data = Static::CopySampleUserData(t.user_data);
+     b->count = count;
+     b->total_weight = t.weight * count;
+     b->next = table_[idx];
+@@ -135,6 +137,8 @@ void StackTraceTable::Iterate(
+       e.requested_alignment = b->trace.requested_alignment;
+       e.allocated_size = allocated_size;
+ 
++      e.user_data = b->trace.user_data;
++
+       e.depth = b->trace.depth;
+       static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth,
+                     "Profile stack size smaller than internal stack sizes");
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.cc	(working tree)
+@@ -60,6 +60,12 @@ ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+ ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
+ ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
++ABSL_CONST_INIT Static::CreateSampleUserDataCallback*
++    Static::create_sample_user_data_callback_ = nullptr;
++ABSL_CONST_INIT Static::CopySampleUserDataCallback*
++    Static::copy_sample_user_data_callback_ = nullptr;
++ABSL_CONST_INIT Static::DestroySampleUserDataCallback*
++    Static::destroy_sample_user_data_callback_ = nullptr;
+ ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ ABSL_CONST_INIT PageMap Static::pagemap_;
+ ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h	(index)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h	(working tree)
+@@ -130,6 +130,34 @@ class Static {
+   static bool ForkSupportEnabled() { return fork_support_enabled_; }
+   static void EnableForkSupport() { fork_support_enabled_ = true; }
+ 
++  using CreateSampleUserDataCallback = void*();
++  using CopySampleUserDataCallback = void*(void*);
++  using DestroySampleUserDataCallback = void(void*);
++
++  static void SetSampleUserDataCallbacks(
++      CreateSampleUserDataCallback create,
++      CopySampleUserDataCallback copy,
++      DestroySampleUserDataCallback destroy) {
++    create_sample_user_data_callback_ = create;
++    copy_sample_user_data_callback_ = copy;
++    destroy_sample_user_data_callback_ = destroy;
++  }
++
++  static void* CreateSampleUserData() {
++    if (create_sample_user_data_callback_)
++      return create_sample_user_data_callback_();
++    return nullptr;
++  }
++  static void* CopySampleUserData(void* user_data) {
++    if (copy_sample_user_data_callback_)
++      return copy_sample_user_data_callback_(user_data);
++    return nullptr;
++  }
++  static void DestroySampleUserData(void* user_data) {
++    if (destroy_sample_user_data_callback_)
++      destroy_sample_user_data_callback_(user_data);
++  }
++
+   static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+     return
+ #ifndef TCMALLOC_DEPRECATED_PERTHREAD
+@@ -176,6 +204,9 @@ class Static {
+   ABSL_CONST_INIT static std::atomic<bool> inited_;
+   static bool cpu_cache_active_;
+   static bool fork_support_enabled_;
++  static CreateSampleUserDataCallback* create_sample_user_data_callback_;
++  static CopySampleUserDataCallback* copy_sample_user_data_callback_;
++  static DestroySampleUserDataCallback* destroy_sample_user_data_callback_;
+   ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+   ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+       numa_topology_;
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(index)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(working tree)
+@@ -1151,6 +1151,13 @@ void TCMallocPostFork() {
+   }
+ }
+ 
++extern "C" void MallocExtension_SetSampleUserDataCallbacks(
++    MallocExtension::CreateSampleUserDataCallback create,
++    MallocExtension::CopySampleUserDataCallback copy,
++    MallocExtension::DestroySampleUserDataCallback destroy) {
++  Static::SetSampleUserDataCallbacks(create, copy, destroy);
++}
++
+ // nallocx slow path.
+ // Moved to a separate function because size_class_with_alignment is not inlined
+ // which would cause nallocx to become non-leaf function with stack frame and
+@@ -1500,6 +1507,7 @@ static void* SampleifyAllocation(size_t requested_size, size_t weight,
+   tmp.requested_alignment = requested_alignment;
+   tmp.allocated_size = allocated_size;
+   tmp.weight = weight;
++  tmp.user_data = Static::CreateSampleUserData();
+ 
+   {
+     absl::base_internal::SpinLockHolder h(&pageheap_lock);
+@@ -1629,6 +1637,7 @@ static void do_free_pages(void* ptr, const PageId p) {
+                          1);
+       }
+       notify_sampled_alloc = true;
++      Static::DestroySampleUserData(st->user_data);
+       Static::stacktrace_allocator().Delete(st);
+     }
+     if (IsSampledMemory(ptr)) {
diff --git a/contrib/libs/tcmalloc/patches/yandex.patch b/contrib/libs/tcmalloc/patches/yandex.patch
new file mode 100644
index 0000000000..12d11f2dad
--- /dev/null
+++ b/contrib/libs/tcmalloc/patches/yandex.patch
@@ -0,0 +1,91 @@
+commit ab4069ebdd376db4d32c29e1a2414565ec849249
+author: prime
+date: 2021-10-07T14:52:42+03:00
+
+    Apply yandex patches
+
+--- contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -1112,6 +1112,11 @@ extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() {
+   return tcmalloc::tcmalloc_internal::Static::CPUCacheActive();
+ }
+ 
++extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() {
++  tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false);
++  tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache();
++}
++
+ extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() {
+   return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size();
+ }
+--- contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -75,6 +75,7 @@ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit(
+ ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty(
+     const char* name_data, size_t name_size, size_t* value);
+ ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive();
++ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches();
+ ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize();
+ ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval(
+     absl::Duration* ret);
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -287,6 +287,16 @@ bool MallocExtension::PerCpuCachesActive() {
+ #endif
+ }
+ 
++void MallocExtension::DeactivatePerCpuCaches() {
++#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
++  if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) {
++    return;
++  }
++
++  MallocExtension_Internal_DeactivatePerCpuCaches();
++#endif
++}
++
+ int32_t MallocExtension::GetMaxPerCpuCacheSize() {
+ #if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+   if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) {
+--- contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/malloc_extension.h	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -329,6 +329,11 @@ class MallocExtension final {
+   // Gets whether TCMalloc is using per-CPU caches.
+   static bool PerCpuCachesActive();
+ 
++  // Extension for unified agent.
++  //
++  // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321
++  static void DeactivatePerCpuCaches();
++
+   // Gets the current maximum cache size per CPU cache.
+   static int32_t GetMaxPerCpuCacheSize();
+   // Sets the maximum cache size per CPU cache.  This is a per-core limit.
+--- contrib/libs/tcmalloc/tcmalloc/static_vars.h	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/static_vars.h	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -122,6 +122,7 @@ class Static {
+     return cpu_cache_active_;
+   }
+   static void ActivateCPUCache() { cpu_cache_active_ = true; }
++  static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+ 
+   static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+     return
+--- contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(5096009d22199137186c9a972bc88409d8ebd513)
++++ contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc	(ab4069ebdd376db4d32c29e1a2414565ec849249)
+@@ -2210,14 +2210,7 @@ extern "C" void* TCMallocInternalNewArray(size_t size)
+     TCMALLOC_ALIAS(TCMallocInternalNew);
+ #else
+ {
+-  void* p = fast_alloc(CppPolicy().WithoutHooks(), size);
+-  // We keep this next instruction out of fast_alloc for a reason: when
+-  // it's in, and new just calls fast_alloc, the optimizer may fold the
+-  // new call into fast_alloc, which messes up our whole section-based
+-  // stacktracing (see ABSL_ATTRIBUTE_SECTION, above).  This ensures fast_alloc
+-  // isn't the last thing this fn calls, and prevents the folding.
+-  MallocHook::InvokeNewHook(p, size);
+-  return p;
++  return fast_alloc(CppPolicy().WithoutHooks(), size);
+ }
+ #endif  // TCMALLOC_ALIAS
+ 
diff --git a/contrib/libs/tcmalloc/slow_but_small/ya.make b/contrib/libs/tcmalloc/slow_but_small/ya.make
new file mode 100644
index 0000000000..ddcb157d30
--- /dev/null
+++ b/contrib/libs/tcmalloc/slow_but_small/ya.make
@@ -0,0 +1,21 @@
+LIBRARY()
+
+WITHOUT_LICENSE_TEXTS()
+
+LICENSE(Apache-2.0)
+
+OWNER(
+    ayles
+    prime
+    g:cpp-contrib
+)
+
+SRCDIR(contrib/libs/tcmalloc)
+
+INCLUDE(../common.inc)
+
+CFLAGS(
+    -DTCMALLOC_SMALL_BUT_SLOW
+)
+
+END()
diff --git a/contrib/libs/tcmalloc/tcmalloc/BUILD b/contrib/libs/tcmalloc/tcmalloc/BUILD
new file mode 100644
index 0000000000..e618b85eec
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/BUILD
@@ -0,0 +1,1316 @@
+# Copyright 2019 The TCMalloc Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Description:
+#
+# tcmalloc is a fast malloc implementation.  See
+# https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of
+# how this malloc works.
+
+load("@rules_fuzzing//fuzzing:cc_defs.bzl", "cc_fuzz_test")
+load("//tcmalloc:copts.bzl", "TCMALLOC_DEFAULT_COPTS")
+load("//tcmalloc:variants.bzl", "create_tcmalloc_benchmark", "create_tcmalloc_testsuite")
+
+package(default_visibility = ["//visibility:private"])
+
+licenses(["notice"])
+
+exports_files(["LICENSE"])
+
+config_setting(
+    name = "llvm",
+    flag_values = {
+        "@bazel_tools//tools/cpp:compiler": "clang",
+    },
+    visibility = [
+        "//tcmalloc/internal:__subpackages__",
+        "//tcmalloc/testing:__subpackages__",
+    ],
+)
+
+cc_library(
+    name = "experiment",
+    srcs = ["experiment.cc"],
+    hdrs = [
+        "experiment.h",
+        "experiment_config.h",
+    ],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":malloc_extension",
+        "//tcmalloc/internal:environment",
+        "//tcmalloc/internal:logging",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+# Dependencies required by :tcmalloc and its variants.  Since :common is built
+# several different ways, it should not be included on this list.
+tcmalloc_deps = [
+    ":experiment",
+    ":malloc_extension",
+    "@com_google_absl//absl/base",
+    "@com_google_absl//absl/base:config",
+    "@com_google_absl//absl/base:core_headers",
+    "@com_google_absl//absl/base:dynamic_annotations",
+    "@com_google_absl//absl/debugging:leak_check",
+    "@com_google_absl//absl/debugging:stacktrace",
+    "@com_google_absl//absl/debugging:symbolize",
+    "@com_google_absl//absl/memory",
+    "@com_google_absl//absl/strings",
+    "@com_google_absl//absl/numeric:bits",
+    "//tcmalloc/internal:config",
+    "//tcmalloc/internal:declarations",
+    "//tcmalloc/internal:linked_list",
+    "//tcmalloc/internal:logging",
+    "//tcmalloc/internal:memory_stats",
+    "//tcmalloc/internal:optimization",
+    "//tcmalloc/internal:percpu",
+]
+
+# This library provides tcmalloc always
+cc_library(
+    name = "tcmalloc",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = tcmalloc_deps + [
+        ":common",
+    ],
+    alwayslink = 1,
+)
+
+# Provides tcmalloc always; use per-thread mode.
+cc_library(
+    name = "tcmalloc_deprecated_perthread",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = [
+        "//tcmalloc/internal:__pkg__",
+        "//tcmalloc/testing:__pkg__",
+    ],
+    deps = tcmalloc_deps + [
+        ":common_deprecated_perthread",
+    ],
+    alwayslink = 1,
+)
+
+# An opt tcmalloc build with ASSERTs forced on (by turning off
+# NDEBUG). Useful for tracking down crashes in production binaries.
+# To use add malloc = "//tcmalloc:opt_with_assertions" in your
+# target's build rule.
+cc_library(
+    name = "opt_with_assertions",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = [
+        "-O2",
+        "-UNDEBUG",
+    ] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = tcmalloc_deps + [
+        ":common",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "size_class_info",
+    hdrs = ["size_class_info.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        "//tcmalloc/internal:logging",
+    ],
+)
+
+# List of common source files used by the various tcmalloc libraries.
+common_srcs = [
+    "arena.cc",
+    "arena.h",
+    "background.cc",
+    "central_freelist.cc",
+    "central_freelist.h",
+    "common.cc",
+    "common.h",
+    "cpu_cache.cc",
+    "cpu_cache.h",
+    "experimental_pow2_below64_size_class.cc",
+    "experimental_pow2_size_class.cc",
+    "legacy_size_classes.cc",
+    "guarded_page_allocator.h",
+    "guarded_page_allocator.cc",
+    "huge_address_map.cc",
+    "huge_allocator.cc",
+    "huge_allocator.h",
+    "huge_cache.cc",
+    "huge_cache.h",
+    "huge_region.h",
+    "huge_page_aware_allocator.cc",
+    "huge_page_aware_allocator.h",
+    "huge_page_filler.h",
+    "huge_pages.h",
+    "page_allocator.cc",
+    "page_allocator.h",
+    "page_allocator_interface.cc",
+    "page_allocator_interface.h",
+    "page_heap.cc",
+    "page_heap.h",
+    "page_heap_allocator.h",
+    "pagemap.cc",
+    "pagemap.h",
+    "parameters.cc",
+    "peak_heap_tracker.cc",
+    "sampler.cc",
+    "sampler.h",
+    "size_classes.cc",
+    "span.cc",
+    "span.h",
+    "span_stats.h",
+    "stack_trace_table.cc",
+    "stack_trace_table.h",
+    "static_vars.cc",
+    "static_vars.h",
+    "stats.cc",
+    "system-alloc.cc",
+    "system-alloc.h",
+    "thread_cache.cc",
+    "thread_cache.h",
+    "tracking.h",
+    "transfer_cache_stats.h",
+    "transfer_cache.cc",
+    "transfer_cache.h",
+    "transfer_cache_internals.h",
+]
+
+common_hdrs = [
+    "arena.h",
+    "central_freelist.h",
+    "common.h",
+    "cpu_cache.h",
+    "guarded_page_allocator.h",
+    "huge_address_map.h",
+    "huge_allocator.h",
+    "tcmalloc_policy.h",
+    "huge_cache.h",
+    "huge_page_filler.h",
+    "huge_pages.h",
+    "huge_region.h",
+    "huge_page_aware_allocator.h",
+    "page_allocator.h",
+    "page_allocator_interface.h",
+    "page_heap.h",
+    "page_heap_allocator.h",
+    "pages.h",
+    "pagemap.h",
+    "parameters.h",
+    "peak_heap_tracker.h",
+    "sampler.h",
+    "span.h",
+    "span_stats.h",
+    "stack_trace_table.h",
+    "stats.h",
+    "static_vars.h",
+    "system-alloc.h",
+    "thread_cache.h",
+    "tracking.h",
+    "transfer_cache_stats.h",
+    "transfer_cache.h",
+    "transfer_cache_internals.h",
+]
+
+common_deps = [
+    ":experiment",
+    ":malloc_extension",
+    ":noruntime_size_classes",
+    ":size_class_info",
+    "@com_google_absl//absl/algorithm:container",
+    "@com_google_absl//absl/base",
+    "@com_google_absl//absl/base:config",
+    "@com_google_absl//absl/base:core_headers",
+    "@com_google_absl//absl/base:dynamic_annotations",
+    "@com_google_absl//absl/container:fixed_array",
+    "@com_google_absl//absl/debugging:debugging_internal",
+    "@com_google_absl//absl/debugging:stacktrace",
+    "@com_google_absl//absl/debugging:symbolize",
+    "@com_google_absl//absl/synchronization",
+    "@com_google_absl//absl/hash:hash",
+    "@com_google_absl//absl/memory",
+    "@com_google_absl//absl/strings",
+    "@com_google_absl//absl/strings:str_format",
+    "@com_google_absl//absl/time",
+    "@com_google_absl//absl/types:optional",
+    "@com_google_absl//absl/types:span",
+    "//tcmalloc/internal:atomic_stats_counter",
+    "@com_google_absl//absl/numeric:bits",
+    "//tcmalloc/internal:config",
+    "//tcmalloc/internal:declarations",
+    "//tcmalloc/internal:environment",
+    "//tcmalloc/internal:linked_list",
+    "//tcmalloc/internal:logging",
+    "//tcmalloc/internal:mincore",
+    "//tcmalloc/internal:numa",
+    "//tcmalloc/internal:cache_topology",
+    "//tcmalloc/internal:optimization",
+    "//tcmalloc/internal:parameter_accessors",
+    "//tcmalloc/internal:percpu",
+    "//tcmalloc/internal:percpu_tcmalloc",
+    "//tcmalloc/internal:range_tracker",
+    "//tcmalloc/internal:timeseries_tracker",
+    "//tcmalloc/internal:util",
+]
+
+cc_library(
+    name = "common",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_deprecated_perthread",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = ["-DTCMALLOC_DEPRECATED_PERTHREAD"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use hugepage-aware
+# allocator.
+cc_library(
+    name = "want_hpaa",
+    srcs = ["want_hpaa.cc"],
+    copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tcmalloc/internal:config",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+    alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use hugepage-aware
+# allocator with hpaa_subrelease=true.
+cc_library(
+    name = "want_hpaa_subrelease",
+    srcs = ["want_hpaa_subrelease.cc"],
+    copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//visibility:public"],
+    deps = [
+        "//tcmalloc/internal:config",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+    alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to not use hugepage-aware
+# allocator.
+cc_library(
+    name = "want_no_hpaa",
+    srcs = ["want_no_hpaa.cc"],
+    copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//tcmalloc/testing:__pkg__"],
+    deps = [
+        "//tcmalloc/internal:config",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+    alwayslink = 1,
+)
+
+# TEMPORARY. WILL BE REMOVED.
+# Add a dep to this if you want your binary to use old span sizes.
+cc_library(
+    name = "want_legacy_spans",
+    srcs = ["want_legacy_spans.cc"],
+    copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//tcmalloc/testing:__pkg__"],
+    deps = [
+        "//tcmalloc/internal:config",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+    alwayslink = 1,
+)
+
+# Add a dep to this if you want your binary to enable NUMA awareness by
+# default.
+cc_library(
+    name = "want_numa_aware",
+    srcs = ["want_numa_aware.cc"],
+    copts = ["-g0"] + TCMALLOC_DEFAULT_COPTS,
+    visibility = [
+        "//tcmalloc:__pkg__",
+        "//tcmalloc/internal:__pkg__",
+        "//tcmalloc/testing:__pkg__",
+    ],
+    deps = [
+        "//tcmalloc/internal:config",
+        "@com_google_absl//absl/base:core_headers",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "runtime_size_classes",
+    srcs = ["runtime_size_classes.cc"],
+    hdrs = ["runtime_size_classes.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//visibility:private"],
+    deps = [
+        ":size_class_info",
+        "//tcmalloc/internal:environment",
+        "//tcmalloc/internal:logging",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "noruntime_size_classes",
+    srcs = ["noruntime_size_classes.cc"],
+    hdrs = ["runtime_size_classes.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":size_class_info",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+    ],
+    alwayslink = 1,
+)
+
+# TCMalloc with large pages is usually faster but fragmentation is higher.  See
+# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+    name = "tcmalloc_large_pages",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = tcmalloc_deps + [
+        ":common_large_pages",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_large_pages",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = ["-DTCMALLOC_LARGE_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+# TCMalloc with 256k pages is usually faster but fragmentation is higher.  See
+# https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+    name = "tcmalloc_256k_pages",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = tcmalloc_deps + [
+        ":common_256k_pages",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_256k_pages",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = ["-DTCMALLOC_256K_PAGES"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "tcmalloc_256k_pages_and_numa",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = [
+        "-DTCMALLOC_256K_PAGES",
+        "-DTCMALLOC_NUMA_AWARE",
+    ] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc/testing:__pkg__"],
+    deps = tcmalloc_deps + [
+        ":common_256k_pages_and_numa",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_256k_pages_and_numa",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = [
+        "-DTCMALLOC_256K_PAGES",
+        "-DTCMALLOC_NUMA_AWARE",
+    ] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+# TCMalloc small-but-slow is a a version of TCMalloc that chooses to minimize
+# fragmentation at a *severe* cost to performance.  It should be used by
+# applications that have significant memory constraints, but don't need to
+# frequently allocate/free objects.
+#
+# See https://github.com/google/tcmalloc/tree/master/docs/tuning.md for more details.
+cc_library(
+    name = "tcmalloc_small_but_slow",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//visibility:public"],
+    deps = tcmalloc_deps + [
+        ":common_small_but_slow",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_small_but_slow",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = ["-DTCMALLOC_SMALL_BUT_SLOW"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+# TCMalloc with NUMA awareness compiled in. Note that by default NUMA awareness
+# will still be disabled at runtime - this default can be changed by adding a
+# dependency upon want_numa_aware, or overridden by setting the
+# TCMALLOC_NUMA_AWARE environment variable.
+cc_library(
+    name = "tcmalloc_numa_aware",
+    srcs = [
+        "libc_override.h",
+        "libc_override_gcc_and_weak.h",
+        "libc_override_glibc.h",
+        "libc_override_redefine.h",
+        "tcmalloc.cc",
+        "tcmalloc.h",
+    ],
+    copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc/testing:__pkg__"],
+    deps = tcmalloc_deps + [
+        ":common_numa_aware",
+    ],
+    alwayslink = 1,
+)
+
+cc_library(
+    name = "common_numa_aware",
+    srcs = common_srcs,
+    hdrs = common_hdrs,
+    copts = ["-DTCMALLOC_NUMA_AWARE"] + TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+    alwayslink = 1,
+)
+
+# Export some header files to //tcmalloc/testing/...
+package_group(
+    name = "tcmalloc_tests",
+    packages = [
+        "//tcmalloc/...",
+    ],
+)
+
+cc_library(
+    name = "headers_for_tests",
+    srcs = [
+        "arena.h",
+        "central_freelist.h",
+        "guarded_page_allocator.h",
+        "huge_address_map.h",
+        "huge_allocator.h",
+        "huge_cache.h",
+        "huge_page_aware_allocator.h",
+        "huge_page_filler.h",
+        "huge_pages.h",
+        "huge_region.h",
+        "page_allocator.h",
+        "page_allocator_interface.h",
+        "page_heap.h",
+        "page_heap_allocator.h",
+        "pagemap.h",
+        "parameters.h",
+        "peak_heap_tracker.h",
+        "span_stats.h",
+        "stack_trace_table.h",
+        "tracking.h",
+        "transfer_cache.h",
+        "transfer_cache_internals.h",
+        "transfer_cache_stats.h",
+    ],
+    hdrs = [
+        "common.h",
+        "pages.h",
+        "sampler.h",
+        "size_class_info.h",
+        "span.h",
+        "static_vars.h",
+        "stats.h",
+        "system-alloc.h",
+    ],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = common_deps,
+)
+
+cc_library(
+    name = "mock_central_freelist",
+    testonly = 1,
+    srcs = ["mock_central_freelist.cc"],
+    hdrs = ["mock_central_freelist.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/types:span",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_library(
+    name = "page_allocator_test_util",
+    testonly = 1,
+    srcs = [
+        "page_allocator_test_util.h",
+    ],
+    hdrs = ["page_allocator_test_util.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    visibility = ["//tcmalloc:tcmalloc_tests"],
+    deps = [
+        ":common",
+        ":malloc_extension",
+    ],
+)
+
+cc_test(
+    name = "page_heap_test",
+    srcs = ["page_heap_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/memory",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "mock_transfer_cache",
+    testonly = 1,
+    srcs = ["mock_transfer_cache.cc"],
+    hdrs = ["mock_transfer_cache.h"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        ":mock_central_freelist",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:distributions",
+        "@com_google_googletest//:gtest",
+    ],
+)
+
+cc_fuzz_test(
+    name = "transfer_cache_fuzz",
+    testonly = 1,
+    srcs = ["transfer_cache_fuzz.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
+    deps = [
+        ":common",
+        ":mock_central_freelist",
+        ":mock_transfer_cache",
+    ],
+)
+
+cc_test(
+    name = "arena_test",
+    timeout = "moderate",
+    srcs = ["arena_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "transfer_cache_test",
+    timeout = "moderate",
+    srcs = ["transfer_cache_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    shard_count = 3,
+    deps = [
+        ":common",
+        ":mock_central_freelist",
+        ":mock_transfer_cache",
+        "//tcmalloc/testing:thread_manager",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:distributions",
+        "@com_google_absl//absl/time",
+        "@com_google_absl//absl/types:span",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_benchmark(
+    name = "transfer_cache_benchmark",
+    srcs = ["transfer_cache_benchmark.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        ":mock_central_freelist",
+        ":mock_transfer_cache",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+cc_test(
+    name = "huge_cache_test",
+    srcs = ["huge_cache_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "huge_allocator_test",
+    srcs = ["huge_allocator_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "huge_page_filler_test",
+    timeout = "long",
+    srcs = ["huge_page_filler_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:distributions",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "huge_page_aware_allocator_test",
+    srcs = ["huge_page_aware_allocator_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    malloc = "//tcmalloc",
+    tags = [
+    ],
+    deps = [
+        ":common",
+        ":malloc_extension",
+        ":page_allocator_test_util",
+        "//tcmalloc/internal:logging",
+        "//tcmalloc/testing:thread_manager",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/flags:flag",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "huge_region_test",
+    srcs = ["huge_region_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_benchmark(
+    name = "guarded_page_allocator_benchmark",
+    srcs = ["guarded_page_allocator_benchmark.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+    ],
+)
+
+cc_test(
+    name = "guarded_page_allocator_test",
+    srcs = ["guarded_page_allocator_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/numeric:bits",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "pagemap_test",
+    srcs = ["pagemap_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "realloc_test",
+    srcs = ["realloc_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:distributions",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "stack_trace_table_test",
+    srcs = ["stack_trace_table_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/debugging:stacktrace",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "system-alloc_test",
+    srcs = ["system-alloc_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    tags = ["nosan"],
+    deps = [
+        ":common",
+        ":malloc_extension",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+# This test has been named "large" since before tests were s/m/l.
+# The "large" refers to large allocation sizes.
+cc_test(
+    name = "tcmalloc_large_test",
+    size = "small",
+    timeout = "moderate",
+    srcs = ["tcmalloc_large_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    tags = [
+        "noasan",
+        "noubsan",
+    ],
+    deps = [
+        ":common",
+        ":malloc_extension",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/container:node_hash_set",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "malloc_extension_system_malloc_test",
+    srcs = ["malloc_extension_system_malloc_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc/internal:system_malloc",
+    deps = [
+        ":malloc_extension",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "malloc_extension_test",
+    srcs = ["malloc_extension_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    tags = [
+        "nosan",
+    ],
+    deps = [
+        ":malloc_extension",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_fuzz_test(
+    name = "malloc_extension_fuzz",
+    testonly = 1,
+    srcs = ["malloc_extension_fuzz.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
+    deps = [
+        ":malloc_extension",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
+
+cc_test(
+    name = "page_allocator_test",
+    srcs = ["page_allocator_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    deps = [
+        ":common",
+        ":malloc_extension",
+        ":page_allocator_test_util",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "profile_test",
+    size = "medium",
+    timeout = "long",
+    srcs = ["profile_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    flaky = 1,  # TODO(b/134690164)
+    linkstatic = 1,
+    malloc = "//tcmalloc",
+    shard_count = 2,
+    tags = [
+        "noasan",
+        "nomsan",
+        "notsan",
+    ],
+    deps = [
+        ":malloc_extension",
+        "//tcmalloc/internal:declarations",
+        "//tcmalloc/internal:linked_list",
+        "//tcmalloc/testing:testutil",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/container:flat_hash_map",
+        "@com_google_absl//absl/synchronization",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "thread_cache_test",
+    size = "medium",
+    srcs = ["thread_cache_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    malloc = "//tcmalloc:tcmalloc_deprecated_perthread",
+    tags = [
+        "nosan",
+    ],
+    deps = [
+        ":malloc_extension",
+        "//tcmalloc/internal:logging",
+        "//tcmalloc/internal:memory_stats",
+        "//tcmalloc/internal:parameter_accessors",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/strings",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_testsuite(
+    name = "size_classes_test",
+    srcs = ["size_classes_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":size_class_info",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "size_classes_test_with_runtime_size_classes",
+    srcs = ["size_classes_with_runtime_size_classes_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        ":runtime_size_classes",
+        ":size_class_info",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/strings:str_format",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "heap_profiling_test",
+    srcs = ["heap_profiling_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    tags = [
+        "nosan",
+    ],
+    deps = [
+        ":common",
+        ":malloc_extension",
+        "//tcmalloc/internal:logging",
+        "//tcmalloc/internal:parameter_accessors",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "runtime_size_classes_test",
+    srcs = ["runtime_size_classes_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    linkstatic = 1,
+    malloc = "//tcmalloc",
+    deps = [
+        ":runtime_size_classes",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_testsuite(
+    name = "span_test",
+    srcs = ["span_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/container:flat_hash_set",
+        "@com_google_absl//absl/random",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_benchmark(
+    name = "span_benchmark",
+    srcs = ["span_benchmark.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = ":tcmalloc",
+    deps = [
+        ":common",
+        "//tcmalloc/internal:logging",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/random",
+    ],
+)
+
+cc_test(
+    name = "stats_test",
+    srcs = ["stats_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/base",
+        "@com_google_absl//absl/time",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_test(
+    name = "huge_address_map_test",
+    srcs = ["huge_address_map_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_library(
+    name = "malloc_extension",
+    srcs = ["malloc_extension.cc"],
+    hdrs = [
+        "internal_malloc_extension.h",
+        "malloc_extension.h",
+    ],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    visibility = [
+        "//visibility:public",
+    ],
+    deps = [
+        "//tcmalloc/internal:parameter_accessors",
+        "@com_google_absl//absl/base:config",
+        "@com_google_absl//absl/base:core_headers",
+        "@com_google_absl//absl/base:malloc_internal",
+        "@com_google_absl//absl/functional:function_ref",
+        "@com_google_absl//absl/memory",
+        "@com_google_absl//absl/strings",
+        "@com_google_absl//absl/time",
+        "@com_google_absl//absl/types:optional",
+        "@com_google_absl//absl/types:span",
+    ],
+)
+
+cc_test(
+    name = "experiment_config_test",
+    srcs = ["experiment_config_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":experiment",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+cc_fuzz_test(
+    name = "experiment_fuzz",
+    testonly = 1,
+    srcs = ["experiment_fuzz.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":experiment",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_fuzz_test(
+    name = "runtime_size_classes_fuzz",
+    testonly = 1,
+    srcs = ["runtime_size_classes_fuzz.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        ":common",
+        ":runtime_size_classes",
+        ":size_class_info",
+        "@com_google_absl//absl/strings",
+    ],
+)
+
+cc_test(
+    name = "cpu_cache_test",
+    srcs = ["cpu_cache_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = ":tcmalloc_deprecated_perthread",
+    tags = [
+        # TODO(b/193887621): Add TSan annotations to CPUCache and/or add
+        # atomics to PageMap
+        "notsan",
+    ],
+    deps = [
+        ":common_deprecated_perthread",
+        "//tcmalloc/internal:optimization",
+        "//tcmalloc/internal:util",
+        "//tcmalloc/testing:testutil",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/random:seed_sequences",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_testsuite(
+    name = "central_freelist_test",
+    srcs = ["central_freelist_test.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    deps = [
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/random",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
+
+create_tcmalloc_benchmark(
+    name = "central_freelist_benchmark",
+    srcs = ["central_freelist_benchmark.cc"],
+    copts = TCMALLOC_DEFAULT_COPTS,
+    malloc = "//tcmalloc",
+    deps = [
+        ":common",
+        "@com_github_google_benchmark//:benchmark",
+        "@com_google_absl//absl/algorithm:container",
+        "@com_google_absl//absl/random",
+        "@com_google_absl//absl/types:optional",
+    ],
+)
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.cc b/contrib/libs/tcmalloc/tcmalloc/arena.cc
new file mode 100644
index 0000000000..5ba1a65bf3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena.cc
@@ -0,0 +1,78 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/arena.h"
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void* Arena::Alloc(size_t bytes, int alignment) {
+  ASSERT(alignment > 0);
+  {  // First we need to move up to the correct alignment.
+    const int misalignment =
+        reinterpret_cast<uintptr_t>(free_area_) % alignment;
+    const int alignment_bytes =
+        misalignment != 0 ? alignment - misalignment : 0;
+    free_area_ += alignment_bytes;
+    free_avail_ -= alignment_bytes;
+    bytes_allocated_ += alignment_bytes;
+  }
+  char* result;
+  if (free_avail_ < bytes) {
+    size_t ask = bytes > kAllocIncrement ? bytes : kAllocIncrement;
+    size_t actual_size;
+    // TODO(b/171081864): Arena allocations should be made relatively
+    // infrequently.  Consider tagging this memory with sampled objects which
+    // are also infrequently allocated.
+    //
+    // In the meantime it is important that we use the current NUMA partition
+    // rather than always using a particular one because it's possible that any
+    // single partition we choose might only contain nodes that the process is
+    // unable to allocate from due to cgroup restrictions.
+    MemoryTag tag;
+    const auto& numa_topology = Static::numa_topology();
+    if (numa_topology.numa_aware()) {
+      tag = NumaNormalTag(numa_topology.GetCurrentPartition());
+    } else {
+      tag = MemoryTag::kNormal;
+    }
+    free_area_ =
+        reinterpret_cast<char*>(SystemAlloc(ask, &actual_size, kPageSize, tag));
+    if (ABSL_PREDICT_FALSE(free_area_ == nullptr)) {
+      Crash(kCrash, __FILE__, __LINE__,
+            "FATAL ERROR: Out of memory trying to allocate internal tcmalloc "
+            "data (bytes, object-size); is something preventing mmap from "
+            "succeeding (sandbox, VSS limitations)?",
+            kAllocIncrement, bytes);
+    }
+    SystemBack(free_area_, actual_size);
+    free_avail_ = actual_size;
+  }
+
+  ASSERT(reinterpret_cast<uintptr_t>(free_area_) % alignment == 0);
+  result = free_area_;
+  free_area_ += bytes;
+  free_avail_ -= bytes;
+  bytes_allocated_ += bytes;
+  return reinterpret_cast<void*>(result);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena.h b/contrib/libs/tcmalloc/tcmalloc/arena.h
new file mode 100644
index 0000000000..0655253540
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena.h
@@ -0,0 +1,68 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_ARENA_H_
+#define TCMALLOC_ARENA_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Arena allocation; designed for use by tcmalloc internal data structures like
+// spans, profiles, etc.  Always expands.
+class Arena {
+ public:
+  constexpr Arena()
+      : free_area_(nullptr), free_avail_(0), bytes_allocated_(0) {}
+
+  // Return a properly aligned byte array of length "bytes".  Crashes if
+  // allocation fails.  Requires pageheap_lock is held.
+  ABSL_ATTRIBUTE_RETURNS_NONNULL void* Alloc(size_t bytes,
+                                             int alignment = kAlignment)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Returns the total number of bytes allocated from this arena.  Requires
+  // pageheap_lock is held.
+  uint64_t bytes_allocated() const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    return bytes_allocated_;
+  }
+
+ private:
+  // How much to allocate from system at a time
+  static constexpr int kAllocIncrement = 128 << 10;
+
+  // Free area from which to carve new objects
+  char* free_area_ ABSL_GUARDED_BY(pageheap_lock);
+  size_t free_avail_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // Total number of bytes allocated from this arena
+  uint64_t bytes_allocated_ ABSL_GUARDED_BY(pageheap_lock);
+
+  Arena(const Arena&) = delete;
+  Arena& operator=(const Arena&) = delete;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_ARENA_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/arena_test.cc b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc
new file mode 100644
index 0000000000..2fb728cac9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/arena_test.cc
@@ -0,0 +1,38 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/arena.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(Arena, AlignedAlloc) {
+  Arena arena;
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(64, 64)) % 64, 0);
+  EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(7)) % 8, 0);
+  EXPECT_EQ(reinterpret_cast<uintptr_t>(arena.Alloc(128, 64)) % 64, 0);
+  for (int alignment = 1; alignment < 100; ++alignment) {
+    EXPECT_EQ(
+        reinterpret_cast<uintptr_t>(arena.Alloc(7, alignment)) % alignment, 0);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/background.cc b/contrib/libs/tcmalloc/tcmalloc/background.cc
new file mode 100644
index 0000000000..ec57c03901
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/background.cc
@@ -0,0 +1,182 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+
+#include "absl/base/internal/sysinfo.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Called by MallocExtension_Internal_ProcessBackgroundActions.
+//
+// We use a simple heuristic here:
+// We keep track of the set of CPUs that we are allowed to run on.  Whenever a
+// CPU is removed from this list, the next call to this routine will detect the
+// disappearance and call ReleaseCpuMemory on it.
+//
+// Note that this heuristic _explicitly_ does not reclaim from isolated cores
+// that this process may have set up specific affinities for -- as this thread
+// will never have been allowed to run there.
+cpu_set_t prev_allowed_cpus;
+void ReleasePerCpuMemoryToOS() {
+  cpu_set_t allowed_cpus;
+
+  // Only attempt reclaim when per-CPU caches are in use.  While
+  // ReleaseCpuMemory() itself is usually a no-op otherwise, we are experiencing
+  // failures in non-permissive sandboxes due to calls made to
+  // sched_getaffinity() below.  It is expected that a runtime environment
+  // supporting per-CPU allocations supports sched_getaffinity().
+  // See b/27247854.
+  if (!MallocExtension::PerCpuCachesActive()) {
+    return;
+  }
+
+  if (subtle::percpu::UsingFlatVirtualCpus()) {
+    // Our (real) CPU mask does not provide useful information about the state
+    // of our virtual CPU set.
+    return;
+  }
+
+  // This can only fail due to a sandbox or similar intercepting the syscall.
+  if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus)) {
+    // We log periodically as start-up errors are frequently ignored and this is
+    // something we do want clients to fix if they are experiencing it.
+    Log(kLog, __FILE__, __LINE__,
+        "Unexpected sched_getaffinity() failure; errno ", errno);
+    return;
+  }
+
+  // Note:  This is technically not correct in the presence of hotplug (it is
+  // not guaranteed that NumCPUs() is an upper bound on CPU-number).  It is
+  // currently safe for Google systems.
+  const int num_cpus = absl::base_internal::NumCPUs();
+  for (int cpu = 0; cpu < num_cpus; cpu++) {
+    if (CPU_ISSET(cpu, &prev_allowed_cpus) && !CPU_ISSET(cpu, &allowed_cpus)) {
+      // This is a CPU present in the old mask, but not the new.  Reclaim.
+      MallocExtension::ReleaseCpuMemory(cpu);
+    }
+  }
+
+  // Update cached runnable CPUs for next iteration.
+  memcpy(&prev_allowed_cpus, &allowed_cpus, sizeof(cpu_set_t));
+}
+
+void ShuffleCpuCaches() {
+  if (!MallocExtension::PerCpuCachesActive()) {
+    return;
+  }
+
+  // Shuffle per-cpu caches
+  Static::cpu_cache().ShuffleCpuCaches();
+}
+
+// Reclaims per-cpu caches. The CPU mask used in ReleasePerCpuMemoryToOS does
+// not provide useful information about virtual CPU state and hence, does not
+// reclaim memory when virtual CPUs are enabled.
+//
+// Here, we use heuristics that are based on cache usage and misses, to
+// determine if the caches have been recently inactive and if they may be
+// reclaimed.
+void ReclaimIdleCpuCaches() {
+  // Attempts reclaim only when per-CPU caches are in use.
+  if (!MallocExtension::PerCpuCachesActive()) {
+    return;
+  }
+
+  Static::cpu_cache().TryReclaimingCaches();
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+// Release memory to the system at a constant rate.
+void MallocExtension_Internal_ProcessBackgroundActions() {
+  tcmalloc::MallocExtension::MarkThreadIdle();
+
+  // Initialize storage for ReleasePerCpuMemoryToOS().
+  CPU_ZERO(&tcmalloc::tcmalloc_internal::prev_allowed_cpus);
+
+  absl::Time prev_time = absl::Now();
+  constexpr absl::Duration kSleepTime = absl::Seconds(1);
+
+  // Reclaim inactive per-cpu caches once per kCpuCacheReclaimPeriod.
+  //
+  // We use a longer 30 sec reclaim period to make sure that caches are indeed
+  // idle. Reclaim drains entire cache, as opposed to cache shuffle for instance
+  // that only shrinks a cache by a few objects at a time. So, we might have
+  // larger performance degradation if we use a shorter reclaim interval and
+  // drain caches that weren't supposed to.
+  constexpr absl::Duration kCpuCacheReclaimPeriod = absl::Seconds(30);
+  absl::Time last_reclaim = absl::Now();
+
+  // Shuffle per-cpu caches once per kCpuCacheShufflePeriod secs.
+  constexpr absl::Duration kCpuCacheShufflePeriod = absl::Seconds(5);
+  absl::Time last_shuffle = absl::Now();
+
+  while (true) {
+    absl::Time now = absl::Now();
+    const ssize_t bytes_to_release =
+        static_cast<size_t>(tcmalloc::tcmalloc_internal::Parameters::
+                                background_release_rate()) *
+        absl::ToDoubleSeconds(now - prev_time);
+    if (bytes_to_release > 0) {  // may be negative if time goes backwards
+      tcmalloc::MallocExtension::ReleaseMemoryToSystem(bytes_to_release);
+    }
+
+    const bool reclaim_idle_per_cpu_caches =
+        tcmalloc::tcmalloc_internal::Parameters::reclaim_idle_per_cpu_caches();
+
+    // If enabled, we use heuristics to determine if the per-cpu caches are
+    // inactive. If disabled, we use a more conservative approach, that uses
+    // allowed cpu masks, to reclaim cpu caches.
+    if (reclaim_idle_per_cpu_caches) {
+      // Try to reclaim per-cpu caches once every kCpuCacheReclaimPeriod
+      // when enabled.
+      if (now - last_reclaim >= kCpuCacheReclaimPeriod) {
+        tcmalloc::tcmalloc_internal::ReclaimIdleCpuCaches();
+        last_reclaim = now;
+      }
+    } else {
+      tcmalloc::tcmalloc_internal::ReleasePerCpuMemoryToOS();
+    }
+
+    const bool shuffle_per_cpu_caches =
+        tcmalloc::tcmalloc_internal::Parameters::shuffle_per_cpu_caches();
+
+    if (shuffle_per_cpu_caches) {
+      if (now - last_shuffle >= kCpuCacheShufflePeriod) {
+        tcmalloc::tcmalloc_internal::ShuffleCpuCaches();
+        last_shuffle = now;
+      }
+    }
+
+    tcmalloc::tcmalloc_internal::Static().sharded_transfer_cache().Plunder();
+    prev_time = now;
+    absl::SleepFor(kSleepTime);
+  }
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc
new file mode 100644
index 0000000000..8620e228a1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.cc
@@ -0,0 +1,218 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/central_freelist.h"
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static MemoryTag MemoryTagFromSizeClass(size_t cl) {
+  if (!Static::numa_topology().numa_aware()) {
+    return MemoryTag::kNormal;
+  }
+  return NumaNormalTag(cl / kNumBaseClasses);
+}
+
+// Like a constructor and hence we disable thread safety analysis.
+void CentralFreeList::Init(size_t cl) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+  size_class_ = cl;
+  object_size_ = Static::sizemap().class_to_size(cl);
+  pages_per_span_ = Length(Static::sizemap().class_to_pages(cl));
+  objects_per_span_ =
+      pages_per_span_.in_bytes() / (object_size_ ? object_size_ : 1);
+}
+
+static Span* MapObjectToSpan(void* object) {
+  const PageId p = PageIdContaining(object);
+  Span* span = Static::pagemap().GetExistingDescriptor(p);
+  return span;
+}
+
+Span* CentralFreeList::ReleaseToSpans(void* object, Span* span,
+                                      size_t object_size) {
+  if (ABSL_PREDICT_FALSE(span->FreelistEmpty(object_size))) {
+    nonempty_.prepend(span);
+  }
+
+  if (ABSL_PREDICT_TRUE(span->FreelistPush(object, object_size))) {
+    return nullptr;
+  }
+  span->RemoveFromList();  // from nonempty_
+  return span;
+}
+
+void CentralFreeList::InsertRange(absl::Span<void*> batch) {
+  CHECK_CONDITION(!batch.empty() && batch.size() <= kMaxObjectsToMove);
+  Span* spans[kMaxObjectsToMove];
+  // Safe to store free spans into freed up space in span array.
+  Span** free_spans = spans;
+  int free_count = 0;
+
+  // Prefetch Span objects to reduce cache misses.
+  for (int i = 0; i < batch.size(); ++i) {
+    Span* span = MapObjectToSpan(batch[i]);
+    ASSERT(span != nullptr);
+    span->Prefetch();
+    spans[i] = span;
+  }
+
+  // First, release all individual objects into spans under our mutex
+  // and collect spans that become completely free.
+  {
+    // Use local copy of variable to ensure that it is not reloaded.
+    size_t object_size = object_size_;
+    absl::base_internal::SpinLockHolder h(&lock_);
+    for (int i = 0; i < batch.size(); ++i) {
+      Span* span = ReleaseToSpans(batch[i], spans[i], object_size);
+      if (ABSL_PREDICT_FALSE(span)) {
+        free_spans[free_count] = span;
+        free_count++;
+      }
+    }
+
+    RecordMultiSpansDeallocated(free_count);
+    UpdateObjectCounts(batch.size());
+  }
+
+  // Then, release all free spans into page heap under its mutex.
+  if (ABSL_PREDICT_FALSE(free_count)) {
+    // Unregister size class doesn't require holding any locks.
+    for (int i = 0; i < free_count; ++i) {
+      Span* const free_span = free_spans[i];
+      ASSERT(IsNormalMemory(free_span->start_address())
+      );
+      Static::pagemap().UnregisterSizeClass(free_span);
+
+      // Before taking pageheap_lock, prefetch the PageTrackers these spans are
+      // on.
+      //
+      // Small-but-slow does not use the HugePageAwareAllocator (by default), so
+      // do not prefetch on this config.
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+      const PageId p = free_span->first_page();
+
+      // In huge_page_filler.h, we static_assert that PageTracker's key elements
+      // for deallocation are within the first two cachelines.
+      void* pt = Static::pagemap().GetHugepage(p);
+      // Prefetch for writing, as we will issue stores to the PageTracker
+      // instance.
+      __builtin_prefetch(pt, 1, 3);
+      __builtin_prefetch(
+          reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pt) +
+                                  ABSL_CACHELINE_SIZE),
+          1, 3);
+#endif  // TCMALLOC_SMALL_BUT_SLOW
+    }
+
+    const MemoryTag tag = MemoryTagFromSizeClass(size_class_);
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    for (int i = 0; i < free_count; ++i) {
+      Span* const free_span = free_spans[i];
+      ASSERT(tag == GetMemoryTag(free_span->start_address()));
+      Static::page_allocator().Delete(free_span, tag);
+    }
+  }
+}
+
+int CentralFreeList::RemoveRange(void** batch, int N) {
+  ASSUME(N > 0);
+  // Use local copy of variable to ensure that it is not reloaded.
+  size_t object_size = object_size_;
+  int result = 0;
+  absl::base_internal::SpinLockHolder h(&lock_);
+  if (ABSL_PREDICT_FALSE(nonempty_.empty())) {
+    result = Populate(batch, N);
+  } else {
+    do {
+      Span* span = nonempty_.first();
+      int here =
+          span->FreelistPopBatch(batch + result, N - result, object_size);
+      ASSERT(here > 0);
+      if (span->FreelistEmpty(object_size)) {
+        span->RemoveFromList();  // from nonempty_
+      }
+      result += here;
+    } while (result < N && !nonempty_.empty());
+  }
+  UpdateObjectCounts(-result);
+  return result;
+}
+
+// Fetch memory from the system and add to the central cache freelist.
+int CentralFreeList::Populate(void** batch,
+                              int N) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+  // Release central list lock while operating on pageheap
+  // Note, this could result in multiple calls to populate each allocating
+  // a new span and the pushing those partially full spans onto nonempty.
+  lock_.Unlock();
+
+  const MemoryTag tag = MemoryTagFromSizeClass(size_class_);
+  Span* span = Static::page_allocator().New(pages_per_span_, tag);
+  if (ABSL_PREDICT_FALSE(span == nullptr)) {
+    Log(kLog, __FILE__, __LINE__, "tcmalloc: allocation failed",
+        pages_per_span_.in_bytes());
+    lock_.Lock();
+    return 0;
+  }
+  ASSERT(tag == GetMemoryTag(span->start_address()));
+  ASSERT(span->num_pages() == pages_per_span_);
+
+  Static::pagemap().RegisterSizeClass(span, size_class_);
+  size_t objects_per_span = objects_per_span_;
+  int result = span->BuildFreelist(object_size_, objects_per_span, batch, N);
+  ASSERT(result > 0);
+  // This is a cheaper check than using FreelistEmpty().
+  bool span_empty = result == objects_per_span;
+
+  lock_.Lock();
+  if (!span_empty) {
+    nonempty_.prepend(span);
+  }
+  RecordSpanAllocated();
+  return result;
+}
+
+size_t CentralFreeList::OverheadBytes() const {
+  if (ABSL_PREDICT_FALSE(object_size_ == 0)) {
+    return 0;
+  }
+  const size_t overhead_per_span = pages_per_span_.in_bytes() % object_size_;
+  return num_spans() * overhead_per_span;
+}
+
+SpanStats CentralFreeList::GetSpanStats() const {
+  SpanStats stats;
+  if (ABSL_PREDICT_FALSE(objects_per_span_ == 0)) {
+    return stats;
+  }
+  stats.num_spans_requested = static_cast<size_t>(num_spans_requested_.value());
+  stats.num_spans_returned = static_cast<size_t>(num_spans_returned_.value());
+  stats.obj_capacity = stats.num_live_spans() * objects_per_span_;
+  return stats;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h
new file mode 100644
index 0000000000..266f184d6b
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist.h
@@ -0,0 +1,142 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_CENTRAL_FREELIST_H_
+#define TCMALLOC_CENTRAL_FREELIST_H_
+
+#include <stddef.h>
+
+#include <cstddef>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/span_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Data kept per size-class in central cache.
+class CentralFreeList {
+ public:
+  constexpr CentralFreeList()
+      : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+        size_class_(0),
+        object_size_(0),
+        objects_per_span_(0),
+        pages_per_span_(0),
+        nonempty_() {}
+
+  CentralFreeList(const CentralFreeList&) = delete;
+  CentralFreeList& operator=(const CentralFreeList&) = delete;
+
+  void Init(size_t cl) ABSL_LOCKS_EXCLUDED(lock_);
+
+  // These methods all do internal locking.
+
+  // Insert batch into the central freelist.
+  // REQUIRES: batch.size() > 0 && batch.size() <= kMaxObjectsToMove.
+  void InsertRange(absl::Span<void*> batch) ABSL_LOCKS_EXCLUDED(lock_);
+
+  // Fill a prefix of batch[0..N-1] with up to N elements removed from central
+  // freelist.  Return the number of elements removed.
+  ABSL_MUST_USE_RESULT int RemoveRange(void** batch, int N)
+      ABSL_LOCKS_EXCLUDED(lock_);
+
+  // Returns the number of free objects in cache.
+  size_t length() const { return static_cast<size_t>(counter_.value()); }
+
+  // Returns the memory overhead (internal fragmentation) attributable
+  // to the freelist.  This is memory lost when the size of elements
+  // in a freelist doesn't exactly divide the page-size (an 8192-byte
+  // page full of 5-byte objects would have 2 bytes memory overhead).
+  size_t OverheadBytes() const;
+
+  SpanStats GetSpanStats() const;
+
+  void AcquireInternalLocks() {
+    lock_.Lock();
+  }
+
+  void ReleaseInternalLocks() {
+    lock_.Unlock();
+  }
+
+ private:
+  // Release an object to spans.
+  // Returns object's span if it become completely free.
+  Span* ReleaseToSpans(void* object, Span* span, size_t object_size)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // Populate cache by fetching from the page heap.
+  // May temporarily release lock_.
+  // Fill a prefix of batch[0..N-1] with up to N elements removed from central
+  // freelist. Returns the number of elements removed.
+  int Populate(void** batch, int N) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
+  // This lock protects all the mutable data members.
+  absl::base_internal::SpinLock lock_;
+
+  size_t size_class_;  // My size class (immutable after Init())
+  size_t object_size_;
+  size_t objects_per_span_;
+  Length pages_per_span_;
+
+  size_t num_spans() const {
+    size_t requested = num_spans_requested_.value();
+    size_t returned = num_spans_returned_.value();
+    if (requested < returned) return 0;
+    return (requested - returned);
+  }
+
+  void RecordSpanAllocated() ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    counter_.LossyAdd(objects_per_span_);
+    num_spans_requested_.LossyAdd(1);
+  }
+
+  void RecordMultiSpansDeallocated(size_t num_spans_returned)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    counter_.LossyAdd(-num_spans_returned * objects_per_span_);
+    num_spans_returned_.LossyAdd(num_spans_returned);
+  }
+
+  void UpdateObjectCounts(int num) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    counter_.LossyAdd(num);
+  }
+
+  // The followings are kept as a StatsCounter so that they can read without
+  // acquiring a lock. Updates to these variables are guarded by lock_
+  // so writes are performed using LossyAdd for speed, the lock still
+  // guarantees accuracy.
+
+  // Num free objects in cache entry
+  StatsCounter counter_;
+
+  StatsCounter num_spans_requested_;
+  StatsCounter num_spans_returned_;
+
+  // Dummy header for non-empty spans
+  SpanList nonempty_ ABSL_GUARDED_BY(lock_);
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_CENTRAL_FREELIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc
new file mode 100644
index 0000000000..a80d580753
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_benchmark.cc
@@ -0,0 +1,198 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "absl/algorithm/container.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tcmalloc_policy.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// This benchmark measures how long it takes to populate multiple
+// spans. The spans are freed in the same order as they were populated
+// to minimize the time it takes to free them.
+void BM_Populate(benchmark::State& state) {
+  size_t object_size = state.range(0);
+  size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+  int batch_size = Static::sizemap().num_objects_to_move(cl);
+  int num_objects = 64 * 1024 * 1024 / object_size;
+  CentralFreeList cfl;
+  // Initialize the span to contain the appropriate size of object.
+  cfl.Init(cl);
+
+  // Allocate an array large enough to hold 64 MiB of objects.
+  std::vector<void*> buffer(num_objects);
+  int64_t items_processed = 0;
+  absl::BitGen rnd;
+
+  for (auto s : state) {
+    int index = 0;
+    // The cost of fetching objects will include the cost of fetching and
+    // populating the span.
+    while (index < num_objects) {
+      int count = std::min(batch_size, num_objects - index);
+      int got = cfl.RemoveRange(&buffer[index], count);
+      index += got;
+    }
+
+    // Don't include the cost of returning the objects to the span, and the
+    // span to the pageheap.
+    state.PauseTiming();
+    index = 0;
+    while (index < num_objects) {
+      uint64_t count = std::min(batch_size, num_objects - index);
+      cfl.InsertRange({&buffer[index], count});
+      index += count;
+    }
+    items_processed += index;
+    state.ResumeTiming();
+  }
+  state.SetItemsProcessed(items_processed);
+}
+BENCHMARK(BM_Populate)
+    ->DenseRange(8, 64, 16)
+    ->DenseRange(64, 1024, 64)
+    ->DenseRange(4096, 28 * 1024, 4096)
+    ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024);
+
+// This benchmark fills a large array with objects, shuffles the objects
+// and then returns them.
+// This should be relatively representative of what happens at runtime.
+// Fetching objects from the CFL is usually done in batches, but returning
+// them is usually done spread over many active spans.
+void BM_MixAndReturn(benchmark::State& state) {
+  size_t object_size = state.range(0);
+  size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+  int batch_size = Static::sizemap().num_objects_to_move(cl);
+  int num_objects = 64 * 1024 * 1024 / object_size;
+  CentralFreeList cfl;
+  // Initialize the span to contain the appropriate size of object.
+  cfl.Init(cl);
+
+  // Allocate an array large enough to hold 64 MiB of objects.
+  std::vector<void*> buffer(num_objects);
+  int64_t items_processed = 0;
+  absl::BitGen rnd;
+
+  for (auto s : state) {
+    int index = 0;
+    while (index < num_objects) {
+      int count = std::min(batch_size, num_objects - index);
+      int got = cfl.RemoveRange(&buffer[index], count);
+      index += got;
+    }
+
+    state.PauseTiming();
+    // Shuffle the vector so that we don't return the objects in the same
+    // order as they were allocated.
+    absl::c_shuffle(buffer, rnd);
+    state.ResumeTiming();
+
+    index = 0;
+    while (index < num_objects) {
+      unsigned int count = std::min(batch_size, num_objects - index);
+      cfl.InsertRange({&buffer[index], count});
+      index += count;
+    }
+    items_processed += index;
+  }
+  state.SetItemsProcessed(items_processed);
+}
+BENCHMARK(BM_MixAndReturn)
+    ->DenseRange(8, 64, 16)
+    ->DenseRange(64, 1024, 64)
+    ->DenseRange(4096, 28 * 1024, 4096)
+    ->DenseRange(32 * 1024, 256 * 1024, 32 * 1024);
+
+// This benchmark holds onto half the allocated objects so that (except for
+// single object spans) spans are never allocated or freed during the
+// benchmark run. This evaluates the performance of just the span handling
+// code, and avoids timing the pageheap code.
+void BM_SpanReuse(benchmark::State& state) {
+  size_t object_size = state.range(0);
+  size_t cl = Static::sizemap().SizeClass(CppPolicy(), object_size);
+  int batch_size = Static::sizemap().num_objects_to_move(cl);
+  int num_objects = 64 * 1024 * 1024 / object_size;
+  CentralFreeList cfl;
+  // Initialize the span to contain the appropriate size of object.
+  cfl.Init(cl);
+
+  // Array used to hold onto half of the objects
+  std::vector<void*> held_objects(2 * num_objects);
+  // Request twice the objects we need
+  for (int index = 0; index < 2 * num_objects;) {
+    int count = std::min(batch_size, 2 * num_objects - index);
+    int got = cfl.RemoveRange(&held_objects[index], count);
+    index += got;
+  }
+
+  // Return half of the objects. This will stop the spans from being
+  // returned to the pageheap. So future operations will not touch the
+  // pageheap.
+  for (int index = 0; index < 2 * num_objects; index += 2) {
+    cfl.InsertRange({&held_objects[index], 1});
+  }
+  // Allocate an array large enough to hold 64 MiB of objects.
+  std::vector<void*> buffer(num_objects);
+  int64_t items_processed = 0;
+  absl::BitGen rnd;
+
+  for (auto s : state) {
+    int index = 0;
+    while (index < num_objects) {
+      int count = std::min(batch_size, num_objects - index);
+      int got = cfl.RemoveRange(&buffer[index], count);
+      index += got;
+    }
+
+    state.PauseTiming();
+    // Shuffle the vector so that we don't return the objects in the same
+    // order as they were allocated.
+    absl::c_shuffle(buffer, rnd);
+    state.ResumeTiming();
+
+    index = 0;
+    while (index < num_objects) {
+      uint64_t count = std::min(batch_size, num_objects - index);
+      cfl.InsertRange({&buffer[index], count});
+      index += count;
+    }
+    items_processed += index;
+  }
+  state.SetItemsProcessed(items_processed);
+
+  // Return the other half of the objects.
+  for (int index = 1; index < 2 * num_objects; index += 2) {
+    cfl.InsertRange({&held_objects[index], 1});
+  }
+}
+// Want to avoid benchmarking spans where there is a single object per span.
+BENCHMARK(BM_SpanReuse)
+    ->DenseRange(8, 64, 16)
+    ->DenseRange(64, 1024, 64)
+    ->DenseRange(1024, 4096, 512);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc
new file mode 100644
index 0000000000..de5960120d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/central_freelist_test.cc
@@ -0,0 +1,121 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/central_freelist.h"
+
+#include <algorithm>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// TODO(b/162552708) Mock out the page heap to interact with CFL instead
+class CFLTest : public testing::TestWithParam<size_t> {
+ protected:
+  size_t cl_;
+  size_t batch_size_;
+  size_t objects_per_span_;
+  CentralFreeList cfl_;
+
+ private:
+  void SetUp() override {
+    cl_ = GetParam();
+    size_t object_size = Static::sizemap().class_to_size(cl_);
+    if (object_size == 0) {
+      GTEST_SKIP() << "Skipping empty size class.";
+    }
+
+    auto pages_per_span = Length(Static::sizemap().class_to_pages(cl_));
+    batch_size_ = Static::sizemap().num_objects_to_move(cl_);
+    objects_per_span_ = pages_per_span.in_bytes() / object_size;
+    cfl_.Init(cl_);
+  }
+
+  void TearDown() override { EXPECT_EQ(cfl_.length(), 0); }
+};
+
+TEST_P(CFLTest, SingleBatch) {
+  void* batch[kMaxObjectsToMove];
+  uint64_t got = cfl_.RemoveRange(batch, batch_size_);
+  ASSERT_GT(got, 0);
+  cfl_.InsertRange({batch, got});
+  SpanStats stats = cfl_.GetSpanStats();
+  EXPECT_EQ(stats.num_spans_requested, 1);
+  EXPECT_EQ(stats.num_spans_returned, 1);
+  EXPECT_EQ(stats.obj_capacity, 0);
+}
+
+TEST_P(CFLTest, MultipleSpans) {
+  std::vector<void*> all_objects;
+
+  const size_t num_spans = 10;
+
+  // Request num_spans spans
+  void* batch[kMaxObjectsToMove];
+  const int num_objects_to_fetch = num_spans * objects_per_span_;
+  int total_fetched = 0;
+  while (total_fetched < num_objects_to_fetch) {
+    size_t n = num_objects_to_fetch - total_fetched;
+    int got = cfl_.RemoveRange(batch, std::min(n, batch_size_));
+    for (int i = 0; i < got; ++i) {
+      all_objects.push_back(batch[i]);
+    }
+    total_fetched += got;
+  }
+
+  SpanStats stats = cfl_.GetSpanStats();
+  EXPECT_EQ(stats.num_spans_requested, num_spans);
+  EXPECT_EQ(stats.num_spans_returned, 0);
+
+  EXPECT_EQ(all_objects.size(), num_objects_to_fetch);
+
+  // Shuffle
+  absl::BitGen rng;
+  std::shuffle(all_objects.begin(), all_objects.end(), rng);
+
+  // Return all
+  int total_returned = 0;
+  bool checked_half = false;
+  while (total_returned < num_objects_to_fetch) {
+    uint64_t size_to_pop =
+        std::min(all_objects.size() - total_returned, batch_size_);
+    for (int i = 0; i < size_to_pop; ++i) {
+      batch[i] = all_objects[i + total_returned];
+    }
+    total_returned += size_to_pop;
+    cfl_.InsertRange({batch, size_to_pop});
+    // sanity check
+    if (!checked_half && total_returned >= (num_objects_to_fetch / 2)) {
+      stats = cfl_.GetSpanStats();
+      EXPECT_GT(stats.num_spans_requested, stats.num_spans_returned);
+      EXPECT_NE(stats.obj_capacity, 0);
+      checked_half = true;
+    }
+  }
+
+  stats = cfl_.GetSpanStats();
+  EXPECT_EQ(stats.num_spans_requested, stats.num_spans_returned);
+  EXPECT_EQ(stats.obj_capacity, 0);
+}
+
+INSTANTIATE_TEST_SUITE_P(All, CFLTest, testing::Range(size_t(1), kNumClasses));
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/common.cc b/contrib/libs/tcmalloc/tcmalloc/common.cc
new file mode 100644
index 0000000000..38443040ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/common.cc
@@ -0,0 +1,204 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/sampler.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+absl::string_view MemoryTagToLabel(MemoryTag tag) {
+  switch (tag) {
+    case MemoryTag::kNormal:
+      return "NORMAL";
+    case MemoryTag::kNormalP1:
+      return "NORMAL_P1";
+    case MemoryTag::kSampled:
+      return "SAMPLED";
+    default:
+      ASSUME(false);
+  }
+}
+
+// Load sizes classes from environment variable if present
+// and valid, then returns True. If not found or valid, returns
+// False.
+bool SizeMap::MaybeRunTimeSizeClasses() {
+  SizeClassInfo parsed[kNumClasses];
+  int num_classes = MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed);
+  if (!ValidSizeClasses(num_classes, parsed)) {
+    return false;
+  }
+
+  if (num_classes != kSizeClassesCount) {
+    // TODO(b/122839049) - Add tests for num_classes < kSizeClassesCount before
+    // allowing that case.
+    Log(kLog, __FILE__, __LINE__, "Can't change the number of size classes",
+        num_classes, kSizeClassesCount);
+    return false;
+  }
+
+  SetSizeClasses(num_classes, parsed);
+  Log(kLog, __FILE__, __LINE__, "Loaded valid Runtime Size classes");
+  return true;
+}
+
+void SizeMap::SetSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+  class_to_size_[0] = 0;
+  class_to_pages_[0] = 0;
+  num_objects_to_move_[0] = 0;
+
+  for (int c = 1; c < num_classes; c++) {
+    class_to_size_[c] = parsed[c].size;
+    class_to_pages_[c] = parsed[c].pages;
+    num_objects_to_move_[c] = parsed[c].num_to_move;
+  }
+
+  // Fill any unspecified size classes with 0.
+  for (int x = num_classes; x < kNumBaseClasses; x++) {
+    class_to_size_[x] = 0;
+    class_to_pages_[x] = 0;
+    num_objects_to_move_[x] = 0;
+  }
+
+  // Copy selected size classes into the upper registers.
+  for (int i = 1; i < (kNumClasses / kNumBaseClasses); i++) {
+    std::copy(&class_to_size_[0], &class_to_size_[kNumBaseClasses],
+              &class_to_size_[kNumBaseClasses * i]);
+    std::copy(&class_to_pages_[0], &class_to_pages_[kNumBaseClasses],
+              &class_to_pages_[kNumBaseClasses * i]);
+    std::copy(&num_objects_to_move_[0], &num_objects_to_move_[kNumBaseClasses],
+              &num_objects_to_move_[kNumBaseClasses * i]);
+  }
+}
+
+// Return true if all size classes meet the requirements for alignment
+// ordering and min and max values.
+bool SizeMap::ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+  if (num_classes <= 0) {
+    return false;
+  }
+  if (kHasExpandedClasses && num_classes > kNumBaseClasses) {
+    num_classes = kNumBaseClasses;
+  }
+
+  for (int c = 1; c < num_classes; c++) {
+    size_t class_size = parsed[c].size;
+    size_t pages = parsed[c].pages;
+    size_t num_objects_to_move = parsed[c].num_to_move;
+    // Each size class must be larger than the previous size class.
+    if (class_size <= parsed[c - 1].size) {
+      Log(kLog, __FILE__, __LINE__, "Non-increasing size class", c,
+          parsed[c - 1].size, class_size);
+      return false;
+    }
+    if (class_size > kMaxSize) {
+      Log(kLog, __FILE__, __LINE__, "size class too big", c, class_size,
+          kMaxSize);
+      return false;
+    }
+    // Check required alignment
+    size_t alignment = 128;
+    if (class_size <= kMultiPageSize) {
+      alignment = kAlignment;
+    } else if (class_size <= SizeMap::kMaxSmallSize) {
+      alignment = kMultiPageAlignment;
+    }
+    if ((class_size & (alignment - 1)) != 0) {
+      Log(kLog, __FILE__, __LINE__, "Not aligned properly", c, class_size,
+          alignment);
+      return false;
+    }
+    if (class_size <= kMultiPageSize && pages != 1) {
+      Log(kLog, __FILE__, __LINE__, "Multiple pages not allowed", class_size,
+          pages, kMultiPageSize);
+      return false;
+    }
+    if (pages >= 256) {
+      Log(kLog, __FILE__, __LINE__, "pages limited to 255", pages);
+      return false;
+    }
+    if (num_objects_to_move > kMaxObjectsToMove) {
+      Log(kLog, __FILE__, __LINE__, "num objects to move too large",
+          num_objects_to_move, kMaxObjectsToMove);
+      return false;
+    }
+  }
+  // Last size class must be able to hold kMaxSize.
+  if (parsed[num_classes - 1].size < kMaxSize) {
+    Log(kLog, __FILE__, __LINE__, "last class doesn't cover kMaxSize",
+        num_classes - 1, parsed[num_classes - 1].size, kMaxSize);
+    return false;
+  }
+  return true;
+}
+
+int ABSL_ATTRIBUTE_WEAK default_want_legacy_spans();
+
+// Initialize the mapping arrays
+void SizeMap::Init() {
+  // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
+  if (ClassIndex(0) != 0) {
+    Crash(kCrash, __FILE__, __LINE__, "Invalid class index for size 0",
+          ClassIndex(0));
+  }
+  if (ClassIndex(kMaxSize) >= sizeof(class_array_)) {
+    Crash(kCrash, __FILE__, __LINE__, "Invalid class index for kMaxSize",
+          ClassIndex(kMaxSize));
+  }
+
+  static_assert(kAlignment <= 16, "kAlignment is too large");
+
+  if (IsExperimentActive(Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS)) {
+    SetSizeClasses(kExperimentalPow2SizeClassesCount,
+                   kExperimentalPow2SizeClasses);
+  } else if (IsExperimentActive(
+                 Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS)) {
+    SetSizeClasses(kExperimentalPow2Below64SizeClassesCount,
+                   kExperimentalPow2Below64SizeClasses);
+  } else {
+    if (default_want_legacy_spans != nullptr &&
+        default_want_legacy_spans() > 0
+    ) {
+      SetSizeClasses(kLegacySizeClassesCount, kLegacySizeClasses);
+    } else {
+      SetSizeClasses(kSizeClassesCount, kSizeClasses);
+    }
+  }
+  MaybeRunTimeSizeClasses();
+
+  int next_size = 0;
+  for (int c = 1; c < kNumClasses; c++) {
+    const int max_size_in_class = class_to_size_[c];
+
+    for (int s = next_size; s <= max_size_in_class; s += kAlignment) {
+      class_array_[ClassIndex(s)] = c;
+    }
+    next_size = max_size_in_class + kAlignment;
+    if (next_size > kMaxSize) {
+      break;
+    }
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/common.h b/contrib/libs/tcmalloc/tcmalloc/common.h
new file mode 100644
index 0000000000..d44811c726
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/common.h
@@ -0,0 +1,524 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Common definitions for tcmalloc code.
+
+#ifndef TCMALLOC_COMMON_H_
+#define TCMALLOC_COMMON_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <limits>
+#include <type_traits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Configuration
+//-------------------------------------------------------------------
+
+// There are four different models for tcmalloc which are created by defining a
+// set of constant variables differently:
+//
+// DEFAULT:
+//   The default configuration strives for good performance while trying to
+//   minimize fragmentation.  It uses a smaller page size to reduce
+//   fragmentation, but allocates per-thread and per-cpu capacities similar to
+//   TCMALLOC_LARGE_PAGES / TCMALLOC_256K_PAGES.
+//
+// TCMALLOC_LARGE_PAGES:
+//   Larger page sizes increase the bookkeeping granularity used by TCMalloc for
+//   its allocations.  This can reduce PageMap size and traffic to the
+//   innermost cache (the page heap), but can increase memory footprints.  As
+//   TCMalloc will not reuse a page for a different allocation size until the
+//   entire page is deallocated, this can be a source of increased memory
+//   fragmentation.
+//
+//   Historically, larger page sizes improved lookup performance for the
+//   pointer-to-size lookup in the PageMap that was part of the critical path.
+//   With most deallocations leveraging C++14's sized delete feature
+//   (https://isocpp.org/files/papers/n3778.html), this optimization is less
+//   significant.
+//
+// TCMALLOC_256K_PAGES
+//   This configuration uses an even larger page size (256KB) as the unit of
+//   accounting granularity.
+//
+// TCMALLOC_SMALL_BUT_SLOW:
+//   Used for situations where minimizing the memory footprint is the most
+//   desirable attribute, even at the cost of performance.
+//
+// The constants that vary between models are:
+//
+//   kPageShift - Shift amount used to compute the page size.
+//   kNumBaseClasses - Number of size classes serviced by bucket allocators
+//   kMaxSize - Maximum size serviced by bucket allocators (thread/cpu/central)
+//   kMinThreadCacheSize - The minimum size in bytes of each ThreadCache.
+//   kMaxThreadCacheSize - The maximum size in bytes of each ThreadCache.
+//   kDefaultOverallThreadCacheSize - The maximum combined size in bytes of all
+//     ThreadCaches for an executable.
+//   kStealAmount - The number of bytes one ThreadCache will steal from another
+//     when the first ThreadCache is forced to Scavenge(), delaying the next
+//     call to Scavenge for this thread.
+
+// Older configurations had their own customized macros.  Convert them into
+// a page-shift parameter that is checked below.
+
+#ifndef TCMALLOC_PAGE_SHIFT
+#ifdef TCMALLOC_SMALL_BUT_SLOW
+#define TCMALLOC_PAGE_SHIFT 12
+#define TCMALLOC_USE_PAGEMAP3
+#elif defined(TCMALLOC_256K_PAGES)
+#define TCMALLOC_PAGE_SHIFT 18
+#elif defined(TCMALLOC_LARGE_PAGES)
+#define TCMALLOC_PAGE_SHIFT 15
+#else
+#define TCMALLOC_PAGE_SHIFT 13
+#endif
+#else
+#error "TCMALLOC_PAGE_SHIFT is an internal macro!"
+#endif
+
+#if TCMALLOC_PAGE_SHIFT == 12
+inline constexpr size_t kPageShift = 12;
+inline constexpr size_t kNumBaseClasses = 46;
+inline constexpr bool kHasExpandedClasses = false;
+inline constexpr size_t kMaxSize = 8 << 10;
+inline constexpr size_t kMinThreadCacheSize = 4 * 1024;
+inline constexpr size_t kMaxThreadCacheSize = 64 * 1024;
+inline constexpr size_t kMaxCpuCacheSize = 20 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = kMinThreadCacheSize;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 19;
+inline constexpr size_t kMinPages = 2;
+#elif TCMALLOC_PAGE_SHIFT == 15
+inline constexpr size_t kPageShift = 15;
+inline constexpr size_t kNumBaseClasses = 78;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+    8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#elif TCMALLOC_PAGE_SHIFT == 18
+inline constexpr size_t kPageShift = 18;
+inline constexpr size_t kNumBaseClasses = 89;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+    8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#elif TCMALLOC_PAGE_SHIFT == 13
+inline constexpr size_t kPageShift = 13;
+inline constexpr size_t kNumBaseClasses = 86;
+inline constexpr bool kHasExpandedClasses = true;
+inline constexpr size_t kMaxSize = 256 * 1024;
+inline constexpr size_t kMinThreadCacheSize = kMaxSize * 2;
+inline constexpr size_t kMaxThreadCacheSize = 4 << 20;
+inline constexpr size_t kMaxCpuCacheSize = 3 * 1024 * 1024;
+inline constexpr size_t kDefaultOverallThreadCacheSize =
+    8u * kMaxThreadCacheSize;
+inline constexpr size_t kStealAmount = 1 << 16;
+inline constexpr size_t kDefaultProfileSamplingRate = 1 << 21;
+inline constexpr size_t kMinPages = 8;
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+
+// Sanitizers constrain the memory layout which causes problems with the
+// enlarged tags required to represent NUMA partitions. Disable NUMA awareness
+// to avoid failing to mmap memory.
+#if defined(TCMALLOC_NUMA_AWARE) && !defined(MEMORY_SANITIZER) && \
+    !defined(THREAD_SANITIZER)
+inline constexpr size_t kNumaPartitions = 2;
+#else
+inline constexpr size_t kNumaPartitions = 1;
+#endif
+
+// We have copies of kNumBaseClasses size classes for each NUMA node, followed
+// by any expanded classes.
+inline constexpr size_t kExpandedClassesStart =
+    kNumBaseClasses * kNumaPartitions;
+inline constexpr size_t kNumClasses =
+    kExpandedClassesStart + (kHasExpandedClasses ? kNumBaseClasses : 0);
+
+// Size classes are often stored as uint32_t values, but there are some
+// situations where we need to store a size class with as compact a
+// representation as possible (e.g. in PageMap). Here we determine the integer
+// type to use in these situations - i.e. the smallest integer type large
+// enough to store values in the range [0,kNumClasses).
+constexpr size_t kMaxClass = kNumClasses - 1;
+using CompactSizeClass =
+    std::conditional_t<kMaxClass <= std::numeric_limits<uint8_t>::max(),
+                       uint8_t, uint16_t>;
+
+// ~64K classes ought to be enough for anybody, but let's be sure.
+static_assert(kMaxClass <= std::numeric_limits<CompactSizeClass>::max());
+
+// Minimum/maximum number of batches in TransferCache per size class.
+// Actual numbers depends on a number of factors, see TransferCache::Init
+// for details.
+inline constexpr size_t kMinObjectsToMove = 2;
+inline constexpr size_t kMaxObjectsToMove = 128;
+
+inline constexpr size_t kPageSize = 1 << kPageShift;
+// Verify that the page size used is at least 8x smaller than the maximum
+// element size in the thread cache.  This guarantees at most 12.5% internal
+// fragmentation (1/8). When page size is 256k (kPageShift == 18), the benefit
+// of increasing kMaxSize to be multiple of kPageSize is unclear. Object size
+// profile data indicates that the number of simultaneously live objects (of
+// size >= 256k) tends to be very small. Keeping those objects as 'large'
+// objects won't cause too much memory waste, while heap memory reuse can be
+// improved. Increasing kMaxSize to be too large has another bad side effect --
+// the thread cache pressure is increased, which will in turn increase traffic
+// between central cache and thread cache, leading to performance degradation.
+static_assert((kMaxSize / kPageSize) >= kMinPages || kPageShift >= 18,
+              "Ratio of kMaxSize / kPageSize is too small");
+
+inline constexpr size_t kAlignment = 8;
+// log2 (kAlignment)
+inline constexpr size_t kAlignmentShift = absl::bit_width(kAlignment - 1u);
+
+// The number of times that a deallocation can cause a freelist to
+// go over its max_length() before shrinking max_length().
+inline constexpr int kMaxOverages = 3;
+
+// Maximum length we allow a per-thread free-list to have before we
+// move objects from it into the corresponding central free-list.  We
+// want this big to avoid locking the central free-list too often.  It
+// should not hurt to make this list somewhat big because the
+// scavenging code will shrink it down when its contents are not in use.
+inline constexpr int kMaxDynamicFreeListLength = 8192;
+
+enum class MemoryTag : uint8_t {
+  // Sampled, infrequently allocated
+  kSampled = 0x0,
+  // Not sampled, NUMA partition 0
+  kNormalP0 = 0x1,
+  // Not sampled, NUMA partition 1
+  kNormalP1 = (kNumaPartitions > 1) ? 0x2 : 0xff,
+  // Not sampled
+  kNormal = kNormalP0,
+};
+
+inline constexpr uintptr_t kTagShift = std::min(kAddressBits - 4, 42);
+inline constexpr uintptr_t kTagMask = uintptr_t{0x3} << kTagShift;
+
+// Returns true if ptr is tagged.
+ABSL_DEPRECATED("Replace with specific tests")
+inline bool IsTaggedMemory(const void* ptr) {
+  return (reinterpret_cast<uintptr_t>(ptr) & kTagMask) == 0;
+}
+
+inline bool IsSampledMemory(const void* ptr) {
+  constexpr uintptr_t kSampledNormalMask = kNumaPartitions > 1 ? 0x3 : 0x1;
+
+  static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP0) &
+                kSampledNormalMask);
+  static_assert(static_cast<uintptr_t>(MemoryTag::kNormalP1) &
+                kSampledNormalMask);
+
+  const uintptr_t tag =
+      (reinterpret_cast<uintptr_t>(ptr) & kTagMask) >> kTagShift;
+  return (tag & kSampledNormalMask) ==
+         static_cast<uintptr_t>(MemoryTag::kSampled);
+}
+
+inline bool IsNormalMemory(const void* ptr) { return !IsSampledMemory(ptr); }
+
+inline MemoryTag GetMemoryTag(const void* ptr) {
+  return static_cast<MemoryTag>((reinterpret_cast<uintptr_t>(ptr) & kTagMask) >>
+                                kTagShift);
+}
+
+absl::string_view MemoryTagToLabel(MemoryTag tag);
+
+inline constexpr bool IsExpandedSizeClass(unsigned cl) {
+  return kHasExpandedClasses && (cl >= kExpandedClassesStart);
+}
+
+#if !defined(TCMALLOC_SMALL_BUT_SLOW) && __SIZEOF_POINTER__ != 4
+// Always allocate at least a huge page
+inline constexpr size_t kMinSystemAlloc = kHugePageSize;
+inline constexpr size_t kMinMmapAlloc = 1 << 30;  // mmap() in 1GiB ranges.
+#else
+// Allocate in units of 2MiB. This is the size of a huge page for x86, but
+// not for Power.
+inline constexpr size_t kMinSystemAlloc = 2 << 20;
+// mmap() in units of 32MiB. This is a multiple of huge page size for
+// both x86 (2MiB) and Power (16MiB)
+inline constexpr size_t kMinMmapAlloc = 32 << 20;
+#endif
+
+static_assert(kMinMmapAlloc % kMinSystemAlloc == 0,
+              "Minimum mmap allocation size is not a multiple of"
+              " minimum system allocation size");
+
+inline MemoryTag NumaNormalTag(size_t numa_partition) {
+  switch (numa_partition) {
+    case 0:
+      return MemoryTag::kNormalP0;
+    case 1:
+      return MemoryTag::kNormalP1;
+    default:
+      ASSUME(false);
+      __builtin_unreachable();
+  }
+}
+
+inline size_t NumaPartitionFromPointer(void* ptr) {
+  if constexpr (kNumaPartitions == 1) {
+    return 0;
+  }
+
+  switch (GetMemoryTag(ptr)) {
+    case MemoryTag::kNormalP1:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+// Size-class information + mapping
+class SizeMap {
+ public:
+  // All size classes <= 512 in all configs always have 1 page spans.
+  static constexpr size_t kMultiPageSize = 512;
+  // Min alignment for all size classes > kMultiPageSize in all configs.
+  static constexpr size_t kMultiPageAlignment = 64;
+  // log2 (kMultiPageAlignment)
+  static constexpr size_t kMultiPageAlignmentShift =
+      absl::bit_width(kMultiPageAlignment - 1u);
+
+ private:
+  //-------------------------------------------------------------------
+  // Mapping from size to size_class and vice versa
+  //-------------------------------------------------------------------
+
+  // Sizes <= 1024 have an alignment >= 8.  So for such sizes we have an
+  // array indexed by ceil(size/8).  Sizes > 1024 have an alignment >= 128.
+  // So for these larger sizes we have an array indexed by ceil(size/128).
+  //
+  // We flatten both logical arrays into one physical array and use
+  // arithmetic to compute an appropriate index.  The constants used by
+  // ClassIndex() were selected to make the flattening work.
+  //
+  // Examples:
+  //   Size       Expression                      Index
+  //   -------------------------------------------------------
+  //   0          (0 + 7) / 8                     0
+  //   1          (1 + 7) / 8                     1
+  //   ...
+  //   1024       (1024 + 7) / 8                  128
+  //   1025       (1025 + 127 + (120<<7)) / 128   129
+  //   ...
+  //   32768      (32768 + 127 + (120<<7)) / 128  376
+  static constexpr int kMaxSmallSize = 1024;
+  static constexpr size_t kClassArraySize =
+      ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
+
+  // Batch size is the number of objects to move at once.
+  typedef unsigned char BatchSize;
+
+  // class_array_ is accessed on every malloc, so is very hot.  We make it the
+  // first member so that it inherits the overall alignment of a SizeMap
+  // instance.  In particular, if we create a SizeMap instance that's cache-line
+  // aligned, this member is also aligned to the width of a cache line.
+  CompactSizeClass
+      class_array_[kClassArraySize * (kHasExpandedClasses ? 2 : 1)] = {0};
+
+  // Number of objects to move between a per-thread list and a central
+  // list in one shot.  We want this to be not too small so we can
+  // amortize the lock overhead for accessing the central list.  Making
+  // it too big may temporarily cause unnecessary memory wastage in the
+  // per-thread free list until the scavenger cleans up the list.
+  BatchSize num_objects_to_move_[kNumClasses] = {0};
+
+  // If size is no more than kMaxSize, compute index of the
+  // class_array[] entry for it, putting the class index in output
+  // parameter idx and returning true. Otherwise return false.
+  static inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+  ClassIndexMaybe(size_t s, uint32_t* idx) {
+    if (ABSL_PREDICT_TRUE(s <= kMaxSmallSize)) {
+      *idx = (static_cast<uint32_t>(s) + 7) >> 3;
+      return true;
+    } else if (s <= kMaxSize) {
+      *idx = (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
+      return true;
+    }
+    return false;
+  }
+
+  static inline size_t ClassIndex(size_t s) {
+    uint32_t ret;
+    CHECK_CONDITION(ClassIndexMaybe(s, &ret));
+    return ret;
+  }
+
+  // Mapping from size class to number of pages to allocate at a time
+  unsigned char class_to_pages_[kNumClasses] = {0};
+
+  // Mapping from size class to max size storable in that class
+  uint32_t class_to_size_[kNumClasses] = {0};
+
+  // If environment variable defined, use it to override sizes classes.
+  // Returns true if all classes defined correctly.
+  bool MaybeRunTimeSizeClasses();
+
+ protected:
+  // Set the give size classes to be used by TCMalloc.
+  void SetSizeClasses(int num_classes, const SizeClassInfo* parsed);
+
+  // Check that the size classes meet all requirements.
+  bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed);
+
+  // Definition of size class that is set in size_classes.cc
+  static const SizeClassInfo kSizeClasses[];
+  static const int kSizeClassesCount;
+
+  static const SizeClassInfo kExperimentalPow2Below64SizeClasses[];
+  static const int kExperimentalPow2Below64SizeClassesCount;
+  // kExperimentalPowBelow64SizeClassesCount
+  static const SizeClassInfo kExperimentalPow2SizeClasses[];
+  static const int kExperimentalPow2SizeClassesCount;
+
+  // Definition of size class that is set in size_classes.cc
+  static const SizeClassInfo kLegacySizeClasses[];
+  static const int kLegacySizeClassesCount;
+
+ public:
+  // constexpr constructor to guarantee zero-initialization at compile-time.  We
+  // rely on Init() to populate things.
+  constexpr SizeMap() = default;
+
+  // Initialize the mapping arrays
+  void Init();
+
+  // Returns the size class for size `size` respecting the alignment
+  // requirements of `policy`.
+  //
+  // Returns true on success. Returns false if either:
+  // - the size exceeds the maximum size class size.
+  // - the align size is greater or equal to the default page size
+  // - no matching properly aligned size class is available
+  //
+  // Requires that policy.align() returns a non-zero power of 2.
+  //
+  // When policy.align() = 1 the default alignment of the size table will be
+  // used. If policy.align() is constexpr 1 (e.g. when using
+  // DefaultAlignPolicy) then alignment-related code will optimize away.
+  //
+  // TODO(b/171978365): Replace the output parameter with returning
+  // absl::optional<uint32_t>.
+  template <typename Policy>
+  inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE GetSizeClass(Policy policy,
+                                                        size_t size,
+                                                        uint32_t* cl) {
+    const size_t align = policy.align();
+    ASSERT(absl::has_single_bit(align));
+
+    if (ABSL_PREDICT_FALSE(align >= kPageSize)) {
+      // TODO(b/172060547): Consider changing this to align > kPageSize.
+      ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+      return false;
+    }
+
+    uint32_t idx;
+    if (ABSL_PREDICT_FALSE(!ClassIndexMaybe(size, &idx))) {
+      ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+      return false;
+    }
+    *cl = class_array_[idx] + policy.scaled_numa_partition();
+
+    // Predict that size aligned allocs most often directly map to a proper
+    // size class, i.e., multiples of 32, 64, etc, matching our class sizes.
+    const size_t mask = (align - 1);
+    do {
+      if (ABSL_PREDICT_TRUE((class_to_size(*cl) & mask) == 0)) {
+        return true;
+      }
+    } while ((++*cl % kNumBaseClasses) != 0);
+
+    ABSL_ANNOTATE_MEMORY_IS_UNINITIALIZED(cl, sizeof(*cl));
+    return false;
+  }
+
+  // Returns size class for given size, or 0 if this instance has not been
+  // initialized yet. REQUIRES: size <= kMaxSize.
+  template <typename Policy>
+  inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE SizeClass(Policy policy,
+                                                       size_t size) {
+    ASSERT(size <= kMaxSize);
+    uint32_t ret = 0;
+    GetSizeClass(policy, size, &ret);
+    return ret;
+  }
+
+  // Get the byte-size for a specified class. REQUIRES: cl <= kNumClasses.
+  inline size_t ABSL_ATTRIBUTE_ALWAYS_INLINE class_to_size(size_t cl) {
+    ASSERT(cl < kNumClasses);
+    return class_to_size_[cl];
+  }
+
+  // Mapping from size class to number of pages to allocate at a time
+  inline size_t class_to_pages(size_t cl) {
+    ASSERT(cl < kNumClasses);
+    return class_to_pages_[cl];
+  }
+
+  // Number of objects to move between a per-thread list and a central
+  // list in one shot.  We want this to be not too small so we can
+  // amortize the lock overhead for accessing the central list.  Making
+  // it too big may temporarily cause unnecessary memory wastage in the
+  // per-thread free list until the scavenger cleans up the list.
+  inline SizeMap::BatchSize num_objects_to_move(size_t cl) {
+    ASSERT(cl < kNumClasses);
+    return num_objects_to_move_[cl];
+  }
+};
+
+// Linker initialized, so this lock can be accessed at any time.
+extern absl::base_internal::SpinLock pageheap_lock;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_COMMON_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc
new file mode 100644
index 0000000000..8ae02b38e9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.cc
@@ -0,0 +1,1140 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/cpu_cache.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/fixed_array.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static cpu_set_t FillActiveCpuMask() {
+  cpu_set_t allowed_cpus;
+  if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) {
+    CPU_ZERO(&allowed_cpus);
+  }
+
+#ifdef PERCPU_USE_RSEQ
+  const bool real_cpus = !subtle::percpu::UsingFlatVirtualCpus();
+#else
+  const bool real_cpus = true;
+#endif
+
+  if (real_cpus) {
+    return allowed_cpus;
+  }
+
+  const int virtual_cpu_count = CPU_COUNT(&allowed_cpus);
+  CPU_ZERO(&allowed_cpus);
+  for (int cpu = 0; cpu < virtual_cpu_count; ++cpu) {
+    CPU_SET(cpu, &allowed_cpus);
+  }
+  return allowed_cpus;
+}
+
+// MaxCapacity() determines how we distribute memory in the per-cpu cache
+// to the various class sizes.
+static size_t MaxCapacity(size_t cl) {
+  // The number of size classes that are commonly used and thus should be
+  // allocated more slots in the per-cpu cache.
+  static constexpr size_t kNumSmall = 10;
+
+  // The memory used for each per-CPU slab is the sum of:
+  //   sizeof(std::atomic<int64_t>) * kNumClasses
+  //   sizeof(void*) * (kSmallObjectDepth + 1) * kNumSmall
+  //   sizeof(void*) * (kLargeObjectDepth + 1) * kNumLarge
+  //
+  // Class size 0 has MaxCapacity() == 0, which is the reason for using
+  // kNumClasses - 1 above instead of kNumClasses.
+  //
+  // Each Size class region in the slab is preceded by one padding pointer that
+  // points to itself, because prefetch instructions of invalid pointers are
+  // slow. That is accounted for by the +1 for object depths.
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+  // With SMALL_BUT_SLOW we have 4KiB of per-cpu slab and 46 class sizes we
+  // allocate:
+  //   == 8 * 46 + 8 * ((16 + 1) * 10 + (6 + 1) * 35) = 4038 bytes of 4096
+  static const uint16_t kSmallObjectDepth = 16;
+  static const uint16_t kLargeObjectDepth = 6;
+#else
+  // We allocate 256KiB per-cpu for pointers to cached per-cpu memory.
+  // Each 256KiB is a subtle::percpu::TcmallocSlab::Slabs
+  // Max(kNumClasses) is 89, so the maximum footprint per CPU is:
+  //   89 * 8 + 8 * ((2048 + 1) * 10 + (152 + 1) * 78 + 88) = 254 KiB
+  static const uint16_t kSmallObjectDepth = 2048;
+  static const uint16_t kLargeObjectDepth = 152;
+#endif
+  if (cl == 0 || cl >= kNumClasses) return 0;
+
+  if (Static::sharded_transfer_cache().should_use(cl)) {
+    return 0;
+  }
+
+  if (Static::sizemap().class_to_size(cl) == 0) {
+    return 0;
+  }
+
+  if (!IsExpandedSizeClass(cl) && (cl % kNumBaseClasses) <= kNumSmall) {
+    // Small object sizes are very heavily used and need very deep caches for
+    // good performance (well over 90% of malloc calls are for cl <= 10.)
+    return kSmallObjectDepth;
+  }
+
+  if (IsExpandedSizeClass(cl)) {
+    return 0;
+  }
+
+  return kLargeObjectDepth;
+}
+
+static void *SlabAlloc(size_t size)
+    ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+  return Static::arena().Alloc(size);
+}
+
+void CPUCache::Activate(ActivationMode mode) {
+  ASSERT(Static::IsInited());
+  int num_cpus = absl::base_internal::NumCPUs();
+
+  size_t per_cpu_shift = kPerCpuShift;
+  const auto &topology = Static::numa_topology();
+  if (topology.numa_aware()) {
+    per_cpu_shift += absl::bit_ceil(topology.active_partitions() - 1);
+  }
+
+  const size_t kBytesAvailable = (1 << per_cpu_shift);
+  size_t bytes_required = sizeof(std::atomic<int64_t>) * kNumClasses;
+
+  // Deal with size classes that correspond only to NUMA partitions that are in
+  // use. If NUMA awareness is disabled then we may have a smaller shift than
+  // would suffice for all of the unused size classes.
+  for (int cl = 0;
+       cl < Static::numa_topology().active_partitions() * kNumBaseClasses;
+       ++cl) {
+    const uint16_t mc = MaxCapacity(cl);
+    max_capacity_[cl] = mc;
+    bytes_required += sizeof(void *) * mc;
+  }
+
+  // Deal with expanded size classes.
+  for (int cl = kExpandedClassesStart; cl < kNumClasses; ++cl) {
+    const uint16_t mc = MaxCapacity(cl);
+    max_capacity_[cl] = mc;
+    bytes_required += sizeof(void *) * mc;
+  }
+
+  // As we may make certain size classes no-ops by selecting "0" at runtime,
+  // using a compile-time calculation overestimates the worst-case memory usage.
+  if (ABSL_PREDICT_FALSE(bytes_required > kBytesAvailable)) {
+    Crash(kCrash, __FILE__, __LINE__, "per-CPU memory exceeded, have ",
+          kBytesAvailable, " need ", bytes_required);
+  }
+
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+  resize_ = reinterpret_cast<ResizeInfo *>(
+      Static::arena().Alloc(sizeof(ResizeInfo) * num_cpus));
+  lazy_slabs_ = Parameters::lazy_per_cpu_caches();
+
+  auto max_cache_size = Parameters::max_per_cpu_cache_size();
+
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    for (int cl = 1; cl < kNumClasses; ++cl) {
+      resize_[cpu].per_class[cl].Init();
+    }
+    resize_[cpu].available.store(max_cache_size, std::memory_order_relaxed);
+    resize_[cpu].capacity.store(max_cache_size, std::memory_order_relaxed);
+    resize_[cpu].last_steal.store(1, std::memory_order_relaxed);
+  }
+
+  freelist_.Init(SlabAlloc, MaxCapacityHelper, lazy_slabs_, per_cpu_shift);
+  if (mode == ActivationMode::FastPathOn) {
+    Static::ActivateCPUCache();
+  }
+}
+
+// Fetch more items from the central cache, refill our local cache,
+// and try to grow it if necessary.
+//
+// This is complicated by the fact that we can only tweak the cache on
+// our current CPU and we might get migrated whenever (in fact, we
+// might already have been migrated since failing to get memory...)
+//
+// So make sure only to make changes to one CPU's cache; at all times,
+// it must be safe to find ourselves migrated (at which point we atomically
+// return memory to the correct CPU.)
+void *CPUCache::Refill(int cpu, size_t cl) {
+  const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+
+  // UpdateCapacity can evict objects from other size classes as it tries to
+  // increase capacity of this size class. The objects are returned in
+  // to_return, we insert them into transfer cache at the end of function
+  // (to increase possibility that we stay on the current CPU as we are
+  // refilling the list).
+  ObjectsToReturn to_return;
+  const size_t target =
+      UpdateCapacity(cpu, cl, batch_length, false, &to_return);
+
+  // Refill target objects in batch_length batches.
+  size_t total = 0;
+  size_t got;
+  size_t i;
+  void *result = nullptr;
+  void *batch[kMaxObjectsToMove];
+  do {
+    const size_t want = std::min(batch_length, target - total);
+    got = Static::transfer_cache().RemoveRange(cl, batch, want);
+    if (got == 0) {
+      break;
+    }
+    total += got;
+    i = got;
+    if (result == nullptr) {
+      i--;
+      result = batch[i];
+    }
+    if (i) {
+      i -= freelist_.PushBatch(cl, batch, i);
+      if (i != 0) {
+        static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+                      "not enough space in batch");
+        Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, i));
+      }
+    }
+  } while (got == batch_length && i == 0 && total < target &&
+           cpu == freelist_.GetCurrentVirtualCpuUnsafe());
+
+  for (int i = to_return.count; i < kMaxToReturn; ++i) {
+    Static::transfer_cache().InsertRange(
+        to_return.cl[i], absl::Span<void *>(&(to_return.obj[i]), 1));
+  }
+
+  return result;
+}
+
+size_t CPUCache::UpdateCapacity(int cpu, size_t cl, size_t batch_length,
+                                bool overflow, ObjectsToReturn *to_return) {
+  // Freelist size balancing strategy:
+  //  - We grow a size class only on overflow/underflow.
+  //  - We shrink size classes in Steal as it scans all size classes.
+  //  - If overflows/underflows happen on a size class, we want to grow its
+  //    capacity to at least 2 * batch_length. It enables usage of the
+  //    transfer cache and leaves the list half-full after we insert/remove
+  //    a batch from the transfer cache.
+  //  - We increase capacity beyond 2 * batch_length only when an overflow is
+  //    followed by an underflow. That's the only case when we could benefit
+  //    from larger capacity -- the overflow and the underflow would collapse.
+  //
+  // Note: we can't understand when we have a perfectly-sized list, because for
+  // a perfectly-sized list we don't hit any slow paths which looks the same as
+  // inactive list. Eventually we will shrink a perfectly-sized list a bit and
+  // then it will grow back. This won't happen very frequently for the most
+  // important small sizes, because we will need several ticks before we shrink
+  // it again. Also we will shrink it by 1, but grow by a batch. So we should
+  // have lots of time until we need to grow it again.
+
+  const size_t max_capacity = max_capacity_[cl];
+  size_t capacity = freelist_.Capacity(cpu, cl);
+  // We assert that the return value, target, is non-zero, so starting from an
+  // initial capacity of zero means we may be populating this core for the
+  // first time.
+  absl::base_internal::LowLevelCallOnce(
+      &resize_[cpu].initialized,
+      [](CPUCache *cache, int cpu) {
+        if (cache->lazy_slabs_) {
+          absl::base_internal::SpinLockHolder h(&cache->resize_[cpu].lock);
+          cache->freelist_.InitCPU(cpu, MaxCapacityHelper);
+        }
+
+        // While we could unconditionally store, a lazy slab population
+        // implementation will require evaluating a branch.
+        cache->resize_[cpu].populated.store(true, std::memory_order_relaxed);
+      },
+      this, cpu);
+  const bool grow_by_one = capacity < 2 * batch_length;
+  uint32_t successive = 0;
+  bool grow_by_batch =
+      resize_[cpu].per_class[cl].Update(overflow, grow_by_one, &successive);
+  if ((grow_by_one || grow_by_batch) && capacity != max_capacity) {
+    size_t increase = 1;
+    if (grow_by_batch) {
+      increase = std::min(batch_length, max_capacity - capacity);
+    } else if (!overflow && capacity < batch_length) {
+      // On underflow we want to grow to at least batch size, because that's
+      // what we want to request from transfer cache.
+      increase = batch_length - capacity;
+    }
+    Grow(cpu, cl, increase, to_return);
+    capacity = freelist_.Capacity(cpu, cl);
+  }
+  // Calculate number of objects to return/request from transfer cache.
+  // Generally we prefer to transfer a single batch, because transfer cache
+  // handles it efficiently. Except for 2 special cases:
+  size_t target = batch_length;
+  // "capacity + 1" because on overflow we already have one object from caller,
+  // so we can return a whole batch even if capacity is one less. Similarly,
+  // on underflow we need to return one object to caller, so we can request
+  // a whole batch even if capacity is one less.
+  if ((capacity + 1) < batch_length) {
+    // If we don't have a full batch, return/request just half. We are missing
+    // transfer cache anyway, and cost of insertion into central freelist is
+    // ~O(number of objects).
+    target = std::max<size_t>(1, (capacity + 1) / 2);
+  } else if (successive > 0 && capacity >= 3 * batch_length) {
+    // If the freelist is large and we are hitting series of overflows or
+    // underflows, return/request several batches at once. On the first overflow
+    // we return 1 batch, on the second -- 2, on the third -- 4 and so on up to
+    // half of the batches we have. We do this to save on the cost of hitting
+    // malloc/free slow path, reduce instruction cache pollution, avoid cache
+    // misses when accessing transfer/central caches, etc.
+    size_t num_batches =
+        std::min<size_t>(1 << std::min<uint32_t>(successive, 10),
+                         ((capacity / batch_length) + 1) / 2);
+    target = num_batches * batch_length;
+  }
+  ASSERT(target != 0);
+  return target;
+}
+
+void CPUCache::Grow(int cpu, size_t cl, size_t desired_increase,
+                    ObjectsToReturn *to_return) {
+  const size_t size = Static::sizemap().class_to_size(cl);
+  const size_t desired_bytes = desired_increase * size;
+  size_t acquired_bytes;
+
+  // First, there might be unreserved slack.  Take what we can.
+  size_t before, after;
+  do {
+    before = resize_[cpu].available.load(std::memory_order_relaxed);
+    acquired_bytes = std::min(before, desired_bytes);
+    after = before - acquired_bytes;
+  } while (!resize_[cpu].available.compare_exchange_strong(
+      before, after, std::memory_order_relaxed, std::memory_order_relaxed));
+
+  if (acquired_bytes < desired_bytes) {
+    acquired_bytes += Steal(cpu, cl, desired_bytes - acquired_bytes, to_return);
+  }
+
+  // We have all the memory we could reserve.  Time to actually do the growth.
+
+  // We might have gotten more than we wanted (stealing from larger sizeclasses)
+  // so don't grow _too_ much.
+  size_t actual_increase = acquired_bytes / size;
+  actual_increase = std::min(actual_increase, desired_increase);
+  // Remember, Grow may not give us all we ask for.
+  size_t increase = freelist_.Grow(cpu, cl, actual_increase, max_capacity_[cl]);
+  size_t increased_bytes = increase * size;
+  if (increased_bytes < acquired_bytes) {
+    // return whatever we didn't use to the slack.
+    size_t unused = acquired_bytes - increased_bytes;
+    resize_[cpu].available.fetch_add(unused, std::memory_order_relaxed);
+  }
+}
+
+void CPUCache::TryReclaimingCaches() {
+  const int num_cpus = absl::base_internal::NumCPUs();
+
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    // Nothing to reclaim if the cpu is not populated.
+    if (!HasPopulated(cpu)) {
+      continue;
+    }
+
+    uint64_t used_bytes = UsedBytes(cpu);
+    uint64_t prev_used_bytes =
+        resize_[cpu].reclaim_used_bytes.load(std::memory_order_relaxed);
+
+    // Get reclaim miss and used bytes stats that were captured at the end of
+    // the previous interval.
+    const CpuCacheMissStats miss_stats = GetReclaimCacheMissStats(cpu);
+    uint64_t misses =
+        uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows};
+
+    // Reclaim the cache if the number of used bytes and total number of misses
+    // stayed constant since the last interval.
+    if (used_bytes != 0 && used_bytes == prev_used_bytes && misses == 0) {
+      Reclaim(cpu);
+    }
+
+    // Takes a snapshot of used bytes in the cache at the end of this interval
+    // so that we can calculate if cache usage changed in the next interval.
+    //
+    // Reclaim occurs on a single thread. So, the relaxed store to used_bytes
+    // is safe.
+    resize_[cpu].reclaim_used_bytes.store(used_bytes,
+                                          std::memory_order_relaxed);
+  }
+}
+
+void CPUCache::ShuffleCpuCaches() {
+  // Knobs that we can potentially tune depending on the workloads.
+  constexpr double kBytesToStealPercent = 5.0;
+  constexpr int kMaxNumStealCpus = 5;
+
+  const int num_cpus = absl::base_internal::NumCPUs();
+  absl::FixedArray<std::pair<int, uint64_t>> misses(num_cpus);
+
+  // Record the cumulative misses for the caches so that we can select the
+  // caches with the highest misses as the candidates to steal the cache for.
+  int max_populated_cpu = -1;
+  int num_populated_cpus = 0;
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    if (!HasPopulated(cpu)) {
+      continue;
+    }
+    const CpuCacheMissStats miss_stats = GetIntervalCacheMissStats(cpu);
+    misses[num_populated_cpus] = {
+        cpu, uint64_t{miss_stats.underflows} + uint64_t{miss_stats.overflows}};
+    max_populated_cpu = cpu;
+    ++num_populated_cpus;
+  }
+  if (max_populated_cpu == -1) {
+    return;
+  }
+
+  // Sorts misses to identify cpus with highest misses.
+  //
+  // TODO(vgogte): We can potentially sort the entire misses array and use that
+  // in StealFromOtherCache to determine cpus to steal from. That is, [0,
+  // num_dest_cpus) may be the destination cpus and [num_dest_cpus, num_cpus)
+  // may be cpus we may steal from. We can iterate through the array in a
+  // descending order to steal from them. The upside of this mechanism is that
+  // we would be able to do a more fair stealing, starting with cpus with lowest
+  // misses. The downside of this mechanism is that we would have to sort the
+  // entire misses array. This might be compute intensive on servers with high
+  // number of cpus (eg. Rome, Milan). We need to investigate the compute
+  // required to implement this.
+  const int num_dest_cpus = std::min(num_populated_cpus, kMaxNumStealCpus);
+  std::partial_sort(misses.begin(), misses.begin() + num_dest_cpus,
+                    misses.end(),
+                    [](std::pair<int, uint64_t> a, std::pair<int, uint64_t> b) {
+                      if (a.second == b.second) {
+                        return a.first < b.first;
+                      }
+                      return a.second > b.second;
+                    });
+
+  // Try to steal kBytesToStealPercent percentage of max_per_cpu_cache_size for
+  // each destination cpu cache.
+  size_t to_steal =
+      kBytesToStealPercent / 100.0 * Parameters::max_per_cpu_cache_size();
+  for (int i = 0; i < num_dest_cpus; ++i) {
+    StealFromOtherCache(misses[i].first, max_populated_cpu, to_steal);
+  }
+
+  // Takes a snapshot of underflows and overflows at the end of this interval
+  // so that we can calculate the misses that occurred in the next interval.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    size_t underflows =
+        resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+    size_t overflows =
+        resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+
+    // Shuffle occurs on a single thread. So, the relaxed stores to
+    // prev_underflow and pre_overflow counters are safe.
+    resize_[cpu].shuffle_underflows.store(underflows,
+                                          std::memory_order_relaxed);
+    resize_[cpu].shuffle_overflows.store(overflows, std::memory_order_relaxed);
+  }
+}
+
+static void ShrinkHandler(void *arg, size_t cl, void **batch, size_t count) {
+  const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+  for (size_t i = 0; i < count; i += batch_length) {
+    size_t n = std::min(batch_length, count - i);
+    Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n));
+  }
+}
+
+void CPUCache::StealFromOtherCache(int cpu, int max_populated_cpu,
+                                   size_t bytes) {
+  constexpr double kCacheMissThreshold = 0.80;
+
+  const CpuCacheMissStats dest_misses = GetIntervalCacheMissStats(cpu);
+
+  // If both underflows and overflows are 0, we should not need to steal.
+  if (dest_misses.underflows == 0 && dest_misses.overflows == 0) return;
+
+  size_t acquired = 0;
+
+  // We use last_cpu_cache_steal_ as a hint to start our search for cpu ids to
+  // steal from so that we can iterate through the cpus in a nice round-robin
+  // fashion.
+  int src_cpu = std::min(last_cpu_cache_steal_.load(std::memory_order_relaxed),
+                         max_populated_cpu);
+
+  // We iterate through max_populate_cpus number of cpus to steal from.
+  // max_populate_cpus records the max cpu id that has been populated. Note
+  // that, any intermediate changes since the max_populated_cpus was measured
+  // may have populated higher cpu ids, but we do not include those in the
+  // search. The approximation prevents us from doing another pass through the
+  // cpus to just find the latest populated cpu id.
+  //
+  // We break from the loop once we iterate through all the cpus once, or if the
+  // total number of acquired bytes is higher than or equal to the desired bytes
+  // we want to steal.
+  for (int cpu_offset = 1; cpu_offset <= max_populated_cpu && acquired < bytes;
+       ++cpu_offset) {
+    if (--src_cpu < 0) {
+      src_cpu = max_populated_cpu;
+    }
+    ASSERT(0 <= src_cpu);
+    ASSERT(src_cpu <= max_populated_cpu);
+
+    // We do not steal from the same CPU. Maybe we can explore combining this
+    // with stealing from the same CPU later.
+    if (src_cpu == cpu) continue;
+
+    // We do not steal from the cache that hasn't been populated yet.
+    if (!HasPopulated(src_cpu)) continue;
+
+    // We do not steal from cache that has capacity less than our lower
+    // capacity threshold.
+    if (Capacity(src_cpu) <
+        kCacheCapacityThreshold * Parameters::max_per_cpu_cache_size())
+      continue;
+
+    const CpuCacheMissStats src_misses = GetIntervalCacheMissStats(src_cpu);
+
+    // If underflows and overflows from the source cpu are higher, we do not
+    // steal from that cache. We consider the cache as a candidate to steal from
+    // only when its misses are lower than 0.8x that of the dest cache.
+    if (src_misses.underflows > kCacheMissThreshold * dest_misses.underflows ||
+        src_misses.overflows > kCacheMissThreshold * dest_misses.overflows)
+      continue;
+
+    size_t start_cl =
+        resize_[src_cpu].last_steal.load(std::memory_order_relaxed);
+
+    ASSERT(start_cl < kNumClasses);
+    ASSERT(0 < start_cl);
+    size_t source_cl = start_cl;
+    for (size_t offset = 1; offset < kNumClasses; ++offset) {
+      source_cl = start_cl + offset;
+      if (source_cl >= kNumClasses) {
+        source_cl -= kNumClasses - 1;
+      }
+      ASSERT(0 < source_cl);
+      ASSERT(source_cl < kNumClasses);
+
+      const size_t capacity = freelist_.Capacity(src_cpu, source_cl);
+      if (capacity == 0) {
+        // Nothing to steal.
+        continue;
+      }
+      const size_t length = freelist_.Length(src_cpu, source_cl);
+
+      // TODO(vgogte): Currently, scoring is similar to stealing from the
+      // same cpu in CpuCache::Steal(). Revisit this later to tune the
+      // knobs.
+      const size_t batch_length =
+          Static::sizemap().num_objects_to_move(source_cl);
+      size_t size = Static::sizemap().class_to_size(source_cl);
+
+      // Clock-like algorithm to prioritize size classes for shrinking.
+      //
+      // Each size class has quiescent ticks counter which is incremented as we
+      // pass it, the counter is reset to 0 in UpdateCapacity on grow.
+      // If the counter value is 0, then we've just tried to grow the size
+      // class, so it makes little sense to shrink it back. The higher counter
+      // value the longer ago we grew the list and the more probable it is that
+      // the full capacity is unused.
+      //
+      // Then, we calculate "shrinking score", the higher the score the less we
+      // we want to shrink this size class. The score is considerably skewed
+      // towards larger size classes: smaller classes are usually used more
+      // actively and we also benefit less from shrinking smaller classes (steal
+      // less capacity). Then, we also avoid shrinking full freelists as we will
+      // need to evict an object and then go to the central freelist to return
+      // it. Then, we also avoid shrinking freelists that are just above batch
+      // size, because shrinking them will disable transfer cache.
+      //
+      // Finally, we shrink if the ticks counter is >= the score.
+      uint32_t qticks = resize_[src_cpu].per_class[source_cl].Tick();
+      uint32_t score = 0;
+      // Note: the following numbers are based solely on intuition, common sense
+      // and benchmarking results.
+      if (size <= 144) {
+        score = 2 + (length >= capacity) +
+                (length >= batch_length && length < 2 * batch_length);
+      } else if (size <= 1024) {
+        score = 1 + (length >= capacity) +
+                (length >= batch_length && length < 2 * batch_length);
+      } else if (size <= (64 << 10)) {
+        score = (length >= capacity);
+      }
+      if (score > qticks) {
+        continue;
+      }
+
+      // Finally, try to shrink (can fail if we were migrated).
+      // We always shrink by 1 object. The idea is that inactive lists will be
+      // shrunk to zero eventually anyway (or they just would not grow in the
+      // first place), but for active lists it does not make sense to
+      // aggressively shuffle capacity all the time.
+      //
+      // If the list is full, ShrinkOtherCache first tries to pop enough items
+      // to make space and then shrinks the capacity.
+      // TODO(vgogte): Maybe we can steal more from a single list to avoid
+      // frequent locking overhead.
+      {
+        absl::base_internal::SpinLockHolder h(&resize_[src_cpu].lock);
+        if (freelist_.ShrinkOtherCache(src_cpu, source_cl, 1, nullptr,
+                                       ShrinkHandler) == 1) {
+          acquired += size;
+          resize_[src_cpu].capacity.fetch_sub(size, std::memory_order_relaxed);
+        }
+      }
+
+      if (acquired >= bytes) {
+        break;
+      }
+    }
+    resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed);
+  }
+  // Record the last cpu id we stole from, which would provide a hint to the
+  // next time we iterate through the cpus for stealing.
+  last_cpu_cache_steal_.store(src_cpu, std::memory_order_relaxed);
+
+  // Increment the capacity of the destination cpu cache by the amount of bytes
+  // acquired from source caches.
+  if (acquired) {
+    size_t before = resize_[cpu].available.load(std::memory_order_relaxed);
+    size_t bytes_with_stolen;
+    do {
+      bytes_with_stolen = before + acquired;
+    } while (!resize_[cpu].available.compare_exchange_weak(
+        before, bytes_with_stolen, std::memory_order_relaxed,
+        std::memory_order_relaxed));
+    resize_[cpu].capacity.fetch_add(acquired, std::memory_order_relaxed);
+  }
+}
+
+// There are rather a lot of policy knobs we could tweak here.
+size_t CPUCache::Steal(int cpu, size_t dest_cl, size_t bytes,
+                       ObjectsToReturn *to_return) {
+  // Steal from other sizeclasses.  Try to go in a nice circle.
+  // Complicated by sizeclasses actually being 1-indexed.
+  size_t acquired = 0;
+  size_t start = resize_[cpu].last_steal.load(std::memory_order_relaxed);
+  ASSERT(start < kNumClasses);
+  ASSERT(0 < start);
+  size_t source_cl = start;
+  for (size_t offset = 1; offset < kNumClasses; ++offset) {
+    source_cl = start + offset;
+    if (source_cl >= kNumClasses) {
+      source_cl -= kNumClasses - 1;
+    }
+    ASSERT(0 < source_cl);
+    ASSERT(source_cl < kNumClasses);
+    // Decide if we want to steal source_cl.
+    if (source_cl == dest_cl) {
+      // First, no sense in picking your own pocket.
+      continue;
+    }
+    const size_t capacity = freelist_.Capacity(cpu, source_cl);
+    if (capacity == 0) {
+      // Nothing to steal.
+      continue;
+    }
+    const size_t length = freelist_.Length(cpu, source_cl);
+    const size_t batch_length =
+        Static::sizemap().num_objects_to_move(source_cl);
+    size_t size = Static::sizemap().class_to_size(source_cl);
+
+    // Clock-like algorithm to prioritize size classes for shrinking.
+    //
+    // Each size class has quiescent ticks counter which is incremented as we
+    // pass it, the counter is reset to 0 in UpdateCapacity on grow.
+    // If the counter value is 0, then we've just tried to grow the size class,
+    // so it makes little sense to shrink it back. The higher counter value
+    // the longer ago we grew the list and the more probable it is that
+    // the full capacity is unused.
+    //
+    // Then, we calculate "shrinking score", the higher the score the less we
+    // we want to shrink this size class. The score is considerably skewed
+    // towards larger size classes: smaller classes are usually used more
+    // actively and we also benefit less from shrinking smaller classes (steal
+    // less capacity). Then, we also avoid shrinking full freelists as we will
+    // need to evict an object and then go to the central freelist to return it.
+    // Then, we also avoid shrinking freelists that are just above batch size,
+    // because shrinking them will disable transfer cache.
+    //
+    // Finally, we shrink if the ticks counter is >= the score.
+    uint32_t qticks = resize_[cpu].per_class[source_cl].Tick();
+    uint32_t score = 0;
+    // Note: the following numbers are based solely on intuition, common sense
+    // and benchmarking results.
+    if (size <= 144) {
+      score = 2 + (length >= capacity) +
+              (length >= batch_length && length < 2 * batch_length);
+    } else if (size <= 1024) {
+      score = 1 + (length >= capacity) +
+              (length >= batch_length && length < 2 * batch_length);
+    } else if (size <= (64 << 10)) {
+      score = (length >= capacity);
+    }
+    if (score > qticks) {
+      continue;
+    }
+
+    if (length >= capacity) {
+      // The list is full, need to evict an object to shrink it.
+      if (to_return == nullptr) {
+        continue;
+      }
+      if (to_return->count == 0) {
+        // Can't steal any more because the to_return set is full.
+        break;
+      }
+      void *obj = freelist_.Pop(source_cl, NoopUnderflow);
+      if (obj) {
+        --to_return->count;
+        to_return->cl[to_return->count] = source_cl;
+        to_return->obj[to_return->count] = obj;
+      }
+    }
+
+    // Finally, try to shrink (can fail if we were migrated).
+    // We always shrink by 1 object. The idea is that inactive lists will be
+    // shrunk to zero eventually anyway (or they just would not grow in the
+    // first place), but for active lists it does not make sense to aggressively
+    // shuffle capacity all the time.
+    if (freelist_.Shrink(cpu, source_cl, 1) == 1) {
+      acquired += size;
+    }
+
+    if (cpu != freelist_.GetCurrentVirtualCpuUnsafe() || acquired >= bytes) {
+      // can't steal any more or don't need to
+      break;
+    }
+  }
+  // update the hint
+  resize_[cpu].last_steal.store(source_cl, std::memory_order_relaxed);
+  return acquired;
+}
+
+int CPUCache::Overflow(void *ptr, size_t cl, int cpu) {
+  const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+  const size_t target = UpdateCapacity(cpu, cl, batch_length, true, nullptr);
+  // Return target objects in batch_length batches.
+  size_t total = 0;
+  size_t count = 1;
+  void *batch[kMaxObjectsToMove];
+  batch[0] = ptr;
+  do {
+    size_t want = std::min(batch_length, target - total);
+    if (count < want) {
+      count += freelist_.PopBatch(cl, batch + count, want - count);
+    }
+    if (!count) break;
+
+    total += count;
+    static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+                  "not enough space in batch");
+    Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch, count));
+    if (count != batch_length) break;
+    count = 0;
+  } while (total < target && cpu == freelist_.GetCurrentVirtualCpuUnsafe());
+  tracking::Report(kFreeTruncations, cl, 1);
+  return 1;
+}
+
+uint64_t CPUCache::Allocated(int target_cpu) const {
+  ASSERT(target_cpu >= 0);
+  if (!HasPopulated(target_cpu)) {
+    return 0;
+  }
+
+  uint64_t total = 0;
+  for (int cl = 1; cl < kNumClasses; cl++) {
+    int size = Static::sizemap().class_to_size(cl);
+    total += size * freelist_.Capacity(target_cpu, cl);
+  }
+  return total;
+}
+
+uint64_t CPUCache::UsedBytes(int target_cpu) const {
+  ASSERT(target_cpu >= 0);
+  if (!HasPopulated(target_cpu)) {
+    return 0;
+  }
+
+  uint64_t total = 0;
+  for (int cl = 1; cl < kNumClasses; cl++) {
+    int size = Static::sizemap().class_to_size(cl);
+    total += size * freelist_.Length(target_cpu, cl);
+  }
+  return total;
+}
+
+bool CPUCache::HasPopulated(int target_cpu) const {
+  ASSERT(target_cpu >= 0);
+  return resize_[target_cpu].populated.load(std::memory_order_relaxed);
+}
+
+PerCPUMetadataState CPUCache::MetadataMemoryUsage() const {
+  return freelist_.MetadataMemoryUsage();
+}
+
+uint64_t CPUCache::TotalUsedBytes() const {
+  uint64_t total = 0;
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    total += UsedBytes(cpu);
+  }
+  return total;
+}
+
+uint64_t CPUCache::TotalObjectsOfClass(size_t cl) const {
+  ASSERT(cl < kNumClasses);
+  uint64_t total_objects = 0;
+  if (cl > 0) {
+    for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; cpu++) {
+      if (!HasPopulated(cpu)) {
+        continue;
+      }
+      total_objects += freelist_.Length(cpu, cl);
+    }
+  }
+  return total_objects;
+}
+
+uint64_t CPUCache::Unallocated(int cpu) const {
+  return resize_[cpu].available.load(std::memory_order_relaxed);
+}
+
+uint64_t CPUCache::Capacity(int cpu) const {
+  return resize_[cpu].capacity.load(std::memory_order_relaxed);
+}
+
+uint64_t CPUCache::CacheLimit() const {
+  return Parameters::max_per_cpu_cache_size();
+}
+
+struct DrainContext {
+  std::atomic<size_t> *available;
+  uint64_t bytes;
+};
+
+static void DrainHandler(void *arg, size_t cl, void **batch, size_t count,
+                         size_t cap) {
+  DrainContext *ctx = static_cast<DrainContext *>(arg);
+  const size_t size = Static::sizemap().class_to_size(cl);
+  const size_t batch_length = Static::sizemap().num_objects_to_move(cl);
+  ctx->bytes += count * size;
+  // Drain resets capacity to 0, so return the allocated capacity to that
+  // CPU's slack.
+  ctx->available->fetch_add(cap * size, std::memory_order_relaxed);
+  for (size_t i = 0; i < count; i += batch_length) {
+    size_t n = std::min(batch_length, count - i);
+    Static::transfer_cache().InsertRange(cl, absl::Span<void *>(batch + i, n));
+  }
+}
+
+uint64_t CPUCache::Reclaim(int cpu) {
+  absl::base_internal::SpinLockHolder h(&resize_[cpu].lock);
+
+  // If we haven't populated this core, freelist_.Drain() will touch the memory
+  // (for writing) as part of its locking process.  Avoid faulting new pages as
+  // part of a release process.
+  if (!resize_[cpu].populated.load(std::memory_order_relaxed)) {
+    return 0;
+  }
+
+  DrainContext ctx{&resize_[cpu].available, 0};
+  freelist_.Drain(cpu, &ctx, DrainHandler);
+
+  // Record that the reclaim occurred for this CPU.
+  resize_[cpu].num_reclaims.store(
+      resize_[cpu].num_reclaims.load(std::memory_order_relaxed) + 1,
+      std::memory_order_relaxed);
+  return ctx.bytes;
+}
+
+uint64_t CPUCache::GetNumReclaims(int cpu) const {
+  return resize_[cpu].num_reclaims.load(std::memory_order_relaxed);
+}
+
+void CPUCache::RecordCacheMissStat(const int cpu, const bool is_malloc) {
+  CPUCache &cpu_cache = Static::cpu_cache();
+  if (is_malloc) {
+    cpu_cache.resize_[cpu].total_underflows.fetch_add(
+        1, std::memory_order_relaxed);
+  } else {
+    cpu_cache.resize_[cpu].total_overflows.fetch_add(1,
+                                                     std::memory_order_relaxed);
+  }
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetReclaimCacheMissStats(int cpu) const {
+  CpuCacheMissStats stats;
+  size_t total_underflows =
+      resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+  size_t prev_reclaim_underflows =
+      resize_[cpu].reclaim_underflows.load(std::memory_order_relaxed);
+  // Takes a snapshot of underflows at the end of this interval so that we can
+  // calculate the misses that occurred in the next interval.
+  //
+  // Reclaim occurs on a single thread. So, a relaxed store to the reclaim
+  // underflow stat is safe.
+  resize_[cpu].reclaim_underflows.store(total_underflows,
+                                        std::memory_order_relaxed);
+
+  // In case of a size_t overflow, we wrap around to 0.
+  stats.underflows = total_underflows > prev_reclaim_underflows
+                         ? total_underflows - prev_reclaim_underflows
+                         : 0;
+
+  size_t total_overflows =
+      resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+  size_t prev_reclaim_overflows =
+      resize_[cpu].reclaim_overflows.load(std::memory_order_relaxed);
+  // Takes a snapshot of overflows at the end of this interval so that we can
+  // calculate the misses that occurred in the next interval.
+  //
+  // Reclaim occurs on a single thread. So, a relaxed store to the reclaim
+  // overflow stat is safe.
+  resize_[cpu].reclaim_overflows.store(total_overflows,
+                                       std::memory_order_relaxed);
+
+  // In case of a size_t overflow, we wrap around to 0.
+  stats.overflows = total_overflows > prev_reclaim_overflows
+                        ? total_overflows - prev_reclaim_overflows
+                        : 0;
+
+  return stats;
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetIntervalCacheMissStats(int cpu) const {
+  CpuCacheMissStats stats;
+  size_t total_underflows =
+      resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+  size_t shuffle_underflows =
+      resize_[cpu].shuffle_underflows.load(std::memory_order_relaxed);
+  // In case of a size_t overflow, we wrap around to 0.
+  stats.underflows = total_underflows > shuffle_underflows
+                         ? total_underflows - shuffle_underflows
+                         : 0;
+
+  size_t total_overflows =
+      resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+  size_t shuffle_overflows =
+      resize_[cpu].shuffle_overflows.load(std::memory_order_relaxed);
+  // In case of a size_t overflow, we wrap around to 0.
+  stats.overflows = total_overflows > shuffle_overflows
+                        ? total_overflows - shuffle_overflows
+                        : 0;
+
+  return stats;
+}
+
+CPUCache::CpuCacheMissStats CPUCache::GetTotalCacheMissStats(int cpu) const {
+  CpuCacheMissStats stats;
+  stats.underflows =
+      resize_[cpu].total_underflows.load(std::memory_order_relaxed);
+  stats.overflows =
+      resize_[cpu].total_overflows.load(std::memory_order_relaxed);
+  return stats;
+}
+
+void CPUCache::Print(Printer *out) const {
+  out->printf("------------------------------------------------\n");
+  out->printf("Bytes in per-CPU caches (per cpu limit: %" PRIu64 " bytes)\n",
+              Static::cpu_cache().CacheLimit());
+  out->printf("------------------------------------------------\n");
+
+  const cpu_set_t allowed_cpus = FillActiveCpuMask();
+
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    static constexpr double MiB = 1048576.0;
+
+    uint64_t rbytes = UsedBytes(cpu);
+    bool populated = HasPopulated(cpu);
+    uint64_t unallocated = Unallocated(cpu);
+    out->printf("cpu %3d: %12" PRIu64
+                " bytes (%7.1f MiB) with"
+                "%12" PRIu64 " bytes unallocated %s%s\n",
+                cpu, rbytes, rbytes / MiB, unallocated,
+                CPU_ISSET(cpu, &allowed_cpus) ? " active" : "",
+                populated ? " populated" : "");
+  }
+
+  out->printf("------------------------------------------------\n");
+  out->printf("Number of per-CPU cache underflows, overflows and reclaims\n");
+  out->printf("------------------------------------------------\n");
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu);
+    uint64_t reclaims = GetNumReclaims(cpu);
+    out->printf(
+        "cpu %3d:"
+        "%12" PRIu64
+        " underflows,"
+        "%12" PRIu64
+        " overflows,"
+        "%12" PRIu64 " reclaims\n",
+        cpu, miss_stats.underflows, miss_stats.overflows, reclaims);
+  }
+}
+
+void CPUCache::PrintInPbtxt(PbtxtRegion *region) const {
+  const cpu_set_t allowed_cpus = FillActiveCpuMask();
+
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    PbtxtRegion entry = region->CreateSubRegion("cpu_cache");
+    uint64_t rbytes = UsedBytes(cpu);
+    bool populated = HasPopulated(cpu);
+    uint64_t unallocated = Unallocated(cpu);
+    CpuCacheMissStats miss_stats = GetTotalCacheMissStats(cpu);
+    uint64_t reclaims = GetNumReclaims(cpu);
+    entry.PrintI64("cpu", uint64_t(cpu));
+    entry.PrintI64("used", rbytes);
+    entry.PrintI64("unused", unallocated);
+    entry.PrintBool("active", CPU_ISSET(cpu, &allowed_cpus));
+    entry.PrintBool("populated", populated);
+    entry.PrintI64("underflows", miss_stats.underflows);
+    entry.PrintI64("overflows", miss_stats.overflows);
+    entry.PrintI64("reclaims", reclaims);
+  }
+}
+
+void CPUCache::AcquireInternalLocks() {
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    resize_[cpu].lock.Lock();
+  }
+}
+
+void CPUCache::ReleaseInternalLocks() {
+  for (int cpu = 0, num_cpus = absl::base_internal::NumCPUs(); cpu < num_cpus;
+       ++cpu) {
+    resize_[cpu].lock.Unlock();
+  }
+}
+
+void CPUCache::PerClassResizeInfo::Init() {
+  state_.store(0, std::memory_order_relaxed);
+}
+
+bool CPUCache::PerClassResizeInfo::Update(bool overflow, bool grow,
+                                          uint32_t *successive) {
+  int32_t raw = state_.load(std::memory_order_relaxed);
+  State state;
+  memcpy(&state, &raw, sizeof(state));
+  const bool overflow_then_underflow = !overflow && state.overflow;
+  grow |= overflow_then_underflow;
+  // Reset quiescent ticks for Steal clock algorithm if we are going to grow.
+  State new_state;
+  new_state.overflow = overflow;
+  new_state.quiescent_ticks = grow ? 0 : state.quiescent_ticks;
+  new_state.successive = overflow == state.overflow ? state.successive + 1 : 0;
+  memcpy(&raw, &new_state, sizeof(raw));
+  state_.store(raw, std::memory_order_relaxed);
+  *successive = new_state.successive;
+  return overflow_then_underflow;
+}
+
+uint32_t CPUCache::PerClassResizeInfo::Tick() {
+  int32_t raw = state_.load(std::memory_order_relaxed);
+  State state;
+  memcpy(&state, &raw, sizeof(state));
+  state.quiescent_ticks++;
+  memcpy(&raw, &state, sizeof(raw));
+  state_.store(raw, std::memory_order_relaxed);
+  return state.quiescent_ticks - 1;
+}
+
+#ifdef ABSL_HAVE_THREAD_SANITIZER
+extern "C" int RunningOnValgrind();
+#endif
+
+static void ActivatePerCPUCaches() {
+  if (tcmalloc::tcmalloc_internal::Static::CPUCacheActive()) {
+    // Already active.
+    return;
+  }
+
+#ifdef ABSL_HAVE_THREAD_SANITIZER
+  // RunningOnValgrind is a proxy for "is something intercepting malloc."
+  //
+  // If Valgrind, et. al., are in use, TCMalloc isn't in use and we shouldn't
+  // activate our per-CPU caches.
+  if (RunningOnValgrind()) {
+    return;
+  }
+#endif
+  if (Parameters::per_cpu_caches() && subtle::percpu::IsFast()) {
+    Static::InitIfNecessary();
+    Static::cpu_cache().Activate(CPUCache::ActivationMode::FastPathOn);
+    // no need for this thread cache anymore, I guess.
+    ThreadCache::BecomeIdle();
+    // If there's a problem with this code, let's notice it right away:
+    ::operator delete(::operator new(1));
+  }
+}
+
+class PerCPUInitializer {
+ public:
+  PerCPUInitializer() {
+   ActivatePerCPUCaches();
+  }
+};
+static PerCPUInitializer module_enter_exit;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+extern "C" void TCMalloc_Internal_ForceCpuCacheActivation() {
+  tcmalloc::tcmalloc_internal::ActivatePerCPUCaches();
+}
+
+extern "C" bool MallocExtension_Internal_GetPerCpuCachesActive() {
+  return tcmalloc::tcmalloc_internal::Static::CPUCacheActive();
+}
+
+extern "C" void MallocExtension_Internal_DeactivatePerCpuCaches() {
+  tcmalloc::tcmalloc_internal::Parameters::set_per_cpu_caches(false);
+  tcmalloc::tcmalloc_internal::Static::DeactivateCPUCache();
+}
+
+extern "C" int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize() {
+  return tcmalloc::tcmalloc_internal::Parameters::max_per_cpu_cache_size();
+}
+
+extern "C" void MallocExtension_Internal_SetMaxPerCpuCacheSize(int32_t value) {
+  tcmalloc::tcmalloc_internal::Parameters::set_max_per_cpu_cache_size(value);
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h
new file mode 100644
index 0000000000..dab7d18910
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache.h
@@ -0,0 +1,390 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_CPU_CACHE_H_
+#define TCMALLOC_CPU_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/call_once.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal/percpu_tcmalloc.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class CPUCache {
+ public:
+  constexpr CPUCache() = default;
+
+  enum class ActivationMode {
+    FastPathOn,
+    FastPathOffTestOnly,
+  };
+
+  // tcmalloc explicitly initializes its global state (to be safe for
+  // use in global constructors) so our constructor must be trivial;
+  // do all initialization here instead.
+  void Activate(ActivationMode mode);
+
+  // Allocate an object of the given size class. When allocation fails
+  // (from this cache and after running Refill), OOMHandler(size) is
+  // called and its return value is returned from
+  // Allocate. OOMHandler is used to parameterize out-of-memory
+  // handling (raising exception, returning nullptr, calling
+  // new_handler or anything else). "Passing" OOMHandler in this way
+  // allows Allocate to be used in tail-call position in fast-path,
+  // making Allocate use jump (tail-call) to slow path code.
+  template <void* OOMHandler(size_t)>
+  void* Allocate(size_t cl);
+
+  // Free an object of the given class.
+  void Deallocate(void* ptr, size_t cl);
+
+  // Give the number of bytes in <cpu>'s cache
+  uint64_t UsedBytes(int cpu) const;
+
+  // Give the allocated number of bytes in <cpu>'s cache
+  uint64_t Allocated(int cpu) const;
+
+  // Whether <cpu>'s cache has ever been populated with objects
+  bool HasPopulated(int cpu) const;
+
+  PerCPUMetadataState MetadataMemoryUsage() const;
+
+  // Give the number of bytes used in all cpu caches.
+  uint64_t TotalUsedBytes() const;
+
+  // Give the number of objects of a given class in all cpu caches.
+  uint64_t TotalObjectsOfClass(size_t cl) const;
+
+  // Give the number of bytes unallocated to any sizeclass in <cpu>'s cache.
+  uint64_t Unallocated(int cpu) const;
+
+  // Gives the total capacity of <cpu>'s cache in bytes.
+  //
+  // The total capacity of <cpu>'s cache should be equal to the sum of allocated
+  // and unallocated bytes for that cache.
+  uint64_t Capacity(int cpu) const;
+
+  // Give the per-cpu limit of cache size.
+  uint64_t CacheLimit() const;
+
+  // Shuffles per-cpu caches using the number of underflows and overflows that
+  // occurred in the prior interval. It selects the top per-cpu caches
+  // with highest misses as candidates, iterates through the other per-cpu
+  // caches to steal capacity from them and adds the stolen bytes to the
+  // available capacity of the per-cpu caches. May be called from any processor.
+  //
+  // TODO(vgogte): There are quite a few knobs that we can play around with in
+  // ShuffleCpuCaches.
+  void ShuffleCpuCaches();
+
+  // Sets the lower limit on the capacity that can be stolen from the cpu cache.
+  static constexpr double kCacheCapacityThreshold = 0.20;
+
+  // Tries to steal <bytes> for the destination <cpu>. It iterates through the
+  // the set of populated cpu caches and steals the bytes from them. A cpu is
+  // considered a good candidate to steal from if:
+  // (1) the cache is populated
+  // (2) the numbers of underflows and overflows are both less than 0.8x those
+  // of the destination per-cpu cache
+  // (3) source cpu is not the same as the destination cpu
+  // (4) capacity of the source cpu/cl is non-zero
+  //
+  // For a given source cpu, we iterate through the size classes to steal from
+  // them. Currently, we use a similar clock-like algorithm from Steal() to
+  // identify the cl to steal from.
+  void StealFromOtherCache(int cpu, int max_populated_cpu, size_t bytes);
+
+  // Tries to reclaim inactive per-CPU caches. It iterates through the set of
+  // populated cpu caches and reclaims the caches that:
+  // (1) had same number of used bytes since the last interval,
+  // (2) had no change in the number of misses since the last interval.
+  void TryReclaimingCaches();
+
+  // Empty out the cache on <cpu>; move all objects to the central
+  // cache.  (If other threads run concurrently on that cpu, we can't
+  // guarantee it will be fully empty on return, but if the cpu is
+  // unused, this will eliminate stranded memory.)  Returns the number
+  // of bytes we sent back.  This function is thread safe.
+  uint64_t Reclaim(int cpu);
+
+  // Reports number of times the <cpu> has been reclaimed.
+  uint64_t GetNumReclaims(int cpu) const;
+
+  // Determine number of bits we should use for allocating per-cpu cache
+  // The amount of per-cpu cache is 2 ^ kPerCpuShift
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+  static const size_t kPerCpuShift = 12;
+#else
+  static constexpr size_t kPerCpuShift = 18;
+#endif
+
+  struct CpuCacheMissStats {
+    size_t underflows;
+    size_t overflows;
+  };
+
+  // Reports total cache underflows and overflows for <cpu>.
+  CpuCacheMissStats GetTotalCacheMissStats(int cpu) const;
+
+  // Reports the cache underflows and overflows for <cpu> that were recorded at
+  // the end of the previous interval. It also records current underflows and
+  // overflows in the reclaim underflow and overflow stats.
+  CpuCacheMissStats GetReclaimCacheMissStats(int cpu) const;
+
+  // Reports cache underflows and overflows for <cpu> this interval.
+  CpuCacheMissStats GetIntervalCacheMissStats(int cpu) const;
+
+  // Report statistics
+  void Print(Printer* out) const;
+  void PrintInPbtxt(PbtxtRegion* region) const;
+
+  void AcquireInternalLocks();
+  void ReleaseInternalLocks();
+
+ private:
+  // Per-size-class freelist resizing info.
+  class PerClassResizeInfo {
+   public:
+    void Init();
+    // Updates info on overflow/underflow.
+    // <overflow> says if it's overflow or underflow.
+    // <grow> is caller approximation of whether we want to grow capacity.
+    // <successive> will contain number of successive overflows/underflows.
+    // Returns if capacity needs to be grown aggressively (i.e. by batch size).
+    bool Update(bool overflow, bool grow, uint32_t* successive);
+    uint32_t Tick();
+
+   private:
+    std::atomic<int32_t> state_;
+    // state_ layout:
+    struct State {
+      // last overflow/underflow?
+      uint32_t overflow : 1;
+      // number of times Steal checked this class since the last grow
+      uint32_t quiescent_ticks : 15;
+      // number of successive overflows/underflows
+      uint32_t successive : 16;
+    };
+    static_assert(sizeof(State) == sizeof(std::atomic<int32_t>),
+                  "size mismatch");
+  };
+
+  subtle::percpu::TcmallocSlab<kNumClasses> freelist_;
+
+  struct ResizeInfoUnpadded {
+    // cache space on this CPU we're not using.  Modify atomically;
+    // we don't want to lose space.
+    std::atomic<size_t> available;
+    // this is just a hint
+    std::atomic<size_t> last_steal;
+    // Track whether we have initialized this CPU.
+    absl::once_flag initialized;
+    // Track whether we have ever populated this CPU.
+    std::atomic<bool> populated;
+    // For cross-cpu operations.
+    absl::base_internal::SpinLock lock;
+    PerClassResizeInfo per_class[kNumClasses];
+    // tracks number of underflows on allocate.
+    std::atomic<size_t> total_underflows;
+    // tracks number of overflows on deallocate.
+    std::atomic<size_t> total_overflows;
+    // tracks number of underflows recorded as of the end of the last shuffle
+    // interval.
+    std::atomic<size_t> shuffle_underflows;
+    // tracks number of overflows recorded as of the end of the last shuffle
+    // interval.
+    std::atomic<size_t> shuffle_overflows;
+    // total cache space available on this CPU. This tracks the total
+    // allocated and unallocated bytes on this CPU cache.
+    std::atomic<size_t> capacity;
+    // Number of underflows as of the end of the last resize interval.
+    std::atomic<size_t> reclaim_underflows;
+    // Number of overflows as of the end of the last resize interval.
+    std::atomic<size_t> reclaim_overflows;
+    // Used bytes in the cache as of the end of the last resize interval.
+    std::atomic<uint64_t> reclaim_used_bytes;
+    // Tracks number of times this CPU has been reclaimed.
+    std::atomic<size_t> num_reclaims;
+  };
+  struct ResizeInfo : ResizeInfoUnpadded {
+    char pad[ABSL_CACHELINE_SIZE -
+             sizeof(ResizeInfoUnpadded) % ABSL_CACHELINE_SIZE];
+  };
+  // Tracking data for each CPU's cache resizing efforts.
+  ResizeInfo* resize_ = nullptr;
+
+  // Track whether we are lazily initializing slabs.  We cannot use the latest
+  // value in Parameters, as it can change after initialization.
+  bool lazy_slabs_ = false;
+  // The maximum capacity of each size class within the slab.
+  uint16_t max_capacity_[kNumClasses] = {0};
+
+  // Provides a hint to StealFromOtherCache() so that we can steal from the
+  // caches in a round-robin fashion.
+  std::atomic<int> last_cpu_cache_steal_ = 0;
+
+  // Return a set of objects to be returned to the Transfer Cache.
+  static constexpr int kMaxToReturn = 16;
+  struct ObjectsToReturn {
+    // The number of slots available for storing objects.
+    int count = kMaxToReturn;
+    // The size class of the returned object. kNumClasses is the
+    // largest value that needs to be stored in cl.
+    CompactSizeClass cl[kMaxToReturn];
+    void* obj[kMaxToReturn];
+  };
+
+  static size_t MaxCapacityHelper(size_t cl) {
+    CPUCache& cpu_cache = Static::cpu_cache();
+    // Heuristic that the CPUCache has been activated.
+    ASSERT(cpu_cache.resize_ != nullptr);
+    return cpu_cache.max_capacity_[cl];
+  }
+
+  void* Refill(int cpu, size_t cl);
+
+  // This is called after finding a full freelist when attempting to push <ptr>
+  // on the freelist for sizeclass <cl>.  The last arg should indicate which
+  // CPU's list was full.  Returns 1.
+  int Overflow(void* ptr, size_t cl, int cpu);
+
+  // Called on <cl> freelist overflow/underflow on <cpu> to balance cache
+  // capacity between size classes. Returns number of objects to return/request
+  // from transfer cache. <to_return> will contain objects that need to be
+  // freed.
+  size_t UpdateCapacity(int cpu, size_t cl, size_t batch_length, bool overflow,
+                        ObjectsToReturn* to_return);
+
+  // Tries to obtain up to <desired_increase> bytes of freelist space on <cpu>
+  // for <cl> from other <cls>. <to_return> will contain objects that need to be
+  // freed.
+  void Grow(int cpu, size_t cl, size_t desired_increase,
+            ObjectsToReturn* to_return);
+
+  // Tries to steal <bytes> for <cl> on <cpu> from other size classes on that
+  // CPU. Returns acquired bytes. <to_return> will contain objects that need to
+  // be freed.
+  size_t Steal(int cpu, size_t cl, size_t bytes, ObjectsToReturn* to_return);
+
+  // Records a cache underflow or overflow on <cpu>, increments underflow or
+  // overflow by 1.
+  // <is_malloc> determines whether the associated count corresponds to an
+  // underflow or overflow.
+  void RecordCacheMissStat(const int cpu, const bool is_malloc);
+
+  static void* NoopUnderflow(int cpu, size_t cl) { return nullptr; }
+  static int NoopOverflow(int cpu, size_t cl, void* item) { return -1; }
+};
+
+template <void* OOMHandler(size_t)>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Allocate(size_t cl) {
+  ASSERT(cl > 0);
+
+  tracking::Report(kMallocHit, cl, 1);
+  struct Helper {
+    static void* ABSL_ATTRIBUTE_NOINLINE Underflow(int cpu, size_t cl) {
+      // we've optimistically reported hit in Allocate, lets undo it and
+      // report miss instead.
+      tracking::Report(kMallocHit, cl, -1);
+      void* ret = nullptr;
+      if (Static::sharded_transfer_cache().should_use(cl)) {
+        ret = Static::sharded_transfer_cache().Pop(cl);
+      } else {
+        tracking::Report(kMallocMiss, cl, 1);
+        CPUCache& cache = Static::cpu_cache();
+        cache.RecordCacheMissStat(cpu, true);
+        ret = cache.Refill(cpu, cl);
+      }
+      if (ABSL_PREDICT_FALSE(ret == nullptr)) {
+        size_t size = Static::sizemap().class_to_size(cl);
+        return OOMHandler(size);
+      }
+      return ret;
+    }
+  };
+  return freelist_.Pop(cl, &Helper::Underflow);
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCache::Deallocate(void* ptr,
+                                                              size_t cl) {
+  ASSERT(cl > 0);
+  tracking::Report(kFreeHit, cl, 1);  // Be optimistic; correct later if needed.
+
+  struct Helper {
+    static int ABSL_ATTRIBUTE_NOINLINE Overflow(int cpu, size_t cl, void* ptr) {
+      // When we reach here we've already optimistically bumped FreeHits.
+      // Fix that.
+      tracking::Report(kFreeHit, cl, -1);
+      if (Static::sharded_transfer_cache().should_use(cl)) {
+        Static::sharded_transfer_cache().Push(cl, ptr);
+        return 1;
+      }
+      tracking::Report(kFreeMiss, cl, 1);
+      CPUCache& cache = Static::cpu_cache();
+      cache.RecordCacheMissStat(cpu, false);
+      return cache.Overflow(ptr, cl, cpu);
+    }
+  };
+  freelist_.Push(cl, ptr, Helper::Overflow);
+}
+
+inline bool UsePerCpuCache() {
+  // We expect a fast path of per-CPU caches being active and the thread being
+  // registered with rseq.
+  if (ABSL_PREDICT_FALSE(!Static::CPUCacheActive())) {
+    return false;
+  }
+
+  if (ABSL_PREDICT_TRUE(subtle::percpu::IsFastNoInit())) {
+    return true;
+  }
+
+  // When rseq is not registered, use this transition edge to shutdown the
+  // thread cache for this thread.
+  //
+  // We call IsFast() on every non-fastpath'd malloc or free since IsFast() has
+  // the side-effect of initializing the per-thread state needed for "unsafe"
+  // per-cpu operations in case this is the first time a new thread is calling
+  // into tcmalloc.
+  //
+  // If the per-CPU cache for a thread is not initialized, we push ourselves
+  // onto the slow path (if !defined(TCMALLOC_DEPRECATED_PERTHREAD)) until this
+  // occurs.  See fast_alloc's use of TryRecordAllocationFast.
+  if (ABSL_PREDICT_TRUE(subtle::percpu::IsFast())) {
+    ThreadCache::BecomeIdle();
+    return true;
+  }
+
+  return false;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+#endif  // TCMALLOC_CPU_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc
new file mode 100644
index 0000000000..fd4282b9c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/cpu_cache_test.cc
@@ -0,0 +1,599 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/cpu_cache.h"
+
+#include <thread>  // NOLINT(build/c++11)
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/random/seed_sequences.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+constexpr size_t kStressSlabs = 4;
+void* OOMHandler(size_t) { return nullptr; }
+
+TEST(CpuCacheTest, Metadata) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  const int num_cpus = absl::base_internal::NumCPUs();
+
+  CPUCache& cache = Static::cpu_cache();
+  // Since this test allocates memory, avoid activating the real fast path to
+  // minimize allocations against the per-CPU cache.
+  cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+  PerCPUMetadataState r = cache.MetadataMemoryUsage();
+  EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift);
+  if (Parameters::lazy_per_cpu_caches()) {
+    EXPECT_EQ(r.resident_size, 0);
+  } else {
+    EXPECT_EQ(r.resident_size, r.virtual_size);
+  }
+
+  auto count_cores = [&]() {
+    int populated_cores = 0;
+    for (int i = 0; i < num_cpus; i++) {
+      if (cache.HasPopulated(i)) {
+        populated_cores++;
+      }
+    }
+    return populated_cores;
+  };
+
+  EXPECT_EQ(0, count_cores());
+
+  int allowed_cpu_id;
+  const size_t kSizeClass = 3;
+  const size_t num_to_move = Static::sizemap().num_objects_to_move(kSizeClass);
+  const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus()
+                                           ? offsetof(kernel_rseq, vcpu_id)
+                                           : offsetof(kernel_rseq, cpu_id);
+  void* ptr;
+  {
+    // Restrict this thread to a single core while allocating and processing the
+    // slow path.
+    //
+    // TODO(b/151313823):  Without this restriction, we may access--for reading
+    // only--other slabs if we end up being migrated.  These may cause huge
+    // pages to be faulted for those cores, leading to test flakiness.
+    tcmalloc_internal::ScopedAffinityMask mask(
+        tcmalloc_internal::AllowedCpus()[0]);
+    allowed_cpu_id =
+        subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset);
+
+    ptr = cache.Allocate<OOMHandler>(kSizeClass);
+
+    if (mask.Tampered() ||
+        allowed_cpu_id !=
+            subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) {
+      return;
+    }
+  }
+  EXPECT_NE(ptr, nullptr);
+  EXPECT_EQ(1, count_cores());
+
+  r = cache.MetadataMemoryUsage();
+  EXPECT_EQ(r.virtual_size, num_cpus << CPUCache::kPerCpuShift);
+  if (Parameters::lazy_per_cpu_caches()) {
+    // We expect to fault in a single core, but we may end up faulting an
+    // entire hugepage worth of memory
+    const size_t core_slab_size = r.virtual_size / num_cpus;
+    const size_t upper_bound =
+        ((core_slab_size + kHugePageSize - 1) & ~(kHugePageSize - 1));
+
+    // A single core may be less than the full slab (core_slab_size), since we
+    // do not touch every page within the slab.
+    EXPECT_GT(r.resident_size, 0);
+    EXPECT_LE(r.resident_size, upper_bound) << count_cores();
+
+    // This test is much more sensitive to implementation details of the per-CPU
+    // cache.  It may need to be updated from time to time.  These numbers were
+    // calculated by MADV_NOHUGEPAGE'ing the memory used for the slab and
+    // measuring the resident size.
+    //
+    // TODO(ckennelly):  Allow CPUCache::Activate to accept a specific arena
+    // allocator, so we can MADV_NOHUGEPAGE the backing store in testing for
+    // more precise measurements.
+    switch (CPUCache::kPerCpuShift) {
+      case 12:
+        EXPECT_GE(r.resident_size, 4096);
+        break;
+      case 18:
+        EXPECT_GE(r.resident_size, 110592);
+        break;
+      default:
+        ASSUME(false);
+        break;
+    };
+
+    // Read stats from the CPU caches.  This should not impact resident_size.
+    const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
+    size_t total_used_bytes = 0;
+    for (int cpu = 0; cpu < num_cpus; ++cpu) {
+      size_t used_bytes = cache.UsedBytes(cpu);
+      total_used_bytes += used_bytes;
+
+      if (cpu == allowed_cpu_id) {
+        EXPECT_GT(used_bytes, 0);
+        EXPECT_TRUE(cache.HasPopulated(cpu));
+      } else {
+        EXPECT_EQ(used_bytes, 0);
+        EXPECT_FALSE(cache.HasPopulated(cpu));
+      }
+
+      EXPECT_LE(cache.Unallocated(cpu), max_cpu_cache_size);
+      EXPECT_EQ(cache.Capacity(cpu), max_cpu_cache_size);
+      EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
+                cache.Capacity(cpu));
+    }
+
+    for (int cl = 0; cl < kNumClasses; ++cl) {
+      // This is sensitive to the current growth policies of CPUCache.  It may
+      // require updating from time-to-time.
+      EXPECT_EQ(cache.TotalObjectsOfClass(cl),
+                (cl == kSizeClass ? num_to_move - 1 : 0))
+          << cl;
+    }
+    EXPECT_EQ(cache.TotalUsedBytes(), total_used_bytes);
+
+    PerCPUMetadataState post_stats = cache.MetadataMemoryUsage();
+    // Confirm stats are within expected bounds.
+    EXPECT_GT(post_stats.resident_size, 0);
+    EXPECT_LE(post_stats.resident_size, upper_bound) << count_cores();
+    // Confirm stats are unchanged.
+    EXPECT_EQ(r.resident_size, post_stats.resident_size);
+  } else {
+    EXPECT_EQ(r.resident_size, r.virtual_size);
+  }
+
+  // Tear down.
+  //
+  // TODO(ckennelly):  We're interacting with the real TransferCache.
+  cache.Deallocate(ptr, kSizeClass);
+
+  for (int i = 0; i < num_cpus; i++) {
+    cache.Reclaim(i);
+  }
+}
+
+TEST(CpuCacheTest, CacheMissStats) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  const int num_cpus = absl::base_internal::NumCPUs();
+
+  CPUCache& cache = Static::cpu_cache();
+  // Since this test allocates memory, avoid activating the real fast path to
+  // minimize allocations against the per-CPU cache.
+  cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+  //  The number of underflows and overflows must be zero for all the caches.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    CPUCache::CpuCacheMissStats total_misses =
+        cache.GetTotalCacheMissStats(cpu);
+    CPUCache::CpuCacheMissStats interval_misses =
+        cache.GetIntervalCacheMissStats(cpu);
+    EXPECT_EQ(total_misses.underflows, 0);
+    EXPECT_EQ(total_misses.overflows, 0);
+    EXPECT_EQ(interval_misses.underflows, 0);
+    EXPECT_EQ(interval_misses.overflows, 0);
+  }
+
+  int allowed_cpu_id;
+  const size_t kSizeClass = 3;
+  const size_t virtual_cpu_id_offset = subtle::percpu::UsingFlatVirtualCpus()
+                                           ? offsetof(kernel_rseq, vcpu_id)
+                                           : offsetof(kernel_rseq, cpu_id);
+  void* ptr;
+  {
+    // Restrict this thread to a single core while allocating and processing the
+    // slow path.
+    //
+    // TODO(b/151313823):  Without this restriction, we may access--for reading
+    // only--other slabs if we end up being migrated.  These may cause huge
+    // pages to be faulted for those cores, leading to test flakiness.
+    tcmalloc_internal::ScopedAffinityMask mask(
+        tcmalloc_internal::AllowedCpus()[0]);
+    allowed_cpu_id =
+        subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset);
+
+    ptr = cache.Allocate<OOMHandler>(kSizeClass);
+
+    if (mask.Tampered() ||
+        allowed_cpu_id !=
+            subtle::percpu::GetCurrentVirtualCpuUnsafe(virtual_cpu_id_offset)) {
+      return;
+    }
+  }
+
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    CPUCache::CpuCacheMissStats total_misses =
+        cache.GetTotalCacheMissStats(cpu);
+    CPUCache::CpuCacheMissStats interval_misses =
+        cache.GetIntervalCacheMissStats(cpu);
+    if (cpu == allowed_cpu_id) {
+      EXPECT_EQ(total_misses.underflows, 1);
+      EXPECT_EQ(interval_misses.underflows, 1);
+    } else {
+      EXPECT_EQ(total_misses.underflows, 0);
+      EXPECT_EQ(interval_misses.underflows, 0);
+    }
+    EXPECT_EQ(total_misses.overflows, 0);
+    EXPECT_EQ(interval_misses.overflows, 0);
+  }
+
+  // Tear down.
+  //
+  // TODO(ckennelly):  We're interacting with the real TransferCache.
+  cache.Deallocate(ptr, kSizeClass);
+
+  for (int i = 0; i < num_cpus; i++) {
+    cache.Reclaim(i);
+  }
+}
+
+static void ShuffleThread(const std::atomic<bool>& stop) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  CPUCache& cache = Static::cpu_cache();
+  // Wake up every 10ms to shuffle the caches so that we can allow misses to
+  // accumulate during that interval
+  while (!stop) {
+    cache.ShuffleCpuCaches();
+    absl::SleepFor(absl::Milliseconds(10));
+  }
+}
+
+static void StressThread(size_t thread_id, const std::atomic<bool>& stop) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  CPUCache& cache = Static::cpu_cache();
+  std::vector<std::pair<size_t, void*>> blocks;
+  absl::BitGen rnd;
+  while (!stop) {
+    const int what = absl::Uniform<int32_t>(rnd, 0, 2);
+    if (what) {
+      // Allocate an object for a class
+      size_t cl = absl::Uniform<int32_t>(rnd, 1, kStressSlabs + 1);
+      void* ptr = cache.Allocate<OOMHandler>(cl);
+      blocks.emplace_back(std::make_pair(cl, ptr));
+    } else {
+      // Deallocate an object for a class
+      if (!blocks.empty()) {
+        cache.Deallocate(blocks.back().second, blocks.back().first);
+        blocks.pop_back();
+      }
+    }
+  }
+
+  // Cleaup. Deallocate rest of the allocated memory.
+  for (int i = 0; i < blocks.size(); i++) {
+    cache.Deallocate(blocks[i].second, blocks[i].first);
+  }
+}
+
+TEST(CpuCacheTest, StealCpuCache) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  CPUCache& cache = Static::cpu_cache();
+  // Since this test allocates memory, avoid activating the real fast path to
+  // minimize allocations against the per-CPU cache.
+  cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+  std::vector<std::thread> threads;
+  std::thread shuffle_thread;
+  const int n_threads = absl::base_internal::NumCPUs();
+  std::atomic<bool> stop(false);
+
+  for (size_t t = 0; t < n_threads; ++t) {
+    threads.push_back(std::thread(StressThread, t, std::ref(stop)));
+  }
+  shuffle_thread = std::thread(ShuffleThread, std::ref(stop));
+
+  absl::SleepFor(absl::Seconds(5));
+  stop = true;
+  for (auto& t : threads) {
+    t.join();
+  }
+  shuffle_thread.join();
+
+  // Check that the total capacity is preserved after the shuffle.
+  size_t capacity = 0;
+  const int num_cpus = absl::base_internal::NumCPUs();
+  const size_t kTotalCapacity = num_cpus * Parameters::max_per_cpu_cache_size();
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    EXPECT_EQ(cache.Allocated(cpu) + cache.Unallocated(cpu),
+              cache.Capacity(cpu));
+    capacity += cache.Capacity(cpu);
+  }
+  EXPECT_EQ(capacity, kTotalCapacity);
+
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    cache.Reclaim(cpu);
+  }
+}
+
+// Runs a single allocate and deallocate operation to warm up the cache. Once a
+// few objects are allocated in the cold cache, we can shuffle cpu caches to
+// steal that capacity from the cold cache to the hot cache.
+static void ColdCacheOperations(int cpu_id, size_t size_class) {
+  // Temporarily fake being on the given CPU.
+  ScopedFakeCpuId fake_cpu_id(cpu_id);
+
+  CPUCache& cache = Static::cpu_cache();
+#if TCMALLOC_PERCPU_USE_RSEQ
+  if (subtle::percpu::UsingFlatVirtualCpus()) {
+    subtle::percpu::__rseq_abi.vcpu_id = cpu_id;
+  }
+#endif
+
+  void* ptr = cache.Allocate<OOMHandler>(size_class);
+  cache.Deallocate(ptr, size_class);
+}
+
+// Runs multiple allocate and deallocate operation on the cpu cache to collect
+// misses. Once we collect enough misses on this cache, we can shuffle cpu
+// caches to steal capacity from colder caches to the hot cache.
+static void HotCacheOperations(int cpu_id) {
+  // Temporarily fake being on the given CPU.
+  ScopedFakeCpuId fake_cpu_id(cpu_id);
+
+  CPUCache& cache = Static::cpu_cache();
+#if TCMALLOC_PERCPU_USE_RSEQ
+  if (subtle::percpu::UsingFlatVirtualCpus()) {
+    subtle::percpu::__rseq_abi.vcpu_id = cpu_id;
+  }
+#endif
+
+  // Allocate and deallocate objects to make sure we have enough misses on the
+  // cache. This will make sure we have sufficient disparity in misses between
+  // the hotter and colder cache, and that we may be able to steal bytes from
+  // the colder cache.
+  for (size_t cl = 1; cl <= kStressSlabs; ++cl) {
+    void* ptr = cache.Allocate<OOMHandler>(cl);
+    cache.Deallocate(ptr, cl);
+  }
+
+  // We reclaim the cache to reset it so that we record underflows/overflows the
+  // next time we allocate and deallocate objects. Without reclaim, the cache
+  // would stay warmed up and it would take more time to drain the colder cache.
+  cache.Reclaim(cpu_id);
+}
+
+TEST(CpuCacheTest, ColdHotCacheShuffleTest) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  CPUCache& cache = Static::cpu_cache();
+  // Since this test allocates memory, avoid activating the real fast path to
+  // minimize allocations against the per-CPU cache.
+  cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+  constexpr int hot_cpu_id = 0;
+  constexpr int cold_cpu_id = 1;
+
+  const size_t max_cpu_cache_size = Parameters::max_per_cpu_cache_size();
+
+  // Empirical tests suggest that we should be able to steal all the steal-able
+  // capacity from colder cache in < 100 tries. Keeping enough buffer here to
+  // make sure we steal from colder cache, while at the same time avoid timeouts
+  // if something goes bad.
+  constexpr int kMaxStealTries = 1000;
+
+  // We allocate and deallocate a single highest cl object.
+  // This makes sure that we have a single large object in the cache that faster
+  // cache can steal.
+  const size_t size_class = kNumClasses - 1;
+
+  for (int num_tries = 0;
+       num_tries < kMaxStealTries &&
+       cache.Capacity(cold_cpu_id) >
+           CPUCache::kCacheCapacityThreshold * max_cpu_cache_size;
+       ++num_tries) {
+    ColdCacheOperations(cold_cpu_id, size_class);
+    HotCacheOperations(hot_cpu_id);
+    cache.ShuffleCpuCaches();
+
+    // Check that the capacity is preserved.
+    EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id),
+              cache.Capacity(cold_cpu_id));
+    EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id),
+              cache.Capacity(hot_cpu_id));
+  }
+
+  size_t cold_cache_capacity = cache.Capacity(cold_cpu_id);
+  size_t hot_cache_capacity = cache.Capacity(hot_cpu_id);
+
+  // Check that we drained cold cache to the lower capacity limit.
+  // We also keep some tolerance, up to the largest class size, below the lower
+  // capacity threshold that we can drain cold cache to.
+  EXPECT_GT(cold_cache_capacity,
+            CPUCache::kCacheCapacityThreshold * max_cpu_cache_size -
+                Static::sizemap().class_to_size(kNumClasses - 1));
+
+  // Check that we have at least stolen some capacity.
+  EXPECT_GT(hot_cache_capacity, max_cpu_cache_size);
+
+  // Perform a few more shuffles to make sure that lower cache capacity limit
+  // has been reached for the cold cache. A few more shuffles should not
+  // change the capacity of either of the caches.
+  for (int i = 0; i < 100; ++i) {
+    ColdCacheOperations(cold_cpu_id, size_class);
+    HotCacheOperations(hot_cpu_id);
+    cache.ShuffleCpuCaches();
+
+    // Check that the capacity is preserved.
+    EXPECT_EQ(cache.Allocated(cold_cpu_id) + cache.Unallocated(cold_cpu_id),
+              cache.Capacity(cold_cpu_id));
+    EXPECT_EQ(cache.Allocated(hot_cpu_id) + cache.Unallocated(hot_cpu_id),
+              cache.Capacity(hot_cpu_id));
+  }
+
+  // Check that the capacity of cold and hot caches is same as before.
+  EXPECT_EQ(cache.Capacity(cold_cpu_id), cold_cache_capacity);
+  EXPECT_EQ(cache.Capacity(hot_cpu_id), hot_cache_capacity);
+
+  // Make sure that the total capacity is preserved.
+  EXPECT_EQ(cache.Capacity(cold_cpu_id) + cache.Capacity(hot_cpu_id),
+            2 * max_cpu_cache_size);
+
+  // Reclaim caches.
+  const int num_cpus = absl::base_internal::NumCPUs();
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    cache.Reclaim(cpu);
+  }
+}
+
+TEST(CpuCacheTest, ReclaimCpuCache) {
+  if (!subtle::percpu::IsFast()) {
+    return;
+  }
+
+  CPUCache& cache = Static::cpu_cache();
+  // Since this test allocates memory, avoid activating the real fast path to
+  // minimize allocations against the per-CPU cache.
+  cache.Activate(CPUCache::ActivationMode::FastPathOffTestOnly);
+
+  //  The number of underflows and overflows must be zero for all the caches.
+  const int num_cpus = absl::base_internal::NumCPUs();
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    // Check that reclaim miss metrics are reset.
+    CPUCache::CpuCacheMissStats reclaim_misses =
+        cache.GetReclaimCacheMissStats(cpu);
+    EXPECT_EQ(reclaim_misses.underflows, 0);
+    EXPECT_EQ(reclaim_misses.overflows, 0);
+
+    // None of the caches should have been reclaimed yet.
+    EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+
+    // Check that caches are empty.
+    uint64_t used_bytes = cache.UsedBytes(cpu);
+    EXPECT_EQ(used_bytes, 0);
+  }
+
+  const size_t kSizeClass = 3;
+
+  // We chose a different size class here so that we can populate different size
+  // class slots and change the number of bytes used by the busy cache later in
+  // our test.
+  const size_t kBusySizeClass = 4;
+
+  // Perform some operations to warm up caches and make sure they are populated.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    ColdCacheOperations(cpu, kSizeClass);
+    EXPECT_TRUE(cache.HasPopulated(cpu));
+  }
+
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    CPUCache::CpuCacheMissStats misses_last_interval =
+        cache.GetReclaimCacheMissStats(cpu);
+    CPUCache::CpuCacheMissStats total_misses =
+        cache.GetTotalCacheMissStats(cpu);
+
+    // Misses since the last reclaim (i.e. since we initialized the caches)
+    // should match the total miss metrics.
+    EXPECT_EQ(misses_last_interval.underflows, total_misses.underflows);
+    EXPECT_EQ(misses_last_interval.overflows, total_misses.overflows);
+
+    // Caches should have non-zero used bytes.
+    EXPECT_GT(cache.UsedBytes(cpu), 0);
+  }
+
+  cache.TryReclaimingCaches();
+
+  // Miss metrics since the last interval were non-zero and the change in used
+  // bytes was non-zero, so none of the caches should get reclaimed.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    // As no cache operations were performed since the last reclaim
+    // operation, the reclaim misses captured during the last interval (i.e.
+    // since the last reclaim) should be zero.
+    CPUCache::CpuCacheMissStats reclaim_misses =
+        cache.GetReclaimCacheMissStats(cpu);
+    EXPECT_EQ(reclaim_misses.underflows, 0);
+    EXPECT_EQ(reclaim_misses.overflows, 0);
+
+    // None of the caches should have been reclaimed as the caches were
+    // accessed in the previous interval.
+    EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+
+    // Caches should not have been reclaimed; used bytes should be non-zero.
+    EXPECT_GT(cache.UsedBytes(cpu), 0);
+  }
+
+  absl::BitGen rnd;
+  const int busy_cpu =
+      absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+  const size_t prev_used = cache.UsedBytes(busy_cpu);
+  ColdCacheOperations(busy_cpu, kBusySizeClass);
+  EXPECT_GT(cache.UsedBytes(busy_cpu), prev_used);
+
+  // Try reclaiming caches again.
+  cache.TryReclaimingCaches();
+
+  // All caches, except the busy cpu cache against which we performed some
+  // operations in the previous interval, should have been reclaimed exactly
+  // once.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    if (cpu == busy_cpu) {
+      EXPECT_GT(cache.UsedBytes(cpu), 0);
+      EXPECT_EQ(cache.GetNumReclaims(cpu), 0);
+    } else {
+      EXPECT_EQ(cache.UsedBytes(cpu), 0);
+      EXPECT_EQ(cache.GetNumReclaims(cpu), 1);
+    }
+  }
+
+  // Try reclaiming caches again.
+  cache.TryReclaimingCaches();
+
+  // All caches, including the busy cache, should have been reclaimed this
+  // time. Note that the caches that were reclaimed in the previous interval
+  // should not be reclaimed again and the number of reclaims reported for them
+  // should still be one.
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    SCOPED_TRACE(absl::StrFormat("Failed CPU: %d", cpu));
+    EXPECT_EQ(cache.UsedBytes(cpu), 0);
+    EXPECT_EQ(cache.GetNumReclaims(cpu), 1);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.cc b/contrib/libs/tcmalloc/tcmalloc/experiment.cc
new file mode 100644
index 0000000000..1c425fbf9e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/experiment.h"
+
+#include <string.h>
+
+#include "absl/base/macros.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+const char kDelimiter = ',';
+const char kExperiments[] = "BORG_EXPERIMENTS";
+const char kDisableExperiments[] = "BORG_DISABLE_EXPERIMENTS";
+constexpr absl::string_view kEnableAll = "enable-all-known-experiments";
+constexpr absl::string_view kDisableAll = "all";
+
+bool LookupExperimentID(absl::string_view label, Experiment* exp) {
+  for (auto config : experiments) {
+    if (config.name == label) {
+      *exp = config.id;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+const bool* GetSelectedExperiments() {
+  static bool by_id[kNumExperiments];
+
+  static const bool* status = [&]() {
+    const char* active_experiments = thread_safe_getenv(kExperiments);
+    const char* disabled_experiments = thread_safe_getenv(kDisableExperiments);
+    return SelectExperiments(by_id,
+                             active_experiments ? active_experiments : "",
+                             disabled_experiments ? disabled_experiments : "");
+  }();
+  return status;
+}
+
+template <typename F>
+void ParseExperiments(absl::string_view labels, F f) {
+  absl::string_view::size_type pos = 0;
+  do {
+    absl::string_view token;
+    auto end = labels.find(kDelimiter, pos);
+    if (end == absl::string_view::npos) {
+      token = labels.substr(pos);
+      pos = end;
+    } else {
+      token = labels.substr(pos, end - pos);
+      pos = end + 1;
+    }
+
+    f(token);
+  } while (pos != absl::string_view::npos);
+}
+
+}  // namespace
+
+const bool* SelectExperiments(bool* buffer, absl::string_view active,
+                              absl::string_view disabled) {
+  memset(buffer, 0, sizeof(*buffer) * kNumExperiments);
+
+  if (active == kEnableAll) {
+    std::fill(buffer, buffer + kNumExperiments, true);
+  }
+
+  ParseExperiments(active, [buffer](absl::string_view token) {
+    Experiment id;
+    if (LookupExperimentID(token, &id)) {
+      buffer[static_cast<int>(id)] = true;
+    }
+  });
+
+  if (disabled == kDisableAll) {
+    memset(buffer, 0, sizeof(*buffer) * kNumExperiments);
+  }
+
+  ParseExperiments(disabled, [buffer](absl::string_view token) {
+    Experiment id;
+    if (LookupExperimentID(token, &id)) {
+      buffer[static_cast<int>(id)] = false;
+    }
+  });
+
+  return buffer;
+}
+
+void PrintExperiments(Printer* printer) {
+  // Index experiments by their positions in the experiments array, rather than
+  // by experiment ID.
+  static bool active[ABSL_ARRAYSIZE(experiments)];
+  static const bool* status = []() {
+    memset(active, 0, sizeof(active));
+    const bool* by_id = GetSelectedExperiments();
+
+    for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) {
+      const auto& config = experiments[i];
+      active[i] = by_id[static_cast<int>(config.id)];
+    }
+
+    return active;
+  }();
+
+  printer->printf("MALLOC EXPERIMENTS:");
+  for (int i = 0; i < ABSL_ARRAYSIZE(experiments); i++) {
+    const char* value = status[i] ? "1" : "0";
+    printer->printf(" %s=%s", experiments[i].name, value);
+  }
+
+  printer->printf("\n");
+}
+
+void FillExperimentProperties(
+    std::map<std::string, MallocExtension::Property>* result) {
+  for (const auto& config : experiments) {
+    (*result)[absl::StrCat("tcmalloc.experiment.", config.name)].value =
+        IsExperimentActive(config.id) ? 1 : 0;
+  }
+}
+
+}  // namespace tcmalloc_internal
+
+bool IsExperimentActive(Experiment exp) {
+  ASSERT(static_cast<int>(exp) >= 0);
+  ASSERT(exp < Experiment::kMaxExperimentID);
+
+  return tcmalloc_internal::GetSelectedExperiments()[static_cast<int>(exp)];
+}
+
+absl::optional<Experiment> FindExperimentByName(absl::string_view name) {
+  for (const auto& config : experiments) {
+    if (name == config.name) {
+      return config.id;
+    }
+  }
+
+  return absl::nullopt;
+}
+
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment.h b/contrib/libs/tcmalloc/tcmalloc/experiment.h
new file mode 100644
index 0000000000..90b3049df1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment.h
@@ -0,0 +1,71 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_EXPERIMENT_H_
+#define TCMALLOC_EXPERIMENT_H_
+
+#include <stddef.h>
+
+#include <map>
+#include <string>
+
+#include "absl/strings/string_view.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+// TCMalloc Experiment Controller
+//
+// This consumes environment variables to decide whether to activate experiments
+// to control TCMalloc behavior.  It avoids memory allocations when making
+// experiment decisions to allow experiments to be used in critical TCMalloc
+// initialization paths.
+//
+// If an experiment is causing difficulty, all experiments can be disabled by
+// setting the environment variable:
+//     BORG_DISABLE_EXPERIMENTS=all *or*
+//     BORG_DISABLE_EXPERIMENTS=BAD_EXPERIMENT_LABEL
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+constexpr size_t kNumExperiments =
+    static_cast<size_t>(Experiment::kMaxExperimentID);
+
+// SelectExperiments parses the experiments enumerated by active and disabled
+// and updates buffer[experiment_id] accordingly.
+//
+// buffer must be sized for kMaxExperimentID entries.
+//
+// This is exposed for testing purposes only.
+const bool* SelectExperiments(bool* buffer, absl::string_view active,
+                              absl::string_view disabled);
+
+void FillExperimentProperties(
+    std::map<std::string, MallocExtension::Property>* result);
+
+void PrintExperiments(Printer* printer);
+
+}  // namespace tcmalloc_internal
+
+bool IsExperimentActive(Experiment exp);
+
+absl::optional<Experiment> FindExperimentByName(absl::string_view name);
+
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_EXPERIMENT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config.h b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h
new file mode 100644
index 0000000000..294c0374e4
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config.h
@@ -0,0 +1,51 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_EXPERIMENT_CONFIG_H_
+#define TCMALLOC_EXPERIMENT_CONFIG_H_
+
+#include "absl/strings/string_view.h"
+
+// Autogenerated by experiments_proto_test --experiments_generate_config=true
+namespace tcmalloc {
+
+enum class Experiment : int {
+  TCMALLOC_TEMERAIRE,
+  TCMALLOC_SANS_56_SIZECLASS,
+  TEST_ONLY_TCMALLOC_POW2_SIZECLASS,
+  TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS,
+  TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE,
+  TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE,
+  kMaxExperimentID,
+};
+
+struct ExperimentConfig {
+  Experiment id;
+  absl::string_view name;
+};
+
+// clang-format off
+inline constexpr ExperimentConfig experiments[] = {
+    {Experiment::TCMALLOC_TEMERAIRE, "TCMALLOC_TEMERAIRE"},
+    {Experiment::TCMALLOC_SANS_56_SIZECLASS, "TCMALLOC_SANS_56_SIZECLASS"},
+    {Experiment::TEST_ONLY_TCMALLOC_POW2_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_SIZECLASS"},
+    {Experiment::TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS, "TEST_ONLY_TCMALLOC_POW2_BELOW64_SIZECLASS"},
+    {Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE"},
+    {Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE, "TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE"},
+};
+// clang-format on
+
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_EXPERIMENT_CONFIG_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc
new file mode 100644
index 0000000000..24da9e64aa
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_config_test.cc
@@ -0,0 +1,31 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/experiment_config.h"
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace {
+
+// Verify IDs are non-negative and strictly less than kMaxExperimentID.
+TEST(ExperimentConfigTest, ValidateIDs) {
+  for (const auto& exp : experiments) {
+    ASSERT_LE(0, static_cast<int>(exp.id));
+    ASSERT_LT(exp.id, Experiment::kMaxExperimentID);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc
new file mode 100644
index 0000000000..2a7afe9b85
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experiment_fuzz.cc
@@ -0,0 +1,38 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/experiment.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+  const char* data = reinterpret_cast<const char*>(d);
+
+  bool buffer[tcmalloc::tcmalloc_internal::kNumExperiments];
+  absl::string_view active, disabled;
+
+  const char* split = static_cast<const char*>(memchr(data, ';', size));
+  if (split == nullptr) {
+    active = absl::string_view(data, size);
+  } else {
+    active = absl::string_view(data, split - data);
+    disabled = absl::string_view(split + 1, size - (split - data + 1));
+  }
+
+  tcmalloc::tcmalloc_internal::SelectExperiments(buffer, active, disabled);
+  return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc
new file mode 100644
index 0000000000..c582cdb9ba
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_56_size_class.cc
@@ -0,0 +1,706 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+namespace tcmalloc {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       24,       1,          32},  // 0.68%
+    {       32,       1,          32},  // 0.59%
+    {       40,       1,          32},  // 0.98%
+    {       48,       1,          32},  // 0.98%
+    {       64,       1,          32},  // 0.59%
+    {       72,       1,          32},  // 1.28%
+    {       80,       1,          32},  // 0.98%
+    {       88,       1,          32},  // 0.68%
+    {       96,       1,          32},  // 0.98%
+    {      104,       1,          32},  // 1.58%
+    {      112,       1,          32},  // 0.78%
+    {      120,       1,          32},  // 0.98%
+    {      128,       1,          32},  // 0.59%
+    {      136,       1,          32},  // 0.98%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      184,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      288,       1,          32},  // 2.18%
+    {      312,       1,          32},  // 1.58%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      384,       1,          32},  // 2.18%
+    {      408,       1,          32},  // 0.98%
+    {      424,       1,          32},  // 2.28%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       2,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       2,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    98304,      12,           2},  // 0.05%
+    {   106496,      13,           2},  // 0.05%
+    {   131072,      16,           2},  // 0.04%
+    {   147456,      18,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   229376,      28,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       24,       1,          32},  // 0.17%
+    {       32,       1,          32},  // 0.15%
+    {       40,       1,          32},  // 0.17%
+    {       48,       1,          32},  // 0.24%
+    {       64,       1,          32},  // 0.15%
+    {       72,       1,          32},  // 0.17%
+    {       80,       1,          32},  // 0.29%
+    {       88,       1,          32},  // 0.24%
+    {       96,       1,          32},  // 0.24%
+    {      104,       1,          32},  // 0.17%
+    {      112,       1,          32},  // 0.34%
+    {      120,       1,          32},  // 0.17%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      280,       1,          32},  // 0.17%
+    {      304,       1,          32},  // 0.89%
+    {      336,       1,          32},  // 0.69%
+    {      368,       1,          32},  // 0.20%
+    {      416,       1,          32},  // 1.13%
+    {      456,       1,          32},  // 1.36%
+    {      488,       1,          32},  // 0.37%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      768,       1,          32},  // 1.74%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1408,       1,          32},  // 1.33%
+    {     1664,       1,          32},  // 3.80%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2432,       1,          26},  // 3.80%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13056,       2,           5},  // 0.47%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       24,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       40,       1,          32},  // 0.03%
+    {       48,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       72,       1,          32},  // 0.04%
+    {       80,       1,          32},  // 0.04%
+    {       88,       1,          32},  // 0.05%
+    {       96,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      216,       1,          32},  // 0.07%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      288,       1,          32},  // 0.04%
+    {      312,       1,          32},  // 0.04%
+    {      344,       1,          32},  // 0.02%
+    {      360,       1,          32},  // 0.04%
+    {      416,       1,          32},  // 0.04%
+    {      464,       1,          32},  // 0.19%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1408,       1,          32},  // 0.12%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2688,       1,          24},  // 0.56%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4608,       1,          14},  // 1.61%
+    {     5120,       1,          12},  // 0.41%
+    {     5504,       1,          11},  // 1.35%
+    {     5760,       1,          11},  // 1.15%
+    {     6144,       1,          10},  // 1.61%
+    {     6656,       1,           9},  // 1.00%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     9344,       1,           7},  // 0.21%
+    {     9984,       1,           6},  // 1.00%
+    {    10880,       1,           6},  // 0.41%
+    {    11904,       1,           5},  // 0.12%
+    {    13056,       1,           5},  // 0.41%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    17408,       1,           3},  // 0.41%
+    {    20096,       1,           3},  // 0.36%
+    {    21760,       1,           3},  // 0.41%
+    {    23808,       1,           2},  // 0.12%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    45568,       2,           2},  // 4.61%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   196608,       3,           2},  // 0.01%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       24,       1,          32},  // 1.57%
+    {       32,       1,          32},  // 1.17%
+    {       40,       1,          32},  // 1.57%
+    {       48,       1,          32},  // 1.57%
+    {       64,       1,          32},  // 1.17%
+    {       72,       1,          32},  // 2.78%
+    {       80,       1,          32},  // 1.57%
+    {       88,       1,          32},  // 2.37%
+    {       96,       1,          32},  // 2.78%
+    {      104,       1,          32},  // 2.17%
+    {      112,       1,          32},  // 2.78%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      176,       1,          32},  // 2.37%
+    {      192,       1,          32},  // 2.78%
+    {      208,       1,          32},  // 4.86%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      312,       1,          32},  // 2.17%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      408,       1,          32},  // 1.57%
+    {      448,       1,          32},  // 2.78%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      640,       2,          32},  // 7.29%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3200,       4,          20},  // 2.70%
+    {     4096,       4,          16},  // 0.29%
+    {     4736,       5,          13},  // 8.36%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       48,       1,          32},  // 0.98%
+    {       64,       1,          32},  // 0.59%
+    {       80,       1,          32},  // 0.98%
+    {       96,       1,          32},  // 0.98%
+    {      112,       1,          32},  // 0.78%
+    {      128,       1,          32},  // 0.59%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      288,       1,          32},  // 2.18%
+    {      304,       1,          32},  // 4.25%
+    {      320,       1,          32},  // 3.00%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      384,       1,          32},  // 2.18%
+    {      400,       1,          32},  // 3.00%
+    {      416,       1,          32},  // 4.25%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       2,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     6784,       5,           9},  // 0.75%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       2,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    90112,      11,           2},  // 0.05%
+    {    98304,      12,           2},  // 0.05%
+    {   106496,      13,           2},  // 0.05%
+    {   122880,      15,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   139264,      17,           2},  // 0.03%
+    {   155648,      19,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   221184,      27,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       48,       1,          32},  // 0.24%
+    {       64,       1,          32},  // 0.15%
+    {       80,       1,          32},  // 0.29%
+    {       96,       1,          32},  // 0.24%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      272,       1,          32},  // 0.54%
+    {      288,       1,          32},  // 0.84%
+    {      304,       1,          32},  // 0.89%
+    {      336,       1,          32},  // 0.69%
+    {      368,       1,          32},  // 0.20%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      480,       1,          32},  // 0.54%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      768,       1,          32},  // 1.74%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1408,       1,          32},  // 1.33%
+    {     1536,       1,          32},  // 1.74%
+    {     1664,       1,          32},  // 3.80%
+    {     1920,       1,          32},  // 0.54%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2432,       1,          26},  // 3.80%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     5632,       2,          11},  // 5.86%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13056,       2,           5},  // 0.47%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       48,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       80,       1,          32},  // 0.04%
+    {       96,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      224,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      288,       1,          32},  // 0.04%
+    {      320,       1,          32},  // 0.04%
+    {      352,       1,          32},  // 0.12%
+    {      368,       1,          32},  // 0.07%
+    {      416,       1,          32},  // 0.04%
+    {      464,       1,          32},  // 0.19%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1408,       1,          32},  // 0.12%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2688,       1,          24},  // 0.56%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4608,       1,          14},  // 1.61%
+    {     5120,       1,          12},  // 0.41%
+    {     5504,       1,          11},  // 1.35%
+    {     5760,       1,          11},  // 1.15%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     7040,       1,           9},  // 0.66%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {     9984,       1,           6},  // 1.00%
+    {    10880,       1,           6},  // 0.41%
+    {    11904,       1,           5},  // 0.12%
+    {    13056,       1,           5},  // 0.41%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    17408,       1,           3},  // 0.41%
+    {    20096,       1,           3},  // 0.36%
+    {    21760,       1,           3},  // 0.41%
+    {    23808,       1,           2},  // 0.12%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    45568,       2,           2},  // 4.61%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   196608,       3,           2},  // 0.01%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalSizeClasses[SizeMap::kExperimentalSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       48,       1,          32},  // 1.57%
+    {       64,       1,          32},  // 1.17%
+    {       80,       1,          32},  // 1.57%
+    {       96,       1,          32},  // 2.78%
+    {      112,       1,          32},  // 2.78%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      176,       1,          32},  // 2.37%
+    {      192,       1,          32},  // 2.78%
+    {      208,       1,          32},  // 4.86%
+    {      224,       1,          32},  // 2.78%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      288,       1,          32},  // 2.78%
+    {      304,       1,          32},  // 4.86%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      400,       1,          32},  // 3.60%
+    {      448,       1,          32},  // 2.78%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      640,       2,          32},  // 7.29%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3200,       4,          20},  // 2.70%
+    {     3584,       7,          18},  // 0.17%
+    {     4096,       4,          16},  // 0.29%
+    {     4736,       5,          13},  // 8.36%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc
new file mode 100755
index 0000000000..c6769f450e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_below64_size_class.cc
@@ -0,0 +1,679 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 82;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       64,       1,          32},  // 0.59%
+    {       72,       1,          32},  // 1.28%
+    {       80,       1,          32},  // 0.98%
+    {       88,       1,          32},  // 0.68%
+    {       96,       1,          32},  // 0.98%
+    {      104,       1,          32},  // 1.58%
+    {      112,       1,          32},  // 0.78%
+    {      120,       1,          32},  // 0.98%
+    {      128,       1,          32},  // 0.59%
+    {      136,       1,          32},  // 0.98%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      296,       1,          32},  // 3.10%
+    {      312,       1,          32},  // 1.58%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      408,       1,          32},  // 0.98%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       1,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    98304,      12,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   147456,      18,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 74;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       64,       1,          32},  // 0.15%
+    {       72,       1,          32},  // 0.17%
+    {       80,       1,          32},  // 0.29%
+    {       88,       1,          32},  // 0.24%
+    {       96,       1,          32},  // 0.24%
+    {      104,       1,          32},  // 0.17%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      280,       1,          32},  // 0.17%
+    {      304,       1,          32},  // 0.89%
+    {      328,       1,          32},  // 1.06%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      488,       1,          32},  // 0.37%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 85;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       72,       1,          32},  // 0.04%
+    {       80,       1,          32},  // 0.04%
+    {       88,       1,          32},  // 0.05%
+    {       96,       1,          32},  // 0.04%
+    {      104,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      360,       1,          32},  // 0.04%
+    {      408,       1,          32},  // 0.10%
+    {      456,       1,          32},  // 0.17%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10880,       1,           6},  // 0.41%
+    {    11904,       1,           5},  // 0.12%
+    {    13056,       1,           5},  // 0.41%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    18688,       1,           3},  // 0.21%
+    {    21760,       1,           3},  // 0.41%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 42;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       64,       1,          32},  // 1.17%
+    {       72,       1,          32},  // 2.78%
+    {       80,       1,          32},  // 1.57%
+    {       88,       1,          32},  // 2.37%
+    {       96,       1,          32},  // 2.78%
+    {      104,       1,          32},  // 2.17%
+    {      120,       1,          32},  // 1.57%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      184,       1,          32},  // 2.37%
+    {      208,       1,          32},  // 4.86%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      312,       1,          32},  // 2.17%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      408,       1,          32},  // 1.57%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3456,       6,          18},  // 1.79%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 82;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       64,       1,          32},  // 0.59%
+    {       80,       1,          32},  // 0.98%
+    {       96,       1,          32},  // 0.98%
+    {      112,       1,          32},  // 0.78%
+    {      128,       1,          32},  // 0.59%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      288,       1,          32},  // 2.18%
+    {      304,       1,          32},  // 4.25%
+    {      320,       1,          32},  // 3.00%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      384,       1,          32},  // 2.18%
+    {      400,       1,          32},  // 3.00%
+    {      416,       1,          32},  // 4.25%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       1,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    90112,      11,           2},  // 0.05%
+    {    98304,      12,           2},  // 0.05%
+    {   106496,      13,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   147456,      18,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 74;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       64,       1,          32},  // 0.15%
+    {       80,       1,          32},  // 0.29%
+    {       96,       1,          32},  // 0.24%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      272,       1,          32},  // 0.54%
+    {      288,       1,          32},  // 0.84%
+    {      304,       1,          32},  // 0.89%
+    {      320,       1,          32},  // 0.54%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      480,       1,          32},  // 0.54%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      768,       1,          32},  // 1.74%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1408,       1,          32},  // 1.33%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 85;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       80,       1,          32},  // 0.04%
+    {       96,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      368,       1,          32},  // 0.07%
+    {      416,       1,          32},  // 0.04%
+    {      464,       1,          32},  // 0.19%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1408,       1,          32},  // 0.12%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3200,       1,          20},  // 1.15%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10368,       1,           6},  // 1.15%
+    {    11392,       1,           5},  // 0.07%
+    {    12416,       1,           5},  // 0.56%
+    {    13696,       1,           4},  // 0.76%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    18688,       1,           3},  // 0.21%
+    {    21760,       1,           3},  // 0.41%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 42;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2Below64SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2Below64SizeClasses[SizeMap::kExperimentalPow2Below64SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       64,       1,          32},  // 1.17%
+    {       80,       1,          32},  // 1.57%
+    {       96,       1,          32},  // 2.78%
+    {      112,       1,          32},  // 2.78%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      176,       1,          32},  // 2.37%
+    {      192,       1,          32},  // 2.78%
+    {      208,       1,          32},  // 4.86%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      304,       1,          32},  // 4.86%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      400,       1,          32},  // 3.60%
+    {      448,       1,          32},  // 2.78%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      640,       2,          32},  // 7.29%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3456,       6,          18},  // 1.79%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc
new file mode 100755
index 0000000000..1e6da051ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/experimental_pow2_size_class.cc
@@ -0,0 +1,239 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       64,       1,          32},  // 0.59%
+    {      128,       1,          32},  // 0.59%
+    {      256,       1,          32},  // 0.59%
+    {      512,       1,          32},  // 0.59%
+    {     1024,       1,          32},  // 0.59%
+    {     2048,       2,          32},  // 0.29%
+    {     4096,       1,          16},  // 0.29%
+    {     8192,       1,           8},  // 0.29%
+    {    16384,       2,           4},  // 0.29%
+    {    32768,       4,           2},  // 0.15%
+    {    65536,       8,           2},  // 0.07%
+    {   131072,      16,           2},  // 0.04%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       64,       1,          32},  // 0.15%
+    {      128,       1,          32},  // 0.15%
+    {      256,       1,          32},  // 0.15%
+    {      512,       1,          32},  // 0.15%
+    {     1024,       1,          32},  // 0.15%
+    {     2048,       1,          32},  // 0.15%
+    {     4096,       1,          16},  // 0.15%
+    {     8192,       1,           8},  // 0.15%
+    {    16384,       1,           4},  // 0.15%
+    {    32768,       1,           2},  // 0.15%
+    {    65536,       2,           2},  // 0.07%
+    {   131072,       4,           2},  // 0.04%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {      128,       1,          32},  // 0.02%
+    {      256,       1,          32},  // 0.02%
+    {      512,       1,          32},  // 0.02%
+    {     1024,       1,          32},  // 0.02%
+    {     2048,       1,          32},  // 0.02%
+    {     4096,       1,          16},  // 0.02%
+    {     8192,       1,           8},  // 0.02%
+    {    16384,       1,           4},  // 0.02%
+    {    32768,       1,           2},  // 0.02%
+    {    65536,       1,           2},  // 0.02%
+    {   131072,       1,           2},  // 0.02%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 12;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       64,       1,          32},  // 1.17%
+    {      128,       1,          32},  // 1.17%
+    {      256,       1,          32},  // 1.17%
+    {      512,       1,          32},  // 1.17%
+    {     1024,       2,          32},  // 0.59%
+    {     2048,       4,          32},  // 0.29%
+    {     4096,       4,          16},  // 0.29%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       64,       1,          32},  // 0.59%
+    {      128,       1,          32},  // 0.59%
+    {      256,       1,          32},  // 0.59%
+    {      512,       1,          32},  // 0.59%
+    {     1024,       1,          32},  // 0.59%
+    {     2048,       2,          32},  // 0.29%
+    {     4096,       1,          16},  // 0.29%
+    {     8192,       1,           8},  // 0.29%
+    {    16384,       2,           4},  // 0.29%
+    {    32768,       4,           2},  // 0.15%
+    {    65536,       8,           2},  // 0.07%
+    {   131072,      16,           2},  // 0.04%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       64,       1,          32},  // 0.15%
+    {      128,       1,          32},  // 0.15%
+    {      256,       1,          32},  // 0.15%
+    {      512,       1,          32},  // 0.15%
+    {     1024,       1,          32},  // 0.15%
+    {     2048,       1,          32},  // 0.15%
+    {     4096,       1,          16},  // 0.15%
+    {     8192,       1,           8},  // 0.15%
+    {    16384,       1,           4},  // 0.15%
+    {    32768,       1,           2},  // 0.15%
+    {    65536,       2,           2},  // 0.07%
+    {   131072,       4,           2},  // 0.04%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 17;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {      128,       1,          32},  // 0.02%
+    {      256,       1,          32},  // 0.02%
+    {      512,       1,          32},  // 0.02%
+    {     1024,       1,          32},  // 0.02%
+    {     2048,       1,          32},  // 0.02%
+    {     4096,       1,          16},  // 0.02%
+    {     8192,       1,           8},  // 0.02%
+    {    16384,       1,           4},  // 0.02%
+    {    32768,       1,           2},  // 0.02%
+    {    65536,       1,           2},  // 0.02%
+    {   131072,       1,           2},  // 0.02%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 12;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kExperimentalPow2SizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kExperimentalPow2SizeClasses[SizeMap::kExperimentalPow2SizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       64,       1,          32},  // 1.17%
+    {      128,       1,          32},  // 1.17%
+    {      256,       1,          32},  // 1.17%
+    {      512,       1,          32},  // 1.17%
+    {     1024,       2,          32},  // 0.59%
+    {     2048,       4,          32},  // 0.29%
+    {     4096,       4,          16},  // 0.29%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc
new file mode 100644
index 0000000000..cc02ed7a05
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.cc
@@ -0,0 +1,562 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/guarded_page_allocator.h"
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <csignal>
+#include <tuple>
+#include <utility>
+
+#include "absl/base/call_once.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+const size_t GuardedPageAllocator::kMagicSize;  // NOLINT
+
+void GuardedPageAllocator::Init(size_t max_alloced_pages, size_t total_pages) {
+  CHECK_CONDITION(max_alloced_pages > 0);
+  CHECK_CONDITION(max_alloced_pages <= total_pages);
+  CHECK_CONDITION(total_pages <= kGpaMaxPages);
+  max_alloced_pages_ = max_alloced_pages;
+  total_pages_ = total_pages;
+
+  // If the system page size is larger than kPageSize, we need to use the
+  // system page size for this allocator since mprotect operates on full pages
+  // only.  This case happens on PPC.
+  page_size_ = std::max(kPageSize, static_cast<size_t>(getpagesize()));
+  ASSERT(page_size_ % kPageSize == 0);
+
+  rand_ = reinterpret_cast<uint64_t>(this);  // Initialize RNG seed.
+  MapPages();
+}
+
+void GuardedPageAllocator::Destroy() {
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  if (initialized_) {
+    size_t len = pages_end_addr_ - pages_base_addr_;
+    int err = munmap(reinterpret_cast<void *>(pages_base_addr_), len);
+    ASSERT(err != -1);
+    (void)err;
+    initialized_ = false;
+  }
+}
+
+void *GuardedPageAllocator::Allocate(size_t size, size_t alignment) {
+  if (size == 0) return nullptr;
+  ssize_t free_slot = ReserveFreeSlot();
+  if (free_slot == -1) return nullptr;  // All slots are reserved.
+
+  ASSERT(size <= page_size_);
+  ASSERT(alignment <= page_size_);
+  ASSERT(alignment == 0 || absl::has_single_bit(alignment));
+  void *result = reinterpret_cast<void *>(SlotToAddr(free_slot));
+  if (mprotect(result, page_size_, PROT_READ | PROT_WRITE) == -1) {
+    ASSERT(false && "mprotect failed");
+    absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+    num_failed_allocations_++;
+    FreeSlot(free_slot);
+    return nullptr;
+  }
+
+  // Place some allocations at end of page for better overflow detection.
+  MaybeRightAlign(free_slot, size, alignment, &result);
+
+  // Record stack trace.
+  SlotMetadata &d = data_[free_slot];
+  d.dealloc_trace.depth = 0;
+  d.alloc_trace.depth = absl::GetStackTrace(d.alloc_trace.stack, kMaxStackDepth,
+                                            /*skip_count=*/3);
+  d.alloc_trace.tid = absl::base_internal::GetTID();
+  d.requested_size = size;
+  d.allocation_start = reinterpret_cast<uintptr_t>(result);
+
+  ASSERT(!alignment || d.allocation_start % alignment == 0);
+  return result;
+}
+
+void GuardedPageAllocator::Deallocate(void *ptr) {
+  ASSERT(PointerIsMine(ptr));
+  const uintptr_t page_addr = GetPageAddr(reinterpret_cast<uintptr_t>(ptr));
+  size_t slot = AddrToSlot(page_addr);
+
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  if (IsFreed(slot)) {
+    double_free_detected_ = true;
+  } else if (WriteOverflowOccurred(slot)) {
+    write_overflow_detected_ = true;
+  }
+
+  CHECK_CONDITION(mprotect(reinterpret_cast<void *>(page_addr), page_size_,
+                           PROT_NONE) != -1);
+
+  if (write_overflow_detected_ || double_free_detected_) {
+    *reinterpret_cast<char *>(ptr) = 'X';  // Trigger SEGV handler.
+    CHECK_CONDITION(false);                // Unreachable.
+  }
+
+  // Record stack trace.
+  GpaStackTrace &trace = data_[slot].dealloc_trace;
+  trace.depth = absl::GetStackTrace(trace.stack, kMaxStackDepth,
+                                    /*skip_count=*/2);
+  trace.tid = absl::base_internal::GetTID();
+
+  FreeSlot(slot);
+}
+
+size_t GuardedPageAllocator::GetRequestedSize(const void *ptr) const {
+  ASSERT(PointerIsMine(ptr));
+  size_t slot = AddrToSlot(GetPageAddr(reinterpret_cast<uintptr_t>(ptr)));
+  return data_[slot].requested_size;
+}
+
+std::pair<off_t, size_t> GuardedPageAllocator::GetAllocationOffsetAndSize(
+    const void *ptr) const {
+  ASSERT(PointerIsMine(ptr));
+  const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+  const size_t slot = GetNearestSlot(addr);
+  return {addr - data_[slot].allocation_start, data_[slot].requested_size};
+}
+
+GuardedPageAllocator::ErrorType GuardedPageAllocator::GetStackTraces(
+    const void *ptr, GpaStackTrace *alloc_trace,
+    GpaStackTrace *dealloc_trace) const {
+  ASSERT(PointerIsMine(ptr));
+  const uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+  size_t slot = GetNearestSlot(addr);
+  *alloc_trace = data_[slot].alloc_trace;
+  *dealloc_trace = data_[slot].dealloc_trace;
+  return GetErrorType(addr, data_[slot]);
+}
+
+// We take guarded samples during periodic profiling samples.  Computes the
+// mean number of profiled samples made for every guarded sample.
+static int GetChainedRate() {
+  auto guarded_rate = Parameters::guarded_sampling_rate();
+  auto sample_rate = Parameters::profile_sampling_rate();
+  if (guarded_rate < 0 || sample_rate <= 0) {
+    return guarded_rate;
+  } else {
+    return std::ceil(static_cast<double>(guarded_rate) /
+                     static_cast<double>(sample_rate));
+  }
+}
+
+void GuardedPageAllocator::Print(Printer *out) {
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  out->printf(
+      "\n"
+      "------------------------------------------------\n"
+      "GWP-ASan Status\n"
+      "------------------------------------------------\n"
+      "Successful Allocations: %zu\n"
+      "Failed Allocations: %zu\n"
+      "Slots Currently Allocated: %zu\n"
+      "Slots Currently Quarantined: %zu\n"
+      "Maximum Slots Allocated: %zu / %zu\n"
+      "PARAMETER tcmalloc_guarded_sample_parameter %d\n",
+      num_allocation_requests_ - num_failed_allocations_,
+      num_failed_allocations_, num_alloced_pages_,
+      total_pages_ - num_alloced_pages_, num_alloced_pages_max_,
+      max_alloced_pages_, GetChainedRate());
+}
+
+void GuardedPageAllocator::PrintInPbtxt(PbtxtRegion *gwp_asan) const {
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  gwp_asan->PrintI64("successful_allocations",
+                     num_allocation_requests_ - num_failed_allocations_);
+  gwp_asan->PrintI64("failed_allocations", num_failed_allocations_);
+  gwp_asan->PrintI64("current_slots_allocated", num_alloced_pages_);
+  gwp_asan->PrintI64("current_slots_quarantined",
+                     total_pages_ - num_alloced_pages_);
+  gwp_asan->PrintI64("max_slots_allocated", num_alloced_pages_max_);
+  gwp_asan->PrintI64("allocated_slot_limit", max_alloced_pages_);
+  gwp_asan->PrintI64("tcmalloc_guarded_sample_parameter", GetChainedRate());
+}
+
+// Maps 2 * total_pages_ + 1 pages so that there are total_pages_ unique pages
+// we can return from Allocate with guard pages before and after them.
+void GuardedPageAllocator::MapPages() {
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  ASSERT(!first_page_addr_);
+  ASSERT(page_size_ % getpagesize() == 0);
+  size_t len = (2 * total_pages_ + 1) * page_size_;
+  auto base_addr = reinterpret_cast<uintptr_t>(
+      MmapAligned(len, page_size_, MemoryTag::kSampled));
+  ASSERT(base_addr);
+  if (!base_addr) return;
+
+  // Tell TCMalloc's PageMap about the memory we own.
+  const PageId page = PageIdContaining(reinterpret_cast<void *>(base_addr));
+  const Length page_len = BytesToLengthFloor(len);
+  if (!Static::pagemap().Ensure(page, page_len)) {
+    ASSERT(false && "Failed to notify page map of page-guarded memory.");
+    return;
+  }
+
+  // Allocate memory for slot metadata.
+  data_ = reinterpret_cast<SlotMetadata *>(
+      Static::arena().Alloc(sizeof(*data_) * total_pages_));
+  for (size_t i = 0; i < total_pages_; ++i) {
+    new (&data_[i]) SlotMetadata;
+  }
+
+  pages_base_addr_ = base_addr;
+  pages_end_addr_ = pages_base_addr_ + len;
+
+  // Align first page to page_size_.
+  first_page_addr_ = GetPageAddr(pages_base_addr_ + page_size_);
+
+  std::fill_n(free_pages_, total_pages_, true);
+  initialized_ = true;
+}
+
+// Selects a random slot in O(total_pages_) time.
+ssize_t GuardedPageAllocator::ReserveFreeSlot() {
+  absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+  if (!initialized_ || !allow_allocations_) return -1;
+  num_allocation_requests_++;
+  if (num_alloced_pages_ == max_alloced_pages_) {
+    num_failed_allocations_++;
+    return -1;
+  }
+
+  rand_ = Sampler::NextRandom(rand_);
+  size_t num_free_pages = total_pages_ - num_alloced_pages_;
+  size_t slot = GetIthFreeSlot(rand_ % num_free_pages);
+  ASSERT(free_pages_[slot]);
+  free_pages_[slot] = false;
+  num_alloced_pages_++;
+  num_alloced_pages_max_ = std::max(num_alloced_pages_, num_alloced_pages_max_);
+  return slot;
+}
+
+size_t GuardedPageAllocator::GetIthFreeSlot(size_t ith_free_slot) {
+  ASSERT(ith_free_slot < total_pages_ - num_alloced_pages_);
+  for (size_t free_slot_count = 0, j = 0;; j++) {
+    if (free_pages_[j]) {
+      if (free_slot_count == ith_free_slot) return j;
+      free_slot_count++;
+    }
+  }
+}
+
+void GuardedPageAllocator::FreeSlot(size_t slot) {
+  ASSERT(slot < total_pages_);
+  ASSERT(!free_pages_[slot]);
+  free_pages_[slot] = true;
+  num_alloced_pages_--;
+}
+
+uintptr_t GuardedPageAllocator::GetPageAddr(uintptr_t addr) const {
+  const uintptr_t addr_mask = ~(page_size_ - 1ULL);
+  return addr & addr_mask;
+}
+
+uintptr_t GuardedPageAllocator::GetNearestValidPage(uintptr_t addr) const {
+  if (addr < first_page_addr_) return first_page_addr_;
+  const uintptr_t last_page_addr =
+      first_page_addr_ + 2 * (total_pages_ - 1) * page_size_;
+  if (addr > last_page_addr) return last_page_addr;
+  uintptr_t offset = addr - first_page_addr_;
+
+  // If addr is already on a valid page, just return addr.
+  if ((offset / page_size_) % 2 == 0) return addr;
+
+  // ptr points to a guard page, so get nearest valid page.
+  const size_t kHalfPageSize = page_size_ / 2;
+  if ((offset / kHalfPageSize) % 2 == 0) {
+    return addr - kHalfPageSize;  // Round down.
+  }
+  return addr + kHalfPageSize;  // Round up.
+}
+
+size_t GuardedPageAllocator::GetNearestSlot(uintptr_t addr) const {
+  return AddrToSlot(GetPageAddr(GetNearestValidPage(addr)));
+}
+
+bool GuardedPageAllocator::IsFreed(size_t slot) const {
+  return free_pages_[slot];
+}
+
+bool GuardedPageAllocator::WriteOverflowOccurred(size_t slot) const {
+  if (!ShouldRightAlign(slot)) return false;
+  uint8_t magic = GetWriteOverflowMagic(slot);
+  uintptr_t alloc_end =
+      data_[slot].allocation_start + data_[slot].requested_size;
+  uintptr_t page_end = SlotToAddr(slot) + page_size_;
+  uintptr_t magic_end = std::min(page_end, alloc_end + kMagicSize);
+  for (uintptr_t p = alloc_end; p < magic_end; ++p) {
+    if (*reinterpret_cast<uint8_t *>(p) != magic) return true;
+  }
+  return false;
+}
+
+GuardedPageAllocator::ErrorType GuardedPageAllocator::GetErrorType(
+    uintptr_t addr, const SlotMetadata &d) const {
+  if (!d.allocation_start) return ErrorType::kUnknown;
+  if (double_free_detected_) return ErrorType::kDoubleFree;
+  if (write_overflow_detected_) return ErrorType::kBufferOverflowOnDealloc;
+  if (d.dealloc_trace.depth) return ErrorType::kUseAfterFree;
+  if (addr < d.allocation_start) return ErrorType::kBufferUnderflow;
+  if (addr >= d.allocation_start + d.requested_size) {
+    return ErrorType::kBufferOverflow;
+  }
+  return ErrorType::kUnknown;
+}
+
+uintptr_t GuardedPageAllocator::SlotToAddr(size_t slot) const {
+  ASSERT(slot < total_pages_);
+  return first_page_addr_ + 2 * slot * page_size_;
+}
+
+size_t GuardedPageAllocator::AddrToSlot(uintptr_t addr) const {
+  uintptr_t offset = addr - first_page_addr_;
+  ASSERT(offset % page_size_ == 0);
+  ASSERT((offset / page_size_) % 2 == 0);
+  int slot = offset / page_size_ / 2;
+  ASSERT(slot >= 0 && slot < total_pages_);
+  return slot;
+}
+
+void GuardedPageAllocator::MaybeRightAlign(size_t slot, size_t size,
+                                           size_t alignment, void **ptr) {
+  if (!ShouldRightAlign(slot)) return;
+  uintptr_t adjusted_ptr =
+      reinterpret_cast<uintptr_t>(*ptr) + page_size_ - size;
+
+  // If alignment == 0, the necessary alignment is never larger than the size
+  // rounded up to the next power of 2.  We use this fact to minimize alignment
+  // padding between the end of small allocations and their guard pages.
+  //
+  // For allocations larger than the greater of kAlignment and
+  // __STDCPP_DEFAULT_NEW_ALIGNMENT__, we're safe aligning to that value.
+  size_t default_alignment =
+      std::min(absl::bit_ceil(size),
+               std::max(kAlignment,
+                        static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__)));
+
+  // Ensure valid alignment.
+  alignment = std::max(alignment, default_alignment);
+  uintptr_t alignment_padding = adjusted_ptr & (alignment - 1);
+  adjusted_ptr -= alignment_padding;
+
+  // Write magic bytes in alignment padding to detect small overflow writes.
+  size_t magic_size = std::min(alignment_padding, kMagicSize);
+  memset(reinterpret_cast<void *>(adjusted_ptr + size),
+         GetWriteOverflowMagic(slot), magic_size);
+  *ptr = reinterpret_cast<void *>(adjusted_ptr);
+}
+
+// If this failure occurs during "bazel test", writes a warning for Bazel to
+// display.
+static void RecordBazelWarning(absl::string_view error) {
+  const char *warning_file = thread_safe_getenv("TEST_WARNINGS_OUTPUT_FILE");
+  if (!warning_file) return;  // Not a bazel test.
+
+  constexpr char warning[] = "GWP-ASan error detected: ";
+  int fd = open(warning_file, O_CREAT | O_WRONLY | O_APPEND, 0644);
+  if (fd == -1) return;
+  (void)write(fd, warning, sizeof(warning) - 1);
+  (void)write(fd, error.data(), error.size());
+  (void)write(fd, "\n", 1);
+  close(fd);
+}
+
+// If this failure occurs during a gUnit test, writes an XML file describing the
+// error type.  Note that we cannot use ::testing::Test::RecordProperty()
+// because it doesn't write the XML file if a test crashes (which we're about to
+// do here).  So we write directly to the XML file instead.
+//
+static void RecordTestFailure(absl::string_view error) {
+  const char *xml_file = thread_safe_getenv("XML_OUTPUT_FILE");
+  if (!xml_file) return;  // Not a gUnit test.
+
+  // Record test failure for Sponge.
+  constexpr char xml_text_header[] =
+      "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+      "<testsuites><testsuite><testcase>"
+      "  <properties>"
+      "    <property name=\"gwp-asan-report\" value=\"";
+  constexpr char xml_text_footer[] =
+      "\"/>"
+      "  </properties>"
+      "  <failure message=\"MemoryError\">"
+      "    GWP-ASan detected a memory error.  See the test log for full report."
+      "  </failure>"
+      "</testcase></testsuite></testsuites>";
+
+  int fd = open(xml_file, O_CREAT | O_WRONLY | O_TRUNC, 0644);
+  if (fd == -1) return;
+  (void)write(fd, xml_text_header, sizeof(xml_text_header) - 1);
+  (void)write(fd, error.data(), error.size());
+  (void)write(fd, xml_text_footer, sizeof(xml_text_footer) - 1);
+  close(fd);
+}
+//
+// If this crash occurs in a test, records test failure summaries.
+//
+// error contains the type of error to record.
+static void RecordCrash(absl::string_view error) {
+
+  RecordBazelWarning(error);
+  RecordTestFailure(error);
+}
+
+static void PrintStackTrace(void **stack_frames, size_t depth) {
+  for (size_t i = 0; i < depth; ++i) {
+    Log(kLog, __FILE__, __LINE__, "  @  ", stack_frames[i]);
+  }
+}
+
+static void PrintStackTraceFromSignalHandler(void *context) {
+  void *stack_frames[kMaxStackDepth];
+  size_t depth = absl::GetStackTraceWithContext(stack_frames, kMaxStackDepth, 1,
+                                                context, nullptr);
+  PrintStackTrace(stack_frames, depth);
+}
+
+// A SEGV handler that prints stack traces for the allocation and deallocation
+// of relevant memory as well as the location of the memory error.
+static void SegvHandler(int signo, siginfo_t *info, void *context) {
+  if (signo != SIGSEGV) return;
+  void *fault = info->si_addr;
+  if (!Static::guardedpage_allocator().PointerIsMine(fault)) return;
+  GuardedPageAllocator::GpaStackTrace alloc_trace, dealloc_trace;
+  GuardedPageAllocator::ErrorType error =
+      Static::guardedpage_allocator().GetStackTraces(fault, &alloc_trace,
+                                                     &dealloc_trace);
+  if (error == GuardedPageAllocator::ErrorType::kUnknown) return;
+  pid_t current_thread = absl::base_internal::GetTID();
+  off_t offset;
+  size_t size;
+  std::tie(offset, size) =
+      Static::guardedpage_allocator().GetAllocationOffsetAndSize(fault);
+
+  Log(kLog, __FILE__, __LINE__,
+      "*** GWP-ASan "
+      "(https://google.github.io/tcmalloc/gwp-asan.html)  "
+      "has detected a memory error ***");
+  Log(kLog, __FILE__, __LINE__, ">>> Access at offset", offset,
+      "into buffer of length", size);
+  Log(kLog, __FILE__, __LINE__,
+      "Error originates from memory allocated in thread", alloc_trace.tid,
+      "at:");
+  PrintStackTrace(alloc_trace.stack, alloc_trace.depth);
+
+  switch (error) {
+    case GuardedPageAllocator::ErrorType::kUseAfterFree:
+      Log(kLog, __FILE__, __LINE__, "The memory was freed in thread",
+          dealloc_trace.tid, "at:");
+      PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth);
+      Log(kLog, __FILE__, __LINE__, "Use-after-free occurs in thread",
+          current_thread, "at:");
+      RecordCrash("use-after-free");
+      break;
+    case GuardedPageAllocator::ErrorType::kBufferUnderflow:
+      Log(kLog, __FILE__, __LINE__, "Buffer underflow occurs in thread",
+          current_thread, "at:");
+      RecordCrash("buffer-underflow");
+      break;
+    case GuardedPageAllocator::ErrorType::kBufferOverflow:
+      Log(kLog, __FILE__, __LINE__, "Buffer overflow occurs in thread",
+          current_thread, "at:");
+      RecordCrash("buffer-overflow");
+      break;
+    case GuardedPageAllocator::ErrorType::kDoubleFree:
+      Log(kLog, __FILE__, __LINE__, "The memory was freed in thread",
+          dealloc_trace.tid, "at:");
+      PrintStackTrace(dealloc_trace.stack, dealloc_trace.depth);
+      Log(kLog, __FILE__, __LINE__, "Double free occurs in thread",
+          current_thread, "at:");
+      RecordCrash("double-free");
+      break;
+    case GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc:
+      Log(kLog, __FILE__, __LINE__,
+          "Buffer overflow (write) detected in thread", current_thread,
+          "at free:");
+      RecordCrash("buffer-overflow-detected-at-free");
+      break;
+    case GuardedPageAllocator::ErrorType::kUnknown:
+      Crash(kCrash, __FILE__, __LINE__, "Unexpected ErrorType::kUnknown");
+  }
+  PrintStackTraceFromSignalHandler(context);
+  if (error == GuardedPageAllocator::ErrorType::kBufferOverflowOnDealloc) {
+    Log(kLog, __FILE__, __LINE__,
+        "*** Try rerunning with --config=asan to get stack trace of overflow "
+        "***");
+  }
+}
+
+static struct sigaction old_sa;
+
+static void ForwardSignal(int signo, siginfo_t *info, void *context) {
+  if (old_sa.sa_flags & SA_SIGINFO) {
+    old_sa.sa_sigaction(signo, info, context);
+  } else if (old_sa.sa_handler == SIG_DFL) {
+    // No previous handler registered.  Re-raise signal for core dump.
+    int err = sigaction(signo, &old_sa, nullptr);
+    if (err == -1) {
+      Log(kLog, __FILE__, __LINE__, "Couldn't restore previous sigaction!");
+    }
+    raise(signo);
+  } else if (old_sa.sa_handler == SIG_IGN) {
+    return;  // Previous sigaction ignored signal, so do the same.
+  } else {
+    old_sa.sa_handler(signo);
+  }
+}
+
+static void HandleSegvAndForward(int signo, siginfo_t *info, void *context) {
+  SegvHandler(signo, info, context);
+  ForwardSignal(signo, info, context);
+}
+
+extern "C" void MallocExtension_Internal_ActivateGuardedSampling() {
+  static absl::once_flag flag;
+  absl::call_once(flag, []() {
+    struct sigaction action = {};
+    action.sa_sigaction = HandleSegvAndForward;
+    sigemptyset(&action.sa_mask);
+    action.sa_flags = SA_SIGINFO;
+    sigaction(SIGSEGV, &action, &old_sa);
+    Static::guardedpage_allocator().AllowAllocations();
+  });
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h
new file mode 100644
index 0000000000..e5a6118c08
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator.h
@@ -0,0 +1,311 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
+#define TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ABSL_CONST_INIT extern absl::base_internal::SpinLock guarded_page_lock;
+
+// An allocator that gives each allocation a new region, with guard pages on
+// either side of the allocated region.  If a buffer is overflowed to the next
+// guard page or underflowed to the previous guard page, a segfault occurs.
+// After an allocation is freed, the underlying page is marked as inaccessible,
+// and any future accesses to it will also cause segfaults until the page is
+// reallocated.
+//
+// Is safe to use with static storage duration and is thread safe with the
+// exception of calls to Init() and Destroy() (see corresponding function
+// comments).
+//
+// SYNCHRONIZATION
+//   Requires the SpinLock guarded_page_lock to be defined externally.  This is
+//   required so that this class may be instantiated with static storage
+//   duration.  The lock is held by this class during initialization and when
+//   accessing the internal free page map.
+//
+// Example:
+//   ABSL_CONST_INIT absl::base_internal::SpinLock
+//       guarded_page_lock(absl::kConstInit,
+//                         absl::base_internal::SCHEDULE_KERNEL_ONLY);
+//   ABSL_CONST_INIT GuardedPageAllocator gpa;
+//
+//   void foo() {
+//     char *buf = reinterpret_cast<char *>(gpa.Allocate(8000, 1));
+//     buf[0] = 'A';            // OK. No segfault occurs.
+//     memset(buf, 'A', 8000);  // OK. No segfault occurs.
+//     buf[-300] = 'A';         // Segfault!
+//     buf[9000] = 'A';         // Segfault!
+//     gpa.Deallocate(buf);
+//     buf[0] = 'B';            // Segfault!
+//   }
+//
+//   int main() {
+//     // Call Init() only once.
+//     gpa.Init(64, GuardedPageAllocator::kGpaMaxPages);
+//     gpa.AllowAllocations();
+//     for (int i = 0; i < 1000; i++) foo();
+//     return 0;
+//   }
+class GuardedPageAllocator {
+ public:
+  struct GpaStackTrace {
+    void *stack[kMaxStackDepth];
+    size_t depth = 0;
+    pid_t tid = 0;
+  };
+
+  // Maximum number of pages this class can allocate.
+  static constexpr size_t kGpaMaxPages = 512;
+
+  enum class ErrorType {
+    kUseAfterFree,
+    kBufferUnderflow,
+    kBufferOverflow,
+    kDoubleFree,
+    kBufferOverflowOnDealloc,
+    kUnknown,
+  };
+
+  constexpr GuardedPageAllocator()
+      : free_pages_{},
+        num_alloced_pages_(0),
+        num_alloced_pages_max_(0),
+        num_allocation_requests_(0),
+        num_failed_allocations_(0),
+        data_(nullptr),
+        pages_base_addr_(0),
+        pages_end_addr_(0),
+        first_page_addr_(0),
+        max_alloced_pages_(0),
+        total_pages_(0),
+        page_size_(0),
+        rand_(0),
+        initialized_(false),
+        allow_allocations_(false),
+        double_free_detected_(false),
+        write_overflow_detected_(false) {}
+
+  GuardedPageAllocator(const GuardedPageAllocator &) = delete;
+  GuardedPageAllocator &operator=(const GuardedPageAllocator &) = delete;
+
+  ~GuardedPageAllocator() = default;
+
+  // Configures this allocator to allocate up to max_alloced_pages pages at a
+  // time from a pool of total_pages pages, where:
+  //   1 <= max_alloced_pages <= total_pages <= kGpaMaxPages
+  //
+  // This method should be called non-concurrently and only once to complete
+  // initialization.  Dynamic initialization is deliberately done here and not
+  // in the constructor, thereby allowing the constructor to be constexpr and
+  // avoiding static initialization order issues.
+  void Init(size_t max_alloced_pages, size_t total_pages)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Unmaps memory allocated by this class.
+  //
+  // This method should be called non-concurrently and only once to complete
+  // destruction.  Destruction is deliberately done here and not in the
+  // destructor, thereby allowing the destructor to be trivial (i.e. a no-op)
+  // and avoiding use-after-destruction issues for static/global instances.
+  void Destroy();
+
+  // On success, returns a pointer to size bytes of page-guarded memory, aligned
+  // to alignment.  On failure, returns nullptr.  The returned pointer is
+  // guaranteed to be tagged.  Failure can occur if memory could not be mapped
+  // or protected, if all guarded pages are already allocated, or if size is 0.
+  //
+  // Precondition:  size and alignment <= page_size_
+  // Precondition:  alignment is 0 or a power of 2
+  void *Allocate(size_t size, size_t alignment)
+      ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+  // Deallocates memory pointed to by ptr.  ptr must have been previously
+  // returned by a call to Allocate.
+  void Deallocate(void *ptr) ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+  // Returns the size requested when ptr was allocated.  ptr must have been
+  // previously returned by a call to Allocate.
+  size_t GetRequestedSize(const void *ptr) const;
+
+  // Returns ptr's offset from the beginning of its allocation along with the
+  // allocation's size.
+  std::pair<off_t, size_t> GetAllocationOffsetAndSize(const void *ptr) const;
+
+  // Records stack traces in alloc_trace and dealloc_trace for the page nearest
+  // to ptr.  alloc_trace is the trace at the time the page was allocated.  If
+  // the page is still allocated, dealloc_trace->depth will be 0. If the page
+  // has been deallocated, dealloc_trace is the trace at the time the page was
+  // deallocated.
+  //
+  // Returns the likely error type for an access at ptr.
+  //
+  // Requires that ptr points to memory mapped by this class.
+  ErrorType GetStackTraces(const void *ptr, GpaStackTrace *alloc_trace,
+                           GpaStackTrace *dealloc_trace) const;
+
+  // Writes a human-readable summary of GuardedPageAllocator's internal state to
+  // *out.
+  void Print(Printer *out) ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+  void PrintInPbtxt(PbtxtRegion *gwp_asan) const
+      ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+  // Returns true if ptr points to memory managed by this class.
+  inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+  PointerIsMine(const void *ptr) const {
+    uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+    return pages_base_addr_ <= addr && addr < pages_end_addr_;
+  }
+
+  // Allows Allocate() to start returning allocations.
+  void AllowAllocations() ABSL_LOCKS_EXCLUDED(guarded_page_lock) {
+    absl::base_internal::SpinLockHolder h(&guarded_page_lock);
+    allow_allocations_ = true;
+  }
+
+ private:
+  // Structure for storing data about a slot.
+  struct SlotMetadata {
+    GpaStackTrace alloc_trace;
+    GpaStackTrace dealloc_trace;
+    size_t requested_size = 0;
+    uintptr_t allocation_start = 0;
+  };
+
+  // Max number of magic bytes we use to detect write-overflows at deallocation.
+  static constexpr size_t kMagicSize = 32;
+
+  // Maps pages into memory.
+  void MapPages() ABSL_LOCKS_EXCLUDED(guarded_page_lock)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Reserves and returns a slot randomly selected from the free slots in
+  // free_pages_.  Returns -1 if no slots available, or if AllowAllocations()
+  // hasn't been called yet.
+  ssize_t ReserveFreeSlot() ABSL_LOCKS_EXCLUDED(guarded_page_lock);
+
+  // Returns the i-th free slot of free_pages_.  i must be in the range [0,
+  // total_pages_ - num_alloced_pages_).
+  size_t GetIthFreeSlot(size_t i)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+  // Marks the specified slot as unreserved.
+  void FreeSlot(size_t slot) ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+  // Returns the address of the page that addr resides on.
+  uintptr_t GetPageAddr(uintptr_t addr) const;
+
+  // Returns an address somewhere on the valid page nearest to addr.
+  uintptr_t GetNearestValidPage(uintptr_t addr) const;
+
+  // Returns the slot number for the page nearest to addr.
+  size_t GetNearestSlot(uintptr_t addr) const;
+
+  // Returns true if the specified slot has already been freed.
+  bool IsFreed(size_t slot) const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(guarded_page_lock);
+
+  // Returns true if magic bytes for slot were overwritten.
+  bool WriteOverflowOccurred(size_t slot) const;
+
+  // Returns the likely error type for the given access address and metadata
+  // associated with the nearest slot.
+  ErrorType GetErrorType(uintptr_t addr, const SlotMetadata &d) const;
+
+  // Magic constant used for detecting write-overflows at deallocation time.
+  static uint8_t GetWriteOverflowMagic(size_t slot) {
+    // Only even slots get magic bytes, so use slot / 2 for more unique magics.
+    return uint8_t{0xcd} * static_cast<uint8_t>(slot / 2);
+  }
+
+  // Returns true if slot should be right aligned.
+  static bool ShouldRightAlign(size_t slot) { return slot % 2 == 0; }
+
+  // If slot is marked for right alignment, moves the allocation in *ptr to the
+  // right end of the slot, maintaining the specified size and alignment.  Magic
+  // bytes are written in any alignment padding.
+  void MaybeRightAlign(size_t slot, size_t size, size_t alignment, void **ptr);
+
+  uintptr_t SlotToAddr(size_t slot) const;
+  size_t AddrToSlot(uintptr_t addr) const;
+
+  // Maps each bool to one page.
+  // true: Free.  false: Reserved.
+  bool free_pages_[kGpaMaxPages] ABSL_GUARDED_BY(guarded_page_lock);
+
+  // Number of currently-allocated pages.
+  size_t num_alloced_pages_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // The high-water mark for num_alloced_pages_.
+  size_t num_alloced_pages_max_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // Number of calls to Allocate.
+  size_t num_allocation_requests_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // Number of times Allocate has failed.
+  size_t num_failed_allocations_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // A dynamically-allocated array of stack trace data captured when each page
+  // is allocated/deallocated.  Printed by the SEGV handler when a memory error
+  // is detected.
+  SlotMetadata *data_;
+
+  uintptr_t pages_base_addr_;  // Points to start of mapped region.
+  uintptr_t pages_end_addr_;   // Points to the end of mapped region.
+  uintptr_t first_page_addr_;  // Points to first page returnable by Allocate.
+  size_t max_alloced_pages_;   // Max number of pages to allocate at once.
+  size_t total_pages_;         // Size of the page pool to allocate from.
+  size_t page_size_;           // Size of pages we allocate.
+  uint64_t rand_;              // RNG seed.
+
+  // True if this object has been fully initialized.
+  bool initialized_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // Flag to control whether we can return allocations or not.
+  bool allow_allocations_ ABSL_GUARDED_BY(guarded_page_lock);
+
+  // Set to true if a double free has occurred.
+  bool double_free_detected_;
+
+  // Set to true if a write overflow was detected on deallocation.
+  bool write_overflow_detected_;
+
+  friend struct ConstexprCheck;
+};
+
+struct ConstexprCheck {
+  static_assert(GuardedPageAllocator().rand_ || true,
+                "GuardedPageAllocator must have a constexpr constructor");
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_GUARDED_PAGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc
new file mode 100644
index 0000000000..fb6d0ea265
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_benchmark.cc
@@ -0,0 +1,60 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages;
+
+// Size of pages used by GuardedPageAllocator.
+static size_t PageSize() {
+  static const size_t page_size =
+      std::max(kPageSize, static_cast<size_t>(getpagesize()));
+  return page_size;
+}
+
+void BM_AllocDealloc(benchmark::State& state) {
+  static GuardedPageAllocator* gpa = []() {
+    auto gpa = new GuardedPageAllocator;
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    gpa->Init(kMaxGpaPages, kMaxGpaPages);
+    gpa->AllowAllocations();
+    return gpa;
+  }();
+  size_t alloc_size = state.range(0);
+  for (auto _ : state) {
+    char* ptr = reinterpret_cast<char*>(gpa->Allocate(alloc_size, 0));
+    CHECK_CONDITION(ptr != nullptr);
+    ptr[0] = 'X';               // Page fault first page.
+    ptr[alloc_size - 1] = 'X';  // Page fault last page.
+    gpa->Deallocate(ptr);
+  }
+}
+
+BENCHMARK(BM_AllocDealloc)->Range(1, PageSize());
+BENCHMARK(BM_AllocDealloc)->Arg(1)->ThreadRange(1, kMaxGpaPages);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc
new file mode 100644
index 0000000000..0d603de690
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/guarded_page_allocator_test.cc
@@ -0,0 +1,243 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/guarded_page_allocator.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <memory>
+#include <set>
+#include <string>
+#include <thread>  // NOLINT(build/c++11)
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/casts.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/memory/memory.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr size_t kMaxGpaPages = GuardedPageAllocator::kGpaMaxPages;
+
+// Size of pages used by GuardedPageAllocator.
+static size_t PageSize() {
+  static const size_t page_size =
+      std::max(kPageSize, static_cast<size_t>(getpagesize()));
+  return page_size;
+}
+
+class GuardedPageAllocatorTest : public testing::Test {
+ protected:
+  GuardedPageAllocatorTest() {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    gpa_.Init(kMaxGpaPages, kMaxGpaPages);
+    gpa_.AllowAllocations();
+  }
+
+  explicit GuardedPageAllocatorTest(size_t num_pages) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    gpa_.Init(num_pages, kMaxGpaPages);
+    gpa_.AllowAllocations();
+  }
+
+  ~GuardedPageAllocatorTest() override { gpa_.Destroy(); }
+
+  GuardedPageAllocator gpa_;
+};
+
+class GuardedPageAllocatorParamTest
+    : public GuardedPageAllocatorTest,
+      public testing::WithParamInterface<size_t> {
+ protected:
+  GuardedPageAllocatorParamTest() : GuardedPageAllocatorTest(GetParam()) {}
+};
+
+TEST_F(GuardedPageAllocatorTest, SingleAllocDealloc) {
+  char *buf = reinterpret_cast<char *>(gpa_.Allocate(PageSize(), 0));
+  EXPECT_NE(buf, nullptr);
+  EXPECT_TRUE(gpa_.PointerIsMine(buf));
+  memset(buf, 'A', PageSize());
+  EXPECT_DEATH(buf[-1] = 'A', "");
+  EXPECT_DEATH(buf[PageSize()] = 'A', "");
+  gpa_.Deallocate(buf);
+  EXPECT_DEATH(buf[0] = 'B', "");
+  EXPECT_DEATH(buf[PageSize() / 2] = 'B', "");
+  EXPECT_DEATH(buf[PageSize() - 1] = 'B', "");
+}
+
+TEST_F(GuardedPageAllocatorTest, NoAlignmentProvided) {
+  constexpr size_t kLargeObjectAlignment = std::max(
+      kAlignment, static_cast<size_t>(__STDCPP_DEFAULT_NEW_ALIGNMENT__));
+
+  for (size_t base_size = 1; base_size <= 64; base_size <<= 1) {
+    for (size_t size : {base_size, base_size + 1}) {
+      SCOPED_TRACE(size);
+
+      constexpr int kElements = 10;
+      std::array<void *, kElements> ptrs;
+
+      // Make several allocation attempts to encounter left/right-alignment in
+      // the guarded region.
+      for (int i = 0; i < kElements; i++) {
+        ptrs[i] = gpa_.Allocate(size, 0);
+        EXPECT_NE(ptrs[i], nullptr);
+        EXPECT_TRUE(gpa_.PointerIsMine(ptrs[i]));
+
+        size_t observed_alignment =
+            1 << absl::countr_zero(absl::bit_cast<uintptr_t>(ptrs[i]));
+        EXPECT_GE(observed_alignment, std::min(size, kLargeObjectAlignment));
+      }
+
+      for (void *ptr : ptrs) {
+        gpa_.Deallocate(ptr);
+      }
+    }
+  }
+}
+
+TEST_F(GuardedPageAllocatorTest, AllocDeallocAligned) {
+  for (size_t align = 1; align <= PageSize(); align <<= 1) {
+    constexpr size_t alloc_size = 1;
+    void *p = gpa_.Allocate(alloc_size, align);
+    EXPECT_NE(p, nullptr);
+    EXPECT_TRUE(gpa_.PointerIsMine(p));
+    EXPECT_EQ(reinterpret_cast<uintptr_t>(p) % align, 0);
+  }
+}
+
+TEST_P(GuardedPageAllocatorParamTest, AllocDeallocAllPages) {
+  size_t num_pages = GetParam();
+  char *bufs[kMaxGpaPages];
+  for (size_t i = 0; i < num_pages; i++) {
+    bufs[i] = reinterpret_cast<char *>(gpa_.Allocate(1, 0));
+    EXPECT_NE(bufs[i], nullptr);
+    EXPECT_TRUE(gpa_.PointerIsMine(bufs[i]));
+  }
+  EXPECT_EQ(gpa_.Allocate(1, 0), nullptr);
+  gpa_.Deallocate(bufs[0]);
+  bufs[0] = reinterpret_cast<char *>(gpa_.Allocate(1, 0));
+  EXPECT_NE(bufs[0], nullptr);
+  EXPECT_TRUE(gpa_.PointerIsMine(bufs[0]));
+  for (size_t i = 0; i < num_pages; i++) {
+    bufs[i][0] = 'A';
+    gpa_.Deallocate(bufs[i]);
+  }
+}
+INSTANTIATE_TEST_SUITE_P(VaryNumPages, GuardedPageAllocatorParamTest,
+                         testing::Values(1, kMaxGpaPages / 2, kMaxGpaPages));
+
+TEST_F(GuardedPageAllocatorTest, PointerIsMine) {
+  void *buf = gpa_.Allocate(1, 0);
+  int stack_var;
+  auto malloc_ptr = absl::make_unique<char>();
+  EXPECT_TRUE(gpa_.PointerIsMine(buf));
+  EXPECT_FALSE(gpa_.PointerIsMine(&stack_var));
+  EXPECT_FALSE(gpa_.PointerIsMine(malloc_ptr.get()));
+}
+
+TEST_F(GuardedPageAllocatorTest, Print) {
+  char buf[1024] = {};
+  Printer out(buf, sizeof(buf));
+  gpa_.Print(&out);
+  EXPECT_THAT(buf, testing::ContainsRegex("GWP-ASan Status"));
+}
+
+// Test that no pages are double-allocated or left unallocated, and that no
+// extra pages are allocated when there's concurrent calls to Allocate().
+TEST_F(GuardedPageAllocatorTest, ThreadedAllocCount) {
+  constexpr size_t kNumThreads = 2;
+  void *allocations[kNumThreads][kMaxGpaPages];
+  {
+    std::vector<std::thread> threads;
+    threads.reserve(kNumThreads);
+    for (size_t i = 0; i < kNumThreads; i++) {
+      threads.push_back(std::thread([this, &allocations, i]() {
+        for (size_t j = 0; j < kMaxGpaPages; j++) {
+          allocations[i][j] = gpa_.Allocate(1, 0);
+        }
+      }));
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+  }
+  absl::flat_hash_set<void *> allocations_set;
+  for (size_t i = 0; i < kNumThreads; i++) {
+    for (size_t j = 0; j < kMaxGpaPages; j++) {
+      allocations_set.insert(allocations[i][j]);
+    }
+  }
+  allocations_set.erase(nullptr);
+  EXPECT_EQ(allocations_set.size(), kMaxGpaPages);
+}
+
+// Test that allocator remains in consistent state under high contention and
+// doesn't double-allocate pages or fail to deallocate pages.
+TEST_F(GuardedPageAllocatorTest, ThreadedHighContention) {
+  const size_t kNumThreads = 4 * absl::base_internal::NumCPUs();
+  {
+    std::vector<std::thread> threads;
+    threads.reserve(kNumThreads);
+    for (size_t i = 0; i < kNumThreads; i++) {
+      threads.push_back(std::thread([this]() {
+        char *buf;
+        while ((buf = reinterpret_cast<char *>(gpa_.Allocate(1, 0))) ==
+               nullptr) {
+          absl::SleepFor(absl::Nanoseconds(5000));
+        }
+
+        // Verify that no other thread has access to this page.
+        EXPECT_EQ(buf[0], 0);
+
+        // Mark this page and allow some time for another thread to potentially
+        // gain access to this page.
+        buf[0] = 'A';
+        absl::SleepFor(absl::Nanoseconds(5000));
+
+        // Unmark this page and deallocate.
+        buf[0] = 0;
+        gpa_.Deallocate(buf);
+      }));
+    }
+
+    for (auto &t : threads) {
+      t.join();
+    }
+  }
+  // Verify all pages have been deallocated now that all threads are done.
+  for (size_t i = 0; i < kMaxGpaPages; i++) {
+    EXPECT_NE(gpa_.Allocate(1, 0), nullptr);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc
new file mode 100644
index 0000000000..5c2473ffed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/heap_profiling_test.cc
@@ -0,0 +1,122 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <new>
+
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace {
+
+int64_t ProfileSize(ProfileType type) {
+  int64_t total = 0;
+
+  MallocExtension::SnapshotCurrent(type).Iterate(
+      [&](const Profile::Sample &e) { total += e.sum; });
+  return total;
+}
+
+class ScopedPeakGrowthFraction {
+ public:
+  explicit ScopedPeakGrowthFraction(double temporary_value)
+      : previous_(TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction()) {
+    TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(temporary_value);
+  }
+
+  ~ScopedPeakGrowthFraction() {
+    TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(previous_);
+  }
+
+ private:
+  double previous_;
+};
+
+TEST(HeapProfilingTest, PeakHeapTracking) {
+  // Adjust high watermark threshold for our scenario, to be independent of
+  // changes to the default.  As we use a random value for choosing our next
+  // sampling point, we may overweight some allocations above their true size.
+  ScopedPeakGrowthFraction s(1.25);
+
+  int64_t start_peak_sz = ProfileSize(ProfileType::kPeakHeap);
+
+  // make a large allocation to force a new peak heap sample
+  // (total live: 50MiB)
+  void *first = ::operator new(50 << 20);
+  // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+  benchmark::DoNotOptimize(first);
+  int64_t peak_after_first = ProfileSize(ProfileType::kPeakHeap);
+  EXPECT_NEAR(peak_after_first, start_peak_sz + (50 << 20), 10 << 20);
+
+  // a small allocation shouldn't increase the peak
+  // (total live: 54MiB)
+  void *second = ::operator new(4 << 20);
+  benchmark::DoNotOptimize(second);
+  int64_t peak_after_second = ProfileSize(ProfileType::kPeakHeap);
+  EXPECT_EQ(peak_after_second, peak_after_first);
+
+  // but a large one should
+  // (total live: 254MiB)
+  void *third = ::operator new(200 << 20);
+  benchmark::DoNotOptimize(third);
+  int64_t peak_after_third = ProfileSize(ProfileType::kPeakHeap);
+  EXPECT_NEAR(peak_after_third, peak_after_second + (200 << 20), 10 << 20);
+
+  // freeing everything shouldn't affect the peak
+  // (total live: 0MiB)
+  ::operator delete(first);
+  EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+  ::operator delete(second);
+  EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+  ::operator delete(third);
+  EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+  // going back up less than previous peak shouldn't affect the peak
+  // (total live: 200MiB)
+  void *fourth = ::operator new(100 << 20);
+  benchmark::DoNotOptimize(fourth);
+  void *fifth = ::operator new(100 << 20);
+  benchmark::DoNotOptimize(fifth);
+  EXPECT_EQ(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+  // passing the old peak significantly, even with many small allocations,
+  // should generate a new one
+  // (total live: 200MiB + 256MiB = 456MiB, 80% over the 254MiB peak)
+  void *bitsy[1 << 10];
+  for (int i = 0; i < 1 << 10; i++) {
+    bitsy[i] = ::operator new(1 << 18);
+    benchmark::DoNotOptimize(bitsy[i]);
+  }
+  EXPECT_GT(ProfileSize(ProfileType::kPeakHeap), peak_after_third);
+
+  ::operator delete(fourth);
+  ::operator delete(fifth);
+  for (int i = 0; i < 1 << 10; i++) {
+    ::operator delete(bitsy[i]);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc
new file mode 100644
index 0000000000..898c6d934a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.cc
@@ -0,0 +1,374 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_address_map.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <new>
+
+#include "absl/base/internal/cycleclock.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+const HugeAddressMap::Node *HugeAddressMap::Node::next() const {
+  const Node *n = right_;
+  if (n) {
+    while (n->left_) n = n->left_;
+    return n;
+  }
+
+  n = parent_;
+  const Node *last = this;
+  while (n) {
+    if (n->left_ == last) return n;
+    last = n;
+    n = n->parent_;
+  }
+
+  return nullptr;
+}
+
+HugeAddressMap::Node *HugeAddressMap::Node::next() {
+  const Node *n = static_cast<const Node *>(this)->next();
+  return const_cast<Node *>(n);
+}
+
+void HugeAddressMap::Node::Check(size_t *num_nodes, HugeLength *size) const {
+  HugeLength longest = range_.len();
+  *num_nodes += 1;
+  *size += range_.len();
+
+  if (left_) {
+    // tree
+    CHECK_CONDITION(left_->range_.start() < range_.start());
+    // disjoint
+    CHECK_CONDITION(left_->range_.end_addr() < range_.start_addr());
+    // well-formed
+    CHECK_CONDITION(left_->parent_ == this);
+    // heap
+    CHECK_CONDITION(left_->prio_ <= prio_);
+    left_->Check(num_nodes, size);
+    if (left_->longest_ > longest) longest = left_->longest_;
+  }
+
+  if (right_) {
+    // tree
+    CHECK_CONDITION(right_->range_.start() > range_.start());
+    // disjoint
+    CHECK_CONDITION(right_->range_.start_addr() > range_.end_addr());
+    // well-formed
+    CHECK_CONDITION(right_->parent_ == this);
+    // heap
+    CHECK_CONDITION(right_->prio_ <= prio_);
+    right_->Check(num_nodes, size);
+    if (right_->longest_ > longest) longest = right_->longest_;
+  }
+
+  CHECK_CONDITION(longest_ == longest);
+}
+
+const HugeAddressMap::Node *HugeAddressMap::first() const {
+  const Node *n = root();
+  if (!n) return nullptr;
+  const Node *left = n->left_;
+  while (left) {
+    n = left;
+    left = n->left_;
+  }
+
+  return n;
+}
+
+HugeAddressMap::Node *HugeAddressMap::first() {
+  const Node *f = static_cast<const HugeAddressMap *>(this)->first();
+  return const_cast<Node *>(f);
+}
+
+void HugeAddressMap::Check() {
+  size_t nodes = 0;
+  HugeLength size = NHugePages(0);
+  if (root_) {
+    CHECK_CONDITION(root_->parent_ == nullptr);
+    root_->Check(&nodes, &size);
+  }
+  CHECK_CONDITION(nodes == nranges());
+  CHECK_CONDITION(size == total_mapped());
+  CHECK_CONDITION(total_nodes_ == used_nodes_ + freelist_size_);
+}
+
+size_t HugeAddressMap::nranges() const { return used_nodes_; }
+
+HugeLength HugeAddressMap::total_mapped() const { return total_size_; }
+
+void HugeAddressMap::Print(Printer *out) const {
+  out->printf("HugeAddressMap: treap %zu / %zu nodes used / created\n",
+              used_nodes_, total_nodes_);
+  const size_t longest = root_ ? root_->longest_.raw_num() : 0;
+  out->printf("HugeAddressMap: %zu contiguous hugepages available\n", longest);
+}
+
+void HugeAddressMap::PrintInPbtxt(PbtxtRegion *hpaa) const {
+  hpaa->PrintI64("num_huge_address_map_treap_nodes_used", used_nodes_);
+  hpaa->PrintI64("num_huge_address_map_treap_nodes_created", total_nodes_);
+  const size_t longest = root_ ? root_->longest_.in_bytes() : 0;
+  hpaa->PrintI64("contiguous_free_bytes", longest);
+}
+
+HugeAddressMap::Node *HugeAddressMap::Predecessor(HugePage p) {
+  Node *n = root();
+  Node *best = nullptr;
+  while (n) {
+    HugeRange here = n->range_;
+    if (here.contains(p)) return n;
+    if (p < here.start()) {
+      // p comes before here:
+      // our predecessor isn't here, nor in the right subtree.
+      n = n->left_;
+    } else {
+      // p comes after here:
+      // here is a valid candidate, and the right subtree might have better.
+      best = n;
+      n = n->right_;
+    }
+  }
+
+  return best;
+}
+
+void HugeAddressMap::Merge(Node *b, HugeRange r, Node *a) {
+  auto merge_when = [](HugeRange x, int64_t x_when, HugeRange y,
+                       int64_t y_when) {
+    // avoid overflow with floating-point
+    const size_t x_len = x.len().raw_num();
+    const size_t y_len = y.len().raw_num();
+    const double x_weight = static_cast<double>(x_len) * x_when;
+    const double y_weight = static_cast<double>(y_len) * y_when;
+    return static_cast<int64_t>((x_weight + y_weight) / (x_len + y_len));
+  };
+
+  int64_t when = absl::base_internal::CycleClock::Now();
+  // Two way merges are easy.
+  if (a == nullptr) {
+    b->when_ = merge_when(b->range_, b->when(), r, when);
+    b->range_ = Join(b->range_, r);
+    FixLongest(b);
+    return;
+  } else if (b == nullptr) {
+    a->when_ = merge_when(r, when, a->range_, a->when());
+    a->range_ = Join(r, a->range_);
+    FixLongest(a);
+    return;
+  }
+
+  // Three way merge: slightly harder.  We must remove one node
+  // (arbitrarily picking next).
+  HugeRange partial = Join(r, a->range_);
+  int64_t partial_when = merge_when(r, when, a->range_, a->when());
+  HugeRange full = Join(b->range_, partial);
+  int64_t full_when = merge_when(b->range_, b->when(), partial, partial_when);
+  // Removing a will reduce total_size_ by that length, but since we're merging
+  // we actually don't change lengths at all; undo that.
+  total_size_ += a->range_.len();
+  Remove(a);
+  b->range_ = full;
+  b->when_ = full_when;
+  FixLongest(b);
+}
+
+void HugeAddressMap::Insert(HugeRange r) {
+  total_size_ += r.len();
+  // First, try to merge if necessary. Note there are three possibilities:
+  // we might need to merge before with r, r with after, or all three together.
+  Node *before = Predecessor(r.start());
+  CHECK_CONDITION(!before || !before->range_.intersects(r));
+  Node *after = before ? before->next() : first();
+  CHECK_CONDITION(!after || !after->range_.intersects(r));
+  if (before && before->range_.precedes(r)) {
+    if (after && r.precedes(after->range_)) {
+      Merge(before, r, after);
+    } else {
+      Merge(before, r, nullptr);
+    }
+    return;
+  } else if (after && r.precedes(after->range_)) {
+    Merge(nullptr, r, after);
+    return;
+  }
+  CHECK_CONDITION(!before || !before->range_.precedes(r));
+  CHECK_CONDITION(!after || !r.precedes(after->range_));
+  // No merging possible; just add a new node.
+  Node *n = Get(r);
+  Node *curr = root();
+  Node *parent = nullptr;
+  Node **link = &root_;
+  // Walk down the tree to our correct location
+  while (curr != nullptr && curr->prio_ >= n->prio_) {
+    curr->longest_ = std::max(curr->longest_, r.len());
+    parent = curr;
+    if (curr->range_.start() < r.start()) {
+      link = &curr->right_;
+      curr = curr->right_;
+    } else {
+      link = &curr->left_;
+      curr = curr->left_;
+    }
+  }
+  *link = n;
+  n->parent_ = parent;
+  n->left_ = n->right_ = nullptr;
+  n->longest_ = r.len();
+  if (curr) {
+    HugePage p = r.start();
+    // We need to split the treap at curr into n's children.
+    // This will be two treaps: one less than p, one greater, and has
+    // a nice recursive structure.
+    Node **less = &n->left_;
+    Node *lp = n;
+    Node **more = &n->right_;
+    Node *mp = n;
+    while (curr) {
+      if (curr->range_.start() < p) {
+        *less = curr;
+        curr->parent_ = lp;
+        less = &curr->right_;
+        lp = curr;
+        curr = curr->right_;
+      } else {
+        *more = curr;
+        curr->parent_ = mp;
+        more = &curr->left_;
+        mp = curr;
+        curr = curr->left_;
+      }
+    }
+    *more = *less = nullptr;
+    // We ripped apart the tree along these two paths--fix longest pointers.
+    FixLongest(lp);
+    FixLongest(mp);
+  }
+}
+
+void HugeAddressMap::Node::FixLongest() {
+  const HugeLength l = left_ ? left_->longest_ : NHugePages(0);
+  const HugeLength r = right_ ? right_->longest_ : NHugePages(0);
+  const HugeLength c = range_.len();
+  const HugeLength new_longest = std::max({l, r, c});
+  longest_ = new_longest;
+}
+
+void HugeAddressMap::FixLongest(HugeAddressMap::Node *n) {
+  while (n) {
+    n->FixLongest();
+    n = n->parent_;
+  }
+}
+
+void HugeAddressMap::Remove(HugeAddressMap::Node *n) {
+  total_size_ -= n->range_.len();
+  // We need to merge the left and right children of n into one
+  // treap, then glue it into place wherever n was.
+  Node **link;
+  Node *parent = n->parent_;
+  Node *top = n->left_;
+  Node *bottom = n->right_;
+
+  const HugeLength child_longest =
+      std::max(top ? top->longest_ : NHugePages(0),
+               bottom ? bottom->longest_ : NHugePages(0));
+  if (!parent) {
+    link = &root_;
+  } else {
+    // Account for the removed child--might change longests.
+    // Easiest way: update this subtree to ignore the removed node,
+    // then fix the chain of parents.
+    n->longest_ = child_longest;
+    FixLongest(parent);
+    if (parent->range_.start() > n->range_.start()) {
+      link = &parent->left_;
+    } else {
+      link = &parent->right_;
+    }
+  }
+
+  // A routine op we'll need a lot: given two (possibly null)
+  // children, put the root-ier one into top.
+  auto reorder_maybe = [](Node **top, Node **bottom) {
+    Node *b = *bottom, *t = *top;
+    if (b && (!t || t->prio_ < b->prio_)) {
+      *bottom = t;
+      *top = b;
+    }
+  };
+
+  reorder_maybe(&top, &bottom);
+  // if we have two treaps to merge (top is always non-null if bottom is)
+  // Invariant: top, bottom are two valid (longest included)
+  // treaps. parent (and all above/elsewhere) have the correct longest
+  // values, though parent does not have the correct children (will be the
+  // merged value of top and bottom.)
+  while (bottom) {
+    *link = top;
+    top->parent_ = parent;
+    // We're merging bottom into top, so top might contain a longer
+    // chunk than it thinks.
+    top->longest_ = std::max(top->longest_, bottom->longest_);
+    parent = top;
+    if (bottom->range_.start() < top->range_.start()) {
+      link = &top->left_;
+      top = top->left_;
+    } else {
+      link = &top->right_;
+      top = top->right_;
+    }
+    reorder_maybe(&top, &bottom);
+  }
+  *link = top;
+  if (top) top->parent_ = parent;
+  Put(n);
+}
+
+void HugeAddressMap::Put(Node *n) {
+  freelist_size_++;
+  used_nodes_--;
+  n->left_ = freelist_;
+  freelist_ = n;
+}
+
+HugeAddressMap::Node *HugeAddressMap::Get(HugeRange r) {
+  CHECK_CONDITION((freelist_ == nullptr) == (freelist_size_ == 0));
+  used_nodes_++;
+  int prio = rand_r(&seed_);
+  if (freelist_size_ == 0) {
+    total_nodes_++;
+    Node *ret = reinterpret_cast<Node *>(meta_(sizeof(Node)));
+    return new (ret) Node(r, prio);
+  }
+
+  freelist_size_--;
+  Node *ret = freelist_;
+  freelist_ = ret->left_;
+  return new (ret) Node(r, prio);
+}
+
+HugeAddressMap::Node::Node(HugeRange r, int prio)
+    : range_(r), prio_(prio), when_(absl::base_internal::CycleClock::Now()) {}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h
new file mode 100644
index 0000000000..3c71f19a3f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map.h
@@ -0,0 +1,148 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_ADDRESS_MAP_H_
+#define TCMALLOC_HUGE_ADDRESS_MAP_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Maintains a set of disjoint HugeRanges, merging adjacent ranges into one.
+// Exposes a balanced (somehow) binary tree of free ranges on address,
+// augmented with the largest range in each subtree (this allows fairly simple
+// allocation algorithms from the contained ranges.
+//
+// This class scales well and is *reasonably* performant, but it is not intended
+// for use on extremely hot paths.
+// TODO(b/134688982): extend to support other range-like types?
+class HugeAddressMap {
+ public:
+  typedef void *(*MetadataAllocFunction)(size_t bytes);
+  explicit constexpr HugeAddressMap(MetadataAllocFunction meta);
+
+  // IMPORTANT: DESTROYING A HUGE ADDRESS MAP DOES NOT MAKE ANY ATTEMPT
+  // AT FREEING ALLOCATED METADATA.
+  ~HugeAddressMap() = default;
+
+  class Node {
+   public:
+    // the range stored at this point
+    HugeRange range() const;
+    // Tree structure
+    Node *left();
+    Node *right();
+    // Iterate to the next node in address order
+    const Node *next() const;
+    Node *next();
+    // when were this node's content added (in
+    // absl::base_internal::CycleClock::Now units)?
+    int64_t when() const;
+
+    // What is the length of the longest range in the subtree rooted here?
+    HugeLength longest() const;
+
+   private:
+    Node(HugeRange r, int prio);
+    friend class HugeAddressMap;
+    HugeRange range_;
+    int prio_;  // chosen randomly
+    Node *left_, *right_;
+    Node *parent_;
+    HugeLength longest_;
+    int64_t when_;
+    // Expensive, recursive consistency check.
+    // Accumulates node count and range sizes into passed arguments.
+    void Check(size_t *num_nodes, HugeLength *size) const;
+
+    // We've broken longest invariants somehow; fix them here.
+    void FixLongest();
+  };
+
+  // Get root of the tree.
+  Node *root();
+  const Node *root() const;
+
+  // Get lowest-addressed node
+  const Node *first() const;
+  Node *first();
+
+  // Returns the highest-addressed range that does not lie completely
+  // after p (if any).
+  Node *Predecessor(HugePage p);
+
+  // Expensive consistency check.
+  void Check();
+
+  // Statistics
+  size_t nranges() const;
+  HugeLength total_mapped() const;
+  void Print(Printer *out) const;
+  void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+  // Add <r> to the map, merging with adjacent ranges as needed.
+  void Insert(HugeRange r);
+
+  // Delete n from the map.
+  void Remove(Node *n);
+
+ private:
+  // our tree
+  Node *root_{nullptr};
+  size_t used_nodes_{0};
+  HugeLength total_size_{NHugePages(0)};
+
+  // cache of unused nodes
+  Node *freelist_{nullptr};
+  size_t freelist_size_{0};
+  // How we get more
+  MetadataAllocFunction meta_;
+  Node *Get(HugeRange r);
+  void Put(Node *n);
+
+  size_t total_nodes_{0};
+
+  void Merge(Node *b, HugeRange r, Node *a);
+  void FixLongest(Node *n);
+  // Note that we always use the same seed, currently; this isn't very random.
+  // In practice we're not worried about adversarial input and this works well
+  // enough.
+  unsigned int seed_{0};
+};
+
+inline constexpr HugeAddressMap::HugeAddressMap(MetadataAllocFunction meta)
+    : meta_(meta) {}
+
+inline HugeRange HugeAddressMap::Node::range() const { return range_; }
+inline HugeAddressMap::Node *HugeAddressMap::Node::left() { return left_; }
+inline HugeAddressMap::Node *HugeAddressMap::Node::right() { return right_; }
+
+inline int64_t HugeAddressMap::Node::when() const { return when_; }
+inline HugeLength HugeAddressMap::Node::longest() const { return longest_; }
+
+inline HugeAddressMap::Node *HugeAddressMap::root() { return root_; }
+inline const HugeAddressMap::Node *HugeAddressMap::root() const {
+  return root_;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_ADDRESS_MAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc
new file mode 100644
index 0000000000..455cd63809
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_address_map_test.cc
@@ -0,0 +1,85 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_address_map.h"
+
+#include <stdlib.h>
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeAddressMapTest : public ::testing::Test {
+ protected:
+  HugeAddressMapTest() : map_(MallocMetadata) { metadata_allocs_.clear(); }
+
+  ~HugeAddressMapTest() override {
+    for (void* p : metadata_allocs_) {
+      free(p);
+    }
+  }
+
+  std::vector<HugeRange> Contents() {
+    std::vector<HugeRange> ret;
+    auto node = map_.first();
+    while (node) {
+      ret.push_back(node->range());
+      node = node->next();
+    }
+
+    return ret;
+  }
+
+  HugePage hp(size_t i) { return {i}; }
+  HugeLength hl(size_t i) { return NHugePages(i); }
+
+  HugeAddressMap map_;
+
+ private:
+  static void* MallocMetadata(size_t size) {
+    void* ptr = malloc(size);
+    metadata_allocs_.push_back(ptr);
+    return ptr;
+  }
+
+  static std::vector<void*> metadata_allocs_;
+};
+
+std::vector<void*> HugeAddressMapTest::metadata_allocs_;
+
+// This test verifies that HugeAddressMap merges properly.
+TEST_F(HugeAddressMapTest, Merging) {
+  const HugeRange r1 = HugeRange::Make(hp(0), hl(1));
+  const HugeRange r2 = HugeRange::Make(hp(1), hl(1));
+  const HugeRange r3 = HugeRange::Make(hp(2), hl(1));
+  const HugeRange all = Join(r1, Join(r2, r3));
+  map_.Insert(r1);
+  map_.Check();
+  EXPECT_THAT(Contents(), testing::ElementsAre(r1));
+  map_.Insert(r3);
+  map_.Check();
+  EXPECT_THAT(Contents(), testing::ElementsAre(r1, r3));
+  map_.Insert(r2);
+  map_.Check();
+  EXPECT_THAT(Contents(), testing::ElementsAre(all));
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc
new file mode 100644
index 0000000000..c77f4522ad
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.cc
@@ -0,0 +1,175 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_allocator.h"
+
+#include <string.h>
+
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void HugeAllocator::Print(Printer *out) {
+  out->printf("HugeAllocator: contiguous, unbacked hugepage(s)\n");
+  free_.Print(out);
+  out->printf(
+      "HugeAllocator: %zu requested - %zu in use = %zu hugepages free\n",
+      from_system_.raw_num(), in_use_.raw_num(),
+      (from_system_ - in_use_).raw_num());
+}
+
+void HugeAllocator::PrintInPbtxt(PbtxtRegion *hpaa) const {
+  free_.PrintInPbtxt(hpaa);
+  hpaa->PrintI64("num_total_requested_huge_pages", from_system_.raw_num());
+  hpaa->PrintI64("num_in_use_huge_pages", in_use_.raw_num());
+}
+
+HugeAddressMap::Node *HugeAllocator::Find(HugeLength n) {
+  HugeAddressMap::Node *curr = free_.root();
+  // invariant: curr != nullptr && curr->longest >= n
+  // we favor smaller gaps and lower nodes and lower addresses, in that
+  // order. The net effect is that we are neither a best-fit nor a
+  // lowest-address allocator but vaguely close to both.
+  HugeAddressMap::Node *best = nullptr;
+  while (curr && curr->longest() >= n) {
+    if (curr->range().len() >= n) {
+      if (!best || best->range().len() > curr->range().len()) {
+        best = curr;
+      }
+    }
+
+    // Either subtree could contain a better fit and we don't want to
+    // search the whole tree. Pick a reasonable child to look at.
+    auto left = curr->left();
+    auto right = curr->right();
+    if (!left || left->longest() < n) {
+      curr = right;
+      continue;
+    }
+
+    if (!right || right->longest() < n) {
+      curr = left;
+      continue;
+    }
+
+    // Here, we have a nontrivial choice.
+    if (left->range().len() == right->range().len()) {
+      if (left->longest() <= right->longest()) {
+        curr = left;
+      } else {
+        curr = right;
+      }
+    } else if (left->range().len() < right->range().len()) {
+      // Here, the longest range in both children is the same...look
+      // in the subtree with the smaller root, as that's slightly
+      // more likely to be our best.
+      curr = left;
+    } else {
+      curr = right;
+    }
+  }
+  return best;
+}
+
+void HugeAllocator::CheckFreelist() {
+  free_.Check();
+  size_t num_nodes = free_.nranges();
+  HugeLength n = free_.total_mapped();
+  free_.Check();
+  CHECK_CONDITION(n == from_system_ - in_use_);
+  LargeSpanStats large;
+  AddSpanStats(nullptr, &large, nullptr);
+  CHECK_CONDITION(num_nodes == large.spans);
+  CHECK_CONDITION(n.in_pages() == large.returned_pages);
+}
+
+HugeRange HugeAllocator::AllocateRange(HugeLength n) {
+  if (n.overflows()) return HugeRange::Nil();
+  size_t actual;
+  size_t bytes = n.in_bytes();
+  size_t align = kHugePageSize;
+  void *ptr = allocate_(bytes, &actual, align);
+  if (ptr == nullptr) {
+    // OOM...
+    return HugeRange::Nil();
+  }
+  CHECK_CONDITION(ptr != nullptr);
+  // It's possible for a request to return extra hugepages.
+  CHECK_CONDITION(actual % kHugePageSize == 0);
+  n = HLFromBytes(actual);
+  from_system_ += n;
+  return HugeRange::Make(HugePageContaining(ptr), n);
+}
+
+HugeRange HugeAllocator::Get(HugeLength n) {
+  CHECK_CONDITION(n > NHugePages(0));
+  auto *node = Find(n);
+  if (!node) {
+    // Get more memory, then "delete" it
+    HugeRange r = AllocateRange(n);
+    if (!r.valid()) return r;
+    in_use_ += r.len();
+    Release(r);
+    node = Find(n);
+    CHECK_CONDITION(node != nullptr);
+  }
+  in_use_ += n;
+
+  HugeRange r = node->range();
+  free_.Remove(node);
+  if (r.len() > n) {
+    HugeLength before = r.len();
+    HugeRange extra = HugeRange::Make(r.start() + n, before - n);
+    r = HugeRange::Make(r.start(), n);
+    ASSERT(r.precedes(extra));
+    ASSERT(r.len() + extra.len() == before);
+    in_use_ += extra.len();
+    Release(extra);
+  } else {
+    // Release does this for us
+    DebugCheckFreelist();
+  }
+
+  return r;
+}
+
+void HugeAllocator::Release(HugeRange r) {
+  in_use_ -= r.len();
+
+  free_.Insert(r);
+  DebugCheckFreelist();
+}
+
+void HugeAllocator::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                                 PageAgeHistograms *ages) const {
+  for (const HugeAddressMap::Node *node = free_.first(); node != nullptr;
+       node = node->next()) {
+    HugeLength n = node->range().len();
+    if (large != nullptr) {
+      large->spans++;
+      large->returned_pages += n.in_pages();
+    }
+
+    if (ages != nullptr) {
+      ages->RecordRange(n.in_pages(), true, node->when());
+    }
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h
new file mode 100644
index 0000000000..6242805c49
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator.h
@@ -0,0 +1,108 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Tracking information for the available range of hugepages,
+// and a basic allocator for unmapped hugepages.
+#ifndef TCMALLOC_HUGE_ALLOCATOR_H_
+#define TCMALLOC_HUGE_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// these typedefs allow replacement of tcmalloc::System* for tests.
+typedef void *(*MemoryAllocFunction)(size_t bytes, size_t *actual,
+                                     size_t align);
+typedef void *(*MetadataAllocFunction)(size_t bytes);
+
+// This tracks available ranges of hugepages and fulfills requests for
+// usable memory, allocating more from the system as needed.  All
+// hugepages are treated as (and assumed to be) unbacked.
+class HugeAllocator {
+ public:
+  constexpr HugeAllocator(MemoryAllocFunction allocate,
+                          MetadataAllocFunction meta_allocate)
+      : free_(meta_allocate), allocate_(allocate) {}
+
+  // Obtain a range of n unbacked hugepages, distinct from all other
+  // calls to Get (other than those that have been Released.)
+  HugeRange Get(HugeLength n);
+
+  // Returns a range of hugepages for reuse by subsequent Gets().
+  // REQUIRES: <r> is the return value (or a subrange thereof) of a previous
+  // call to Get(); neither <r> nor any overlapping range has been released
+  // since that Get().
+  void Release(HugeRange r);
+
+  // Total memory requested from the system, whether in use or not,
+  HugeLength system() const { return from_system_; }
+  // Unused memory in the allocator.
+  HugeLength size() const { return from_system_ - in_use_; }
+
+  void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                    PageAgeHistograms *ages) const;
+
+  BackingStats stats() const {
+    BackingStats s;
+    s.system_bytes = system().in_bytes();
+    s.free_bytes = 0;
+    s.unmapped_bytes = size().in_bytes();
+    return s;
+  }
+
+  void Print(Printer *out);
+  void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+ private:
+  // We're constrained in several ways by existing code.  Hard requirements:
+  // * no radix tree or similar O(address space) external space tracking
+  // * support sub releasing
+  // * low metadata overhead
+  // * no pre-allocation.
+  // * reasonable space overhead
+  //
+  // We use a treap ordered on addresses to track.  This isn't the most
+  // efficient thing ever but we're about to hit 100usec+/hugepage
+  // backing costs if we've gotten this far; the last few bits of performance
+  // don't matter, and most of the simple ideas can't hit all of the above
+  // requirements.
+  HugeAddressMap free_;
+  HugeAddressMap::Node *Find(HugeLength n);
+
+  void CheckFreelist();
+  void DebugCheckFreelist() {
+#ifndef NDEBUG
+    CheckFreelist();
+#endif
+  }
+
+  HugeLength from_system_{NHugePages(0)};
+  HugeLength in_use_{NHugePages(0)};
+
+  MemoryAllocFunction allocate_;
+  HugeRange AllocateRange(HugeLength n);
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc
new file mode 100644
index 0000000000..150075b88e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_allocator_test.cc
@@ -0,0 +1,449 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_allocator.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeAllocatorTest : public testing::TestWithParam<bool> {
+ private:
+  // Use a tiny fraction of actual size so we can test aggressively.
+  static void *AllocateFake(size_t bytes, size_t *actual, size_t align);
+
+  static constexpr size_t kMaxBacking = 1024 * 1024;
+  // This isn't super good form but we'll never have more than one HAT
+  // extant at once.
+  static std::vector<size_t> backing_;
+
+  // We use actual malloc for metadata allocations, but we track them so they
+  // can be deleted.
+  static void *MallocMetadata(size_t size);
+  static std::vector<void *> metadata_allocs_;
+  static size_t metadata_bytes_;
+  static bool should_overallocate_;
+  static HugeLength huge_pages_requested_;
+  static HugeLength huge_pages_received_;
+
+ protected:
+  HugeLength HugePagesRequested() { return huge_pages_requested_; }
+  HugeLength HugePagesReceived() { return huge_pages_received_; }
+
+  HugeAllocatorTest() {
+    should_overallocate_ = GetParam();
+    huge_pages_requested_ = NHugePages(0);
+    huge_pages_received_ = NHugePages(0);
+    // We don't use the first few bytes, because things might get weird
+    // given zero pointers.
+    backing_.resize(1024);
+    metadata_bytes_ = 0;
+  }
+
+  ~HugeAllocatorTest() override {
+    for (void *p : metadata_allocs_) {
+      free(p);
+    }
+    metadata_allocs_.clear();
+    backing_.clear();
+  }
+
+  size_t *GetActual(HugePage p) { return &backing_[p.index()]; }
+
+  // We're dealing with a lot of memory, so we don't want to do full memset
+  // and then check every byte for corruption.  So set the first and last
+  // byte in each page...
+  void CheckPages(HugeRange r, size_t c) {
+    for (HugePage p = r.first; p < r.first + r.n; ++p) {
+      EXPECT_EQ(c, *GetActual(p));
+    }
+  }
+
+  void MarkPages(HugeRange r, size_t c) {
+    for (HugePage p = r.first; p < r.first + r.n; ++p) {
+      *GetActual(p) = c;
+    }
+  }
+
+  void CheckStats(HugeLength expected_use) {
+    const HugeLength received = HugePagesReceived();
+    EXPECT_EQ(received, allocator_.system());
+    HugeLength used = received - allocator_.size();
+    EXPECT_EQ(used, expected_use);
+  }
+
+  HugeAllocator allocator_{AllocateFake, MallocMetadata};
+};
+
+// Use a tiny fraction of actual size so we can test aggressively.
+void *HugeAllocatorTest::AllocateFake(size_t bytes, size_t *actual,
+                                      size_t align) {
+  CHECK_CONDITION(bytes % kHugePageSize == 0);
+  CHECK_CONDITION(align % kHugePageSize == 0);
+  HugeLength req = HLFromBytes(bytes);
+  huge_pages_requested_ += req;
+  // Test the case where our sys allocator provides too much.
+  if (should_overallocate_) ++req;
+  huge_pages_received_ += req;
+  *actual = req.in_bytes();
+  // we'll actually provide hidden backing, one word per hugepage.
+  bytes = req / NHugePages(1);
+  align /= kHugePageSize;
+  size_t index = backing_.size();
+  if (index % align != 0) {
+    index += (align - (index & align));
+  }
+  if (index + bytes > kMaxBacking) return nullptr;
+  backing_.resize(index + bytes);
+  void *ptr = reinterpret_cast<void *>(index * kHugePageSize);
+  return ptr;
+}
+
+// We use actual malloc for metadata allocations, but we track them so they
+// can be deleted.
+void *HugeAllocatorTest::MallocMetadata(size_t size) {
+  metadata_bytes_ += size;
+  void *ptr = malloc(size);
+  metadata_allocs_.push_back(ptr);
+  return ptr;
+}
+
+std::vector<size_t> HugeAllocatorTest::backing_;
+std::vector<void *> HugeAllocatorTest::metadata_allocs_;
+size_t HugeAllocatorTest::metadata_bytes_;
+bool HugeAllocatorTest::should_overallocate_;
+HugeLength HugeAllocatorTest::huge_pages_requested_;
+HugeLength HugeAllocatorTest::huge_pages_received_;
+
+TEST_P(HugeAllocatorTest, Basic) {
+  std::vector<std::pair<HugeRange, size_t>> allocs;
+  absl::BitGen rng;
+  size_t label = 0;
+  HugeLength total = NHugePages(0);
+  static const size_t kSize = 1000;
+  HugeLength peak = total;
+  for (int i = 0; i < kSize; ++i) {
+    HugeLength len =
+        NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 12) - 1) + 1);
+    auto r = allocator_.Get(len);
+    ASSERT_TRUE(r.valid());
+    total += len;
+    peak = std::max(peak, total);
+    CheckStats(total);
+    MarkPages(r, label);
+    allocs.push_back({r, label});
+    label++;
+  }
+
+  for (int i = 0; i < 1000 * 25; ++i) {
+    size_t index = absl::Uniform<int32_t>(rng, 0, kSize);
+    std::swap(allocs[index], allocs[kSize - 1]);
+    auto p = allocs[kSize - 1];
+    CheckPages(p.first, p.second);
+    total -= p.first.len();
+    allocator_.Release(p.first);
+    CheckStats(total);
+
+    HugeLength len =
+        NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 12) - 1) + 1);
+    auto r = allocator_.Get(len);
+    ASSERT_TRUE(r.valid());
+    ASSERT_EQ(r.len(), len);
+    total += len;
+    peak = std::max(peak, total);
+    CheckStats(total);
+    MarkPages(r, label);
+    allocs[kSize - 1] = {r, label};
+    label++;
+  }
+  for (auto p : allocs) {
+    CheckPages(p.first, p.second);
+    allocator_.Release(p.first);
+  }
+}
+
+// Check that releasing small chunks of allocations works OK.
+TEST_P(HugeAllocatorTest, Subrelease) {
+  size_t label = 1;
+  const HugeLength kLen = NHugePages(8);
+  const HugeLength kTotal = kLen * (kLen / NHugePages(1) - 1);
+  for (int i = 0; i < 100; ++i) {
+    std::vector<std::pair<HugeRange, size_t>> allocs;
+    // get allocs of kLen and release different sized sub-chunks of them -
+    // make sure that doesn't break anything else.
+    for (HugeLength j = NHugePages(1); j < kLen; ++j) {
+      auto r = allocator_.Get(kLen);
+      ASSERT_TRUE(r.valid());
+      MarkPages(r, label);
+      allocator_.Release({r.start(), j});
+      allocs.push_back({{r.start() + j, kLen - j}, label});
+      label++;
+    }
+    EXPECT_EQ(kTotal, HugePagesRequested());
+    for (auto p : allocs) {
+      CheckPages(p.first, p.second);
+      allocator_.Release(p.first);
+    }
+  }
+}
+
+// Does subreleasing work OK for absurdly large allocations?
+TEST_P(HugeAllocatorTest, SubreleaseLarge) {
+  absl::BitGen rng;
+  std::vector<std::pair<HugeRange, size_t>> allocs;
+  size_t label = 1;
+  const HugeLength kLimit = HLFromBytes(1024ul * 1024 * 1024 * 1024);
+  for (HugeLength n = NHugePages(2); n < kLimit; n *= 2) {
+    auto r = allocator_.Get(n);
+    ASSERT_TRUE(r.valid());
+    MarkPages(r, label);
+    // chunk of less than half
+    HugeLength chunk =
+        NHugePages(absl::Uniform<int32_t>(rng, 0, n / NHugePages(2)) + 1);
+    allocator_.Release({r.start(), chunk});
+    allocs.push_back({{r.start() + chunk, n - chunk}, label});
+    label++;
+  }
+  // reuse the released space
+  const HugeLength total = HugePagesRequested();
+  while (total == HugePagesRequested()) {
+    HugeLength n =
+        NHugePages(absl::LogUniform<int32_t>(rng, 0, (1 << 8) - 1) + 1);
+    auto r = allocator_.Get(n);
+    ASSERT_TRUE(r.valid());
+    MarkPages(r, label);
+    allocs.push_back({r, label});
+    label++;
+  }
+  for (auto p : allocs) {
+    CheckPages(p.first, p.second);
+    allocator_.Release(p.first);
+  }
+}
+
+// We don't care *that* much about vaddress space, but let's not be crazy.
+// Don't fill tiny requests from big spaces.
+TEST_P(HugeAllocatorTest, Fragmentation) {
+  // Prime the pump with some random allocations.
+  absl::BitGen rng;
+
+  std::vector<HugeRange> free;
+  constexpr int kSlots = 50;
+
+  // Plan to insert a large allocation at the big_slot'th index, then free it
+  // during the initial priming step (so we have at least a contiguous region of
+  // at least big hugepages).
+  HugeLength big = NHugePages(8);
+  const int big_slot = absl::Uniform(rng, 0, kSlots);
+
+  for (int i = 0; i < kSlots; ++i) {
+    if (i == big_slot) {
+      auto r = allocator_.Get(big);
+      ASSERT_TRUE(r.valid());
+      free.push_back(r);
+    }
+
+    auto r = allocator_.Get(NHugePages(1));
+    ASSERT_TRUE(r.valid());
+    if (absl::Bernoulli(rng, 1.0 / 2)) {
+      free.push_back(r);
+    }
+  }
+  size_t slots = free.size() - 1;
+  for (auto r : free) {
+    allocator_.Release(r);
+  }
+  free.clear();
+  static const size_t kReps = 5;
+  for (int i = 0; i < kReps; ++i) {
+    SCOPED_TRACE(i);
+
+    // Ensure we have a range of this size.
+    HugeRange r = allocator_.Get(big);
+    ASSERT_TRUE(r.valid());
+    if (NHugePages(slots) > allocator_.size()) {
+      // We should also have slots pages left over after allocating big
+      for (int i = 0; i < slots; ++i) {
+        HugeRange f = allocator_.Get(NHugePages(1));
+        ASSERT_TRUE(f.valid());
+        free.push_back(f);
+      }
+      for (auto f : free) {
+        allocator_.Release(f);
+      }
+      free.clear();
+    }
+    allocator_.Release(r);
+    // We should definitely have at least this many small spaces...
+    for (int i = 0; i < slots; ++i) {
+      r = allocator_.Get(NHugePages(1));
+      ASSERT_TRUE(r.valid());
+      free.push_back(r);
+    }
+    // that don't interfere with the available big space.
+    auto before = allocator_.system();
+    r = allocator_.Get(big);
+    ASSERT_TRUE(r.valid());
+    EXPECT_EQ(before, allocator_.system());
+    allocator_.Release(r);
+    for (auto r : free) {
+      allocator_.Release(r);
+    }
+    free.clear();
+    slots += big.raw_num();
+    big += big;
+  }
+}
+
+// Check that we only request as much as we actually need from the system.
+TEST_P(HugeAllocatorTest, Frugal) {
+  HugeLength total = NHugePages(0);
+  static const size_t kSize = 1000;
+  for (int i = 1; i < kSize; ++i) {
+    HugeLength len = NHugePages(i);
+    // toss the range, we ain't using it
+    ASSERT_TRUE(allocator_.Get(len).valid());
+
+    total += len;
+    CheckStats(total);
+    EXPECT_EQ(total, HugePagesRequested());
+  }
+}
+
+TEST_P(HugeAllocatorTest, Stats) {
+  struct Helper {
+    static void Stats(const HugeAllocator *huge, size_t *num_spans,
+                      Length *pages, absl::Duration *avg_age) {
+      SmallSpanStats small;
+      LargeSpanStats large;
+      PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+      huge->AddSpanStats(&small, &large, &ages);
+      for (auto i = Length(0); i < kMaxPages; ++i) {
+        EXPECT_EQ(0, small.normal_length[i.raw_num()]);
+        EXPECT_EQ(0, small.returned_length[i.raw_num()]);
+      }
+      *num_spans = large.spans;
+      EXPECT_EQ(Length(0), large.normal_pages);
+      *pages = large.returned_pages;
+      const PageAgeHistograms::Histogram *hist = ages.GetTotalHistogram(true);
+      *avg_age = absl::Seconds(hist->avg_age());
+    }
+  };
+
+  if (GetParam()) {
+    // Ensure overallocation doesn't skew our measurements below.
+    allocator_.Release(allocator_.Get(NHugePages(7)));
+  }
+  const HugeRange r = allocator_.Get(NHugePages(8));
+  ASSERT_TRUE(r.valid());
+  const HugePage p = r.start();
+  // Break it into 3 ranges, separated by one-page regions,
+  // so we can easily track the internal state in stats.
+  const HugeRange r1 = {p, NHugePages(1)};
+  const HugeRange b1 = {p + NHugePages(1), NHugePages(1)};
+  const HugeRange r2 = {p + NHugePages(2), NHugePages(2)};
+  const HugeRange b2 = {p + NHugePages(4), NHugePages(1)};
+  const HugeRange r3 = {p + NHugePages(5), NHugePages(3)};
+
+  size_t num_spans;
+  Length pages;
+  absl::Duration avg_age;
+
+  Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+  EXPECT_EQ(0, num_spans);
+  EXPECT_EQ(Length(0), pages);
+  EXPECT_EQ(absl::ZeroDuration(), avg_age);
+
+  allocator_.Release(r1);
+  constexpr absl::Duration kDelay = absl::Milliseconds(500);
+  absl::SleepFor(kDelay);
+  Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+  EXPECT_EQ(1, num_spans);
+  EXPECT_EQ(NHugePages(1).in_pages(), pages);
+  // We can only do >= testing, because we might be arbitrarily delayed.
+  // Since avg_age is computed in floating point, we may have round-off from
+  // TCMalloc's internal use of absl::base_internal::CycleClock down through
+  // computing the average age of the spans.  kEpsilon allows for a tiny amount
+  // of slop.
+  constexpr absl::Duration kEpsilon = absl::Microseconds(200);
+  EXPECT_LE(kDelay - kEpsilon, avg_age);
+
+  allocator_.Release(r2);
+  absl::SleepFor(absl::Milliseconds(250));
+  Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+  EXPECT_EQ(2, num_spans);
+  EXPECT_EQ(NHugePages(3).in_pages(), pages);
+  EXPECT_LE(
+      (absl::Seconds(0.75) * 1 + absl::Seconds(0.25) * 2) / (1 + 2) - kEpsilon,
+      avg_age);
+
+  allocator_.Release(r3);
+  absl::SleepFor(absl::Milliseconds(125));
+  Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+  EXPECT_EQ(3, num_spans);
+  EXPECT_EQ(NHugePages(6).in_pages(), pages);
+  EXPECT_LE((absl::Seconds(0.875) * 1 + absl::Seconds(0.375) * 2 +
+             absl::Seconds(0.125) * 3) /
+                    (1 + 2 + 3) -
+                kEpsilon,
+            avg_age);
+
+  allocator_.Release(b1);
+  allocator_.Release(b2);
+  absl::SleepFor(absl::Milliseconds(100));
+  Helper::Stats(&allocator_, &num_spans, &pages, &avg_age);
+  EXPECT_EQ(1, num_spans);
+  EXPECT_EQ(NHugePages(8).in_pages(), pages);
+  EXPECT_LE((absl::Seconds(0.975) * 1 + absl::Seconds(0.475) * 2 +
+             absl::Seconds(0.225) * 3 + absl::Seconds(0.1) * 2) /
+                    (1 + 2 + 3 + 2) -
+                kEpsilon,
+            avg_age);
+}
+
+// Make sure we're well-behaved in the presence of OOM (and that we do
+// OOM at some point...)
+TEST_P(HugeAllocatorTest, OOM) {
+  HugeLength n = NHugePages(1);
+  while (allocator_.Get(n).valid()) {
+    n *= 2;
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    NormalOverAlloc, HugeAllocatorTest, testing::Values(false, true),
+    +[](const testing::TestParamInfo<bool> &info) {
+      return info.param ? "overallocates" : "normal";
+    });
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc
new file mode 100644
index 0000000000..0d25da2983
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.cc
@@ -0,0 +1,494 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_cache.h"
+
+#include <tuple>
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_address_map.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::Report(HugeLength val) {
+  timeseries_.Report(val);
+}
+
+template <size_t kEpochs>
+HugeLength MinMaxTracker<kEpochs>::MaxOverTime(absl::Duration t) const {
+  HugeLength m = NHugePages(0);
+  size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength));
+  timeseries_.IterBackwards([&](size_t offset, int64_t ts,
+                                const Extrema &e) { m = std::max(m, e.max); },
+                            num_epochs);
+  return m;
+}
+
+template <size_t kEpochs>
+HugeLength MinMaxTracker<kEpochs>::MinOverTime(absl::Duration t) const {
+  HugeLength m = kMaxVal;
+  size_t num_epochs = ceil(absl::FDivDuration(t, kEpochLength));
+  timeseries_.IterBackwards([&](size_t offset, int64_t ts,
+                                const Extrema &e) { m = std::min(m, e.min); },
+                            num_epochs);
+  return m;
+}
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::Print(Printer *out) const {
+  // Prints timestamp:min_pages:max_pages for each window with records.
+  // Timestamp == kEpochs - 1 is the most recent measurement.
+  const int64_t millis = absl::ToInt64Milliseconds(kEpochLength);
+  out->printf("\nHugeCache: window %lldms * %zu", millis, kEpochs);
+  int written = 0;
+  timeseries_.Iter(
+      [&](size_t offset, int64_t ts, const Extrema &e) {
+        if ((written++) % 100 == 0)
+          out->printf("\nHugeCache: Usage timeseries ");
+        out->printf("%zu:%zu:%zd,", offset, e.min.raw_num(), e.max.raw_num());
+      },
+      timeseries_.kSkipEmptyEntries);
+  out->printf("\n");
+}
+
+template <size_t kEpochs>
+void MinMaxTracker<kEpochs>::PrintInPbtxt(PbtxtRegion *hpaa) const {
+  // Prints content of each non-empty epoch, from oldest to most recent data
+  auto huge_cache_history = hpaa->CreateSubRegion("huge_cache_history");
+  huge_cache_history.PrintI64("window_ms",
+                              absl::ToInt64Milliseconds(kEpochLength));
+  huge_cache_history.PrintI64("epochs", kEpochs);
+
+  timeseries_.Iter(
+      [&](size_t offset, int64_t ts, const Extrema &e) {
+        auto m = huge_cache_history.CreateSubRegion("measurements");
+        m.PrintI64("epoch", offset);
+        m.PrintI64("min_bytes", e.min.in_bytes());
+        m.PrintI64("max_bytes", e.max.in_bytes());
+      },
+      timeseries_.kSkipEmptyEntries);
+}
+
+template <size_t kEpochs>
+bool MinMaxTracker<kEpochs>::Extrema::operator==(const Extrema &other) const {
+  return (other.max == max) && (other.min == min);
+}
+
+// Explicit instantiations of template
+template class MinMaxTracker<>;
+template class MinMaxTracker<600>;
+
+// The logic for actually allocating from the cache or backing, and keeping
+// the hit rates specified.
+HugeRange HugeCache::DoGet(HugeLength n, bool *from_released) {
+  auto *node = Find(n);
+  if (!node) {
+    misses_++;
+    weighted_misses_ += n.raw_num();
+    HugeRange res = allocator_->Get(n);
+    if (res.valid()) {
+      *from_released = true;
+    }
+
+    return res;
+  }
+  hits_++;
+  weighted_hits_ += n.raw_num();
+  *from_released = false;
+  size_ -= n;
+  UpdateSize(size());
+  HugeRange result, leftover;
+  // Put back whatever we have left (or nothing, if it's exact.)
+  std::tie(result, leftover) = Split(node->range(), n);
+  cache_.Remove(node);
+  if (leftover.valid()) {
+    cache_.Insert(leftover);
+  }
+  return result;
+}
+
+void HugeCache::MaybeGrowCacheLimit(HugeLength missed) {
+  // Our goal is to make the cache size = the largest "brief dip."
+  //
+  // A "dip" being a case where usage shrinks, then increases back up
+  // to previous levels (at least partially).
+  //
+  // "brief" is "returns to normal usage in < kCacheTime." (In
+  // other words, we ideally want to be willing to cache memory for
+  // kCacheTime before expecting it to be used again--we are loose
+  // on the timing..)
+  //
+  // The interesting part is finding those dips.
+
+  // This is the downward slope: we lost some usage. (This in theory could
+  // be as much as 2 * kCacheTime old, which is fine.)
+  const HugeLength shrink = off_peak_tracker_.MaxOverTime(kCacheTime);
+
+  // This is the upward slope: we are coming back up.
+  const HugeLength grow = usage_ - usage_tracker_.MinOverTime(kCacheTime);
+
+  // Ideally we now know that we dipped down by some amount, then came
+  // up.  Sadly our stats aren't quite good enough to guarantee things
+  // happened in the proper order.  Suppose our usage takes the
+  // following path (in essentially zero time):
+  // 0, 10000, 5000, 5500.
+  //
+  // Clearly the proven dip here is 500.  But we'll compute shrink = 5000,
+  // grow = 5500--we'd prefer to measure from a min *after* that shrink.
+  //
+  // It's difficult to ensure this, and hopefully this case is rare.
+  // TODO(b/134690209): figure out if we can solve that problem.
+  const HugeLength dip = std::min(shrink, grow);
+
+  // Fragmentation: we may need to cache a little more than the actual
+  // usage jump. 10% seems to be a reasonable addition that doesn't waste
+  // much space, but gets good performance on tests.
+  const HugeLength slack = dip / 10;
+
+  const HugeLength lim = dip + slack;
+
+  if (lim > limit()) {
+    last_limit_change_ = clock_.now();
+    limit_ = lim;
+  }
+}
+
+void HugeCache::IncUsage(HugeLength n) {
+  usage_ += n;
+  usage_tracker_.Report(usage_);
+  detailed_tracker_.Report(usage_);
+  off_peak_tracker_.Report(NHugePages(0));
+  if (size() + usage() > max_rss_) max_rss_ = size() + usage();
+}
+
+void HugeCache::DecUsage(HugeLength n) {
+  usage_ -= n;
+  usage_tracker_.Report(usage_);
+  detailed_tracker_.Report(usage_);
+  const HugeLength max = usage_tracker_.MaxOverTime(kCacheTime);
+  ASSERT(max >= usage_);
+  const HugeLength off_peak = max - usage_;
+  off_peak_tracker_.Report(off_peak);
+  if (size() + usage() > max_rss_) max_rss_ = size() + usage();
+}
+
+void HugeCache::UpdateSize(HugeLength size) {
+  size_tracker_.Report(size);
+  if (size > max_size_) max_size_ = size;
+  if (size + usage() > max_rss_) max_rss_ = size + usage();
+
+  // TODO(b/134691947): moving this inside the MinMaxTracker would save one call
+  // to clock_.now() but all MinMaxTrackers would track regret instead.
+  int64_t now = clock_.now();
+  if (now > last_regret_update_) {
+    regret_ += size.raw_num() * (now - last_regret_update_);
+    last_regret_update_ = now;
+  }
+}
+
+HugeRange HugeCache::Get(HugeLength n, bool *from_released) {
+  HugeRange r = DoGet(n, from_released);
+  // failure to get a range should "never" "never" happen (VSS limits
+  // or wildly incorrect allocation sizes only...) Don't deal with
+  // this case for cache size accounting.
+  IncUsage(r.len());
+
+  const bool miss = r.valid() && *from_released;
+  if (miss) MaybeGrowCacheLimit(n);
+  return r;
+}
+
+void HugeCache::Release(HugeRange r) {
+  DecUsage(r.len());
+
+  cache_.Insert(r);
+  size_ += r.len();
+  if (size_ <= limit()) {
+    fills_++;
+  } else {
+    overflows_++;
+  }
+
+  // Shrink the limit, if we're going to do it, before we shrink to
+  // the max size.  (This could reduce the number of regions we break
+  // in half to avoid overshrinking.)
+  if ((clock_.now() - last_limit_change_) > (cache_time_ticks_ * 2)) {
+    total_fast_unbacked_ += MaybeShrinkCacheLimit();
+  }
+  total_fast_unbacked_ += ShrinkCache(limit());
+
+  UpdateSize(size());
+}
+
+void HugeCache::ReleaseUnbacked(HugeRange r) {
+  DecUsage(r.len());
+  // No point in trying to cache it, just hand it back.
+  allocator_->Release(r);
+}
+
+HugeLength HugeCache::MaybeShrinkCacheLimit() {
+  last_limit_change_ = clock_.now();
+
+  const HugeLength min = size_tracker_.MinOverTime(kCacheTime * 2);
+  // If cache size has gotten down to at most 20% of max, we assume
+  // we're close enough to the optimal size--we don't want to fiddle
+  // too much/too often unless we have large gaps in usage.
+  if (min < limit() / 5) return NHugePages(0);
+
+  // Take away half of the unused portion.
+  HugeLength drop = std::max(min / 2, NHugePages(1));
+  limit_ = std::max(limit() <= drop ? NHugePages(0) : limit() - drop,
+                    MinCacheLimit());
+  return ShrinkCache(limit());
+}
+
+HugeLength HugeCache::ShrinkCache(HugeLength target) {
+  HugeLength removed = NHugePages(0);
+  while (size_ > target) {
+    // Remove smallest-ish nodes, to avoid fragmentation where possible.
+    auto *node = Find(NHugePages(1));
+    CHECK_CONDITION(node);
+    HugeRange r = node->range();
+    cache_.Remove(node);
+    // Suppose we're 10 MiB over target but the smallest available node
+    // is 100 MiB.  Don't go overboard--split up the range.
+    // In particular - this prevents disastrous results if we've decided
+    // the cache should be 99 MiB but the actual hot usage is 100 MiB
+    // (and it is unfragmented).
+    const HugeLength delta = size() - target;
+    if (r.len() > delta) {
+      HugeRange to_remove, leftover;
+      std::tie(to_remove, leftover) = Split(r, delta);
+      ASSERT(leftover.valid());
+      cache_.Insert(leftover);
+      r = to_remove;
+    }
+
+    size_ -= r.len();
+    // Note, actual unback implementation is temporarily dropping and
+    // re-acquiring the page heap lock here.
+    unback_(r.start_addr(), r.byte_len());
+    allocator_->Release(r);
+    removed += r.len();
+  }
+
+  return removed;
+}
+
+HugeLength HugeCache::ReleaseCachedPages(HugeLength n) {
+  // This is a good time to check: is our cache going persistently unused?
+  HugeLength released = MaybeShrinkCacheLimit();
+
+  if (released < n) {
+    n -= released;
+    const HugeLength target = n > size() ? NHugePages(0) : size() - n;
+    released += ShrinkCache(target);
+  }
+
+  UpdateSize(size());
+  total_periodic_unbacked_ += released;
+  return released;
+}
+
+void HugeCache::AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                             PageAgeHistograms *ages) const {
+  static_assert(kPagesPerHugePage >= kMaxPages);
+  for (const HugeAddressMap::Node *node = cache_.first(); node != nullptr;
+       node = node->next()) {
+    HugeLength n = node->range().len();
+    if (large != nullptr) {
+      large->spans++;
+      large->normal_pages += n.in_pages();
+    }
+
+    if (ages != nullptr) {
+      ages->RecordRange(n.in_pages(), false, node->when());
+    }
+  }
+}
+
+HugeAddressMap::Node *HugeCache::Find(HugeLength n) {
+  HugeAddressMap::Node *curr = cache_.root();
+  // invariant: curr != nullptr && curr->longest >= n
+  // we favor smaller gaps and lower nodes and lower addresses, in that
+  // order. The net effect is that we are neither a best-fit nor a
+  // lowest-address allocator but vaguely close to both.
+  HugeAddressMap::Node *best = nullptr;
+  while (curr && curr->longest() >= n) {
+    if (curr->range().len() >= n) {
+      if (!best || best->range().len() > curr->range().len()) {
+        best = curr;
+      }
+    }
+
+    // Either subtree could contain a better fit and we don't want to
+    // search the whole tree. Pick a reasonable child to look at.
+    auto left = curr->left();
+    auto right = curr->right();
+    if (!left || left->longest() < n) {
+      curr = right;
+      continue;
+    }
+
+    if (!right || right->longest() < n) {
+      curr = left;
+      continue;
+    }
+
+    // Here, we have a nontrivial choice.
+    if (left->range().len() == right->range().len()) {
+      if (left->longest() <= right->longest()) {
+        curr = left;
+      } else {
+        curr = right;
+      }
+    } else if (left->range().len() < right->range().len()) {
+      // Here, the longest range in both children is the same...look
+      // in the subtree with the smaller root, as that's slightly
+      // more likely to be our best.
+      curr = left;
+    } else {
+      curr = right;
+    }
+  }
+  return best;
+}
+
+void HugeCache::Print(Printer *out) {
+  const int64_t millis = absl::ToInt64Milliseconds(kCacheTime);
+  out->printf(
+      "HugeCache: contains unused, backed hugepage(s) "
+      "(kCacheTime = %lldms)\n",
+      millis);
+  // a / (a + b), avoiding division by zero
+  auto safe_ratio = [](double a, double b) {
+    const double total = a + b;
+    if (total == 0) return 0.0;
+    return a / total;
+  };
+
+  const double hit_rate = safe_ratio(hits_, misses_);
+  const double overflow_rate = safe_ratio(overflows_, fills_);
+
+  out->printf(
+      "HugeCache: %zu / %zu hugepages cached / cache limit "
+      "(%.3f hit rate, %.3f overflow rate)\n",
+      size_.raw_num(), limit().raw_num(), hit_rate, overflow_rate);
+  out->printf("HugeCache: %zu MiB fast unbacked, %zu MiB periodic\n",
+              total_fast_unbacked_.in_bytes() / 1024 / 1024,
+              total_periodic_unbacked_.in_bytes() / 1024 / 1024);
+  UpdateSize(size());
+  out->printf(
+      "HugeCache: %zu MiB*s cached since startup\n",
+      NHugePages(regret_).in_mib() / static_cast<size_t>(clock_.freq()));
+
+  usage_tracker_.Report(usage_);
+  const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime);
+  const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime);
+  out->printf(
+      "HugeCache: recent usage range: %zu min - %zu curr -  %zu max MiB\n",
+      usage_min.in_mib(), usage_.in_mib(), usage_max.in_mib());
+
+  const HugeLength off_peak = usage_max - usage_;
+  off_peak_tracker_.Report(off_peak);
+  const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime);
+  const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime);
+  out->printf(
+      "HugeCache: recent offpeak range: %zu min - %zu curr - %zu max MiB\n",
+      off_peak_min.in_mib(), off_peak.in_mib(), off_peak_max.in_mib());
+
+  const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime);
+  const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime);
+  out->printf(
+      "HugeCache: recent cache range: %zu min - %zu curr - %zu max MiB\n",
+      cache_min.in_mib(), size_.in_mib(), cache_max.in_mib());
+
+  detailed_tracker_.Print(out);
+}
+
+void HugeCache::PrintInPbtxt(PbtxtRegion *hpaa) {
+  hpaa->PrintI64("huge_cache_time_const",
+                 absl::ToInt64Milliseconds(kCacheTime));
+
+  // a / (a + b), avoiding division by zero
+  auto safe_ratio = [](double a, double b) {
+    const double total = a + b;
+    if (total == 0) return 0.0;
+    return a / total;
+  };
+
+  const double hit_rate = safe_ratio(hits_, misses_);
+  const double overflow_rate = safe_ratio(overflows_, fills_);
+
+  // number of bytes in HugeCache
+  hpaa->PrintI64("cached_huge_page_bytes", size_.in_bytes());
+  // max allowed bytes in HugeCache
+  hpaa->PrintI64("max_cached_huge_page_bytes", limit().in_bytes());
+  // lifetime cache hit rate
+  hpaa->PrintDouble("huge_cache_hit_rate", hit_rate);
+  // lifetime cache overflow rate
+  hpaa->PrintDouble("huge_cache_overflow_rate", overflow_rate);
+  // bytes eagerly unbacked by HugeCache
+  hpaa->PrintI64("fast_unbacked_bytes", total_fast_unbacked_.in_bytes());
+  // bytes unbacked by periodic releaser thread
+  hpaa->PrintI64("periodic_unbacked_bytes",
+                 total_periodic_unbacked_.in_bytes());
+  UpdateSize(size());
+  // memory cached since startup (in MiB*s)
+  hpaa->PrintI64("huge_cache_regret", NHugePages(regret_).in_mib() /
+                                          static_cast<size_t>(clock_.freq()));
+
+  usage_tracker_.Report(usage_);
+  const HugeLength usage_min = usage_tracker_.MinOverTime(kCacheTime);
+  const HugeLength usage_max = usage_tracker_.MaxOverTime(kCacheTime);
+  {
+    auto usage_stats = hpaa->CreateSubRegion("huge_cache_usage_stats");
+    usage_stats.PrintI64("min_bytes", usage_min.in_bytes());
+    usage_stats.PrintI64("current_bytes", usage_.in_bytes());
+    usage_stats.PrintI64("max_bytes", usage_max.in_bytes());
+  }
+
+  const HugeLength off_peak = usage_max - usage_;
+  off_peak_tracker_.Report(off_peak);
+  const HugeLength off_peak_min = off_peak_tracker_.MinOverTime(kCacheTime);
+  const HugeLength off_peak_max = off_peak_tracker_.MaxOverTime(kCacheTime);
+  {
+    auto usage_stats = hpaa->CreateSubRegion("huge_cache_offpeak_stats");
+    usage_stats.PrintI64("min_bytes", off_peak_min.in_bytes());
+    usage_stats.PrintI64("current_bytes", off_peak.in_bytes());
+    usage_stats.PrintI64("max_bytes", off_peak_max.in_bytes());
+  }
+
+  const HugeLength cache_min = size_tracker_.MinOverTime(kCacheTime);
+  const HugeLength cache_max = size_tracker_.MaxOverTime(kCacheTime);
+  {
+    auto usage_stats = hpaa->CreateSubRegion("huge_cache_cache_stats");
+    usage_stats.PrintI64("min_bytes", cache_min.in_bytes());
+    usage_stats.PrintI64("current_bytes", size_.in_bytes());
+    usage_stats.PrintI64("max_bytes", cache_max.in_bytes());
+  }
+
+  detailed_tracker_.PrintInPbtxt(hpaa);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache.h b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h
new file mode 100644
index 0000000000..2ffda26cb2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache.h
@@ -0,0 +1,228 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Wrapping interface for HugeAllocator that handles backing and
+// unbacking, including a hot cache of backed single hugepages.
+#ifndef TCMALLOC_HUGE_CACHE_H_
+#define TCMALLOC_HUGE_CACHE_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/timeseries_tracker.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+typedef void (*MemoryModifyFunction)(void *start, size_t len);
+
+// Track the extreme values of a HugeLength value over the past
+// kWindow (time ranges approximate.)
+template <size_t kEpochs = 16>
+class MinMaxTracker {
+ public:
+  explicit constexpr MinMaxTracker(Clock clock, absl::Duration w)
+      : kEpochLength(w / kEpochs), timeseries_(clock, w) {}
+
+  void Report(HugeLength val);
+  void Print(Printer *out) const;
+  void PrintInPbtxt(PbtxtRegion *hpaa) const;
+
+  // If t < kEpochLength, these functions return statistics for last epoch. The
+  // granularity is kEpochLength (rounded up).
+  HugeLength MaxOverTime(absl::Duration t) const;
+  HugeLength MinOverTime(absl::Duration t) const;
+
+ private:
+  const absl::Duration kEpochLength;
+
+  static constexpr HugeLength kMaxVal =
+      NHugePages(std::numeric_limits<size_t>::max());
+  struct Extrema {
+    HugeLength min, max;
+
+    static Extrema Nil() {
+      Extrema e;
+      e.max = NHugePages(0);
+      e.min = kMaxVal;
+      return e;
+    }
+
+    void Report(HugeLength n) {
+      max = std::max(max, n);
+      min = std::min(min, n);
+    }
+
+    bool empty() const { return (*this == Nil()); }
+
+    bool operator==(const Extrema &other) const;
+  };
+
+  TimeSeriesTracker<Extrema, HugeLength, kEpochs> timeseries_;
+};
+
+// Explicit instantiations are defined in huge_cache.cc.
+extern template class MinMaxTracker<>;
+extern template class MinMaxTracker<600>;
+
+template <size_t kEpochs>
+constexpr HugeLength MinMaxTracker<kEpochs>::kMaxVal;
+
+class HugeCache {
+ public:
+  // For use in production
+  HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate,
+            MemoryModifyFunction unback)
+      : HugeCache(allocator, meta_allocate, unback,
+                  Clock{.now = absl::base_internal::CycleClock::Now,
+                        .freq = absl::base_internal::CycleClock::Frequency}) {}
+
+  // For testing with mock clock
+  HugeCache(HugeAllocator *allocator, MetadataAllocFunction meta_allocate,
+            MemoryModifyFunction unback, Clock clock)
+      : allocator_(allocator),
+        cache_(meta_allocate),
+        clock_(clock),
+        cache_time_ticks_(clock_.freq() * absl::ToDoubleSeconds(kCacheTime)),
+        nanoseconds_per_tick_(absl::ToInt64Nanoseconds(absl::Seconds(1)) /
+                              clock_.freq()),
+        last_limit_change_(clock.now()),
+        last_regret_update_(clock.now()),
+        detailed_tracker_(clock, absl::Minutes(10)),
+        usage_tracker_(clock, kCacheTime * 2),
+        off_peak_tracker_(clock, kCacheTime * 2),
+        size_tracker_(clock, kCacheTime * 2),
+        unback_(unback) {}
+  // Allocate a usable set of <n> contiguous hugepages.  Try to give out
+  // memory that's currently backed from the kernel if we have it available.
+  // *from_released is set to false if the return range is already backed;
+  // otherwise, it is set to true (and the caller should back it.)
+  HugeRange Get(HugeLength n, bool *from_released);
+
+  // Deallocate <r> (assumed to be backed by the kernel.)
+  void Release(HugeRange r);
+  // As Release, but the range is assumed to _not_ be backed.
+  void ReleaseUnbacked(HugeRange r);
+
+  // Release to the system up to <n> hugepages of cache contents; returns
+  // the number of hugepages released.
+  HugeLength ReleaseCachedPages(HugeLength n);
+
+  // Backed memory available.
+  HugeLength size() const { return size_; }
+  // Total memory cached (in HugeLength * nanoseconds)
+  uint64_t regret() const { return regret_ * nanoseconds_per_tick_; }
+  // Current limit for how much backed memory we'll cache.
+  HugeLength limit() const { return limit_; }
+  // Sum total of unreleased requests.
+  HugeLength usage() const { return usage_; }
+
+  void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                    PageAgeHistograms *ages) const;
+
+  BackingStats stats() const {
+    BackingStats s;
+    s.system_bytes = (usage() + size()).in_bytes();
+    s.free_bytes = size().in_bytes();
+    s.unmapped_bytes = 0;
+    return s;
+  }
+
+  void Print(Printer *out);
+  void PrintInPbtxt(PbtxtRegion *hpaa);
+
+ private:
+  HugeAllocator *allocator_;
+
+  // We just cache-missed a request for <missed> pages;
+  // should we grow?
+  void MaybeGrowCacheLimit(HugeLength missed);
+  // Check if the cache seems consistently too big.  Returns the
+  // number of pages *evicted* (not the change in limit).
+  HugeLength MaybeShrinkCacheLimit();
+
+  // Ensure the cache contains at most <target> hugepages,
+  // returning the number removed.
+  HugeLength ShrinkCache(HugeLength target);
+
+  HugeRange DoGet(HugeLength n, bool *from_released);
+
+  HugeAddressMap::Node *Find(HugeLength n);
+
+  HugeAddressMap cache_;
+  HugeLength size_{NHugePages(0)};
+
+  HugeLength limit_{NHugePages(10)};
+  const absl::Duration kCacheTime = absl::Seconds(1);
+
+  size_t hits_{0};
+  size_t misses_{0};
+  size_t fills_{0};
+  size_t overflows_{0};
+  uint64_t weighted_hits_{0};
+  uint64_t weighted_misses_{0};
+
+  // Sum(size of Gets) - Sum(size of Releases), i.e. amount of backed
+  // hugepages our user currently wants to have.
+  void IncUsage(HugeLength n);
+  void DecUsage(HugeLength n);
+  HugeLength usage_{NHugePages(0)};
+
+  // This is CycleClock, except overridable for tests.
+  Clock clock_;
+  const int64_t cache_time_ticks_;
+  const double nanoseconds_per_tick_;
+
+  int64_t last_limit_change_;
+
+  // 10 hugepages is a good baseline for our cache--easily wiped away
+  // by periodic release, and not that much memory on any real server.
+  // However, we can go below it if we haven't used that much for 30 seconds.
+  HugeLength MinCacheLimit() const { return NHugePages(10); }
+
+  uint64_t regret_{0};  // overflows if we cache 585 hugepages for 1 year
+  int64_t last_regret_update_;
+  void UpdateSize(HugeLength size);
+
+  MinMaxTracker<600> detailed_tracker_;
+
+  MinMaxTracker<> usage_tracker_;
+  MinMaxTracker<> off_peak_tracker_;
+  MinMaxTracker<> size_tracker_;
+  HugeLength max_size_{NHugePages(0)};
+  HugeLength max_rss_{NHugePages(0)};
+
+  HugeLength total_fast_unbacked_{NHugePages(0)};
+  HugeLength total_periodic_unbacked_{NHugePages(0)};
+
+  MemoryModifyFunction unback_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc
new file mode 100644
index 0000000000..2699b44303
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_cache_test.cc
@@ -0,0 +1,563 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_cache.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include <memory>
+#include <random>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/memory/memory.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class HugeCacheTest : public testing::Test {
+ private:
+  // Allow tests to modify the clock used by the cache.
+  static int64_t clock_offset_;
+  static double GetClockFrequency() {
+    return absl::base_internal::CycleClock::Frequency();
+  }
+  static int64_t GetClock() {
+    return absl::base_internal::CycleClock::Now() +
+           clock_offset_ * GetClockFrequency() /
+               absl::ToDoubleNanoseconds(absl::Seconds(1));
+  }
+
+  // Use a tiny fraction of actual size so we can test aggressively.
+  static void* AllocateFake(size_t bytes, size_t* actual, size_t align) {
+    if (bytes % kHugePageSize != 0) {
+      Crash(kCrash, __FILE__, __LINE__, "not aligned", bytes, kHugePageSize);
+    }
+    if (align % kHugePageSize != 0) {
+      Crash(kCrash, __FILE__, __LINE__, "not aligned", align, kHugePageSize);
+    }
+    *actual = bytes;
+    // we'll actually provide hidden backing, one word per hugepage.
+    bytes /= kHugePageSize;
+    align /= kHugePageSize;
+    size_t index = backing.size();
+    if (index % align != 0) {
+      index += (align - (index & align));
+    }
+    backing.resize(index + bytes);
+    void* ptr = reinterpret_cast<void*>(index * kHugePageSize);
+    return ptr;
+  }
+  // This isn't super good form but we'll never have more than one HAT
+  // extant at once.
+  static std::vector<size_t> backing;
+
+  // We use actual malloc for metadata allocations, but we track them so they
+  // can be deleted.  (TODO make this an arena if we care, which I doubt)
+  static void* MallocMetadata(size_t size) {
+    metadata_bytes += size;
+    void* ptr = calloc(size, 1);
+    metadata_allocs.push_back(ptr);
+    return ptr;
+  }
+  static std::vector<void*> metadata_allocs;
+  static size_t metadata_bytes;
+
+  // This is wordy, but necessary for mocking:
+  class BackingInterface {
+   public:
+    virtual void Unback(void* p, size_t len) = 0;
+    virtual ~BackingInterface() {}
+  };
+
+  class MockBackingInterface : public BackingInterface {
+   public:
+    MOCK_METHOD2(Unback, void(void* p, size_t len));
+  };
+
+  static void MockUnback(void* p, size_t len) { mock_->Unback(p, len); }
+
+ protected:
+  static std::unique_ptr<testing::NiceMock<MockBackingInterface>> mock_;
+
+  HugeCacheTest() {
+    // We don't use the first few bytes, because things might get weird
+    // given zero pointers.
+    backing.resize(1024);
+    metadata_bytes = 0;
+    mock_ = absl::make_unique<testing::NiceMock<MockBackingInterface>>();
+  }
+
+  ~HugeCacheTest() override {
+    for (void* p : metadata_allocs) {
+      free(p);
+    }
+    metadata_allocs.clear();
+    backing.clear();
+    mock_.reset(nullptr);
+
+    clock_offset_ = 0;
+  }
+
+  void Advance(absl::Duration d) {
+    clock_offset_ += absl::ToInt64Nanoseconds(d);
+  }
+
+  HugeAllocator alloc_{AllocateFake, MallocMetadata};
+  HugeCache cache_{&alloc_, MallocMetadata, MockUnback,
+                   Clock{.now = GetClock, .freq = GetClockFrequency}};
+};
+
+std::vector<size_t> HugeCacheTest::backing;
+std::vector<void*> HugeCacheTest::metadata_allocs;
+size_t HugeCacheTest::metadata_bytes;
+std::unique_ptr<testing::NiceMock<HugeCacheTest::MockBackingInterface>>
+    HugeCacheTest::mock_;
+
+int64_t HugeCacheTest::clock_offset_ = 0;
+
+TEST_F(HugeCacheTest, Basic) {
+  bool from;
+  for (int i = 0; i < 100 * 1000; ++i) {
+    cache_.Release(cache_.Get(NHugePages(1), &from));
+  }
+}
+
+TEST_F(HugeCacheTest, Backing) {
+  bool from;
+  cache_.Release(cache_.Get(NHugePages(4), &from));
+  EXPECT_TRUE(from);
+  // We should be able to split up a large range...
+  HugeRange r1 = cache_.Get(NHugePages(3), &from);
+  EXPECT_FALSE(from);
+  HugeRange r2 = cache_.Get(NHugePages(1), &from);
+  EXPECT_FALSE(from);
+
+  // and then merge it back.
+  cache_.Release(r1);
+  cache_.Release(r2);
+  HugeRange r = cache_.Get(NHugePages(4), &from);
+  EXPECT_FALSE(from);
+  cache_.Release(r);
+}
+
+TEST_F(HugeCacheTest, Release) {
+  bool from;
+  const HugeLength one = NHugePages(1);
+  cache_.Release(cache_.Get(NHugePages(5), &from));
+  HugeRange r1, r2, r3, r4, r5;
+  r1 = cache_.Get(one, &from);
+  r2 = cache_.Get(one, &from);
+  r3 = cache_.Get(one, &from);
+  r4 = cache_.Get(one, &from);
+  r5 = cache_.Get(one, &from);
+  cache_.Release(r1);
+  cache_.Release(r2);
+  cache_.Release(r3);
+  cache_.Release(r4);
+  cache_.Release(r5);
+
+  r1 = cache_.Get(one, &from);
+  ASSERT_EQ(false, from);
+  r2 = cache_.Get(one, &from);
+  ASSERT_EQ(false, from);
+  r3 = cache_.Get(one, &from);
+  ASSERT_EQ(false, from);
+  r4 = cache_.Get(one, &from);
+  ASSERT_EQ(false, from);
+  r5 = cache_.Get(one, &from);
+  ASSERT_EQ(false, from);
+  cache_.Release(r1);
+  cache_.Release(r2);
+  cache_.Release(r5);
+
+  ASSERT_EQ(NHugePages(3), cache_.size());
+  EXPECT_CALL(*mock_, Unback(r5.start_addr(), kHugePageSize * 1)).Times(1);
+  EXPECT_EQ(NHugePages(1), cache_.ReleaseCachedPages(NHugePages(1)));
+  cache_.Release(r3);
+  cache_.Release(r4);
+
+  EXPECT_CALL(*mock_, Unback(r1.start_addr(), 4 * kHugePageSize)).Times(1);
+  EXPECT_EQ(NHugePages(4), cache_.ReleaseCachedPages(NHugePages(200)));
+}
+
+TEST_F(HugeCacheTest, Regret) {
+  bool from;
+  HugeRange r = cache_.Get(NHugePages(20), &from);
+  cache_.Release(r);
+  HugeLength cached = cache_.size();
+  absl::Duration d = absl::Seconds(20);
+  Advance(d);
+  char buf[512];
+  Printer out(buf, 512);
+  cache_.Print(&out);  // To update the regret
+  uint64_t expected_regret = absl::ToInt64Nanoseconds(d) * cached.raw_num();
+  // Not exactly accurate since the mock clock advances with real time, and
+  // when we measure regret will be updated.
+  EXPECT_NEAR(cache_.regret(), expected_regret, expected_regret / 1000);
+  EXPECT_GE(cache_.regret(), expected_regret);
+}
+
+TEST_F(HugeCacheTest, Stats) {
+  bool from;
+  HugeRange r = cache_.Get(NHugePages(1 + 1 + 2 + 1 + 3), &from);
+  HugeRange r1, r2, r3, spacer1, spacer2;
+  std::tie(r1, spacer1) = Split(r, NHugePages(1));
+  std::tie(spacer1, r2) = Split(spacer1, NHugePages(1));
+  std::tie(r2, spacer2) = Split(r2, NHugePages(2));
+  std::tie(spacer2, r3) = Split(spacer2, NHugePages(1));
+  cache_.Release(r1);
+  cache_.Release(r2);
+  cache_.Release(r3);
+
+  ASSERT_EQ(NHugePages(6), cache_.size());
+  r1 = cache_.Get(NHugePages(1), &from);
+  ASSERT_EQ(false, from);
+  r2 = cache_.Get(NHugePages(2), &from);
+  ASSERT_EQ(false, from);
+  r3 = cache_.Get(NHugePages(3), &from);
+  ASSERT_EQ(false, from);
+
+  struct Helper {
+    static void Stat(const HugeCache& cache, size_t* spans,
+                     Length* pages_backed, Length* pages_unbacked,
+                     double* avg_age) {
+      PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+      LargeSpanStats large;
+      cache.AddSpanStats(nullptr, &large, &ages);
+
+      const PageAgeHistograms::Histogram* hist = ages.GetTotalHistogram(false);
+      *spans = large.spans;
+      *pages_backed = large.normal_pages;
+      *pages_unbacked = large.returned_pages;
+      *avg_age = hist->avg_age();
+    }
+  };
+
+  double avg_age;
+  size_t spans;
+  Length pages_backed;
+  Length pages_unbacked;
+
+  cache_.Release(r1);
+  absl::SleepFor(absl::Microseconds(5000));
+  Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+  EXPECT_EQ(Length(0), pages_unbacked);
+  EXPECT_EQ(1, spans);
+  EXPECT_EQ(NHugePages(1).in_pages(), pages_backed);
+  EXPECT_LE(0.005, avg_age);
+
+  cache_.Release(r2);
+  absl::SleepFor(absl::Microseconds(2500));
+  Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+  EXPECT_EQ(Length(0), pages_unbacked);
+  EXPECT_EQ(2, spans);
+  EXPECT_EQ(NHugePages(3).in_pages(), pages_backed);
+  EXPECT_LE((0.0075 * 1 + 0.0025 * 2) / (1 + 2), avg_age);
+
+  cache_.Release(r3);
+  absl::SleepFor(absl::Microseconds(1250));
+  Helper::Stat(cache_, &spans, &pages_backed, &pages_unbacked, &avg_age);
+  EXPECT_EQ(Length(0), pages_unbacked);
+  EXPECT_EQ(3, spans);
+  EXPECT_EQ(NHugePages(6).in_pages(), pages_backed);
+  EXPECT_LE((0.00875 * 1 + 0.00375 * 2 + 0.00125 * 3) / (1 + 2 + 3), avg_age);
+}
+
+static double Frac(HugeLength num, HugeLength denom) {
+  return static_cast<double>(num.raw_num()) / denom.raw_num();
+}
+
+TEST_F(HugeCacheTest, Growth) {
+  bool released;
+  absl::BitGen rng;
+  // fragmentation is a bit of a challenge
+  std::uniform_int_distribution<size_t> sizes(1, 5);
+  // fragment the cache badly.
+  std::vector<HugeRange> keep;
+  std::vector<HugeRange> drop;
+  for (int i = 0; i < 1000; ++i) {
+    auto& l = std::bernoulli_distribution()(rng) ? keep : drop;
+    l.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+  }
+
+  for (auto r : drop) {
+    cache_.Release(r);
+  }
+
+  // See the TODO in HugeCache::MaybeGrowCache; without this delay,
+  // the above fragmentation plays merry havoc with our instrumentation.
+  Advance(absl::Seconds(30));
+
+  // Test that our cache can grow to fit a working set.
+  HugeLength hot_set_sizes[] = {NHugePages(5), NHugePages(10), NHugePages(100),
+                                NHugePages(10000)};
+
+  for (const HugeLength hot : hot_set_sizes) {
+    SCOPED_TRACE(absl::StrCat("cache size = ", hot.in_bytes() / 1024.0 / 1024.0,
+                              " MiB"));
+    // Exercise the cache allocating about <hot> worth of data. After
+    // a brief warmup phase, we should do this without needing to back much.
+    auto alloc = [&]() -> std::pair<HugeLength, HugeLength> {
+      HugeLength got = NHugePages(0);
+      HugeLength needed_backing = NHugePages(0);
+      std::vector<HugeRange> items;
+      while (got < hot) {
+        HugeLength rest = hot - got;
+        HugeLength l = std::min(rest, NHugePages(sizes(rng)));
+        got += l;
+        items.push_back(cache_.Get(l, &released));
+        if (released) needed_backing += l;
+      }
+      for (auto r : items) {
+        cache_.Release(r);
+      }
+      return {needed_backing, got};
+    };
+
+    // warmup - we're allowed to incur misses and be too big.
+    for (int i = 0; i < 2; ++i) {
+      alloc();
+    }
+
+    HugeLength needed_backing = NHugePages(0);
+    HugeLength total = NHugePages(0);
+    for (int i = 0; i < 16; ++i) {
+      auto r = alloc();
+      needed_backing += r.first;
+      total += r.second;
+      // Cache shouldn't have just grown arbitrarily
+      const HugeLength cached = cache_.size();
+      // Allow us 10% slop, but don't get out of bed for tiny caches anyway.
+      const double ratio = Frac(cached, hot);
+      SCOPED_TRACE(
+          absl::StrCat(cached.raw_num(), "hps ", Frac(r.first, r.second)));
+      if (ratio > 1 && cached > NHugePages(16)) {
+        EXPECT_LE(ratio, 1.1);
+      }
+    }
+    // approximately, given the randomized sizing...
+
+    const double ratio = Frac(needed_backing, total);
+    EXPECT_LE(ratio, 0.2);
+  }
+}
+
+// If we repeatedly grow and shrink, but do so very slowly, we should *not*
+// cache the large variation.
+TEST_F(HugeCacheTest, SlowGrowthUncached) {
+  absl::BitGen rng;
+  std::uniform_int_distribution<size_t> sizes(1, 10);
+  for (int i = 0; i < 20; ++i) {
+    std::vector<HugeRange> rs;
+    for (int j = 0; j < 20; ++j) {
+      Advance(absl::Milliseconds(600));
+      bool released;
+      rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+    }
+    HugeLength max_cached = NHugePages(0);
+    for (auto r : rs) {
+      Advance(absl::Milliseconds(600));
+      cache_.Release(r);
+      max_cached = std::max(max_cached, cache_.size());
+    }
+    EXPECT_GE(NHugePages(10), max_cached);
+  }
+}
+
+// If very rarely we have a huge increase in usage, it shouldn't be cached.
+TEST_F(HugeCacheTest, SpikesUncached) {
+  absl::BitGen rng;
+  std::uniform_int_distribution<size_t> sizes(1, 10);
+  for (int i = 0; i < 20; ++i) {
+    std::vector<HugeRange> rs;
+    for (int j = 0; j < 2000; ++j) {
+      bool released;
+      rs.push_back(cache_.Get(NHugePages(sizes(rng)), &released));
+    }
+    HugeLength max_cached = NHugePages(0);
+    for (auto r : rs) {
+      cache_.Release(r);
+      max_cached = std::max(max_cached, cache_.size());
+    }
+    EXPECT_GE(NHugePages(10), max_cached);
+    Advance(absl::Seconds(30));
+  }
+}
+
+// If very rarely we have a huge *decrease* in usage, it *should* be cached.
+TEST_F(HugeCacheTest, DipsCached) {
+  absl::BitGen rng;
+  std::uniform_int_distribution<size_t> sizes(1, 10);
+  for (int i = 0; i < 20; ++i) {
+    std::vector<HugeRange> rs;
+    HugeLength got = NHugePages(0);
+    HugeLength uncached = NHugePages(0);
+    for (int j = 0; j < 2000; ++j) {
+      bool released;
+      HugeLength n = NHugePages(sizes(rng));
+      rs.push_back(cache_.Get(n, &released));
+      got += n;
+      if (released) uncached += n;
+    }
+    // Most of our time is at high usage...
+    Advance(absl::Seconds(30));
+    // Now immediately release and reallocate.
+    for (auto r : rs) {
+      cache_.Release(r);
+    }
+
+    // warmup
+    if (i >= 2) {
+      EXPECT_GE(0.06, Frac(uncached, got));
+    }
+  }
+}
+
+// Suppose in a previous era of behavior we needed a giant cache,
+// but now we don't.  Do we figure this out promptly?
+TEST_F(HugeCacheTest, Shrink) {
+  absl::BitGen rng;
+  std::uniform_int_distribution<size_t> sizes(1, 10);
+  for (int i = 0; i < 20; ++i) {
+    std::vector<HugeRange> rs;
+    for (int j = 0; j < 2000; ++j) {
+      HugeLength n = NHugePages(sizes(rng));
+      bool released;
+      rs.push_back(cache_.Get(n, &released));
+    }
+    for (auto r : rs) {
+      cache_.Release(r);
+    }
+  }
+
+  ASSERT_LE(NHugePages(10000), cache_.size());
+
+  for (int i = 0; i < 30; ++i) {
+    // New working set <= 20 pages.
+    Advance(absl::Seconds(1));
+
+    // And do some work.
+    for (int j = 0; j < 100; ++j) {
+      bool released;
+      HugeRange r1 = cache_.Get(NHugePages(sizes(rng)), &released);
+      HugeRange r2 = cache_.Get(NHugePages(sizes(rng)), &released);
+      cache_.Release(r1);
+      cache_.Release(r2);
+    }
+  }
+
+  ASSERT_GE(NHugePages(25), cache_.limit());
+}
+
+TEST_F(HugeCacheTest, Usage) {
+  bool released;
+
+  auto r1 = cache_.Get(NHugePages(10), &released);
+  EXPECT_EQ(NHugePages(10), cache_.usage());
+
+  auto r2 = cache_.Get(NHugePages(100), &released);
+  EXPECT_EQ(NHugePages(110), cache_.usage());
+
+  cache_.Release(r1);
+  EXPECT_EQ(NHugePages(100), cache_.usage());
+
+  // Pretend we unbacked this.
+  cache_.ReleaseUnbacked(r2);
+  EXPECT_EQ(NHugePages(0), cache_.usage());
+}
+
+class MinMaxTrackerTest : public testing::Test {
+ protected:
+  void Advance(absl::Duration d) {
+    clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+  }
+
+  static int64_t FakeClock() { return clock_; }
+
+  static double GetFakeClockFrequency() {
+    return absl::ToDoubleNanoseconds(absl::Seconds(2));
+  }
+
+ private:
+  static int64_t clock_;
+};
+
+int64_t MinMaxTrackerTest::clock_{0};
+
+TEST_F(MinMaxTrackerTest, Works) {
+  const absl::Duration kDuration = absl::Seconds(2);
+  MinMaxTracker<> tracker{
+      Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration};
+
+  tracker.Report(NHugePages(0));
+  EXPECT_EQ(NHugePages(0), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  tracker.Report(NHugePages(10));
+  EXPECT_EQ(NHugePages(10), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  tracker.Report(NHugePages(5));
+  EXPECT_EQ(NHugePages(10), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  tracker.Report(NHugePages(100));
+  EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  // Some tests for advancing time
+  Advance(kDuration / 3);
+  tracker.Report(NHugePages(2));
+  EXPECT_EQ(NHugePages(2), tracker.MaxOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration / 2));
+  EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(2), tracker.MinOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration / 2));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  Advance(kDuration / 3);
+  tracker.Report(NHugePages(5));
+  EXPECT_EQ(NHugePages(5), tracker.MaxOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(5), tracker.MaxOverTime(kDuration / 2));
+  EXPECT_EQ(NHugePages(100), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(5), tracker.MinOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(2), tracker.MinOverTime(kDuration / 2));
+  EXPECT_EQ(NHugePages(0), tracker.MinOverTime(kDuration));
+
+  // This should annihilate everything.
+  Advance(kDuration * 2);
+  tracker.Report(NHugePages(1));
+  EXPECT_EQ(NHugePages(1), tracker.MaxOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(1), tracker.MinOverTime(absl::Nanoseconds(1)));
+  EXPECT_EQ(NHugePages(1), tracker.MaxOverTime(kDuration));
+  EXPECT_EQ(NHugePages(1), tracker.MinOverTime(kDuration));
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc
new file mode 100644
index 0000000000..e662456df6
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.cc
@@ -0,0 +1,676 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_aware_allocator.h"
+
+#include <stdint.h>
+#include <string.h>
+
+#include <new>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool decide_want_hpaa();
+ABSL_ATTRIBUTE_WEAK int default_want_hpaa();
+ABSL_ATTRIBUTE_WEAK int default_subrelease();
+
+bool decide_subrelease() {
+  if (!decide_want_hpaa()) {
+    // Subrelease is off if HPAA is off.
+    return false;
+  }
+
+  const char *e = thread_safe_getenv("TCMALLOC_HPAA_CONTROL");
+  if (e) {
+    switch (e[0]) {
+      case '0':
+        if (kPageShift <= 12) {
+          return false;
+        }
+
+        if (default_want_hpaa != nullptr) {
+          int default_hpaa = default_want_hpaa();
+          if (default_hpaa < 0) {
+            return false;
+          }
+        }
+
+        Log(kLog, __FILE__, __LINE__,
+            "Runtime opt-out from HPAA requires building with "
+            "//tcmalloc:want_no_hpaa."
+        );
+        break;
+      case '1':
+        return false;
+      case '2':
+        return true;
+      default:
+        Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+        return false;
+    }
+  }
+
+  if (default_subrelease != nullptr) {
+    const int decision = default_subrelease();
+    if (decision != 0) {
+      return decision > 0;
+    }
+  }
+
+  if (tcmalloc::IsExperimentActive(tcmalloc::Experiment::TCMALLOC_TEMERAIRE)) {
+    return false;
+  }
+
+  return true;
+}
+
+FillerPartialRerelease decide_partial_rerelease() {
+  const char *e = thread_safe_getenv("TCMALLOC_PARTIAL_RELEASE_CONTROL");
+  if (e) {
+    if (e[0] == '0') {
+      return FillerPartialRerelease::Return;
+    }
+    if (e[0] == '1') {
+      return FillerPartialRerelease::Retain;
+    }
+    Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+  }
+
+  return FillerPartialRerelease::Retain;
+}
+
+// Some notes: locking discipline here is a bit funny, because
+// we want to *not* hold the pageheap lock while backing memory.
+
+// We have here a collection of slightly different allocators each
+// optimized for slightly different purposes.  This file has two main purposes:
+// - pick the right one for a given allocation
+// - provide enough data to figure out what we picked last time!
+
+HugePageAwareAllocator::HugePageAwareAllocator(MemoryTag tag)
+    : PageAllocatorInterface("HugePageAware", tag),
+      filler_(decide_partial_rerelease()),
+      alloc_(
+          [](MemoryTag tag) {
+            // TODO(ckennelly): Remove the template parameter.
+            switch (tag) {
+              case MemoryTag::kNormal:
+                return AllocAndReport<MemoryTag::kNormal>;
+              case MemoryTag::kNormalP1:
+                return AllocAndReport<MemoryTag::kNormalP1>;
+              case MemoryTag::kSampled:
+                return AllocAndReport<MemoryTag::kSampled>;
+              default:
+                ASSUME(false);
+                __builtin_unreachable();
+            }
+          }(tag),
+          MetaDataAlloc),
+      cache_(HugeCache{&alloc_, MetaDataAlloc, UnbackWithoutLock}) {
+  tracker_allocator_.Init(&Static::arena());
+  region_allocator_.Init(&Static::arena());
+}
+
+HugePageAwareAllocator::FillerType::Tracker *HugePageAwareAllocator::GetTracker(
+    HugePage p) {
+  void *v = Static::pagemap().GetHugepage(p.first_page());
+  FillerType::Tracker *pt = reinterpret_cast<FillerType::Tracker *>(v);
+  ASSERT(pt == nullptr || pt->location() == p);
+  return pt;
+}
+
+void HugePageAwareAllocator::SetTracker(
+    HugePage p, HugePageAwareAllocator::FillerType::Tracker *pt) {
+  Static::pagemap().SetHugepage(p.first_page(), pt);
+}
+
+PageId HugePageAwareAllocator::AllocAndContribute(HugePage p, Length n,
+                                                  bool donated) {
+  CHECK_CONDITION(p.start_addr() != nullptr);
+  FillerType::Tracker *pt = tracker_allocator_.New();
+  new (pt) FillerType::Tracker(p, absl::base_internal::CycleClock::Now());
+  ASSERT(pt->longest_free_range() >= n);
+  PageId page = pt->Get(n).page;
+  ASSERT(page == p.first_page());
+  SetTracker(p, pt);
+  filler_.Contribute(pt, donated);
+  return page;
+}
+
+PageId HugePageAwareAllocator::RefillFiller(Length n, bool *from_released) {
+  HugeRange r = cache_.Get(NHugePages(1), from_released);
+  if (!r.valid()) return PageId{0};
+  // This is duplicate to Finalize, but if we need to break up
+  // hugepages to get to our usage limit it would be very bad to break
+  // up what's left of r after we allocate from there--while r is
+  // mostly empty, clearly what's left in the filler is too fragmented
+  // to be very useful, and we would rather release those
+  // pages. Otherwise, we're nearly guaranteed to release r (if n
+  // isn't very large), and the next allocation will just repeat this
+  // process.
+  Static::page_allocator().ShrinkToUsageLimit();
+  return AllocAndContribute(r.start(), n, /*donated=*/false);
+}
+
+Span *HugePageAwareAllocator::Finalize(Length n, PageId page)
+    ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+  ASSERT(page != PageId{0});
+  Span *ret = Span::New(page, n);
+  Static::pagemap().Set(page, ret);
+  ASSERT(!ret->sampled());
+  info_.RecordAlloc(page, n);
+  Static::page_allocator().ShrinkToUsageLimit();
+  return ret;
+}
+
+// For anything <= half a huge page, we will unconditionally use the filler
+// to pack it into a single page.  If we need another page, that's fine.
+Span *HugePageAwareAllocator::AllocSmall(Length n, bool *from_released) {
+  auto [pt, page] = filler_.TryGet(n);
+  if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+    *from_released = false;
+    return Finalize(n, page);
+  }
+
+  page = RefillFiller(n, from_released);
+  if (ABSL_PREDICT_FALSE(page == PageId{0})) {
+    return nullptr;
+  }
+  return Finalize(n, page);
+}
+
+Span *HugePageAwareAllocator::AllocLarge(Length n, bool *from_released) {
+  // If it's an exact page multiple, just pull it from pages directly.
+  HugeLength hl = HLFromPages(n);
+  if (hl.in_pages() == n) {
+    return AllocRawHugepages(n, from_released);
+  }
+
+  PageId page;
+  // If we fit in a single hugepage, try the Filler first.
+  if (n < kPagesPerHugePage) {
+    auto [pt, page] = filler_.TryGet(n);
+    if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+      *from_released = false;
+      return Finalize(n, page);
+    }
+  }
+
+  // If we're using regions in this binary (see below comment), is
+  // there currently available space there?
+  if (regions_.MaybeGet(n, &page, from_released)) {
+    return Finalize(n, page);
+  }
+
+  // We have two choices here: allocate a new region or go to
+  // hugepages directly (hoping that slack will be filled by small
+  // allocation.) The second strategy is preferrable, as it's
+  // typically faster and usually more space efficient, but it's sometimes
+  // catastrophic.
+  //
+  // See https://github.com/google/tcmalloc/tree/master/docs/regions-are-not-optional.md
+  //
+  // So test directly if we're in the bad case--almost no binaries are.
+  // If not, just fall back to direct allocation (and hope we do hit that case!)
+  const Length slack = info_.slack();
+  // Don't bother at all until the binary is reasonably sized
+  if (slack < HLFromBytes(64 * 1024 * 1024).in_pages()) {
+    return AllocRawHugepages(n, from_released);
+  }
+
+  // In the vast majority of binaries, we have many small allocations which
+  // will nicely fill slack.  (Fleetwide, the average ratio is 15:1; only
+  // a handful of binaries fall below 1:1.)
+  const Length small = info_.small();
+  if (slack < small) {
+    return AllocRawHugepages(n, from_released);
+  }
+
+  // We couldn't allocate a new region. They're oversized, so maybe we'd get
+  // lucky with a smaller request?
+  if (!AddRegion()) {
+    return AllocRawHugepages(n, from_released);
+  }
+
+  CHECK_CONDITION(regions_.MaybeGet(n, &page, from_released));
+  return Finalize(n, page);
+}
+
+Span *HugePageAwareAllocator::AllocEnormous(Length n, bool *from_released) {
+  return AllocRawHugepages(n, from_released);
+}
+
+Span *HugePageAwareAllocator::AllocRawHugepages(Length n, bool *from_released) {
+  HugeLength hl = HLFromPages(n);
+
+  HugeRange r = cache_.Get(hl, from_released);
+  if (!r.valid()) return nullptr;
+
+  // We now have a huge page range that covers our request.  There
+  // might be some slack in it if n isn't a multiple of
+  // kPagesPerHugePage. Add the hugepage with slack to the filler,
+  // pretending the non-slack portion is a smaller allocation.
+  Length total = hl.in_pages();
+  Length slack = total - n;
+  HugePage first = r.start();
+  SetTracker(first, nullptr);
+  HugePage last = first + r.len() - NHugePages(1);
+  if (slack == Length(0)) {
+    SetTracker(last, nullptr);
+    return Finalize(total, r.start().first_page());
+  }
+
+  ++donated_huge_pages_;
+
+  Length here = kPagesPerHugePage - slack;
+  ASSERT(here > Length(0));
+  AllocAndContribute(last, here, /*donated=*/true);
+  return Finalize(n, r.start().first_page());
+}
+
+static void BackSpan(Span *span) {
+  SystemBack(span->start_address(), span->bytes_in_span());
+}
+
+// public
+Span *HugePageAwareAllocator::New(Length n) {
+  CHECK_CONDITION(n > Length(0));
+  bool from_released;
+  Span *s = LockAndAlloc(n, &from_released);
+  if (s) {
+    // Prefetch for writing, as we anticipate using the memory soon.
+    __builtin_prefetch(s->start_address(), 1, 3);
+    if (from_released) BackSpan(s);
+  }
+  ASSERT(!s || GetMemoryTag(s->start_address()) == tag_);
+  return s;
+}
+
+Span *HugePageAwareAllocator::LockAndAlloc(Length n, bool *from_released) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  // Our policy depends on size.  For small things, we will pack them
+  // into single hugepages.
+  if (n <= kPagesPerHugePage / 2) {
+    return AllocSmall(n, from_released);
+  }
+
+  // For anything too big for the filler, we use either a direct hugepage
+  // allocation, or possibly the regions if we are worried about slack.
+  if (n <= HugeRegion::size().in_pages()) {
+    return AllocLarge(n, from_released);
+  }
+
+  // In the worst case, we just fall back to directly allocating a run
+  // of hugepages.
+  return AllocEnormous(n, from_released);
+}
+
+// public
+Span *HugePageAwareAllocator::NewAligned(Length n, Length align) {
+  if (align <= Length(1)) {
+    return New(n);
+  }
+
+  // we can do better than this, but...
+  // TODO(b/134690769): support higher align.
+  CHECK_CONDITION(align <= kPagesPerHugePage);
+  bool from_released;
+  Span *s;
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    s = AllocRawHugepages(n, &from_released);
+  }
+  if (s && from_released) BackSpan(s);
+  ASSERT(!s || GetMemoryTag(s->start_address()) == tag_);
+  return s;
+}
+
+void HugePageAwareAllocator::DeleteFromHugepage(FillerType::Tracker *pt,
+                                                PageId p, Length n) {
+  if (ABSL_PREDICT_TRUE(filler_.Put(pt, p, n) == nullptr)) return;
+  if (pt->donated()) {
+    --donated_huge_pages_;
+  }
+  ReleaseHugepage(pt);
+}
+
+bool HugePageAwareAllocator::AddRegion() {
+  HugeRange r = alloc_.Get(HugeRegion::size());
+  if (!r.valid()) return false;
+  HugeRegion *region = region_allocator_.New();
+  new (region) HugeRegion(r, SystemRelease);
+  regions_.Contribute(region);
+  return true;
+}
+
+void HugePageAwareAllocator::Delete(Span *span) {
+  ASSERT(!span || GetMemoryTag(span->start_address()) == tag_);
+  PageId p = span->first_page();
+  HugePage hp = HugePageContaining(p);
+  Length n = span->num_pages();
+  info_.RecordFree(p, n);
+
+  Span::Delete(span);
+
+  // The tricky part, as with so many allocators: where did we come from?
+  // There are several possibilities.
+  FillerType::Tracker *pt = GetTracker(hp);
+  // a) We got packed by the filler onto a single hugepage - return our
+  //    allocation to that hugepage in the filler.
+  if (ABSL_PREDICT_TRUE(pt != nullptr)) {
+    ASSERT(hp == HugePageContaining(p + n - Length(1)));
+    DeleteFromHugepage(pt, p, n);
+    return;
+  }
+
+  // b) We got put into a region, possibly crossing hugepages -
+  //    return our allocation to the region.
+  if (regions_.MaybePut(p, n)) return;
+
+  // c) we came straight from the HugeCache - return straight there.  (We
+  //    might have had slack put into the filler - if so, return that virtual
+  //    allocation to the filler too!)
+  ASSERT(n >= kPagesPerHugePage);
+  HugeLength hl = HLFromPages(n);
+  HugePage last = hp + hl - NHugePages(1);
+  Length slack = hl.in_pages() - n;
+  if (slack == Length(0)) {
+    ASSERT(GetTracker(last) == nullptr);
+  } else {
+    pt = GetTracker(last);
+    CHECK_CONDITION(pt != nullptr);
+    // We put the slack into the filler (see AllocEnormous.)
+    // Handle this page separately as a virtual allocation
+    // onto the last hugepage.
+    PageId virt = last.first_page();
+    Length virt_len = kPagesPerHugePage - slack;
+    pt = filler_.Put(pt, virt, virt_len);
+    // We may have used the slack, which would prevent us from returning
+    // the entire range now.  If filler returned a Tracker, we are fully empty.
+    if (pt == nullptr) {
+      // Last page isn't empty -- pretend the range was shorter.
+      --hl;
+    } else {
+      // Last page was empty - but if we sub-released it, we still
+      // have to split it off and release it independently.)
+      if (pt->released()) {
+        --hl;
+        ReleaseHugepage(pt);
+      } else {
+        // Get rid of the tracker *object*, but not the *hugepage*
+        // (which is still part of our range.)  We were able to reclaim the
+        // contributed slack.
+        --donated_huge_pages_;
+        SetTracker(pt->location(), nullptr);
+        tracker_allocator_.Delete(pt);
+      }
+    }
+  }
+  cache_.Release({hp, hl});
+}
+
+void HugePageAwareAllocator::ReleaseHugepage(FillerType::Tracker *pt) {
+  ASSERT(pt->used_pages() == Length(0));
+  HugeRange r = {pt->location(), NHugePages(1)};
+  SetTracker(pt->location(), nullptr);
+
+  if (pt->released()) {
+    cache_.ReleaseUnbacked(r);
+  } else {
+    cache_.Release(r);
+  }
+
+  tracker_allocator_.Delete(pt);
+}
+
+// public
+BackingStats HugePageAwareAllocator::stats() const {
+  BackingStats stats = alloc_.stats();
+  const auto actual_system = stats.system_bytes;
+  stats += cache_.stats();
+  stats += filler_.stats();
+  stats += regions_.stats();
+  // the "system" (total managed) byte count is wildly double counted,
+  // since it all comes from HugeAllocator but is then managed by
+  // cache/regions/filler. Adjust for that.
+  stats.system_bytes = actual_system;
+  return stats;
+}
+
+// public
+void HugePageAwareAllocator::GetSmallSpanStats(SmallSpanStats *result) {
+  GetSpanStats(result, nullptr, nullptr);
+}
+
+// public
+void HugePageAwareAllocator::GetLargeSpanStats(LargeSpanStats *result) {
+  GetSpanStats(nullptr, result, nullptr);
+}
+
+void HugePageAwareAllocator::GetSpanStats(SmallSpanStats *small,
+                                          LargeSpanStats *large,
+                                          PageAgeHistograms *ages) {
+  if (small != nullptr) {
+    *small = SmallSpanStats();
+  }
+  if (large != nullptr) {
+    *large = LargeSpanStats();
+  }
+
+  alloc_.AddSpanStats(small, large, ages);
+  filler_.AddSpanStats(small, large, ages);
+  regions_.AddSpanStats(small, large, ages);
+  cache_.AddSpanStats(small, large, ages);
+}
+
+// public
+Length HugePageAwareAllocator::ReleaseAtLeastNPages(Length num_pages) {
+  Length released;
+  released += cache_.ReleaseCachedPages(HLFromPages(num_pages)).in_pages();
+
+  // This is our long term plan but in current state will lead to insufficent
+  // THP coverage. It is however very useful to have the ability to turn this on
+  // for testing.
+  // TODO(b/134690769): make this work, remove the flag guard.
+  if (Parameters::hpaa_subrelease()) {
+    if (released < num_pages) {
+      released += filler_.ReleasePages(
+          num_pages - released, Parameters::filler_skip_subrelease_interval(),
+          /*hit_limit*/ false);
+    }
+  }
+
+  // TODO(b/134690769):
+  // - perhaps release region?
+  // - refuse to release if we're too close to zero?
+  info_.RecordRelease(num_pages, released);
+  return released;
+}
+
+static double BytesToMiB(size_t bytes) {
+  const double MiB = 1048576.0;
+  return bytes / MiB;
+}
+
+static void BreakdownStats(Printer *out, const BackingStats &s,
+                           const char *label) {
+  out->printf("%s %6.1f MiB used, %6.1f MiB free, %6.1f MiB unmapped\n", label,
+              BytesToMiB(s.system_bytes - s.free_bytes - s.unmapped_bytes),
+              BytesToMiB(s.free_bytes), BytesToMiB(s.unmapped_bytes));
+}
+
+static void BreakdownStatsInPbtxt(PbtxtRegion *hpaa, const BackingStats &s,
+                                  const char *key) {
+  auto usage = hpaa->CreateSubRegion(key);
+  usage.PrintI64("used", s.system_bytes - s.free_bytes - s.unmapped_bytes);
+  usage.PrintI64("free", s.free_bytes);
+  usage.PrintI64("unmapped", s.unmapped_bytes);
+}
+
+// public
+void HugePageAwareAllocator::Print(Printer *out) { Print(out, true); }
+
+void HugePageAwareAllocator::Print(Printer *out, bool everything) {
+  SmallSpanStats small;
+  LargeSpanStats large;
+  BackingStats bstats;
+  PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  bstats = stats();
+  GetSpanStats(&small, &large, &ages);
+  PrintStats("HugePageAware", out, bstats, small, large, everything);
+  out->printf(
+      "\nHuge page aware allocator components:\n"
+      "------------------------------------------------\n");
+  out->printf("HugePageAware: breakdown of used / free / unmapped space:\n");
+
+  auto fstats = filler_.stats();
+  BreakdownStats(out, fstats, "HugePageAware: filler");
+
+  auto rstats = regions_.stats();
+  BreakdownStats(out, rstats, "HugePageAware: region");
+
+  auto cstats = cache_.stats();
+  // Everything in the filler came from the cache -
+  // adjust the totals so we see the amount used by the mutator.
+  cstats.system_bytes -= fstats.system_bytes;
+  BreakdownStats(out, cstats, "HugePageAware: cache ");
+
+  auto astats = alloc_.stats();
+  // Everything in *all* components came from here -
+  // so again adjust the totals.
+  astats.system_bytes -= (fstats + rstats + cstats).system_bytes;
+  BreakdownStats(out, astats, "HugePageAware: alloc ");
+  out->printf("\n");
+
+  out->printf("HugePageAware: filler donations %zu\n",
+              donated_huge_pages_.raw_num());
+
+  // Component debug output
+  // Filler is by far the most important; print (some) of it
+  // unconditionally.
+  filler_.Print(out, everything);
+  out->printf("\n");
+  if (everything) {
+    regions_.Print(out);
+    out->printf("\n");
+    cache_.Print(out);
+    out->printf("\n");
+    alloc_.Print(out);
+    out->printf("\n");
+
+    // Use statistics
+    info_.Print(out);
+
+    // and age tracking.
+    ages.Print("HugePageAware", out);
+  }
+
+  out->printf("PARAMETER hpaa_subrelease %d\n",
+              Parameters::hpaa_subrelease() ? 1 : 0);
+}
+
+void HugePageAwareAllocator::PrintInPbtxt(PbtxtRegion *region) {
+  SmallSpanStats small;
+  LargeSpanStats large;
+  PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  GetSpanStats(&small, &large, &ages);
+  PrintStatsInPbtxt(region, small, large, ages);
+  {
+    auto hpaa = region->CreateSubRegion("huge_page_allocator");
+    hpaa.PrintBool("using_hpaa", true);
+    hpaa.PrintBool("using_hpaa_subrelease", Parameters::hpaa_subrelease());
+
+    // Fill HPAA Usage
+    auto fstats = filler_.stats();
+    BreakdownStatsInPbtxt(&hpaa, fstats, "filler_usage");
+
+    auto rstats = regions_.stats();
+    BreakdownStatsInPbtxt(&hpaa, rstats, "region_usage");
+
+    auto cstats = cache_.stats();
+    // Everything in the filler came from the cache -
+    // adjust the totals so we see the amount used by the mutator.
+    cstats.system_bytes -= fstats.system_bytes;
+    BreakdownStatsInPbtxt(&hpaa, cstats, "cache_usage");
+
+    auto astats = alloc_.stats();
+    // Everything in *all* components came from here -
+    // so again adjust the totals.
+    astats.system_bytes -= (fstats + rstats + cstats).system_bytes;
+    BreakdownStatsInPbtxt(&hpaa, astats, "alloc_usage");
+
+    filler_.PrintInPbtxt(&hpaa);
+    regions_.PrintInPbtxt(&hpaa);
+    cache_.PrintInPbtxt(&hpaa);
+    alloc_.PrintInPbtxt(&hpaa);
+
+    // Use statistics
+    info_.PrintInPbtxt(&hpaa, "hpaa_stat");
+
+    hpaa.PrintI64("filler_donated_huge_pages", donated_huge_pages_.raw_num());
+  }
+}
+
+template <MemoryTag tag>
+void *HugePageAwareAllocator::AllocAndReport(size_t bytes, size_t *actual,
+                                             size_t align) {
+  void *p = SystemAlloc(bytes, actual, align, tag);
+  if (p == nullptr) return p;
+  const PageId page = PageIdContaining(p);
+  const Length page_len = BytesToLengthFloor(*actual);
+  Static::pagemap().Ensure(page, page_len);
+  return p;
+}
+
+void *HugePageAwareAllocator::MetaDataAlloc(size_t bytes)
+    ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+  return Static::arena().Alloc(bytes);
+}
+
+Length HugePageAwareAllocator::ReleaseAtLeastNPagesBreakingHugepages(Length n) {
+  // We desparately need to release memory, and are willing to
+  // compromise on hugepage usage. That means releasing from the filler.
+  return filler_.ReleasePages(n, absl::ZeroDuration(), /*hit_limit*/ true);
+}
+
+void HugePageAwareAllocator::UnbackWithoutLock(void *start, size_t length) {
+  pageheap_lock.Unlock();
+  SystemRelease(start, length);
+  pageheap_lock.Lock();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h
new file mode 100644
index 0000000000..c36a1e515e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator.h
@@ -0,0 +1,175 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
+#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_cache.h"
+#include "tcmalloc/huge_page_filler.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/huge_region.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool decide_subrelease();
+
+// An implementation of the PageAllocator interface that is hugepage-efficent.
+// Attempts to pack allocations into full hugepages wherever possible,
+// and aggressively returns empty ones to the system.
+class HugePageAwareAllocator final : public PageAllocatorInterface {
+ public:
+  explicit HugePageAwareAllocator(MemoryTag tag);
+  ~HugePageAwareAllocator() override = default;
+
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  // Caller should not pass "n == 0" -- instead, n should have
+  // been rounded up already.
+  Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  // As New, but the returned span is aligned to a <align>-page boundary.
+  // <align> must be a power of two.
+  Span* NewAligned(Length n, Length align)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  // Delete the span "[p, p+n-1]".
+  // REQUIRES: span was returned by earlier call to New() and
+  //           has not yet been deleted.
+  void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  BackingStats stats() const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  void GetSmallSpanStats(SmallSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  void GetLargeSpanStats(LargeSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  // Try to release at least num_pages for reuse by the OS.  Returns
+  // the actual number of pages released, which may be less than
+  // num_pages if there weren't enough pages to release. The result
+  // may also be larger than num_pages since page_heap might decide to
+  // release one large range instead of fragmenting it into two
+  // smaller released and unreleased ranges.
+  Length ReleaseAtLeastNPages(Length num_pages)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  Length ReleaseAtLeastNPagesBreakingHugepages(Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Prints stats about the page heap to *out.
+  void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  // Print stats to *out, excluding long/likely uninteresting things
+  // unless <everything> is true.
+  void Print(Printer* out, bool everything) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  void PrintInPbtxt(PbtxtRegion* region)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  HugeLength DonatedHugePages() const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    return donated_huge_pages_;
+  }
+
+  const HugeCache* cache() const { return &cache_; }
+
+ private:
+  typedef HugePageFiller<PageTracker<SystemRelease>> FillerType;
+  FillerType filler_;
+
+  // Calls SystemRelease, but with dropping of pageheap_lock around the call.
+  static void UnbackWithoutLock(void* start, size_t length)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  HugeRegionSet<HugeRegion> regions_;
+
+  PageHeapAllocator<FillerType::Tracker> tracker_allocator_;
+  PageHeapAllocator<HugeRegion> region_allocator_;
+
+  FillerType::Tracker* GetTracker(HugePage p);
+
+  void SetTracker(HugePage p, FillerType::Tracker* pt);
+
+  template <MemoryTag tag>
+  static void* AllocAndReport(size_t bytes, size_t* actual, size_t align)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  static void* MetaDataAlloc(size_t bytes);
+  HugeAllocator alloc_;
+  HugeCache cache_;
+
+  // donated_huge_pages_ measures the number of huge pages contributed to the
+  // filler from left overs of large huge page allocations.  When the large
+  // allocation is deallocated, we decrement this count *if* we were able to
+  // fully reassemble the address range (that is, the partial hugepage did not
+  // get stuck in the filler).
+  HugeLength donated_huge_pages_ ABSL_GUARDED_BY(pageheap_lock);
+
+  void GetSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+                    PageAgeHistograms* ages);
+
+  PageId RefillFiller(Length n, bool* from_released)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Allocate the first <n> from p, and contribute the rest to the filler.  If
+  // "donated" is true, the contribution will be marked as coming from the
+  // tail of a multi-hugepage alloc.  Returns the allocated section.
+  PageId AllocAndContribute(HugePage p, Length n, bool donated)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  // Helpers for New().
+
+  Span* LockAndAlloc(Length n, bool* from_released);
+
+  Span* AllocSmall(Length n, bool* from_released)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  Span* AllocLarge(Length n, bool* from_released)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  Span* AllocEnormous(Length n, bool* from_released)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  Span* AllocRawHugepages(Length n, bool* from_released)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  bool AddRegion() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  void ReleaseHugepage(FillerType::Tracker* pt)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  // Return an allocation from a single hugepage.
+  void DeleteFromHugepage(FillerType::Tracker* pt, PageId p, Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Finish an allocation request - give it a span and mark it in the pagemap.
+  Span* Finalize(Length n, PageId page);
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc
new file mode 100644
index 0000000000..83ae930e44
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_aware_allocator_test.cc
@@ -0,0 +1,957 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_aware_allocator.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <new>
+#include <string>
+#include <thread>  // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/flags/flag.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_split.h"
+#include "absl/synchronization/barrier.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/time.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator_test_util.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+#include "tcmalloc/testing/thread_manager.h"
+
+ABSL_FLAG(std::string, tracefile, "", "file to pull trace from");
+ABSL_FLAG(uint64_t, limit, 0, "");
+ABSL_FLAG(bool, always_check_usage, false, "enable expensive memory checks");
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::HasSubstr;
+
+class HugePageAwareAllocatorTest : public ::testing::Test {
+ protected:
+  HugePageAwareAllocatorTest() : rng_() {
+    before_ = MallocExtension::GetRegionFactory();
+    extra_ = new ExtraRegionFactory(before_);
+    MallocExtension::SetRegionFactory(extra_);
+
+    // HugePageAwareAllocator can't be destroyed cleanly, so we store a pointer
+    // to one and construct in place.
+    void* p = malloc(sizeof(HugePageAwareAllocator));
+    allocator_ = new (p) HugePageAwareAllocator(MemoryTag::kNormal);
+  }
+
+  ~HugePageAwareAllocatorTest() override {
+    CHECK_CONDITION(ids_.empty());
+    CHECK_CONDITION(total_ == Length(0));
+    // We end up leaking both the backing allocations and the metadata.
+    // The backing allocations are unmapped--it's silly, but not
+    // costing us muchin a 64-bit address space.
+    // The metadata is real memory, but there's barely any of it.
+    // It'd be very complicated to rebuild the allocator to support
+    // teardown, so we just put up with it.
+    {
+      absl::base_internal::SpinLockHolder h(&pageheap_lock);
+      auto stats = allocator_->stats();
+      if (stats.free_bytes + stats.unmapped_bytes != stats.system_bytes) {
+        Crash(kCrash, __FILE__, __LINE__, stats.free_bytes,
+              stats.unmapped_bytes, "!=", stats.system_bytes);
+      }
+    }
+
+    free(allocator_);
+
+    MallocExtension::SetRegionFactory(before_);
+    delete extra_;
+  }
+
+  void CheckStats() {
+    size_t actual_used_bytes = total_.in_bytes();
+    BackingStats stats;
+    {
+      absl::base_internal::SpinLockHolder h2(&pageheap_lock);
+      stats = allocator_->stats();
+    }
+    uint64_t used_bytes =
+        stats.system_bytes - stats.free_bytes - stats.unmapped_bytes;
+    ASSERT_EQ(used_bytes, actual_used_bytes);
+  }
+
+  uint64_t GetFreeBytes() {
+    BackingStats stats;
+    {
+      absl::base_internal::SpinLockHolder h2(&pageheap_lock);
+      stats = allocator_->stats();
+    }
+    return stats.free_bytes;
+  }
+
+  Span* AllocatorNew(Length n) { return allocator_->New(n); }
+
+  void AllocatorDelete(Span* s) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    allocator_->Delete(s);
+  }
+
+  Span* New(Length n) {
+    absl::base_internal::SpinLockHolder h(&lock_);
+    Span* span = AllocatorNew(n);
+    CHECK_CONDITION(span != nullptr);
+    EXPECT_GE(span->num_pages(), n);
+    const size_t id = next_id_++;
+    total_ += n;
+    CheckStats();
+    // and distinct spans...
+    CHECK_CONDITION(ids_.insert({span, id}).second);
+    return span;
+  }
+
+  void Delete(Span* span) {
+    Length n = span->num_pages();
+    {
+      absl::base_internal::SpinLockHolder h(&lock_);
+      auto i = ids_.find(span);
+      CHECK_CONDITION(i != ids_.end());
+      const size_t id = i->second;
+      ids_.erase(i);
+      AllocatorDelete(span);
+      total_ -= n;
+      CheckStats();
+    }
+  }
+
+  // Mostly small things, some large ones.
+  Length RandomAllocSize() {
+    // TODO(b/128521238): scalable RNG
+    absl::base_internal::SpinLockHolder h(&lock_);
+    if (absl::Bernoulli(rng_, 1.0 / 1000)) {
+      Length n =
+          Length(1024) * (1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 8) - 1));
+      n += Length(absl::Uniform<int32_t>(rng_, 0, 1024));
+      return n;
+    }
+    return Length(1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 9) - 1));
+  }
+
+  Length ReleasePages(Length k) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    return allocator_->ReleaseAtLeastNPages(k);
+  }
+
+  std::string Print() {
+    std::string ret;
+    const size_t kSize = 1 << 20;
+    ret.resize(kSize);
+    Printer p(&ret[0], kSize);
+    allocator_->Print(&p);
+    ret.erase(p.SpaceRequired());
+    return ret;
+  }
+
+  std::string PrintInPbTxt() {
+    std::string ret;
+    const size_t kSize = 1 << 20;
+    ret.resize(kSize);
+    Printer p(&ret[0], kSize);
+    {
+      PbtxtRegion region(&p, kNested, 0);
+      allocator_->PrintInPbtxt(&region);
+    }
+    ret.erase(p.SpaceRequired());
+    return ret;
+  }
+
+  HugePageAwareAllocator* allocator_;
+  ExtraRegionFactory* extra_;
+  AddressRegionFactory* before_;
+  absl::BitGen rng_;
+  absl::base_internal::SpinLock lock_;
+  absl::flat_hash_map<Span*, size_t> ids_;
+  size_t next_id_{0};
+  Length total_;
+};
+
+TEST_F(HugePageAwareAllocatorTest, Fuzz) {
+  std::vector<Span*> allocs;
+  for (int i = 0; i < 5000; ++i) {
+    Length n = RandomAllocSize();
+    allocs.push_back(New(n));
+  }
+  static const size_t kReps = 50 * 1000;
+  for (int i = 0; i < kReps; ++i) {
+    SCOPED_TRACE(absl::StrFormat("%d reps, %d pages", i, total_.raw_num()));
+    size_t index = absl::Uniform<int32_t>(rng_, 0, allocs.size());
+    Span* old = allocs[index];
+    Delete(old);
+    Length n = RandomAllocSize();
+    allocs[index] = New(n);
+  }
+
+  for (auto s : allocs) {
+    Delete(s);
+  }
+}
+
+// Prevent regression of the fragmentation problem that was reported in
+// b/63301358, reproduced in CL/161345659 and (partially) fixed in CL/161305971.
+TEST_F(HugePageAwareAllocatorTest, JustUnderMultipleOfHugepages) {
+  std::vector<Span*> big_allocs, small_allocs;
+  // Trigger creation of a hugepage with more than one allocation and plenty of
+  // free space.
+  small_allocs.push_back(New(Length(1)));
+  small_allocs.push_back(New(Length(10)));
+  // Limit iterations so that the huge page with the small allocs doesn't fill
+  // up.
+  size_t n_iter = (kPagesPerHugePage - Length(2)).raw_num();
+  // Also limit memory usage to ~1 GB.
+  n_iter = std::min((1 << 30) / (2 * kHugePageSize), n_iter);
+  for (int i = 0; i < n_iter; ++i) {
+    Length n = 2 * kPagesPerHugePage - Length(1);
+    big_allocs.push_back(New(n));
+    small_allocs.push_back(New(Length(1)));
+  }
+  for (auto* span : big_allocs) {
+    Delete(span);
+  }
+  // We should have one hugepage that's full of small allocations and a bunch
+  // of empty hugepages. The HugeCache will keep some of the empty hugepages
+  // backed so free space should drop to a small multiple of the huge page size.
+  EXPECT_LE(GetFreeBytes(), 20 * kHugePageSize);
+  for (auto* span : small_allocs) {
+    Delete(span);
+  }
+}
+
+TEST_F(HugePageAwareAllocatorTest, Multithreaded) {
+  static const size_t kThreads = 16;
+  std::vector<std::thread> threads;
+  threads.reserve(kThreads);
+  absl::Barrier b1(kThreads);
+  absl::Barrier b2(kThreads);
+  for (int i = 0; i < kThreads; ++i) {
+    threads.push_back(std::thread([this, &b1, &b2]() {
+      absl::BitGen rng;
+      std::vector<Span*> allocs;
+      for (int i = 0; i < 150; ++i) {
+        Length n = RandomAllocSize();
+        allocs.push_back(New(n));
+      }
+      b1.Block();
+      static const size_t kReps = 4 * 1000;
+      for (int i = 0; i < kReps; ++i) {
+        size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+        Delete(allocs[index]);
+        Length n = RandomAllocSize();
+        allocs[index] = New(n);
+      }
+      b2.Block();
+      for (auto s : allocs) {
+        Delete(s);
+      }
+    }));
+  }
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+TEST_F(HugePageAwareAllocatorTest, ReleasingLarge) {
+  // Ensure the HugeCache has some free items:
+  Delete(New(kPagesPerHugePage));
+  ASSERT_LE(kPagesPerHugePage, ReleasePages(kPagesPerHugePage));
+}
+
+TEST_F(HugePageAwareAllocatorTest, ReleasingSmall) {
+  const bool old_subrelease = Parameters::hpaa_subrelease();
+  Parameters::set_hpaa_subrelease(true);
+
+  const absl::Duration old_skip_subrelease =
+      Parameters::filler_skip_subrelease_interval();
+  Parameters::set_filler_skip_subrelease_interval(absl::ZeroDuration());
+
+  std::vector<Span*> live, dead;
+  static const size_t N = kPagesPerHugePage.raw_num() * 128;
+  for (int i = 0; i < N; ++i) {
+    Span* span = New(Length(1));
+    ((i % 2 == 0) ? live : dead).push_back(span);
+  }
+
+  for (auto d : dead) {
+    Delete(d);
+  }
+
+  EXPECT_EQ(kPagesPerHugePage / 2, ReleasePages(Length(1)));
+
+  for (auto l : live) {
+    Delete(l);
+  }
+
+  Parameters::set_hpaa_subrelease(old_subrelease);
+  Parameters::set_filler_skip_subrelease_interval(old_skip_subrelease);
+}
+
+TEST_F(HugePageAwareAllocatorTest, DonatedHugePages) {
+  // This test verifies that we accurately measure the amount of RAM that we
+  // donate to the huge page filler when making large allocations, including
+  // those kept alive after we deallocate.
+  static constexpr Length kSlack = Length(2);
+  static constexpr Length kLargeSize = 2 * kPagesPerHugePage - kSlack;
+  static constexpr Length kSmallSize = Length(1);
+
+  Span* large1 = New(kLargeSize);
+  Length slack;
+  HugeLength donated_huge_pages;
+  {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    slack = allocator_->info().slack();
+    donated_huge_pages = allocator_->DonatedHugePages();
+  }
+  EXPECT_EQ(slack, kSlack);
+  EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+  EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+  EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+
+  // Make a small allocation and then free the large allocation.  Slack should
+  // fall, but we've kept alive our donation to the filler.
+  Span* small = New(kSmallSize);
+  Delete(large1);
+  {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    slack = allocator_->info().slack();
+    donated_huge_pages = allocator_->DonatedHugePages();
+  }
+  EXPECT_EQ(slack, Length(0));
+  EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+  EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+  EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+
+  // Make another large allocation.  The number of donated huge pages should
+  // continue to increase.
+  Span* large2 = New(kLargeSize);
+  {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    slack = allocator_->info().slack();
+    donated_huge_pages = allocator_->DonatedHugePages();
+  }
+  EXPECT_EQ(slack, kSlack);
+  EXPECT_EQ(donated_huge_pages, NHugePages(2));
+
+  EXPECT_THAT(Print(), HasSubstr("filler donations 2"));
+  EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2"));
+
+  // Deallocating the small allocation does not reduce the number of donations,
+  // as we were unable to reassemble the VSS for large1.
+  Delete(small);
+  {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    slack = allocator_->info().slack();
+    donated_huge_pages = allocator_->DonatedHugePages();
+  }
+  EXPECT_EQ(slack, kSlack);
+  EXPECT_EQ(donated_huge_pages, NHugePages(2));
+
+  EXPECT_THAT(Print(), HasSubstr("filler donations 2"));
+  EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 2"));
+
+  // Deallocating everything should return slack to 0 and allow large2's
+  // contiguous VSS to be reassembled.
+  Delete(large2);
+  {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    slack = allocator_->info().slack();
+    donated_huge_pages = allocator_->DonatedHugePages();
+  }
+  EXPECT_EQ(slack, Length(0));
+  EXPECT_EQ(donated_huge_pages, NHugePages(1));
+
+  EXPECT_THAT(Print(), HasSubstr("filler donations 1"));
+  EXPECT_THAT(PrintInPbTxt(), HasSubstr("filler_donated_huge_pages: 1"));
+}
+
+TEST_F(HugePageAwareAllocatorTest, PageMapInterference) {
+  // This test manipulates the test HugePageAwareAllocator while making
+  // allocations/deallocations that interact with the real PageAllocator. The
+  // two share a global PageMap.
+  //
+  // If this test begins failing, the two are likely conflicting by violating
+  // invariants in the PageMap.
+  std::vector<Span*> allocs;
+
+  for (int i : {10, 20, 30}) {
+    auto n = Length(i << 7);
+    allocs.push_back(New(n));
+  }
+
+  for (auto* a : allocs) {
+    Delete(a);
+  }
+
+  allocs.clear();
+
+  // Do the same, but allocate something on the real page heap.
+  for (int i : {10, 20, 30}) {
+    auto n = Length(i << 7);
+    allocs.push_back(New(n));
+
+    ::operator delete(::operator new(1 << 20));
+  }
+
+  for (auto* a : allocs) {
+    Delete(a);
+  }
+}
+
+TEST_F(HugePageAwareAllocatorTest, LargeSmall) {
+  const int kIters = 2000;
+  const Length kSmallPages = Length(1);
+  // Large block must be larger than 1 huge page.
+  const Length kLargePages = 2 * kPagesPerHugePage - kSmallPages;
+  std::vector<Span*> small_allocs;
+
+  // Repeatedly allocate large and small allocations that fit into a multiple of
+  // huge pages.  The large allocations are short lived and the small
+  // allocations are long-lived.  We want to refrain from growing the heap size
+  // without bound, keeping many huge pages alive because of the small
+  // allocations.
+  for (int i = 0; i < kIters; i++) {
+    Span* large = New(kLargePages);
+    ASSERT_NE(large, nullptr);
+    Span* small = New(kSmallPages);
+    ASSERT_NE(small, nullptr);
+
+    small_allocs.push_back(small);
+    Delete(large);
+  }
+
+  BackingStats stats;
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    stats = allocator_->stats();
+  }
+
+  constexpr size_t kBufferSize = 1024 * 1024;
+  char buffer[kBufferSize];
+  Printer printer(buffer, kBufferSize);
+  allocator_->Print(&printer);
+  // Verify that we have less free memory than we allocated in total. We have
+  // to account for bytes tied up in the cache.
+  EXPECT_LE(stats.free_bytes - allocator_->cache()->size().in_bytes(),
+            kSmallPages.in_bytes() * kIters)
+      << buffer;
+
+  for (Span* small : small_allocs) {
+    Delete(small);
+  }
+}
+
+// Tests an edge case in hugepage donation behavior.
+TEST_F(HugePageAwareAllocatorTest, DonatedPageLists) {
+  const Length kSmallPages = Length(1);
+  // Large block must be larger than 1 huge page.
+  const Length kLargePages = 2 * kPagesPerHugePage - 2 * kSmallPages;
+
+  Span* large = New(kLargePages);
+  ASSERT_NE(large, nullptr);
+
+  // Allocating small1 moves the backing huge page off of the donated pages
+  // list.
+  Span* small1 = New(kSmallPages);
+  ASSERT_NE(small1, nullptr);
+  // This delete needs to have put the origin PageTracker back onto the right
+  // free list.
+  Delete(small1);
+
+  // This otherwise fails.
+  Span* small2 = New(kSmallPages);
+  ASSERT_NE(small2, nullptr);
+  Delete(small2);
+
+  // Clean up.
+  Delete(large);
+}
+
+TEST_F(HugePageAwareAllocatorTest, DonationAccounting) {
+  const Length kSmallPages = Length(2);
+  const Length kOneHugePageDonation = kPagesPerHugePage - kSmallPages;
+  const Length kMultipleHugePagesDonation = 3 * kPagesPerHugePage - kSmallPages;
+
+  // Each of these allocations should count as one donation, but only if they
+  // are actually being reused.
+  Span* large = New(kOneHugePageDonation);
+  ASSERT_NE(large, nullptr);
+
+  // This allocation ensures that the donation is not counted.
+  Span* small = New(kSmallPages);
+  ASSERT_NE(small, nullptr);
+
+  Span* large2 = New(kMultipleHugePagesDonation);
+  ASSERT_NE(large2, nullptr);
+
+  // This allocation ensures that the donation is not counted.
+  Span* small2 = New(kSmallPages);
+  ASSERT_NE(small2, nullptr);
+
+  Span* large3 = New(kOneHugePageDonation);
+  ASSERT_NE(large3, nullptr);
+
+  Span* large4 = New(kMultipleHugePagesDonation);
+  ASSERT_NE(large4, nullptr);
+
+  // Clean up.
+  Delete(large);
+  Delete(large2);
+  Delete(large3);
+  Delete(large4);
+  Delete(small);
+  Delete(small2);
+
+  // Check donation count.
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  CHECK_CONDITION(NHugePages(2) == allocator_->DonatedHugePages());
+}
+
+// We'd like to test OOM behavior but this, err, OOMs. :)
+// (Usable manually in controlled environments.
+TEST_F(HugePageAwareAllocatorTest, DISABLED_OOM) {
+  std::vector<Span*> objs;
+  auto n = Length(1);
+  while (true) {
+    Span* s = New(n);
+    if (!s) break;
+    objs.push_back(s);
+    n *= 2;
+  }
+  for (auto s : objs) {
+    Delete(s);
+  }
+}
+
+struct MemoryBytes {
+  uint64_t virt;
+  uint64_t phys;
+};
+
+int64_t pagesize = getpagesize();
+
+static size_t BytesInCore(void* p, size_t len) {
+  static const size_t kBufSize = 1024;
+  unsigned char buf[kBufSize];
+  const size_t kChunk = pagesize * kBufSize;
+  size_t resident = 0;
+  while (len > 0) {
+    // We call mincore in bounded size chunks (though typically one
+    // chunk will cover an entire request.)
+    const size_t chunk_len = std::min(kChunk, len);
+    if (mincore(p, chunk_len, buf) != 0) {
+      Crash(kCrash, __FILE__, __LINE__, "mincore failed, errno", errno);
+    }
+    const size_t lim = chunk_len / pagesize;
+    for (size_t i = 0; i < lim; ++i) {
+      if (buf[i] & 1) resident += pagesize;
+    }
+    len -= chunk_len;
+    p = static_cast<char*>(p) + chunk_len;
+  }
+
+  return resident;
+}
+
+// Is any page of this hugepage resident?
+bool HugePageResident(HugePage p) {
+  return BytesInCore(p.start_addr(), kHugePageSize) > 0;
+}
+
+void Touch(PageId p) {
+  // a tcmalloc-page may contain more than an actual kernel page
+  volatile char* base = reinterpret_cast<char*>(p.start_addr());
+  static size_t kActualPages = std::max<size_t>(kPageSize / pagesize, 1);
+  for (int i = 0; i < kActualPages; ++i) {
+    base[i * pagesize] = 1;
+  }
+}
+
+// Fault an entire hugepage, as if THP chose to do so on an entirely
+// empty hugepage. (In real life, this will usually, but not always,
+// happen: we make sure it does so our accounting is accurate.)
+void Touch(HugePage hp) {
+  PageId p = hp.first_page();
+  const PageId lim = p + kPagesPerHugePage;
+  while (p < lim) {
+    Touch(p);
+    ++p;
+  }
+}
+
+// Fault in memory across a span (SystemBack doesn't always do this.)
+void TouchTHP(Span* s) {
+  PageId p = s->first_page();
+  PageId lim = s->last_page();
+  HugePage last = HugePageContaining(nullptr);
+  while (p <= lim) {
+    HugePage hp = HugePageContaining(p);
+    // Suppose that we are touching a hugepage for the first time (it
+    // is entirely non-resident.) The page fault we take will usually
+    // be promoted to a full transparent hugepage, and our accounting
+    // assumes this is true.  But we can't actually guarantee that
+    // (the kernel won't wait if memory is too fragmented.)  Do it ourselves
+    // by hand, to ensure our mincore() calculations return the right answers.
+    if (hp != last && !HugePageResident(hp)) {
+      last = hp;
+      Touch(hp);
+    }
+
+    // Regardless of whether we've optimistically faulted in a
+    // hugepage, we also touch each page in the span.
+    Touch(p);
+    ++p;
+  }
+}
+
+// Similar to above but much more careful about touching memory / mallocing
+// and without the validation
+class StatTest : public testing::Test {
+ protected:
+  StatTest() : rng_() {}
+
+  class RegionFactory;
+
+  class Region : public AddressRegion {
+   public:
+    Region(AddressRegion* underlying, RegionFactory* factory)
+        : underlying_(underlying), factory_(factory) {}
+
+    std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+      std::pair<void*, size_t> ret = underlying_->Alloc(size, alignment);
+      if (!ret.first) return {nullptr, 0};
+
+      // we only support so many allocations here for simplicity
+      CHECK_CONDITION(factory_->n_ < factory_->kNumAllocs);
+      // Anything coming from the test allocator will request full
+      // alignment.  Metadata allocations will not.  Since we can't
+      // control the backing of metadata allocations, elide them.
+      // TODO(b/128521238): this is not a good way to do this.
+      if (alignment >= kHugePageSize) {
+        factory_->allocs_[factory_->n_] = ret;
+        factory_->n_++;
+      }
+      return ret;
+    }
+
+   private:
+    AddressRegion* underlying_;
+    RegionFactory* factory_;
+  };
+
+  class RegionFactory : public AddressRegionFactory {
+   public:
+    explicit RegionFactory(AddressRegionFactory* underlying)
+        : underlying_(underlying), n_(0) {}
+
+    AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+      AddressRegion* underlying_region = underlying_->Create(start, size, hint);
+      CHECK_CONDITION(underlying_region);
+      void* region_space = MallocInternal(sizeof(Region));
+      CHECK_CONDITION(region_space);
+      return new (region_space) Region(underlying_region, this);
+    }
+
+    size_t GetStats(absl::Span<char> buffer) override {
+      return underlying_->GetStats(buffer);
+    }
+
+    MemoryBytes Memory() {
+      MemoryBytes b = {0, 0};
+      for (int i = 0; i < n_; ++i) {
+        void* p = allocs_[i].first;
+        size_t len = allocs_[i].second;
+        b.virt += len;
+        b.phys += BytesInCore(p, len);
+      }
+
+      return b;
+    }
+
+    AddressRegionFactory* underlying() const { return underlying_; }
+
+   private:
+    friend class Region;
+    AddressRegionFactory* underlying_;
+
+    static constexpr size_t kNumAllocs = 1000;
+    size_t n_;
+    std::pair<void*, size_t> allocs_[kNumAllocs];
+  };
+
+  // Carefully get memory usage without touching anything.
+  MemoryBytes GetSystemBytes() { return replacement_region_factory_.Memory(); }
+
+  // This is essentially a test case set up, but run manually -
+  // we can't guarantee gunit won't malloc between.
+  void PrepTest() {
+    memset(buf, 0, sizeof(buf));
+    MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
+    SetRegionFactory(&replacement_region_factory_);
+    alloc = new (buf) HugePageAwareAllocator(MemoryTag::kNormal);
+  }
+
+  ~StatTest() override {
+    SetRegionFactory(replacement_region_factory_.underlying());
+  }
+
+  BackingStats Stats() {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    BackingStats stats = alloc->stats();
+    return stats;
+  }
+
+  // Use bigger allocs here to ensure growth:
+  Length RandomAllocSize() {
+    // Since we touch all of the pages, try to avoid OOM'ing by limiting the
+    // number of big allocations.
+    const Length kMaxBigAllocs = Length(4096);
+
+    if (big_allocs_ < kMaxBigAllocs && absl::Bernoulli(rng_, 1.0 / 50)) {
+      auto n =
+          Length(1024 * (1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 9) - 1)));
+      n += Length(absl::Uniform<int32_t>(rng_, 0, 1024));
+      big_allocs_ += n;
+      return n;
+    }
+    return Length(1 + absl::LogUniform<int32_t>(rng_, 0, (1 << 10) - 1));
+  }
+
+  Span* Alloc(Length n) {
+    Span* span = alloc->New(n);
+    TouchTHP(span);
+    if (n > span->num_pages()) {
+      Crash(kCrash, __FILE__, __LINE__, n.raw_num(),
+            "not <=", span->num_pages().raw_num());
+    }
+    n = span->num_pages();
+    if (n > longest_) longest_ = n;
+    total_ += n;
+    if (total_ > peak_) peak_ = total_;
+    return span;
+  }
+
+  void Free(Span* s) {
+    Length n = s->num_pages();
+    total_ -= n;
+    {
+      absl::base_internal::SpinLockHolder h(&pageheap_lock);
+      alloc->Delete(s);
+    }
+  }
+
+  void CheckStats() {
+    MemoryBytes here = GetSystemBytes();
+    BackingStats stats = Stats();
+    SmallSpanStats small;
+    LargeSpanStats large;
+    {
+      absl::base_internal::SpinLockHolder h(&pageheap_lock);
+      alloc->GetSmallSpanStats(&small);
+      alloc->GetLargeSpanStats(&large);
+    }
+
+    size_t span_stats_free_bytes = 0, span_stats_released_bytes = 0;
+    for (auto i = Length(0); i < kMaxPages; ++i) {
+      span_stats_free_bytes += i.in_bytes() * small.normal_length[i.raw_num()];
+      span_stats_released_bytes +=
+          i.in_bytes() * small.returned_length[i.raw_num()];
+    }
+    span_stats_free_bytes += large.normal_pages.in_bytes();
+    span_stats_released_bytes += large.returned_pages.in_bytes();
+
+#ifndef __ppc__
+    const size_t alloced_bytes = total_.in_bytes();
+#endif
+    ASSERT_EQ(here.virt, stats.system_bytes);
+#ifndef __ppc__
+    const size_t actual_unmapped = here.virt - here.phys;
+#endif
+    // TODO(b/122551676):  On PPC, our release granularity may be smaller than
+    // the system page size, so we may not actually unmap memory that we expect.
+    // Pending using the return value of madvise, relax this constraint.
+#ifndef __ppc__
+    ASSERT_EQ(actual_unmapped, stats.unmapped_bytes);
+    ASSERT_EQ(here.phys, stats.free_bytes + alloced_bytes);
+    ASSERT_EQ(alloced_bytes,
+              stats.system_bytes - stats.free_bytes - stats.unmapped_bytes);
+#endif
+    ASSERT_EQ(stats.free_bytes, span_stats_free_bytes);
+    ASSERT_EQ(stats.unmapped_bytes, span_stats_released_bytes);
+  }
+
+  char buf[sizeof(HugePageAwareAllocator)];
+  HugePageAwareAllocator* alloc;
+  RegionFactory replacement_region_factory_{GetRegionFactory()};
+  absl::BitGen rng_;
+
+  Length total_;
+  Length longest_;
+  Length peak_;
+  Length big_allocs_;
+};
+
+TEST_F(StatTest, Basic) {
+  static const size_t kNumAllocs = 500;
+  Span* allocs[kNumAllocs];
+
+  const bool always_check_usage = absl::GetFlag(FLAGS_always_check_usage);
+
+  PrepTest();
+  // DO NOT MALLOC ANYTHING BELOW THIS LINE!  WE'RE TRYING TO CAREFULLY COUNT
+  // ALLOCATIONS.
+  // (note we can't stop background threads, but hopefully they're idle enough.)
+
+  for (int i = 0; i < kNumAllocs; ++i) {
+    Length k = RandomAllocSize();
+    allocs[i] = Alloc(k);
+    // stats are expensive, don't always check
+    if (i % 10 != 0 && !always_check_usage) continue;
+    CheckStats();
+  }
+
+  static const size_t kReps = 1000;
+  for (int i = 0; i < kReps; ++i) {
+    size_t index = absl::Uniform<int32_t>(rng_, 0, kNumAllocs);
+
+    Free(allocs[index]);
+    Length k = RandomAllocSize();
+    allocs[index] = Alloc(k);
+
+    if (absl::Bernoulli(rng_, 1.0 / 3)) {
+      Length pages(absl::LogUniform<int32_t>(rng_, 0, (1 << 10) - 1) + 1);
+      absl::base_internal::SpinLockHolder h(&pageheap_lock);
+      alloc->ReleaseAtLeastNPages(pages);
+    }
+
+    // stats are expensive, don't always check
+    if (i % 10 != 0 && !always_check_usage) continue;
+    CheckStats();
+  }
+
+  for (int i = 0; i < kNumAllocs; ++i) {
+    Free(allocs[i]);
+    if (i % 10 != 0 && !always_check_usage) continue;
+    CheckStats();
+  }
+
+  {
+    CheckStats();
+    pageheap_lock.Lock();
+    auto final_stats = alloc->stats();
+    pageheap_lock.Unlock();
+    ASSERT_EQ(final_stats.free_bytes + final_stats.unmapped_bytes,
+              final_stats.system_bytes);
+  }
+
+  // test over, malloc all you like
+}
+
+TEST_F(HugePageAwareAllocatorTest, ParallelRelease) {
+  ThreadManager threads;
+  constexpr int kThreads = 10;
+
+  struct ABSL_CACHELINE_ALIGNED Metadata {
+    absl::BitGen rng;
+    std::vector<Span*> spans;
+  };
+
+  std::vector<Metadata> metadata;
+  metadata.resize(kThreads);
+
+  threads.Start(kThreads, [&](int thread_id) {
+    Metadata& m = metadata[thread_id];
+
+    if (thread_id == 0) {
+      ReleasePages(Length(absl::Uniform(m.rng, 1, 1 << 10)));
+      return;
+    } else if (thread_id == 1) {
+      benchmark::DoNotOptimize(Print());
+      return;
+    }
+
+    if (absl::Bernoulli(m.rng, 0.6) || m.spans.empty()) {
+      Span* s = AllocatorNew(Length(absl::LogUniform(m.rng, 1, 1 << 10)));
+      CHECK_CONDITION(s != nullptr);
+
+      // Touch the contents of the buffer.  We later use it to verify we are the
+      // only thread manipulating the Span, for example, if another thread
+      // madvise DONTNEED'd the contents and zero'd them.
+      const uintptr_t key = reinterpret_cast<uintptr_t>(s) ^ thread_id;
+      *reinterpret_cast<uintptr_t*>(s->start_address()) = key;
+
+      m.spans.push_back(s);
+    } else {
+      size_t index = absl::Uniform<size_t>(m.rng, 0, m.spans.size());
+
+      Span* back = m.spans.back();
+      Span* s = m.spans[index];
+      m.spans[index] = back;
+      m.spans.pop_back();
+
+      const uintptr_t key = reinterpret_cast<uintptr_t>(s) ^ thread_id;
+      EXPECT_EQ(*reinterpret_cast<uintptr_t*>(s->start_address()), key);
+
+      AllocatorDelete(s);
+    }
+  });
+
+  absl::SleepFor(absl::Seconds(1));
+
+  threads.Stop();
+
+  for (auto& m : metadata) {
+    for (Span* s : m.spans) {
+      AllocatorDelete(s);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h
new file mode 100644
index 0000000000..2f72b43881
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler.h
@@ -0,0 +1,2113 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_FILLER_H_
+#define TCMALLOC_HUGE_PAGE_FILLER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <limits>
+
+#include "absl/algorithm/container.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_cache.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/range_tracker.h"
+#include "tcmalloc/internal/timeseries_tracker.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This and the following classes implement the adaptive hugepage subrelease
+// mechanism and realized fragmentation metric described in "Adaptive Hugepage
+// Subrelease for Non-moving Memory Allocators in Warehouse-Scale Computers"
+// (ISMM 2021).
+
+// Tracks correctness of skipped subrelease decisions over time.
+template <size_t kEpochs = 16>
+class SkippedSubreleaseCorrectnessTracker {
+ public:
+  struct SkippedSubreleaseDecision {
+    Length pages;  // Number of pages we skipped subreleasing.
+    size_t count;  // Number of times we skipped a subrelease.
+
+    SkippedSubreleaseDecision() : pages(0), count(0) {}
+    explicit SkippedSubreleaseDecision(Length pages) : pages(pages), count(1) {}
+    explicit SkippedSubreleaseDecision(Length pages, size_t count)
+        : pages(pages), count(count) {}
+
+    SkippedSubreleaseDecision& operator+=(SkippedSubreleaseDecision rhs) {
+      pages += rhs.pages;
+      count += rhs.count;
+      return *this;
+    }
+
+    static SkippedSubreleaseDecision Zero() {
+      return SkippedSubreleaseDecision();
+    }
+  };
+
+  explicit constexpr SkippedSubreleaseCorrectnessTracker(Clock clock,
+                                                         absl::Duration w)
+      : window_(w),
+        epoch_length_(window_ / kEpochs),
+        last_confirmed_peak_(0),
+        tracker_(clock, w) {}
+
+  // Not copyable or movable
+  SkippedSubreleaseCorrectnessTracker(
+      const SkippedSubreleaseCorrectnessTracker&) = delete;
+  SkippedSubreleaseCorrectnessTracker& operator=(
+      const SkippedSubreleaseCorrectnessTracker&) = delete;
+
+  void ReportSkippedSubreleasePages(
+      Length skipped_pages, Length peak_pages,
+      absl::Duration expected_time_until_next_peak) {
+    total_skipped_ += SkippedSubreleaseDecision(skipped_pages);
+    pending_skipped_ += SkippedSubreleaseDecision(skipped_pages);
+
+    SkippedSubreleaseUpdate update;
+    update.decision = SkippedSubreleaseDecision(skipped_pages);
+    update.num_pages_at_decision = peak_pages;
+    update.correctness_interval_epochs =
+        expected_time_until_next_peak / epoch_length_;
+    tracker_.Report(update);
+  }
+
+  void ReportUpdatedPeak(Length current_peak) {
+    // Record this peak for the current epoch (so we don't double-count correct
+    // predictions later) and advance the tracker.
+    SkippedSubreleaseUpdate update;
+    update.confirmed_peak = current_peak;
+    if (tracker_.Report(update)) {
+      // Also keep track of the largest peak we have confirmed this epoch.
+      last_confirmed_peak_ = Length(0);
+    }
+
+    // Recompute currently pending decisions.
+    pending_skipped_ = SkippedSubreleaseDecision::Zero();
+
+    Length largest_peak_already_confirmed = last_confirmed_peak_;
+
+    tracker_.IterBackwards(
+        [&](size_t offset, int64_t ts, const SkippedSubreleaseEntry& e) {
+          // Do not clear any decisions in the current epoch.
+          if (offset == 0) {
+            return;
+          }
+
+          if (e.decisions.count > 0 &&
+              e.max_num_pages_at_decision > largest_peak_already_confirmed &&
+              offset <= e.correctness_interval_epochs) {
+            if (e.max_num_pages_at_decision <= current_peak) {
+              // We can confirm a subrelease decision as correct and it had not
+              // been confirmed correct by an earlier peak yet.
+              correctly_skipped_ += e.decisions;
+            } else {
+              pending_skipped_ += e.decisions;
+            }
+          }
+
+          // Did we clear any earlier decisions based on a peak in this epoch?
+          // Keep track of the peak, so we do not clear them again.
+          largest_peak_already_confirmed =
+              std::max(largest_peak_already_confirmed, e.max_confirmed_peak);
+        },
+        -1);
+
+    last_confirmed_peak_ = std::max(last_confirmed_peak_, current_peak);
+  }
+
+  inline SkippedSubreleaseDecision total_skipped() const {
+    return total_skipped_;
+  }
+
+  inline SkippedSubreleaseDecision correctly_skipped() const {
+    return correctly_skipped_;
+  }
+
+  inline SkippedSubreleaseDecision pending_skipped() const {
+    return pending_skipped_;
+  }
+
+ private:
+  struct SkippedSubreleaseUpdate {
+    // A subrelease decision that was made at this time step: How much did we
+    // decide not to release?
+    SkippedSubreleaseDecision decision;
+
+    // What does our future demand have to be for this to be correct? If there
+    // were multiple subrelease decisions in the same epoch, use the max.
+    Length num_pages_at_decision;
+
+    // How long from the time of the decision do we have before the decision
+    // will be determined incorrect?
+    int64_t correctness_interval_epochs = 0;
+
+    // At this time step, we confirmed a demand peak at this level, which means
+    // all subrelease decisions in earlier time steps that had peak_demand_pages
+    // <= this confirmed_peak were confirmed correct and don't need to be
+    // considered again in the future.
+    Length confirmed_peak;
+  };
+
+  struct SkippedSubreleaseEntry {
+    SkippedSubreleaseDecision decisions = SkippedSubreleaseDecision::Zero();
+    Length max_num_pages_at_decision;
+    int64_t correctness_interval_epochs = 0;
+    Length max_confirmed_peak;
+
+    static SkippedSubreleaseEntry Nil() { return SkippedSubreleaseEntry(); }
+
+    void Report(SkippedSubreleaseUpdate e) {
+      decisions += e.decision;
+      correctness_interval_epochs =
+          std::max(correctness_interval_epochs, e.correctness_interval_epochs);
+      max_num_pages_at_decision =
+          std::max(max_num_pages_at_decision, e.num_pages_at_decision);
+      max_confirmed_peak = std::max(max_confirmed_peak, e.confirmed_peak);
+    }
+  };
+
+  const absl::Duration window_;
+  const absl::Duration epoch_length_;
+
+  // The largest peak we processed this epoch. This is required to avoid us
+  // double-counting correctly predicted decisions.
+  Length last_confirmed_peak_;
+
+  SkippedSubreleaseDecision total_skipped_;
+  SkippedSubreleaseDecision correctly_skipped_;
+  SkippedSubreleaseDecision pending_skipped_;
+
+  TimeSeriesTracker<SkippedSubreleaseEntry, SkippedSubreleaseUpdate, kEpochs>
+      tracker_;
+};
+
+struct SubreleaseStats {
+  Length total_pages_subreleased;  // cumulative since startup
+  Length num_pages_subreleased;
+  HugeLength total_hugepages_broken{NHugePages(0)};  // cumulative since startup
+  HugeLength num_hugepages_broken{NHugePages(0)};
+
+  bool is_limit_hit = false;
+  // Keep these limit-related stats cumulative since startup only
+  Length total_pages_subreleased_due_to_limit;
+  HugeLength total_hugepages_broken_due_to_limit{NHugePages(0)};
+
+  void reset() {
+    total_pages_subreleased += num_pages_subreleased;
+    total_hugepages_broken += num_hugepages_broken;
+    num_pages_subreleased = Length(0);
+    num_hugepages_broken = NHugePages(0);
+  }
+
+  // Must be called at the beginning of each subrelease request
+  void set_limit_hit(bool value) { is_limit_hit = value; }
+
+  // This only has a well-defined meaning within ReleaseCandidates where
+  // set_limit_hit() has been called earlier. Do not use anywhere else.
+  bool limit_hit() { return is_limit_hit; }
+};
+
+// Track filler statistics over a time window.
+template <size_t kEpochs = 16>
+class FillerStatsTracker {
+ public:
+  enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes };
+
+  struct FillerStats {
+    Length num_pages;
+    Length free_pages;
+    Length unmapped_pages;
+    Length used_pages_in_subreleased_huge_pages;
+    HugeLength huge_pages[kNumTypes];
+    Length num_pages_subreleased;
+    HugeLength num_hugepages_broken = NHugePages(0);
+
+    HugeLength total_huge_pages() const {
+      HugeLength total_huge_pages;
+      for (int i = 0; i < kNumTypes; i++) {
+        total_huge_pages += huge_pages[i];
+      }
+      return total_huge_pages;
+    }
+  };
+
+  struct NumberOfFreePages {
+    Length free;
+    Length free_backed;
+  };
+
+  explicit constexpr FillerStatsTracker(Clock clock, absl::Duration w,
+                                        absl::Duration summary_interval)
+      : summary_interval_(summary_interval),
+        window_(w),
+        epoch_length_(window_ / kEpochs),
+        tracker_(clock, w),
+        skipped_subrelease_correctness_(clock, w) {}
+
+  // Not copyable or movable
+  FillerStatsTracker(const FillerStatsTracker&) = delete;
+  FillerStatsTracker& operator=(const FillerStatsTracker&) = delete;
+
+  void Report(const FillerStats stats) {
+    if (ABSL_PREDICT_FALSE(tracker_.Report(stats))) {
+      if (ABSL_PREDICT_FALSE(pending_skipped().count > 0)) {
+        // Consider the peak within the just completed epoch to confirm the
+        // correctness of any recent subrelease decisions.
+        skipped_subrelease_correctness_.ReportUpdatedPeak(std::max(
+            stats.num_pages,
+            tracker_.GetEpochAtOffset(1).stats[kStatsAtMaxDemand].num_pages));
+      }
+    }
+  }
+
+  void Print(Printer* out) const;
+  void PrintInPbtxt(PbtxtRegion* hpaa) const;
+
+  // Calculates recent peaks for skipping subrelease decisions. If our allocated
+  // memory is below the demand peak within the last peak_interval, we stop
+  // subreleasing. If our demand is going above that peak again within another
+  // peak_interval, we report that we made the correct decision.
+  FillerStats GetRecentPeak(absl::Duration peak_interval) {
+    last_peak_interval_ = peak_interval;
+    FillerStats recent_peak;
+    Length max_demand_pages;
+
+    int64_t num_epochs = peak_interval / epoch_length_;
+    tracker_.IterBackwards(
+        [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+          if (!e.empty()) {
+            // Identify the maximum number of demand pages we have seen within
+            // the time interval.
+            if (e.stats[kStatsAtMaxDemand].num_pages > max_demand_pages) {
+              recent_peak = e.stats[kStatsAtMaxDemand];
+              max_demand_pages = recent_peak.num_pages;
+            }
+          }
+        },
+        num_epochs);
+
+    return recent_peak;
+  }
+
+  void ReportSkippedSubreleasePages(
+      Length pages, Length peak_pages,
+      absl::Duration expected_time_until_next_peak) {
+    if (pages == Length(0)) {
+      return;
+    }
+
+    skipped_subrelease_correctness_.ReportSkippedSubreleasePages(
+        pages, peak_pages, expected_time_until_next_peak);
+  }
+
+  inline typename SkippedSubreleaseCorrectnessTracker<
+      kEpochs>::SkippedSubreleaseDecision
+  total_skipped() const {
+    return skipped_subrelease_correctness_.total_skipped();
+  }
+
+  inline typename SkippedSubreleaseCorrectnessTracker<
+      kEpochs>::SkippedSubreleaseDecision
+  correctly_skipped() const {
+    return skipped_subrelease_correctness_.correctly_skipped();
+  }
+
+  inline typename SkippedSubreleaseCorrectnessTracker<
+      kEpochs>::SkippedSubreleaseDecision
+  pending_skipped() const {
+    return skipped_subrelease_correctness_.pending_skipped();
+  }
+
+  // Returns the minimum number of free pages throughout the tracker period.
+  // The first value of the pair is the number of all free pages, the second
+  // value contains only the backed ones.
+  NumberOfFreePages min_free_pages(absl::Duration w) const {
+    NumberOfFreePages mins;
+    mins.free = Length::max();
+    mins.free_backed = Length::max();
+
+    int64_t num_epochs = std::clamp(w / epoch_length_, int64_t{0},
+                                    static_cast<int64_t>(kEpochs));
+
+    tracker_.IterBackwards(
+        [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+          if (!e.empty()) {
+            mins.free = std::min(mins.free, e.min_free_pages);
+            mins.free_backed =
+                std::min(mins.free_backed, e.min_free_backed_pages);
+          }
+        },
+        num_epochs);
+    mins.free = (mins.free == Length::max()) ? Length(0) : mins.free;
+    mins.free_backed =
+        (mins.free_backed == Length::max()) ? Length(0) : mins.free_backed;
+    return mins;
+  }
+
+ private:
+  // We collect filler statistics at four "interesting points" within each time
+  // step: at min/max demand of pages and at min/max use of hugepages. This
+  // allows us to approximate the envelope of the different metrics.
+  enum StatsType {
+    kStatsAtMinDemand,
+    kStatsAtMaxDemand,
+    kStatsAtMinHugePages,
+    kStatsAtMaxHugePages,
+    kNumStatsTypes
+  };
+
+  struct FillerStatsEntry {
+    // Collect filler stats at "interesting points" (minimum/maximum page demand
+    // and at minimum/maximum usage of huge pages).
+    FillerStats stats[kNumStatsTypes] = {};
+    static constexpr Length kDefaultValue = Length::max();
+    Length min_free_pages = kDefaultValue;
+    Length min_free_backed_pages = kDefaultValue;
+    Length num_pages_subreleased;
+    HugeLength num_hugepages_broken = NHugePages(0);
+
+    static FillerStatsEntry Nil() { return FillerStatsEntry(); }
+
+    void Report(FillerStats e) {
+      if (empty()) {
+        for (int i = 0; i < kNumStatsTypes; i++) {
+          stats[i] = e;
+        }
+      }
+
+      if (e.num_pages < stats[kStatsAtMinDemand].num_pages) {
+        stats[kStatsAtMinDemand] = e;
+      }
+
+      if (e.num_pages > stats[kStatsAtMaxDemand].num_pages) {
+        stats[kStatsAtMaxDemand] = e;
+      }
+
+      if (e.total_huge_pages() <
+          stats[kStatsAtMinHugePages].total_huge_pages()) {
+        stats[kStatsAtMinHugePages] = e;
+      }
+
+      if (e.total_huge_pages() >
+          stats[kStatsAtMaxHugePages].total_huge_pages()) {
+        stats[kStatsAtMaxHugePages] = e;
+      }
+
+      min_free_pages =
+          std::min(min_free_pages, e.free_pages + e.unmapped_pages);
+      min_free_backed_pages = std::min(min_free_backed_pages, e.free_pages);
+
+      // Subrelease stats
+      num_pages_subreleased += e.num_pages_subreleased;
+      num_hugepages_broken += e.num_hugepages_broken;
+    }
+
+    bool empty() const { return min_free_pages == kDefaultValue; }
+  };
+
+  // The tracker reports pages that have been free for at least this interval,
+  // as well as peaks within this interval.
+  const absl::Duration summary_interval_;
+
+  const absl::Duration window_;
+  const absl::Duration epoch_length_;
+
+  TimeSeriesTracker<FillerStatsEntry, FillerStats, kEpochs> tracker_;
+  SkippedSubreleaseCorrectnessTracker<kEpochs> skipped_subrelease_correctness_;
+
+  // Records the last peak_interval value, for reporting and debugging only.
+  absl::Duration last_peak_interval_;
+};
+
+// Evaluate a/b, avoiding division by zero
+inline double safe_div(double a, double b) {
+  if (b == 0) {
+    return 0.;
+  } else {
+    return a / b;
+  }
+}
+
+inline double safe_div(Length a, Length b) {
+  return safe_div(a.raw_num(), b.raw_num());
+}
+
+template <size_t kEpochs>
+void FillerStatsTracker<kEpochs>::Print(Printer* out) const {
+  NumberOfFreePages free_pages = min_free_pages(summary_interval_);
+  out->printf("HugePageFiller: time series over %d min interval\n\n",
+              absl::ToInt64Minutes(summary_interval_));
+
+  // Realized fragmentation is equivalent to backed minimum free pages over a
+  // 5-min interval. It is printed for convenience but not included in pbtxt.
+  out->printf("HugePageFiller: realized fragmentation: %.1f MiB\n",
+              free_pages.free_backed.in_mib());
+  out->printf("HugePageFiller: minimum free pages: %zu (%zu backed)\n",
+              free_pages.free.raw_num(), free_pages.free_backed.raw_num());
+
+  FillerStatsEntry at_peak_demand;
+  FillerStatsEntry at_peak_hps;
+
+  tracker_.IterBackwards(
+      [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+        if (!e.empty()) {
+          if (at_peak_demand.empty() ||
+              at_peak_demand.stats[kStatsAtMaxDemand].num_pages <
+                  e.stats[kStatsAtMaxDemand].num_pages) {
+            at_peak_demand = e;
+          }
+
+          if (at_peak_hps.empty() ||
+              at_peak_hps.stats[kStatsAtMaxHugePages].total_huge_pages() <
+                  e.stats[kStatsAtMaxHugePages].total_huge_pages()) {
+            at_peak_hps = e;
+          }
+        }
+      },
+      summary_interval_ / epoch_length_);
+
+  out->printf(
+      "HugePageFiller: at peak demand: %zu pages (and %zu free, %zu unmapped)\n"
+      "HugePageFiller: at peak demand: %zu hps (%zu regular, %zu donated, "
+      "%zu partial, %zu released)\n",
+      at_peak_demand.stats[kStatsAtMaxDemand].num_pages.raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].free_pages.raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand]
+          .huge_pages[kPartialReleased]
+          .raw_num(),
+      at_peak_demand.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num());
+
+  out->printf(
+      "HugePageFiller: at peak hps: %zu pages (and %zu free, %zu unmapped)\n"
+      "HugePageFiller: at peak hps: %zu hps (%zu regular, %zu donated, "
+      "%zu partial, %zu released)\n",
+      at_peak_hps.stats[kStatsAtMaxDemand].num_pages.raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].free_pages.raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].unmapped_pages.raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].total_huge_pages().raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kRegular].raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kDonated].raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand]
+          .huge_pages[kPartialReleased]
+          .raw_num(),
+      at_peak_hps.stats[kStatsAtMaxDemand].huge_pages[kReleased].raw_num());
+
+  out->printf(
+      "\nHugePageFiller: Since the start of the execution, %zu subreleases (%zu"
+      " pages) were skipped due to recent (%llds) peaks.\n",
+      total_skipped().count, total_skipped().pages.raw_num(),
+      static_cast<long long>(absl::ToInt64Seconds(last_peak_interval_)));
+
+  Length skipped_pages = total_skipped().pages - pending_skipped().pages;
+  double correctly_skipped_pages_percentage =
+      safe_div(100.0 * correctly_skipped().pages, skipped_pages);
+
+  size_t skipped_count = total_skipped().count - pending_skipped().count;
+  double correctly_skipped_count_percentage =
+      safe_div(100.0 * correctly_skipped().count, skipped_count);
+
+  out->printf(
+      "HugePageFiller: %.4f%% of decisions confirmed correct, %zu "
+      "pending (%.4f%% of pages, %zu pending).\n",
+      correctly_skipped_count_percentage, pending_skipped().count,
+      correctly_skipped_pages_percentage, pending_skipped().pages.raw_num());
+
+  // Print subrelease stats
+  Length total_subreleased;
+  HugeLength total_broken = NHugePages(0);
+  tracker_.Iter(
+      [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+        total_subreleased += e.num_pages_subreleased;
+        total_broken += e.num_hugepages_broken;
+      },
+      tracker_.kSkipEmptyEntries);
+  out->printf(
+      "HugePageFiller: Subrelease stats last %d min: total "
+      "%zu pages subreleased, %zu hugepages broken\n",
+      static_cast<int64_t>(absl::ToInt64Minutes(window_)),
+      total_subreleased.raw_num(), total_broken.raw_num());
+}
+
+template <size_t kEpochs>
+void FillerStatsTracker<kEpochs>::PrintInPbtxt(PbtxtRegion* hpaa) const {
+  {
+    auto skip_subrelease = hpaa->CreateSubRegion("filler_skipped_subrelease");
+    skip_subrelease.PrintI64("skipped_subrelease_interval_ms",
+                             absl::ToInt64Milliseconds(last_peak_interval_));
+    skip_subrelease.PrintI64("skipped_subrelease_pages",
+                             total_skipped().pages.raw_num());
+    skip_subrelease.PrintI64("correctly_skipped_subrelease_pages",
+                             correctly_skipped().pages.raw_num());
+    skip_subrelease.PrintI64("pending_skipped_subrelease_pages",
+                             pending_skipped().pages.raw_num());
+    skip_subrelease.PrintI64("skipped_subrelease_count", total_skipped().count);
+    skip_subrelease.PrintI64("correctly_skipped_subrelease_count",
+                             correctly_skipped().count);
+    skip_subrelease.PrintI64("pending_skipped_subrelease_count",
+                             pending_skipped().count);
+  }
+
+  auto filler_stats = hpaa->CreateSubRegion("filler_stats_timeseries");
+  filler_stats.PrintI64("window_ms", absl::ToInt64Milliseconds(epoch_length_));
+  filler_stats.PrintI64("epochs", kEpochs);
+
+  NumberOfFreePages free_pages = min_free_pages(summary_interval_);
+  filler_stats.PrintI64("min_free_pages_interval_ms",
+                        absl::ToInt64Milliseconds(summary_interval_));
+  filler_stats.PrintI64("min_free_pages", free_pages.free.raw_num());
+  filler_stats.PrintI64("min_free_backed_pages",
+                        free_pages.free_backed.raw_num());
+
+  static const char* labels[kNumStatsTypes] = {
+      "at_minimum_demand", "at_maximum_demand", "at_minimum_huge_pages",
+      "at_maximum_huge_pages"};
+
+  tracker_.Iter(
+      [&](size_t offset, int64_t ts, const FillerStatsEntry& e) {
+        auto region = filler_stats.CreateSubRegion("measurements");
+        region.PrintI64("epoch", offset);
+        region.PrintI64("timestamp_ms",
+                        absl::ToInt64Milliseconds(absl::Nanoseconds(ts)));
+        region.PrintI64("min_free_pages", e.min_free_pages.raw_num());
+        region.PrintI64("min_free_backed_pages",
+                        e.min_free_backed_pages.raw_num());
+        region.PrintI64("num_pages_subreleased",
+                        e.num_pages_subreleased.raw_num());
+        region.PrintI64("num_hugepages_broken",
+                        e.num_hugepages_broken.raw_num());
+        for (int i = 0; i < kNumStatsTypes; i++) {
+          auto m = region.CreateSubRegion(labels[i]);
+          FillerStats stats = e.stats[i];
+          m.PrintI64("num_pages", stats.num_pages.raw_num());
+          m.PrintI64("regular_huge_pages",
+                     stats.huge_pages[kRegular].raw_num());
+          m.PrintI64("donated_huge_pages",
+                     stats.huge_pages[kDonated].raw_num());
+          m.PrintI64("partial_released_huge_pages",
+                     stats.huge_pages[kPartialReleased].raw_num());
+          m.PrintI64("released_huge_pages",
+                     stats.huge_pages[kReleased].raw_num());
+          m.PrintI64("used_pages_in_subreleased_huge_pages",
+                     stats.used_pages_in_subreleased_huge_pages.raw_num());
+        }
+      },
+      tracker_.kSkipEmptyEntries);
+}
+
+// PageTracker keeps track of the allocation status of every page in a HugePage.
+// It allows allocation and deallocation of a contiguous run of pages.
+//
+// Its mutating methods are annotated as requiring the pageheap_lock, in order
+// to support unlocking the page heap lock in a dynamic annotation-friendly way.
+template <MemoryModifyFunction Unback>
+class PageTracker : public TList<PageTracker<Unback>>::Elem {
+ public:
+  static void UnbackImpl(void* p, size_t size) { Unback(p, size); }
+
+  constexpr PageTracker(HugePage p, uint64_t when)
+      : location_(p),
+        released_count_(0),
+        donated_(false),
+        unbroken_(true),
+        free_{} {
+    init_when(when);
+
+#ifndef __ppc64__
+#if defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Winvalid-offsetof"
+#endif
+    // Verify fields are structured so commonly accessed members (as part of
+    // Put) are on the first two cache lines.  This allows the CentralFreeList
+    // to accelerate deallocations by prefetching PageTracker instances before
+    // taking the pageheap_lock.
+    //
+    // On PPC64, kHugePageSize / kPageSize is typically ~2K (16MB / 8KB),
+    // requiring 512 bytes for representing free_.  While its cache line size is
+    // larger, the entirety of free_ will not fit on two cache lines.
+    static_assert(
+        offsetof(PageTracker<Unback>, location_) + sizeof(location_) <=
+            2 * ABSL_CACHELINE_SIZE,
+        "location_ should fall within the first two cachelines of "
+        "PageTracker.");
+    static_assert(offsetof(PageTracker<Unback>, when_numerator_) +
+                          sizeof(when_numerator_) <=
+                      2 * ABSL_CACHELINE_SIZE,
+                  "when_numerator_ should fall within the first two cachelines "
+                  "of PageTracker.");
+    static_assert(offsetof(PageTracker<Unback>, when_denominator_) +
+                          sizeof(when_denominator_) <=
+                      2 * ABSL_CACHELINE_SIZE,
+                  "when_denominator_ should fall within the first two "
+                  "cachelines of PageTracker.");
+    static_assert(
+        offsetof(PageTracker<Unback>, donated_) + sizeof(donated_) <=
+            2 * ABSL_CACHELINE_SIZE,
+        "donated_ should fall within the first two cachelines of PageTracker.");
+    static_assert(
+        offsetof(PageTracker<Unback>, free_) + sizeof(free_) <=
+            2 * ABSL_CACHELINE_SIZE,
+        "free_ should fall within the first two cachelines of PageTracker.");
+#if defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+#endif  // __ppc64__
+  }
+
+  struct PageAllocation {
+    PageId page;
+    Length previously_unbacked;
+  };
+
+  // REQUIRES: there's a free range of at least n pages
+  //
+  // Returns a PageId i and a count of previously unbacked pages in the range
+  // [i, i+n) in previously_unbacked.
+  PageAllocation Get(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // REQUIRES: p was the result of a previous call to Get(n)
+  void Put(PageId p, Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Returns true if any unused pages have been returned-to-system.
+  bool released() const { return released_count_ > 0; }
+
+  // Was this tracker donated from the tail of a multi-hugepage allocation?
+  // Only up-to-date when the tracker is on a TrackerList in the Filler;
+  // otherwise the value is meaningless.
+  bool donated() const { return donated_; }
+  // Set/reset the donated flag. The donated status is lost, for instance,
+  // when further allocations are made on the tracker.
+  void set_donated(bool status) { donated_ = status; }
+
+  // These statistics help us measure the fragmentation of a hugepage and
+  // the desirability of allocating from this hugepage.
+  Length longest_free_range() const { return Length(free_.longest_free()); }
+  size_t nallocs() const { return free_.allocs(); }
+  Length used_pages() const { return Length(free_.used()); }
+  Length released_pages() const { return Length(released_count_); }
+  Length free_pages() const;
+  bool empty() const;
+
+  bool unbroken() const { return unbroken_; }
+
+  // Returns the hugepage whose availability is being tracked.
+  HugePage location() const { return location_; }
+
+  // Return all unused pages to the system, mark future frees to do same.
+  // Returns the count of pages unbacked.
+  Length ReleaseFree() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Return this allocation to the system, if policy warrants it.
+  //
+  // As of 3/2020 our policy is to rerelease:  Once we break a hugepage by
+  // returning a fraction of it, we return *anything* unused.  This simplifies
+  // tracking.
+  //
+  // TODO(b/141550014):  Make retaining the default/sole policy.
+  void MaybeRelease(PageId p, Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    if (released_count_ == 0) {
+      return;
+    }
+
+    // Mark pages as released.
+    Length index = p - location_.first_page();
+    ASSERT(released_by_page_.CountBits(index.raw_num(), n.raw_num()) == 0);
+    released_by_page_.SetRange(index.raw_num(), n.raw_num());
+    released_count_ += n.raw_num();
+    ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+           released_count_);
+
+    // TODO(b/122551676):  If release fails, we should not SetRange above.
+    ReleasePagesWithoutLock(p, n);
+  }
+
+  void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+                    PageAgeHistograms* ages) const;
+
+ private:
+  void init_when(uint64_t w) {
+    const Length before = Length(free_.total_free());
+    when_numerator_ = w * before.raw_num();
+    when_denominator_ = before.raw_num();
+  }
+
+  HugePage location_;
+  // We keep track of an average time weighted by Length::raw_num. In order to
+  // avoid doing division on fast path, store the numerator and denominator and
+  // only do the division when we need the average.
+  uint64_t when_numerator_;
+  uint64_t when_denominator_;
+
+  // Cached value of released_by_page_.CountBits(0, kPagesPerHugePages)
+  //
+  // TODO(b/151663108):  Logically, this is guarded by pageheap_lock.
+  uint16_t released_count_;
+  bool donated_;
+  bool unbroken_;
+
+  RangeTracker<kPagesPerHugePage.raw_num()> free_;
+  // Bitmap of pages based on them being released to the OS.
+  // * Not yet released pages are unset (considered "free")
+  // * Released pages are set.
+  //
+  // Before releasing any locks to release memory to the OS, we mark the bitmap.
+  //
+  // Once released, a huge page is considered released *until* free_ is
+  // exhausted and no pages released_by_page_ are set.  We may have up to
+  // kPagesPerHugePage-1 parallel subreleases in-flight.
+  //
+  // TODO(b/151663108):  Logically, this is guarded by pageheap_lock.
+  Bitmap<kPagesPerHugePage.raw_num()> released_by_page_;
+
+  static_assert(kPagesPerHugePage.raw_num() <
+                    std::numeric_limits<uint16_t>::max(),
+                "nallocs must be able to support kPagesPerHugePage!");
+
+  void ReleasePages(PageId p, Length n) {
+    void* ptr = p.start_addr();
+    size_t byte_len = n.in_bytes();
+    Unback(ptr, byte_len);
+    unbroken_ = false;
+  }
+
+  void ReleasePagesWithoutLock(PageId p, Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    pageheap_lock.Unlock();
+
+    void* ptr = p.start_addr();
+    size_t byte_len = n.in_bytes();
+    Unback(ptr, byte_len);
+
+    pageheap_lock.Lock();
+    unbroken_ = false;
+  }
+};
+
+enum class FillerPartialRerelease : bool {
+  // Once we break a hugepage by returning a fraction of it, we return
+  // *anything* unused.  This simplifies tracking.
+  //
+  // As of 2/2020, this is the default behavior.
+  Return,
+  // When releasing a page onto an already-released huge page, retain the page
+  // rather than releasing it back to the OS.  This can reduce minor page
+  // faults for hot pages.
+  //
+  // TODO(b/141550014, b/122551676):  Make this the default behavior.
+  Retain,
+};
+
+// This tracks a set of unfilled hugepages, and fulfills allocations
+// with a goal of filling some hugepages as tightly as possible and emptying
+// out the remainder.
+template <class TrackerType>
+class HugePageFiller {
+ public:
+  explicit HugePageFiller(FillerPartialRerelease partial_rerelease);
+  HugePageFiller(FillerPartialRerelease partial_rerelease, Clock clock);
+
+  typedef TrackerType Tracker;
+
+  struct TryGetResult {
+    TrackerType* pt;
+    PageId page;
+  };
+
+  // Our API is simple, but note that it does not include an unconditional
+  // allocation, only a "try"; we expect callers to allocate new hugepages if
+  // needed.  This simplifies using it in a few different contexts (and improves
+  // the testing story - no dependencies.)
+  //
+  // On failure, returns nullptr/PageId{0}.
+  TryGetResult TryGet(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Marks [p, p + n) as usable by new allocations into *pt; returns pt
+  // if that hugepage is now empty (nullptr otherwise.)
+  // REQUIRES: pt is owned by this object (has been Contribute()), and
+  // {pt, p, n} was the result of a previous TryGet.
+  TrackerType* Put(TrackerType* pt, PageId p, Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Contributes a tracker to the filler. If "donated," then the tracker is
+  // marked as having come from the tail of a multi-hugepage allocation, which
+  // causes it to be treated slightly differently.
+  void Contribute(TrackerType* pt, bool donated);
+
+  HugeLength size() const { return size_; }
+
+  // Useful statistics
+  Length pages_allocated() const { return allocated_; }
+  Length used_pages() const { return allocated_; }
+  Length unmapped_pages() const { return unmapped_; }
+  Length free_pages() const;
+  Length used_pages_in_released() const { return n_used_released_; }
+  Length used_pages_in_partial_released() const {
+    return n_used_partial_released_;
+  }
+  Length used_pages_in_any_subreleased() const {
+    return n_used_released_ + n_used_partial_released_;
+  }
+
+  // Fraction of used pages that are on non-released hugepages and
+  // thus could be backed by kernel hugepages. (Of course, we can't
+  // guarantee that the kernel had available 2-mib regions of physical
+  // memory--so this being 1 doesn't mean that everything actually
+  // *is* hugepage-backed!)
+  double hugepage_frac() const;
+
+  // Returns the amount of memory to release if all remaining options of
+  // releasing memory involve subreleasing pages.
+  Length GetDesiredSubreleasePages(Length desired, Length total_released,
+                                   absl::Duration peak_interval)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Tries to release desired pages by iteratively releasing from the emptiest
+  // possible hugepage and releasing its free memory to the system.  Return the
+  // number of pages actually released.
+  Length ReleasePages(Length desired,
+                      absl::Duration skip_subrelease_after_peaks_interval,
+                      bool hit_limit)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  void AddSpanStats(SmallSpanStats* small, LargeSpanStats* large,
+                    PageAgeHistograms* ages) const;
+
+  BackingStats stats() const;
+  SubreleaseStats subrelease_stats() const { return subrelease_stats_; }
+  void Print(Printer* out, bool everything) const;
+  void PrintInPbtxt(PbtxtRegion* hpaa) const;
+
+ private:
+  typedef TList<TrackerType> TrackerList;
+
+  // This class wraps an array of N TrackerLists and a Bitmap storing which
+  // elements are non-empty.
+  template <size_t N>
+  class HintedTrackerLists {
+   public:
+    HintedTrackerLists() : nonempty_{}, size_(NHugePages(0)) {}
+
+    // Removes a TrackerType from the first non-empty freelist with index at
+    // least n and returns it. Returns nullptr if there is none.
+    TrackerType* GetLeast(const size_t n) {
+      ASSERT(n < N);
+      size_t i = nonempty_.FindSet(n);
+      if (i == N) {
+        return nullptr;
+      }
+      ASSERT(!lists_[i].empty());
+      TrackerType* pt = lists_[i].first();
+      if (lists_[i].remove(pt)) {
+        nonempty_.ClearBit(i);
+      }
+      --size_;
+      return pt;
+    }
+    void Add(TrackerType* pt, const size_t i) {
+      ASSERT(i < N);
+      ASSERT(pt != nullptr);
+      lists_[i].prepend(pt);
+      nonempty_.SetBit(i);
+      ++size_;
+    }
+    void Remove(TrackerType* pt, const size_t i) {
+      ASSERT(i < N);
+      ASSERT(pt != nullptr);
+      if (lists_[i].remove(pt)) {
+        nonempty_.ClearBit(i);
+      }
+      --size_;
+    }
+    const TrackerList& operator[](const size_t n) const {
+      ASSERT(n < N);
+      return lists_[n];
+    }
+    HugeLength size() const { return size_; }
+    bool empty() const { return size().raw_num() == 0; }
+    // Runs a functor on all HugePages in the TrackerLists.
+    // This method is const but the Functor gets passed a non-const pointer.
+    // This quirk is inherited from TrackerList.
+    template <typename Functor>
+    void Iter(const Functor& func, size_t start) const {
+      size_t i = nonempty_.FindSet(start);
+      while (i < N) {
+        auto& list = lists_[i];
+        ASSERT(!list.empty());
+        for (TrackerType* pt : list) {
+          func(pt);
+        }
+        i++;
+        if (i < N) i = nonempty_.FindSet(i);
+      }
+    }
+
+   private:
+    TrackerList lists_[N];
+    Bitmap<N> nonempty_;
+    HugeLength size_;
+  };
+
+  SubreleaseStats subrelease_stats_;
+
+  // We group hugepages first by longest-free (as a measure of fragmentation),
+  // then into 8 chunks inside there by desirability of allocation.
+  static constexpr size_t kChunks = 8;
+  // Which chunk should this hugepage be in?
+  // This returns the largest possible value kChunks-1 iff pt has a single
+  // allocation.
+  size_t IndexFor(TrackerType* pt);
+  // Returns index for regular_alloc_.
+  static size_t ListFor(Length longest, size_t chunk);
+  static constexpr size_t kNumLists = kPagesPerHugePage.raw_num() * kChunks;
+
+  HintedTrackerLists<kNumLists> regular_alloc_;
+  HintedTrackerLists<kPagesPerHugePage.raw_num()> donated_alloc_;
+  // Partially released ones that we are trying to release.
+  //
+  // When FillerPartialRerelease == Return:
+  //   regular_alloc_partial_released_ is empty and n_used_partial_released_ is
+  //   0.
+  //
+  // When FillerPartialRerelease == Retain:
+  //   regular_alloc_partial_released_ contains huge pages that are partially
+  //   allocated, partially free, and partially returned to the OS.
+  //   n_used_partial_released_ is the number of pages which have been allocated
+  //   of the set.
+  //
+  // regular_alloc_released_:  This list contains huge pages whose pages are
+  // either allocated or returned to the OS.  There are no pages that are free,
+  // but not returned to the OS.  n_used_released_ contains the number of
+  // pages in those huge pages that are not free (i.e., allocated).
+  Length n_used_partial_released_;
+  Length n_used_released_;
+  HintedTrackerLists<kNumLists> regular_alloc_partial_released_;
+  HintedTrackerLists<kNumLists> regular_alloc_released_;
+
+  // RemoveFromFillerList pt from the appropriate HintedTrackerList.
+  void RemoveFromFillerList(TrackerType* pt);
+  // Put pt in the appropriate HintedTrackerList.
+  void AddToFillerList(TrackerType* pt);
+  // Like AddToFillerList(), but for use when donating from the tail of a
+  // multi-hugepage allocation.
+  void DonateToFillerList(TrackerType* pt);
+
+  // CompareForSubrelease identifies the worse candidate for subrelease, between
+  // the choice of huge pages a and b.
+  static bool CompareForSubrelease(TrackerType* a, TrackerType* b) {
+    ASSERT(a != nullptr);
+    ASSERT(b != nullptr);
+
+    return a->used_pages() < b->used_pages();
+  }
+
+  // SelectCandidates identifies the candidates.size() best candidates in the
+  // given tracker list.
+  //
+  // To support gathering candidates from multiple tracker lists,
+  // current_candidates is nonzero.
+  template <size_t N>
+  static int SelectCandidates(absl::Span<TrackerType*> candidates,
+                              int current_candidates,
+                              const HintedTrackerLists<N>& tracker_list,
+                              size_t tracker_start);
+
+  // Release desired pages from the page trackers in candidates.  Returns the
+  // number of pages released.
+  Length ReleaseCandidates(absl::Span<TrackerType*> candidates, Length desired)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  HugeLength size_;
+
+  Length allocated_;
+  Length unmapped_;
+
+  // How much have we eagerly unmapped (in already released hugepages), but
+  // not reported to ReleasePages calls?
+  Length unmapping_unaccounted_;
+
+  FillerPartialRerelease partial_rerelease_;
+
+  // Functionality related to time series tracking.
+  void UpdateFillerStatsTracker();
+  using StatsTrackerType = FillerStatsTracker<600>;
+  StatsTrackerType fillerstats_tracker_;
+};
+
+template <MemoryModifyFunction Unback>
+inline typename PageTracker<Unback>::PageAllocation PageTracker<Unback>::Get(
+    Length n) {
+  size_t index = free_.FindAndMark(n.raw_num());
+
+  ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+         released_count_);
+
+  size_t unbacked = 0;
+  // If release_count_ == 0, CountBits will return 0 and ClearRange will be a
+  // no-op (but will touch cachelines) due to the invariants guaranteed by
+  // CountBits() == released_count_.
+  //
+  // This is a performance optimization, not a logical requirement.
+  if (ABSL_PREDICT_FALSE(released_count_ > 0)) {
+    unbacked = released_by_page_.CountBits(index, n.raw_num());
+    released_by_page_.ClearRange(index, n.raw_num());
+    ASSERT(released_count_ >= unbacked);
+    released_count_ -= unbacked;
+  }
+
+  ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+         released_count_);
+  return PageAllocation{location_.first_page() + Length(index),
+                        Length(unbacked)};
+}
+
+template <MemoryModifyFunction Unback>
+inline void PageTracker<Unback>::Put(PageId p, Length n) {
+  Length index = p - location_.first_page();
+  free_.Unmark(index.raw_num(), n.raw_num());
+
+  when_numerator_ += n.raw_num() * absl::base_internal::CycleClock::Now();
+  when_denominator_ += n.raw_num();
+}
+
+template <MemoryModifyFunction Unback>
+inline Length PageTracker<Unback>::ReleaseFree() {
+  size_t count = 0;
+  size_t index = 0;
+  size_t n;
+  // For purposes of tracking, pages which are not yet released are "free" in
+  // the released_by_page_ bitmap.  We subrelease these pages in an iterative
+  // process:
+  //
+  // 1.  Identify the next range of still backed pages.
+  // 2.  Iterate on the free_ tracker within this range.  For any free range
+  //     found, mark these as unbacked.
+  // 3.  Release the subrange to the OS.
+  while (released_by_page_.NextFreeRange(index, &index, &n)) {
+    size_t free_index;
+    size_t free_n;
+
+    // Check for freed pages in this unreleased region.
+    if (free_.NextFreeRange(index, &free_index, &free_n) &&
+        free_index < index + n) {
+      // If there is a free range which overlaps with [index, index+n), release
+      // it.
+      size_t end = std::min(free_index + free_n, index + n);
+
+      // In debug builds, verify [free_index, end) is backed.
+      size_t length = end - free_index;
+      ASSERT(released_by_page_.CountBits(free_index, length) == 0);
+      // Mark pages as released.  Amortize the update to release_count_.
+      released_by_page_.SetRange(free_index, length);
+
+      PageId p = location_.first_page() + Length(free_index);
+      // TODO(b/122551676):  If release fails, we should not SetRange above.
+      ReleasePages(p, Length(length));
+
+      index = end;
+      count += length;
+    } else {
+      // [index, index+n) did not have an overlapping range in free_, move to
+      // the next backed range of pages.
+      index += n;
+    }
+  }
+
+  released_count_ += count;
+  ASSERT(Length(released_count_) <= kPagesPerHugePage);
+  ASSERT(released_by_page_.CountBits(0, kPagesPerHugePage.raw_num()) ==
+         released_count_);
+  init_when(absl::base_internal::CycleClock::Now());
+  return Length(count);
+}
+
+template <MemoryModifyFunction Unback>
+inline void PageTracker<Unback>::AddSpanStats(SmallSpanStats* small,
+                                              LargeSpanStats* large,
+                                              PageAgeHistograms* ages) const {
+  size_t index = 0, n;
+
+  uint64_t w = when_denominator_ == 0 ? when_numerator_
+                                      : when_numerator_ / when_denominator_;
+  while (free_.NextFreeRange(index, &index, &n)) {
+    bool is_released = released_by_page_.GetBit(index);
+    // Find the last bit in the run with the same state (set or cleared) as
+    // index.
+    size_t end;
+    if (index >= kPagesPerHugePage.raw_num() - 1) {
+      end = kPagesPerHugePage.raw_num();
+    } else {
+      end = is_released ? released_by_page_.FindClear(index + 1)
+                        : released_by_page_.FindSet(index + 1);
+    }
+    n = std::min(end - index, n);
+    ASSERT(n > 0);
+
+    if (n < kMaxPages.raw_num()) {
+      if (small != nullptr) {
+        if (is_released) {
+          small->returned_length[n]++;
+        } else {
+          small->normal_length[n]++;
+        }
+      }
+    } else {
+      if (large != nullptr) {
+        large->spans++;
+        if (is_released) {
+          large->returned_pages += Length(n);
+        } else {
+          large->normal_pages += Length(n);
+        }
+      }
+    }
+
+    if (ages) {
+      ages->RecordRange(Length(n), is_released, w);
+    }
+    index += n;
+  }
+}
+
+template <MemoryModifyFunction Unback>
+inline bool PageTracker<Unback>::empty() const {
+  return free_.used() == 0;
+}
+
+template <MemoryModifyFunction Unback>
+inline Length PageTracker<Unback>::free_pages() const {
+  return kPagesPerHugePage - used_pages();
+}
+
+template <class TrackerType>
+inline HugePageFiller<TrackerType>::HugePageFiller(
+    FillerPartialRerelease partial_rerelease)
+    : HugePageFiller(
+          partial_rerelease,
+          Clock{.now = absl::base_internal::CycleClock::Now,
+                .freq = absl::base_internal::CycleClock::Frequency}) {}
+
+// For testing with mock clock
+template <class TrackerType>
+inline HugePageFiller<TrackerType>::HugePageFiller(
+    FillerPartialRerelease partial_rerelease, Clock clock)
+    : size_(NHugePages(0)),
+      partial_rerelease_(partial_rerelease),
+      fillerstats_tracker_(clock, absl::Minutes(10), absl::Minutes(5)) {}
+
+template <class TrackerType>
+inline typename HugePageFiller<TrackerType>::TryGetResult
+HugePageFiller<TrackerType>::TryGet(Length n) {
+  ASSERT(n > Length(0));
+
+  // How do we choose which hugepage to allocate from (among those with
+  // a free range of at least n?) Our goal is to be as space-efficient
+  // as possible, which leads to two priorities:
+  //
+  // (1) avoid fragmentation; keep free ranges in a hugepage as long
+  //     as possible. This maintains our ability to satisfy large
+  //     requests without allocating new hugepages
+  // (2) fill mostly-full hugepages more; let mostly-empty hugepages
+  //     empty out.  This lets us recover totally empty hugepages (and
+  //     return them to the OS.)
+  //
+  // In practice, avoiding fragmentation is by far more important:
+  // space usage can explode if we don't jealously guard large free ranges.
+  //
+  // Our primary measure of fragmentation of a hugepage by a proxy measure: the
+  // longest free range it contains. If this is short, any free space is
+  // probably fairly fragmented.  It also allows us to instantly know if a
+  // hugepage can support a given allocation.
+  //
+  // We quantize the number of allocations in a hugepage (chunked
+  // logarithmically.) We favor allocating from hugepages with many allocations
+  // already present, which helps with (2) above. Note that using the number of
+  // allocations works substantially better than the number of allocated pages;
+  // to first order allocations of any size are about as likely to be freed, and
+  // so (by simple binomial probability distributions) we're more likely to
+  // empty out a hugepage with 2 5-page allocations than one with 5 1-pages.
+  //
+  // The above suggests using the hugepage with the shortest longest empty
+  // range, breaking ties in favor of fewest number of allocations. This works
+  // well for most workloads but caused bad page heap fragmentation for some:
+  // b/63301358 and b/138618726. The intuition for what went wrong is
+  // that although the tail of large allocations is donated to the Filler (see
+  // HugePageAwareAllocator::AllocRawHugepages) for use, we don't actually
+  // want to use them until the regular Filler hugepages are used up. That
+  // way, they can be reassembled as a single large hugepage range if the
+  // large allocation is freed.
+  // Some workloads can tickle this discrepancy a lot, because they have a lot
+  // of large, medium-lifetime allocations. To fix this we treat hugepages
+  // that are freshly donated as less preferable than hugepages that have been
+  // already used for small allocations, regardless of their longest_free_range.
+  //
+  // Overall our allocation preference is:
+  //  - We prefer allocating from used freelists rather than freshly donated
+  //  - We prefer donated pages over previously released hugepages ones.
+  //  - Among donated freelists we prefer smaller longest_free_range
+  //  - Among used freelists we prefer smaller longest_free_range
+  //    with ties broken by (quantized) alloc counts
+  //
+  // We group hugepages by longest_free_range and quantized alloc count and
+  // store each group in a TrackerList. All freshly-donated groups are stored
+  // in a "donated" array and the groups with (possibly prior) small allocs are
+  // stored in a "regular" array. Each of these arrays is encapsulated in a
+  // HintedTrackerLists object, which stores the array together with a bitmap to
+  // quickly find non-empty lists. The lists are ordered to satisfy the
+  // following two useful properties:
+  //
+  // - later (nonempty) freelists can always fulfill requests that
+  //   earlier ones could.
+  // - earlier freelists, by the above criteria, are preferred targets
+  //   for allocation.
+  //
+  // So all we have to do is find the first nonempty freelist in the regular
+  // HintedTrackerList that *could* support our allocation, and it will be our
+  // best choice. If there is none we repeat with the donated HintedTrackerList.
+  ASSUME(n < kPagesPerHugePage);
+  TrackerType* pt;
+
+  bool was_released = false;
+  do {
+    pt = regular_alloc_.GetLeast(ListFor(n, 0));
+    if (pt) {
+      ASSERT(!pt->donated());
+      break;
+    }
+    pt = donated_alloc_.GetLeast(n.raw_num());
+    if (pt) {
+      break;
+    }
+    if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+      pt = regular_alloc_partial_released_.GetLeast(ListFor(n, 0));
+      if (pt) {
+        ASSERT(!pt->donated());
+        was_released = true;
+        ASSERT(n_used_partial_released_ >= pt->used_pages());
+        n_used_partial_released_ -= pt->used_pages();
+        break;
+      }
+    }
+    pt = regular_alloc_released_.GetLeast(ListFor(n, 0));
+    if (pt) {
+      ASSERT(!pt->donated());
+      was_released = true;
+      ASSERT(n_used_released_ >= pt->used_pages());
+      n_used_released_ -= pt->used_pages();
+      break;
+    }
+
+    return {nullptr, PageId{0}};
+  } while (false);
+  ASSUME(pt != nullptr);
+  ASSERT(pt->longest_free_range() >= n);
+  const auto page_allocation = pt->Get(n);
+  AddToFillerList(pt);
+  allocated_ += n;
+
+  ASSERT(was_released || page_allocation.previously_unbacked == Length(0));
+  (void)was_released;
+  ASSERT(unmapped_ >= page_allocation.previously_unbacked);
+  unmapped_ -= page_allocation.previously_unbacked;
+  // We're being used for an allocation, so we are no longer considered
+  // donated by this point.
+  ASSERT(!pt->donated());
+  UpdateFillerStatsTracker();
+  return {pt, page_allocation.page};
+}
+
+// Marks [p, p + n) as usable by new allocations into *pt; returns pt
+// if that hugepage is now empty (nullptr otherwise.)
+// REQUIRES: pt is owned by this object (has been Contribute()), and
+// {pt, p, n} was the result of a previous TryGet.
+template <class TrackerType>
+inline TrackerType* HugePageFiller<TrackerType>::Put(TrackerType* pt, PageId p,
+                                                     Length n) {
+  // Consider releasing [p, p+n).  We do this here:
+  // * To unback the memory before we mark it as free.  When partially
+  //   unbacking, we release the pageheap_lock.  Another thread could see the
+  //   "free" memory and begin using it before we retake the lock.
+  // * To maintain maintain the invariant that
+  //     pt->released() => regular_alloc_released_.size() > 0 ||
+  //                       regular_alloc_partial_released_.size() > 0
+  //   We do this before removing pt from our lists, since another thread may
+  //   encounter our post-RemoveFromFillerList() update to
+  //   regular_alloc_released_.size() and regular_alloc_partial_released_.size()
+  //   while encountering pt.
+  if (partial_rerelease_ == FillerPartialRerelease::Return) {
+    pt->MaybeRelease(p, n);
+  }
+
+  RemoveFromFillerList(pt);
+
+  pt->Put(p, n);
+
+  allocated_ -= n;
+  if (partial_rerelease_ == FillerPartialRerelease::Return && pt->released()) {
+    unmapped_ += n;
+    unmapping_unaccounted_ += n;
+  }
+
+  if (pt->longest_free_range() == kPagesPerHugePage) {
+    --size_;
+    if (pt->released()) {
+      const Length free_pages = pt->free_pages();
+      const Length released_pages = pt->released_pages();
+      ASSERT(free_pages >= released_pages);
+      ASSERT(unmapped_ >= released_pages);
+      unmapped_ -= released_pages;
+
+      if (free_pages > released_pages) {
+        // We should only see a difference between free pages and released pages
+        // when we retain returned pages.
+        ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain);
+
+        // pt is partially released.  As the rest of the hugepage-aware
+        // allocator works in terms of whole hugepages, we need to release the
+        // rest of the hugepage.  This simplifies subsequent accounting by
+        // allowing us to work with hugepage-granularity, rather than needing to
+        // retain pt's state indefinitely.
+        pageheap_lock.Unlock();
+        TrackerType::UnbackImpl(pt->location().start_addr(), kHugePageSize);
+        pageheap_lock.Lock();
+
+        unmapping_unaccounted_ += free_pages - released_pages;
+      }
+    }
+
+    UpdateFillerStatsTracker();
+    return pt;
+  }
+  AddToFillerList(pt);
+  UpdateFillerStatsTracker();
+  return nullptr;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::Contribute(TrackerType* pt,
+                                                    bool donated) {
+  // A contributed huge page should not yet be subreleased.
+  ASSERT(pt->released_pages() == Length(0));
+
+  allocated_ += pt->used_pages();
+  if (donated) {
+    DonateToFillerList(pt);
+  } else {
+    AddToFillerList(pt);
+  }
+  ++size_;
+  UpdateFillerStatsTracker();
+}
+
+template <class TrackerType>
+template <size_t N>
+inline int HugePageFiller<TrackerType>::SelectCandidates(
+    absl::Span<TrackerType*> candidates, int current_candidates,
+    const HintedTrackerLists<N>& tracker_list, size_t tracker_start) {
+  auto PushCandidate = [&](TrackerType* pt) {
+    // If we have few candidates, we can avoid creating a heap.
+    //
+    // In ReleaseCandidates(), we unconditionally sort the list and linearly
+    // iterate through it--rather than pop_heap repeatedly--so we only need the
+    // heap for creating a bounded-size priority queue.
+    if (current_candidates < candidates.size()) {
+      candidates[current_candidates] = pt;
+      current_candidates++;
+
+      if (current_candidates == candidates.size()) {
+        std::make_heap(candidates.begin(), candidates.end(),
+                       CompareForSubrelease);
+      }
+      return;
+    }
+
+    // Consider popping the worst candidate from our list.
+    if (CompareForSubrelease(candidates[0], pt)) {
+      // pt is worse than the current worst.
+      return;
+    }
+
+    std::pop_heap(candidates.begin(), candidates.begin() + current_candidates,
+                  CompareForSubrelease);
+    candidates[current_candidates - 1] = pt;
+    std::push_heap(candidates.begin(), candidates.begin() + current_candidates,
+                   CompareForSubrelease);
+  };
+
+  tracker_list.Iter(PushCandidate, tracker_start);
+
+  return current_candidates;
+}
+
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::ReleaseCandidates(
+    absl::Span<TrackerType*> candidates, Length target) {
+  absl::c_sort(candidates, CompareForSubrelease);
+
+  Length total_released;
+  HugeLength total_broken = NHugePages(0);
+#ifndef NDEBUG
+  Length last;
+#endif
+  for (int i = 0; i < candidates.size() && total_released < target; i++) {
+    TrackerType* best = candidates[i];
+    ASSERT(best != nullptr);
+
+#ifndef NDEBUG
+    // Double check that our sorting criteria were applied correctly.
+    ASSERT(last <= best->used_pages());
+    last = best->used_pages();
+#endif
+
+    if (best->unbroken()) {
+      ++total_broken;
+    }
+    RemoveFromFillerList(best);
+    Length ret = best->ReleaseFree();
+    unmapped_ += ret;
+    ASSERT(unmapped_ >= best->released_pages());
+    total_released += ret;
+    AddToFillerList(best);
+  }
+
+  subrelease_stats_.num_pages_subreleased += total_released;
+  subrelease_stats_.num_hugepages_broken += total_broken;
+
+  // Keep separate stats if the on going release is triggered by reaching
+  // tcmalloc limit
+  if (subrelease_stats_.limit_hit()) {
+    subrelease_stats_.total_pages_subreleased_due_to_limit += total_released;
+    subrelease_stats_.total_hugepages_broken_due_to_limit += total_broken;
+  }
+  return total_released;
+}
+
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::GetDesiredSubreleasePages(
+    Length desired, Length total_released, absl::Duration peak_interval) {
+  // Don't subrelease pages if it wouldn't push you under the latest peak.
+  // This is a bit subtle: We want the current *mapped* pages not to be below
+  // the recent *demand* peak, i.e., if we have a large amount of free memory
+  // right now but demand is below a recent peak, we still want to subrelease.
+  ASSERT(total_released < desired);
+
+  if (peak_interval == absl::ZeroDuration()) {
+    return desired;
+  }
+
+  UpdateFillerStatsTracker();
+  Length demand_at_peak =
+      fillerstats_tracker_.GetRecentPeak(peak_interval).num_pages;
+  Length current_pages = used_pages() + free_pages();
+
+  if (demand_at_peak != Length(0)) {
+    Length new_desired;
+    if (demand_at_peak >= current_pages) {
+      new_desired = total_released;
+    } else {
+      new_desired = total_released + (current_pages - demand_at_peak);
+    }
+
+    if (new_desired >= desired) {
+      return desired;
+    }
+
+    // Report the amount of memory that we didn't release due to this
+    // mechanism, but never more than free_pages, since we would not have
+    // been able to release that much memory with or without this mechanism
+    // (i.e., reporting more would be confusing).
+    Length skipped_pages = std::min(free_pages(), (desired - new_desired));
+    fillerstats_tracker_.ReportSkippedSubreleasePages(
+        skipped_pages, current_pages, peak_interval);
+    return new_desired;
+  }
+
+  return desired;
+}
+
+// Tries to release desired pages by iteratively releasing from the emptiest
+// possible hugepage and releasing its free memory to the system.  Return the
+// number of pages actually released.
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::ReleasePages(
+    Length desired, absl::Duration skip_subrelease_after_peaks_interval,
+    bool hit_limit) {
+  Length total_released;
+
+  // We also do eager release, once we've called this at least once:
+  // claim credit for anything that gets done.
+  if (unmapping_unaccounted_.raw_num() > 0) {
+    // TODO(ckennelly):  This may overshoot in releasing more than desired
+    // pages.
+    Length n = unmapping_unaccounted_;
+    unmapping_unaccounted_ = Length(0);
+    subrelease_stats_.num_pages_subreleased += n;
+
+    if (n >= desired) {
+      return n;
+    }
+
+    total_released += n;
+  }
+
+  if (skip_subrelease_after_peaks_interval != absl::ZeroDuration()) {
+    desired = GetDesiredSubreleasePages(desired, total_released,
+                                        skip_subrelease_after_peaks_interval);
+    if (desired <= total_released) {
+      return total_released;
+    }
+  }
+
+  subrelease_stats_.set_limit_hit(hit_limit);
+
+  // Optimize for releasing up to a huge page worth of small pages (scattered
+  // over many parts of the filler).  Since we hold pageheap_lock, we cannot
+  // allocate here.
+  constexpr size_t kCandidates = kPagesPerHugePage.raw_num();
+  using CandidateArray = std::array<TrackerType*, kCandidates>;
+
+  if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+    while (total_released < desired) {
+      CandidateArray candidates;
+      // We can skip the first kChunks lists as they are known to be 100% full.
+      // (Those lists are likely to be long.)
+      //
+      // We do not examine the regular_alloc_released_ lists, as only contain
+      // completely released pages.
+      int n_candidates =
+          SelectCandidates(absl::MakeSpan(candidates), 0,
+                           regular_alloc_partial_released_, kChunks);
+
+      Length released =
+          ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates),
+                            desired - total_released);
+      if (released == Length(0)) {
+        break;
+      }
+      total_released += released;
+    }
+  }
+
+  // Only consider breaking up a hugepage if there are no partially released
+  // pages.
+  while (total_released < desired) {
+    CandidateArray candidates;
+    int n_candidates = SelectCandidates(absl::MakeSpan(candidates), 0,
+                                        regular_alloc_, kChunks);
+    // TODO(b/138864853): Perhaps remove donated_alloc_ from here, it's not a
+    // great candidate for partial release.
+    n_candidates = SelectCandidates(absl::MakeSpan(candidates), n_candidates,
+                                    donated_alloc_, 0);
+
+    Length released =
+        ReleaseCandidates(absl::MakeSpan(candidates.data(), n_candidates),
+                          desired - total_released);
+    if (released == Length(0)) {
+      break;
+    }
+    total_released += released;
+  }
+
+  return total_released;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::AddSpanStats(
+    SmallSpanStats* small, LargeSpanStats* large,
+    PageAgeHistograms* ages) const {
+  auto loop = [&](const TrackerType* pt) {
+    pt->AddSpanStats(small, large, ages);
+  };
+  // We can skip the first kChunks lists as they are known to be 100% full.
+  regular_alloc_.Iter(loop, kChunks);
+  donated_alloc_.Iter(loop, 0);
+
+  if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+    regular_alloc_partial_released_.Iter(loop, 0);
+  } else {
+    ASSERT(regular_alloc_partial_released_.empty());
+    ASSERT(n_used_partial_released_ == Length(0));
+  }
+  regular_alloc_released_.Iter(loop, 0);
+}
+
+template <class TrackerType>
+inline BackingStats HugePageFiller<TrackerType>::stats() const {
+  BackingStats s;
+  s.system_bytes = size_.in_bytes();
+  s.free_bytes = free_pages().in_bytes();
+  s.unmapped_bytes = unmapped_pages().in_bytes();
+  return s;
+}
+
+namespace huge_page_filler_internal {
+// Computes some histograms of fullness. Because nearly empty/full huge pages
+// are much more interesting, we calculate 4 buckets at each of the beginning
+// and end of size one, and then divide the overall space by 16 to have 16
+// (mostly) even buckets in the middle.
+class UsageInfo {
+ public:
+  enum Type { kRegular, kDonated, kPartialReleased, kReleased, kNumTypes };
+
+  UsageInfo() {
+    size_t i;
+    for (i = 0; i <= 4 && i < kPagesPerHugePage.raw_num(); ++i) {
+      bucket_bounds_[buckets_size_] = i;
+      buckets_size_++;
+    }
+    if (i < kPagesPerHugePage.raw_num() - 4) {
+      // Because kPagesPerHugePage is a power of two, it must be at least 16
+      // to get inside this "if" - either i=5 and kPagesPerHugePage=8 and
+      // the test fails, or kPagesPerHugePage <= 4 and the test fails.
+      ASSERT(kPagesPerHugePage >= Length(16));
+      constexpr int step = kPagesPerHugePage.raw_num() / 16;
+      // We want to move in "step"-sized increments, aligned every "step".
+      // So first we have to round i up to the nearest step boundary. This
+      // logic takes advantage of step being a power of two, so step-1 is
+      // all ones in the low-order bits.
+      i = ((i - 1) | (step - 1)) + 1;
+      for (; i < kPagesPerHugePage.raw_num() - 4; i += step) {
+        bucket_bounds_[buckets_size_] = i;
+        buckets_size_++;
+      }
+      i = kPagesPerHugePage.raw_num() - 4;
+    }
+    for (; i < kPagesPerHugePage.raw_num(); ++i) {
+      bucket_bounds_[buckets_size_] = i;
+      buckets_size_++;
+    }
+    CHECK_CONDITION(buckets_size_ <= kBucketCapacity);
+  }
+
+  template <class TrackerType>
+  void Record(const TrackerType* pt, Type which) {
+    const Length free = kPagesPerHugePage - pt->used_pages();
+    const Length lf = pt->longest_free_range();
+    const size_t nalloc = pt->nallocs();
+    // This is a little annoying as our buckets *have* to differ;
+    // nalloc is in [1,256], free_pages and longest_free are in [0, 255].
+    free_page_histo_[which][BucketNum(free.raw_num())]++;
+    longest_free_histo_[which][BucketNum(lf.raw_num())]++;
+    nalloc_histo_[which][BucketNum(nalloc - 1)]++;
+  }
+
+  void Print(Printer* out) {
+    PrintHisto(out, free_page_histo_[kRegular],
+               "# of regular hps with a<= # of free pages <b", 0);
+    PrintHisto(out, free_page_histo_[kDonated],
+               "# of donated hps with a<= # of free pages <b", 0);
+    PrintHisto(out, free_page_histo_[kPartialReleased],
+               "# of partial released hps with a<= # of free pages <b", 0);
+    PrintHisto(out, free_page_histo_[kReleased],
+               "# of released hps with a<= # of free pages <b", 0);
+    // For donated huge pages, number of allocs=1 and longest free range =
+    // number of free pages, so it isn't useful to show the next two.
+    PrintHisto(out, longest_free_histo_[kRegular],
+               "# of regular hps with a<= longest free range <b", 0);
+    PrintHisto(out, longest_free_histo_[kPartialReleased],
+               "# of partial released hps with a<= longest free range <b", 0);
+    PrintHisto(out, longest_free_histo_[kReleased],
+               "# of released hps with a<= longest free range <b", 0);
+    PrintHisto(out, nalloc_histo_[kRegular],
+               "# of regular hps with a<= # of allocations <b", 1);
+    PrintHisto(out, nalloc_histo_[kPartialReleased],
+               "# of partial released hps with a<= # of allocations <b", 1);
+    PrintHisto(out, nalloc_histo_[kReleased],
+               "# of released hps with a<= # of allocations <b", 1);
+  }
+
+  void Print(PbtxtRegion* hpaa) {
+    static constexpr absl::string_view kTrackerTypes[kNumTypes] = {
+        "REGULAR", "DONATED", "PARTIAL", "RELEASED"};
+    for (int i = 0; i < kNumTypes; ++i) {
+      PbtxtRegion scoped = hpaa->CreateSubRegion("filler_tracker");
+      scoped.PrintRaw("type", kTrackerTypes[i]);
+      PrintHisto(&scoped, free_page_histo_[i], "free_pages_histogram", 0);
+      PrintHisto(&scoped, longest_free_histo_[i],
+                 "longest_free_range_histogram", 0);
+      PrintHisto(&scoped, nalloc_histo_[i], "allocations_histogram", 1);
+    }
+  }
+
+ private:
+  // Maximum of 4 buckets at the start and end, and 16 in the middle.
+  static constexpr size_t kBucketCapacity = 4 + 16 + 4;
+  using Histo = size_t[kBucketCapacity];
+
+  int BucketNum(size_t page) {
+    auto it =
+        std::upper_bound(bucket_bounds_, bucket_bounds_ + buckets_size_, page);
+    CHECK_CONDITION(it != bucket_bounds_);
+    return it - bucket_bounds_ - 1;
+  }
+
+  void PrintHisto(Printer* out, Histo h, const char blurb[], size_t offset) {
+    out->printf("\nHugePageFiller: %s", blurb);
+    for (size_t i = 0; i < buckets_size_; ++i) {
+      if (i % 6 == 0) {
+        out->printf("\nHugePageFiller:");
+      }
+      out->printf(" <%3zu<=%6zu", bucket_bounds_[i] + offset, h[i]);
+    }
+    out->printf("\n");
+  }
+
+  void PrintHisto(PbtxtRegion* hpaa, Histo h, const char key[], size_t offset) {
+    for (size_t i = 0; i < buckets_size_; ++i) {
+      auto hist = hpaa->CreateSubRegion(key);
+      hist.PrintI64("lower_bound", bucket_bounds_[i] + offset);
+      hist.PrintI64("upper_bound",
+                    (i == buckets_size_ - 1 ? bucket_bounds_[i]
+                                            : bucket_bounds_[i + 1] - 1) +
+                        offset);
+      hist.PrintI64("value", h[i]);
+    }
+  }
+
+  // Arrays, because they are split per alloc type.
+  Histo free_page_histo_[kNumTypes]{};
+  Histo longest_free_histo_[kNumTypes]{};
+  Histo nalloc_histo_[kNumTypes]{};
+  size_t bucket_bounds_[kBucketCapacity];
+  int buckets_size_ = 0;
+};
+}  // namespace huge_page_filler_internal
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::Print(Printer* out,
+                                               bool everything) const {
+  out->printf("HugePageFiller: densely pack small requests into hugepages\n");
+
+  HugeLength nrel =
+      regular_alloc_released_.size() + regular_alloc_partial_released_.size();
+  HugeLength nfull = NHugePages(0);
+
+  // note kChunks, not kNumLists here--we're iterating *full* lists.
+  for (size_t chunk = 0; chunk < kChunks; ++chunk) {
+    nfull += NHugePages(
+        regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length());
+  }
+  // A donated alloc full list is impossible because it would have never been
+  // donated in the first place. (It's an even hugepage.)
+  ASSERT(donated_alloc_[0].empty());
+  // Evaluate a/b, avoiding division by zero
+  const auto safe_div = [](Length a, Length b) {
+    return b == Length(0) ? 0.
+                          : static_cast<double>(a.raw_num()) /
+                                static_cast<double>(b.raw_num());
+  };
+  const HugeLength n_partial = size() - nrel - nfull;
+  const HugeLength n_nonfull =
+      n_partial + regular_alloc_partial_released_.size();
+  out->printf(
+      "HugePageFiller: %zu total, %zu full, %zu partial, %zu released "
+      "(%zu partially), 0 quarantined\n",
+      size().raw_num(), nfull.raw_num(), n_partial.raw_num(), nrel.raw_num(),
+      regular_alloc_partial_released_.size().raw_num());
+  out->printf("HugePageFiller: %zu pages free in %zu hugepages, %.4f free\n",
+              free_pages().raw_num(), size().raw_num(),
+              safe_div(free_pages(), size().in_pages()));
+
+  ASSERT(free_pages() <= n_nonfull.in_pages());
+  out->printf("HugePageFiller: among non-fulls, %.4f free\n",
+              safe_div(free_pages(), n_nonfull.in_pages()));
+
+  out->printf(
+      "HugePageFiller: %zu used pages in subreleased hugepages (%zu of them in "
+      "partially released)\n",
+      used_pages_in_any_subreleased().raw_num(),
+      used_pages_in_partial_released().raw_num());
+
+  out->printf(
+      "HugePageFiller: %zu hugepages partially released, %.4f released\n",
+      nrel.raw_num(), safe_div(unmapped_pages(), nrel.in_pages()));
+  out->printf("HugePageFiller: %.4f of used pages hugepageable\n",
+              hugepage_frac());
+
+  // Subrelease
+  out->printf(
+      "HugePageFiller: Since startup, %zu pages subreleased, %zu hugepages "
+      "broken, (%zu pages, %zu hugepages due to reaching tcmalloc limit)\n",
+      subrelease_stats_.total_pages_subreleased.raw_num(),
+      subrelease_stats_.total_hugepages_broken.raw_num(),
+      subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num(),
+      subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num());
+
+  if (!everything) return;
+
+  // Compute some histograms of fullness.
+  using huge_page_filler_internal::UsageInfo;
+  UsageInfo usage;
+  regular_alloc_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0);
+  donated_alloc_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0);
+  if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+    regular_alloc_partial_released_.Iter(
+        [&](const TrackerType* pt) {
+          usage.Record(pt, UsageInfo::kPartialReleased);
+        },
+        0);
+  } else {
+    ASSERT(regular_alloc_partial_released_.empty());
+    ASSERT(n_used_partial_released_.raw_num() == 0);
+  }
+  regular_alloc_released_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); },
+      0);
+
+  out->printf("\n");
+  out->printf("HugePageFiller: fullness histograms\n");
+  usage.Print(out);
+
+  out->printf("\n");
+  fillerstats_tracker_.Print(out);
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::PrintInPbtxt(PbtxtRegion* hpaa) const {
+  HugeLength nrel =
+      regular_alloc_released_.size() + regular_alloc_partial_released_.size();
+  HugeLength nfull = NHugePages(0);
+
+  // note kChunks, not kNumLists here--we're iterating *full* lists.
+  for (size_t chunk = 0; chunk < kChunks; ++chunk) {
+    nfull += NHugePages(
+        regular_alloc_[ListFor(/*longest=*/Length(0), chunk)].length());
+  }
+  // A donated alloc full list is impossible because it would have never been
+  // donated in the first place. (It's an even hugepage.)
+  ASSERT(donated_alloc_[0].empty());
+  // Evaluate a/b, avoiding division by zero
+  const auto safe_div = [](Length a, Length b) {
+    return b == Length(0) ? 0
+                          : static_cast<double>(a.raw_num()) /
+                                static_cast<double>(b.raw_num());
+  };
+  const HugeLength n_partial = size() - nrel - nfull;
+  hpaa->PrintI64("filler_full_huge_pages", nfull.raw_num());
+  hpaa->PrintI64("filler_partial_huge_pages", n_partial.raw_num());
+  hpaa->PrintI64("filler_released_huge_pages", nrel.raw_num());
+  hpaa->PrintI64("filler_partially_released_huge_pages",
+                 regular_alloc_partial_released_.size().raw_num());
+  hpaa->PrintI64("filler_free_pages", free_pages().raw_num());
+  hpaa->PrintI64("filler_used_pages_in_subreleased",
+                 used_pages_in_any_subreleased().raw_num());
+  hpaa->PrintI64("filler_used_pages_in_partial_released",
+                 used_pages_in_partial_released().raw_num());
+  hpaa->PrintI64(
+      "filler_unmapped_bytes",
+      static_cast<uint64_t>(nrel.raw_num() *
+                            safe_div(unmapped_pages(), nrel.in_pages())));
+  hpaa->PrintI64(
+      "filler_hugepageable_used_bytes",
+      static_cast<uint64_t>(hugepage_frac() *
+                            static_cast<double>(allocated_.in_bytes())));
+  hpaa->PrintI64("filler_num_pages_subreleased",
+                 subrelease_stats_.total_pages_subreleased.raw_num());
+  hpaa->PrintI64("filler_num_hugepages_broken",
+                 subrelease_stats_.total_hugepages_broken.raw_num());
+  hpaa->PrintI64(
+      "filler_num_pages_subreleased_due_to_limit",
+      subrelease_stats_.total_pages_subreleased_due_to_limit.raw_num());
+  hpaa->PrintI64(
+      "filler_num_hugepages_broken_due_to_limit",
+      subrelease_stats_.total_hugepages_broken_due_to_limit.raw_num());
+  // Compute some histograms of fullness.
+  using huge_page_filler_internal::UsageInfo;
+  UsageInfo usage;
+  regular_alloc_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kRegular); }, 0);
+  donated_alloc_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kDonated); }, 0);
+  if (partial_rerelease_ == FillerPartialRerelease::Retain) {
+    regular_alloc_partial_released_.Iter(
+        [&](const TrackerType* pt) {
+          usage.Record(pt, UsageInfo::kPartialReleased);
+        },
+        0);
+  } else {
+    ASSERT(regular_alloc_partial_released_.empty());
+    ASSERT(n_used_partial_released_ == Length(0));
+  }
+  regular_alloc_released_.Iter(
+      [&](const TrackerType* pt) { usage.Record(pt, UsageInfo::kReleased); },
+      0);
+
+  usage.Print(hpaa);
+
+  fillerstats_tracker_.PrintInPbtxt(hpaa);
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::UpdateFillerStatsTracker() {
+  StatsTrackerType::FillerStats stats;
+  stats.num_pages = allocated_;
+  stats.free_pages = free_pages();
+  stats.unmapped_pages = unmapped_pages();
+  stats.used_pages_in_subreleased_huge_pages =
+      n_used_partial_released_ + n_used_released_;
+  stats.huge_pages[StatsTrackerType::kRegular] = regular_alloc_.size();
+  stats.huge_pages[StatsTrackerType::kDonated] = donated_alloc_.size();
+  stats.huge_pages[StatsTrackerType::kPartialReleased] =
+      regular_alloc_partial_released_.size();
+  stats.huge_pages[StatsTrackerType::kReleased] =
+      regular_alloc_released_.size();
+  stats.num_pages_subreleased = subrelease_stats_.num_pages_subreleased;
+  stats.num_hugepages_broken = subrelease_stats_.num_hugepages_broken;
+  fillerstats_tracker_.Report(stats);
+  subrelease_stats_.reset();
+}
+
+template <class TrackerType>
+inline size_t HugePageFiller<TrackerType>::IndexFor(TrackerType* pt) {
+  ASSERT(!pt->empty());
+  // Prefer to allocate from hugepages with many allocations already present;
+  // spaced logarithmically.
+  const size_t na = pt->nallocs();
+  // This equals 63 - ceil(log2(na))
+  // (or 31 if size_t is 4 bytes, etc.)
+  const size_t neg_ceil_log = __builtin_clzl(2 * na - 1);
+
+  // We want the same spread as neg_ceil_log, but spread over [0,
+  // kChunks) (clamped at the left edge) instead of [0, 64). So subtract off
+  // the difference (computed by forcing na=1 to kChunks - 1.)
+  const size_t kOffset = __builtin_clzl(1) - (kChunks - 1);
+  const size_t i = std::max(neg_ceil_log, kOffset) - kOffset;
+  ASSERT(i < kChunks);
+  return i;
+}
+
+template <class TrackerType>
+inline size_t HugePageFiller<TrackerType>::ListFor(const Length longest,
+                                                   const size_t chunk) {
+  ASSERT(chunk < kChunks);
+  ASSERT(longest < kPagesPerHugePage);
+  return longest.raw_num() * kChunks + chunk;
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::RemoveFromFillerList(TrackerType* pt) {
+  Length longest = pt->longest_free_range();
+  ASSERT(longest < kPagesPerHugePage);
+
+  if (pt->donated()) {
+    donated_alloc_.Remove(pt, longest.raw_num());
+  } else {
+    size_t chunk = IndexFor(pt);
+    size_t i = ListFor(longest, chunk);
+    if (!pt->released()) {
+      regular_alloc_.Remove(pt, i);
+    } else if (partial_rerelease_ == FillerPartialRerelease::Return ||
+               pt->free_pages() <= pt->released_pages()) {
+      regular_alloc_released_.Remove(pt, i);
+      ASSERT(n_used_released_ >= pt->used_pages());
+      n_used_released_ -= pt->used_pages();
+    } else {
+      regular_alloc_partial_released_.Remove(pt, i);
+      ASSERT(n_used_partial_released_ >= pt->used_pages());
+      n_used_partial_released_ -= pt->used_pages();
+    }
+  }
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::AddToFillerList(TrackerType* pt) {
+  size_t chunk = IndexFor(pt);
+  Length longest = pt->longest_free_range();
+  ASSERT(longest < kPagesPerHugePage);
+
+  // Once a donated alloc is used in any way, it degenerates into being a
+  // regular alloc. This allows the algorithm to keep using it (we had to be
+  // desperate to use it in the first place), and thus preserves the other
+  // donated allocs.
+  pt->set_donated(false);
+
+  size_t i = ListFor(longest, chunk);
+  if (!pt->released()) {
+    regular_alloc_.Add(pt, i);
+  } else if (partial_rerelease_ == FillerPartialRerelease::Return ||
+             pt->free_pages() == pt->released_pages()) {
+    regular_alloc_released_.Add(pt, i);
+    n_used_released_ += pt->used_pages();
+  } else {
+    ASSERT(partial_rerelease_ == FillerPartialRerelease::Retain);
+    regular_alloc_partial_released_.Add(pt, i);
+    n_used_partial_released_ += pt->used_pages();
+  }
+}
+
+template <class TrackerType>
+inline void HugePageFiller<TrackerType>::DonateToFillerList(TrackerType* pt) {
+  Length longest = pt->longest_free_range();
+  ASSERT(longest < kPagesPerHugePage);
+
+  // We should never be donating already-released trackers!
+  ASSERT(!pt->released());
+  pt->set_donated(true);
+
+  donated_alloc_.Add(pt, longest.raw_num());
+}
+
+template <class TrackerType>
+inline double HugePageFiller<TrackerType>::hugepage_frac() const {
+  // How many of our used pages are on non-huge pages? Since
+  // everything on a released hugepage is either used or released,
+  // just the difference:
+  const Length nrel = regular_alloc_released_.size().in_pages();
+  const Length used = used_pages();
+  const Length unmapped = unmapped_pages();
+  ASSERT(n_used_partial_released_ <=
+         regular_alloc_partial_released_.size().in_pages());
+  const Length used_on_rel = (nrel >= unmapped ? nrel - unmapped : Length(0)) +
+                             n_used_partial_released_;
+  ASSERT(used >= used_on_rel);
+  const Length used_on_huge = used - used_on_rel;
+
+  const Length denom = used > Length(0) ? used : Length(1);
+  const double ret =
+      static_cast<double>(used_on_huge.raw_num()) / denom.raw_num();
+  ASSERT(ret >= 0);
+  ASSERT(ret <= 1);
+  return std::clamp<double>(ret, 0, 1);
+}
+
+// Helper for stat functions.
+template <class TrackerType>
+inline Length HugePageFiller<TrackerType>::free_pages() const {
+  return size().in_pages() - used_pages() - unmapped_pages();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_PAGE_FILLER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc
new file mode 100644
index 0000000000..9879d41d79
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_page_filler_test.cc
@@ -0,0 +1,3799 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_page_filler.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <new>
+#include <random>
+#include <string>
+#include <thread>  // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/algorithm/container.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/flags/flag.h"
+#include "absl/memory/memory.h"
+#include "absl/random/bernoulli_distribution.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/stats.h"
+
+using tcmalloc::tcmalloc_internal::Length;
+
+ABSL_FLAG(Length, page_tracker_defrag_lim, Length(32),
+          "Max allocation size for defrag test");
+
+ABSL_FLAG(Length, frag_req_limit, Length(32),
+          "request size limit for frag test");
+ABSL_FLAG(Length, frag_size, Length(512 * 1024),
+          "target number of pages for frag test");
+ABSL_FLAG(uint64_t, frag_iters, 10 * 1000 * 1000, "iterations for frag test");
+
+ABSL_FLAG(double, release_until, 0.01,
+          "fraction of used we target in pageheap");
+ABSL_FLAG(uint64_t, bytes, 1024 * 1024 * 1024, "baseline usage");
+ABSL_FLAG(double, growth_factor, 2.0, "growth over baseline");
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// This is an arbitrary distribution taken from page requests from
+// an empirical driver test.  It seems realistic enough. We trim it to
+// [1, last].
+//
+std::discrete_distribution<size_t> EmpiricalDistribution(Length last) {
+  std::vector<size_t> page_counts = []() {
+    std::vector<size_t> ret(12289);
+    ret[1] = 375745576;
+    ret[2] = 59737961;
+    ret[3] = 35549390;
+    ret[4] = 43896034;
+    ret[5] = 17484968;
+    ret[6] = 15830888;
+    ret[7] = 9021717;
+    ret[8] = 208779231;
+    ret[9] = 3775073;
+    ret[10] = 25591620;
+    ret[11] = 2483221;
+    ret[12] = 3595343;
+    ret[13] = 2232402;
+    ret[16] = 17639345;
+    ret[21] = 4215603;
+    ret[25] = 4212756;
+    ret[28] = 760576;
+    ret[30] = 2166232;
+    ret[32] = 3021000;
+    ret[40] = 1186302;
+    ret[44] = 479142;
+    ret[48] = 570030;
+    ret[49] = 101262;
+    ret[55] = 592333;
+    ret[57] = 236637;
+    ret[64] = 785066;
+    ret[65] = 44700;
+    ret[73] = 539659;
+    ret[80] = 342091;
+    ret[96] = 488829;
+    ret[97] = 504;
+    ret[113] = 242921;
+    ret[128] = 157206;
+    ret[129] = 145;
+    ret[145] = 117191;
+    ret[160] = 91818;
+    ret[192] = 67824;
+    ret[193] = 144;
+    ret[225] = 40711;
+    ret[256] = 38569;
+    ret[257] = 1;
+    ret[297] = 21738;
+    ret[320] = 13510;
+    ret[384] = 19499;
+    ret[432] = 13856;
+    ret[490] = 9849;
+    ret[512] = 3024;
+    ret[640] = 3655;
+    ret[666] = 3963;
+    ret[715] = 2376;
+    ret[768] = 288;
+    ret[1009] = 6389;
+    ret[1023] = 2788;
+    ret[1024] = 144;
+    ret[1280] = 1656;
+    ret[1335] = 2592;
+    ret[1360] = 3024;
+    ret[1536] = 432;
+    ret[2048] = 288;
+    ret[2560] = 72;
+    ret[3072] = 360;
+    ret[12288] = 216;
+    return ret;
+  }();
+
+  Length lim = last;
+  auto i = page_counts.begin();
+  // remember lim might be too big (in which case we use the whole
+  // vector...)
+
+  auto j = page_counts.size() > lim.raw_num() ? i + (lim.raw_num() + 1)
+                                              : page_counts.end();
+
+  return std::discrete_distribution<size_t>(i, j);
+}
+
+class PageTrackerTest : public testing::Test {
+ protected:
+  PageTrackerTest()
+      :  // an unlikely magic page
+        huge_(HugePageContaining(reinterpret_cast<void*>(0x1abcde200000))),
+        tracker_(huge_, absl::base_internal::CycleClock::Now()) {}
+
+  ~PageTrackerTest() override { mock_.VerifyAndClear(); }
+
+  struct PAlloc {
+    PageId p;
+    Length n;
+  };
+
+  void Mark(PAlloc a, size_t mark) {
+    EXPECT_LE(huge_.first_page(), a.p);
+    size_t index = (a.p - huge_.first_page()).raw_num();
+    size_t end = index + a.n.raw_num();
+    EXPECT_LE(end, kPagesPerHugePage.raw_num());
+    for (; index < end; ++index) {
+      marks_[index] = mark;
+    }
+  }
+
+  class MockUnbackInterface {
+   public:
+    void Unback(void* p, size_t len) {
+      CHECK_CONDITION(actual_index_ < kMaxCalls);
+      actual_[actual_index_] = {p, len};
+      ++actual_index_;
+    }
+
+    void Expect(void* p, size_t len) {
+      CHECK_CONDITION(expected_index_ < kMaxCalls);
+      expected_[expected_index_] = {p, len};
+      ++expected_index_;
+    }
+
+    void VerifyAndClear() {
+      EXPECT_EQ(expected_index_, actual_index_);
+      for (size_t i = 0, n = std::min(expected_index_, actual_index_); i < n;
+           ++i) {
+        EXPECT_EQ(expected_[i].ptr, actual_[i].ptr);
+        EXPECT_EQ(expected_[i].len, actual_[i].len);
+      }
+      expected_index_ = 0;
+      actual_index_ = 0;
+    }
+
+   private:
+    struct CallArgs {
+      void* ptr{nullptr};
+      size_t len{0};
+    };
+
+    static constexpr size_t kMaxCalls = 10;
+    CallArgs expected_[kMaxCalls] = {};
+    CallArgs actual_[kMaxCalls] = {};
+    size_t expected_index_{0};
+    size_t actual_index_{0};
+  };
+
+  static void MockUnback(void* p, size_t len);
+
+  typedef PageTracker<MockUnback> TestPageTracker;
+
+  // strict because release calls should only happen when we ask
+  static MockUnbackInterface mock_;
+
+  void Check(PAlloc a, size_t mark) {
+    EXPECT_LE(huge_.first_page(), a.p);
+    size_t index = (a.p - huge_.first_page()).raw_num();
+    size_t end = index + a.n.raw_num();
+    EXPECT_LE(end, kPagesPerHugePage.raw_num());
+    for (; index < end; ++index) {
+      EXPECT_EQ(mark, marks_[index]);
+    }
+  }
+  size_t marks_[kPagesPerHugePage.raw_num()];
+  HugePage huge_;
+  TestPageTracker tracker_;
+
+  void ExpectPages(PAlloc a) {
+    void* ptr = a.p.start_addr();
+    size_t bytes = a.n.in_bytes();
+    mock_.Expect(ptr, bytes);
+  }
+
+  PAlloc Get(Length n) {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    PageId p = tracker_.Get(n).page;
+    return {p, n};
+  }
+
+  void Put(PAlloc a) {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    tracker_.Put(a.p, a.n);
+  }
+
+  Length ReleaseFree() {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    return tracker_.ReleaseFree();
+  }
+
+  void MaybeRelease(PAlloc a) {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    tracker_.MaybeRelease(a.p, a.n);
+  }
+};
+
+void PageTrackerTest::MockUnback(void* p, size_t len) { mock_.Unback(p, len); }
+
+PageTrackerTest::MockUnbackInterface PageTrackerTest::mock_;
+
+TEST_F(PageTrackerTest, AllocSane) {
+  Length free = kPagesPerHugePage;
+  auto n = Length(1);
+  std::vector<PAlloc> allocs;
+  // This should work without fragmentation.
+  while (n <= free) {
+    ASSERT_LE(n, tracker_.longest_free_range());
+    EXPECT_EQ(kPagesPerHugePage - free, tracker_.used_pages());
+    EXPECT_EQ(free, tracker_.free_pages());
+    PAlloc a = Get(n);
+    Mark(a, n.raw_num());
+    allocs.push_back(a);
+    free -= n;
+    ++n;
+  }
+
+  // All should be distinct
+  for (auto alloc : allocs) {
+    Check(alloc, alloc.n.raw_num());
+  }
+}
+
+TEST_F(PageTrackerTest, ReleasingReturn) {
+  static const Length kAllocSize = kPagesPerHugePage / 4;
+  PAlloc a1 = Get(kAllocSize - Length(3));
+  PAlloc a2 = Get(kAllocSize);
+  PAlloc a3 = Get(kAllocSize + Length(1));
+  PAlloc a4 = Get(kAllocSize + Length(2));
+
+  Put(a2);
+  Put(a4);
+  // We now have a hugepage that looks like [alloced] [free] [alloced] [free].
+  // The free parts should be released when we mark the hugepage as such,
+  // but not the allocated parts.
+  ExpectPages(a2);
+  ExpectPages(a4);
+  ReleaseFree();
+  mock_.VerifyAndClear();
+
+  // Now we return the other parts, and they *should* get released.
+  ExpectPages(a1);
+  ExpectPages(a3);
+
+  MaybeRelease(a1);
+  Put(a1);
+
+  MaybeRelease(a3);
+  Put(a3);
+}
+
+TEST_F(PageTrackerTest, ReleasingRetain) {
+  static const Length kAllocSize = kPagesPerHugePage / 4;
+  PAlloc a1 = Get(kAllocSize - Length(3));
+  PAlloc a2 = Get(kAllocSize);
+  PAlloc a3 = Get(kAllocSize + Length(1));
+  PAlloc a4 = Get(kAllocSize + Length(2));
+
+  Put(a2);
+  Put(a4);
+  // We now have a hugepage that looks like [alloced] [free] [alloced] [free].
+  // The free parts should be released when we mark the hugepage as such,
+  // but not the allocated parts.
+  ExpectPages(a2);
+  ExpectPages(a4);
+  ReleaseFree();
+  mock_.VerifyAndClear();
+
+  // Now we return the other parts, and they shouldn't get released.
+  Put(a1);
+  Put(a3);
+
+  mock_.VerifyAndClear();
+
+  // But they will if we ReleaseFree.
+  ExpectPages(a1);
+  ExpectPages(a3);
+  ReleaseFree();
+  mock_.VerifyAndClear();
+}
+
+TEST_F(PageTrackerTest, Defrag) {
+  absl::BitGen rng;
+  const Length N = absl::GetFlag(FLAGS_page_tracker_defrag_lim);
+  auto dist = EmpiricalDistribution(N);
+
+  std::vector<PAlloc> allocs;
+
+  std::vector<PAlloc> doomed;
+  while (tracker_.longest_free_range() > Length(0)) {
+    Length n;
+    do {
+      n = Length(dist(rng));
+    } while (n > tracker_.longest_free_range());
+    PAlloc a = Get(n);
+    (absl::Bernoulli(rng, 1.0 / 2) ? allocs : doomed).push_back(a);
+  }
+
+  for (auto d : doomed) {
+    Put(d);
+  }
+
+  static const size_t kReps = 250 * 1000;
+
+  std::vector<double> frag_samples;
+  std::vector<Length> longest_free_samples;
+  frag_samples.reserve(kReps);
+  longest_free_samples.reserve(kReps);
+  for (size_t i = 0; i < kReps; ++i) {
+    const Length free = kPagesPerHugePage - tracker_.used_pages();
+    // Ideally, we'd like all of our free space to stay in a single
+    // nice little run.
+    const Length longest = tracker_.longest_free_range();
+    double frag = free > Length(0)
+                      ? static_cast<double>(longest.raw_num()) / free.raw_num()
+                      : 1;
+
+    if (i % (kReps / 25) == 0) {
+      printf("free = %zu longest = %zu frag = %f\n", free.raw_num(),
+             longest.raw_num(), frag);
+    }
+    frag_samples.push_back(frag);
+    longest_free_samples.push_back(longest);
+
+    // Randomly grow or shrink (picking the only safe option when we're either
+    // full or empty.)
+    if (tracker_.longest_free_range() == Length(0) ||
+        (absl::Bernoulli(rng, 1.0 / 2) && !allocs.empty())) {
+      size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+      std::swap(allocs[index], allocs.back());
+      Put(allocs.back());
+      allocs.pop_back();
+    } else {
+      Length n;
+      do {
+        n = Length(dist(rng));
+      } while (n > tracker_.longest_free_range());
+      allocs.push_back(Get(n));
+    }
+  }
+
+  std::sort(frag_samples.begin(), frag_samples.end());
+  std::sort(longest_free_samples.begin(), longest_free_samples.end());
+
+  {
+    const double p10 = frag_samples[kReps * 10 / 100];
+    const double p25 = frag_samples[kReps * 25 / 100];
+    const double p50 = frag_samples[kReps * 50 / 100];
+    const double p75 = frag_samples[kReps * 75 / 100];
+    const double p90 = frag_samples[kReps * 90 / 100];
+    printf("Fragmentation quantiles:\n");
+    printf("p10: %f p25: %f p50: %f p75: %f p90: %f\n", p10, p25, p50, p75,
+           p90);
+    // We'd like to prety consistently rely on (75% of the time) reasonable
+    // defragmentation (50% of space is fully usable...)
+    // ...but we currently can't hit that mark consistently.
+    // The situation is worse on ppc with larger huge pages:
+    // pass rate for test is ~50% at 0.20. Reducing from 0.2 to 0.07.
+    // TODO(b/127466107) figure out a better solution.
+    EXPECT_GE(p25, 0.07);
+  }
+
+  {
+    const Length p10 = longest_free_samples[kReps * 10 / 100];
+    const Length p25 = longest_free_samples[kReps * 25 / 100];
+    const Length p50 = longest_free_samples[kReps * 50 / 100];
+    const Length p75 = longest_free_samples[kReps * 75 / 100];
+    const Length p90 = longest_free_samples[kReps * 90 / 100];
+    printf("Longest free quantiles:\n");
+    printf("p10: %zu p25: %zu p50: %zu p75: %zu p90: %zu\n", p10.raw_num(),
+           p25.raw_num(), p50.raw_num(), p75.raw_num(), p90.raw_num());
+    // Similarly, we'd really like for there usually (p25) to be a space
+    // for a large allocation (N - note that we've cooked the books so that
+    // the page tracker is going to be something like half empty (ish) and N
+    // is small, so that should be doable.)
+    // ...but, of course, it isn't.
+    EXPECT_GE(p25, Length(4));
+  }
+
+  for (auto a : allocs) {
+    Put(a);
+  }
+}
+
+TEST_F(PageTrackerTest, Stats) {
+  struct Helper {
+    static void Stat(const TestPageTracker& tracker,
+                     std::vector<Length>* small_backed,
+                     std::vector<Length>* small_unbacked, LargeSpanStats* large,
+                     double* avg_age_backed, double* avg_age_unbacked) {
+      SmallSpanStats small;
+      *large = LargeSpanStats();
+      PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+      tracker.AddSpanStats(&small, large, &ages);
+      small_backed->clear();
+      small_unbacked->clear();
+      for (auto i = Length(0); i < kMaxPages; ++i) {
+        for (int j = 0; j < small.normal_length[i.raw_num()]; ++j) {
+          small_backed->push_back(i);
+        }
+
+        for (int j = 0; j < small.returned_length[i.raw_num()]; ++j) {
+          small_unbacked->push_back(i);
+        }
+      }
+
+      *avg_age_backed = ages.GetTotalHistogram(false)->avg_age();
+      *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age();
+    }
+  };
+
+  LargeSpanStats large;
+  std::vector<Length> small_backed, small_unbacked;
+  double avg_age_backed, avg_age_unbacked;
+
+  const PageId p = Get(kPagesPerHugePage).p;
+  const PageId end = p + kPagesPerHugePage;
+  PageId next = p;
+  Put({next, kMaxPages + Length(1)});
+  next += kMaxPages + Length(1);
+
+  absl::SleepFor(absl::Milliseconds(10));
+  Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre());
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+  EXPECT_EQ(Length(0), large.returned_pages);
+  EXPECT_LE(0.01, avg_age_backed);
+
+  ++next;
+  Put({next, Length(1)});
+  next += Length(1);
+  absl::SleepFor(absl::Milliseconds(20));
+  Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre(Length(1)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+  EXPECT_EQ(Length(0), large.returned_pages);
+  EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.03 + 1 * 0.02) /
+                (kMaxPages + Length(2)).raw_num(),
+            avg_age_backed);
+  EXPECT_EQ(0, avg_age_unbacked);
+
+  ++next;
+  Put({next, Length(2)});
+  next += Length(2);
+  absl::SleepFor(absl::Milliseconds(30));
+  Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+  EXPECT_EQ(Length(0), large.returned_pages);
+  EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.06 + 1 * 0.05 + 2 * 0.03) /
+                (kMaxPages + Length(4)).raw_num(),
+            avg_age_backed);
+  EXPECT_EQ(0, avg_age_unbacked);
+
+  ++next;
+  Put({next, Length(3)});
+  next += Length(3);
+  ASSERT_LE(next, end);
+  absl::SleepFor(absl::Milliseconds(40));
+  Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed,
+              testing::ElementsAre(Length(1), Length(2), Length(3)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(kMaxPages + Length(1), large.normal_pages);
+  EXPECT_EQ(Length(0), large.returned_pages);
+  EXPECT_LE(((kMaxPages + Length(1)).raw_num() * 0.10 + 1 * 0.09 + 2 * 0.07 +
+             3 * 0.04) /
+                (kMaxPages + Length(7)).raw_num(),
+            avg_age_backed);
+  EXPECT_EQ(0, avg_age_unbacked);
+
+  ExpectPages({p, kMaxPages + Length(1)});
+  ExpectPages({p + kMaxPages + Length(2), Length(1)});
+  ExpectPages({p + kMaxPages + Length(4), Length(2)});
+  ExpectPages({p + kMaxPages + Length(7), Length(3)});
+  EXPECT_EQ(kMaxPages + Length(7), ReleaseFree());
+  absl::SleepFor(absl::Milliseconds(100));
+  Helper::Stat(tracker_, &small_backed, &small_unbacked, &large,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre());
+  EXPECT_THAT(small_unbacked,
+              testing::ElementsAre(Length(1), Length(2), Length(3)));
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(Length(0), large.normal_pages);
+  EXPECT_EQ(kMaxPages + Length(1), large.returned_pages);
+  EXPECT_EQ(0, avg_age_backed);
+  EXPECT_LE(0.1, avg_age_unbacked);
+}
+
+TEST_F(PageTrackerTest, b151915873) {
+  // This test verifies, while generating statistics for the huge page, that we
+  // do not go out-of-bounds in our bitmaps (b/151915873).
+
+  // While the PageTracker relies on FindAndMark to decide which pages to hand
+  // out, we do not specify where in the huge page we get our allocations.
+  // Allocate single pages and then use their returned addresses to create the
+  // desired pattern in the bitmaps, namely:
+  //
+  // |      | kPagesPerHugePage - 2 | kPagesPerHugePages - 1 |
+  // | .... | not free              | free                   |
+  //
+  // This causes AddSpanStats to try index = kPagesPerHugePage - 1, n=1.  We
+  // need to not overflow FindClear/FindSet.
+
+  std::vector<PAlloc> allocs;
+  allocs.reserve(kPagesPerHugePage.raw_num());
+  for (int i = 0; i < kPagesPerHugePage.raw_num(); i++) {
+    allocs.push_back(Get(Length(1)));
+  }
+
+  std::sort(allocs.begin(), allocs.end(),
+            [](const PAlloc& a, const PAlloc& b) { return a.p < b.p; });
+
+  Put(allocs.back());
+  allocs.erase(allocs.begin() + allocs.size() - 1);
+
+  ASSERT_EQ(tracker_.used_pages(), kPagesPerHugePage - Length(1));
+
+  SmallSpanStats small;
+  LargeSpanStats large;
+  PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+
+  tracker_.AddSpanStats(&small, &large, &ages);
+
+  EXPECT_EQ(small.normal_length[1], 1);
+  EXPECT_THAT(0,
+              testing::AllOfArray(&small.normal_length[2],
+                                  &small.normal_length[kMaxPages.raw_num()]));
+}
+
+class BlockingUnback {
+ public:
+  static void Unback(void* p, size_t len) {
+    if (!mu_) {
+      return;
+    }
+
+    if (counter) {
+      counter->DecrementCount();
+    }
+
+    mu_->Lock();
+    mu_->Unlock();
+  }
+
+  static void set_lock(absl::Mutex* mu) { mu_ = mu; }
+
+  static absl::BlockingCounter* counter;
+
+ private:
+  static thread_local absl::Mutex* mu_;
+};
+
+thread_local absl::Mutex* BlockingUnback::mu_ = nullptr;
+absl::BlockingCounter* BlockingUnback::counter = nullptr;
+
+class FillerTest : public testing::TestWithParam<FillerPartialRerelease> {
+ protected:
+  // Allow tests to modify the clock used by the cache.
+  static int64_t FakeClock() { return clock_; }
+  static double GetFakeClockFrequency() {
+    return absl::ToDoubleNanoseconds(absl::Seconds(2));
+  }
+  static void Advance(absl::Duration d) {
+    clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+  }
+  static void ResetClock() { clock_ = 1234; }
+
+  static void Unback(void* p, size_t len) {}
+
+  // Our templating approach lets us directly override certain functions
+  // and have mocks without virtualization.  It's a bit funky but works.
+  typedef PageTracker<BlockingUnback::Unback> FakeTracker;
+
+  // We have backing of one word per (normal-sized) page for our "hugepages".
+  std::vector<size_t> backing_;
+  // This is space efficient enough that we won't bother recycling pages.
+  HugePage GetBacking() {
+    intptr_t i = backing_.size();
+    backing_.resize(i + kPagesPerHugePage.raw_num());
+    intptr_t addr = i << kPageShift;
+    CHECK_CONDITION(addr % kHugePageSize == 0);
+    return HugePageContaining(reinterpret_cast<void*>(addr));
+  }
+
+  size_t* GetFakePage(PageId p) { return &backing_[p.index()]; }
+
+  void MarkRange(PageId p, Length n, size_t mark) {
+    for (auto i = Length(0); i < n; ++i) {
+      *GetFakePage(p + i) = mark;
+    }
+  }
+
+  void CheckRange(PageId p, Length n, size_t mark) {
+    for (auto i = Length(0); i < n; ++i) {
+      EXPECT_EQ(mark, *GetFakePage(p + i));
+    }
+  }
+
+  HugePageFiller<FakeTracker> filler_;
+
+  FillerTest()
+      : filler_(GetParam(),
+                Clock{.now = FakeClock, .freq = GetFakeClockFrequency}) {
+    ResetClock();
+  }
+
+  ~FillerTest() override { EXPECT_EQ(NHugePages(0), filler_.size()); }
+
+  struct PAlloc {
+    FakeTracker* pt;
+    PageId p;
+    Length n;
+    size_t mark;
+  };
+
+  void Mark(const PAlloc& alloc) { MarkRange(alloc.p, alloc.n, alloc.mark); }
+
+  void Check(const PAlloc& alloc) { CheckRange(alloc.p, alloc.n, alloc.mark); }
+
+  size_t next_mark_{0};
+
+  HugeLength hp_contained_{NHugePages(0)};
+  Length total_allocated_{0};
+
+  absl::InsecureBitGen gen_;
+
+  void CheckStats() {
+    EXPECT_EQ(hp_contained_, filler_.size());
+    auto stats = filler_.stats();
+    const uint64_t freelist_bytes = stats.free_bytes + stats.unmapped_bytes;
+    const uint64_t used_bytes = stats.system_bytes - freelist_bytes;
+    EXPECT_EQ(total_allocated_.in_bytes(), used_bytes);
+    EXPECT_EQ((hp_contained_.in_pages() - total_allocated_).in_bytes(),
+              freelist_bytes);
+  }
+  PAlloc AllocateRaw(Length n, bool donated = false) {
+    EXPECT_LT(n, kPagesPerHugePage);
+    PAlloc ret;
+    ret.n = n;
+    ret.pt = nullptr;
+    ret.mark = ++next_mark_;
+    if (!donated) {  // Donated means always create a new hugepage
+      absl::base_internal::SpinLockHolder l(&pageheap_lock);
+      auto [pt, page] = filler_.TryGet(n);
+      ret.pt = pt;
+      ret.p = page;
+    }
+    if (ret.pt == nullptr) {
+      ret.pt =
+          new FakeTracker(GetBacking(), absl::base_internal::CycleClock::Now());
+      {
+        absl::base_internal::SpinLockHolder l(&pageheap_lock);
+        ret.p = ret.pt->Get(n).page;
+      }
+      filler_.Contribute(ret.pt, donated);
+      ++hp_contained_;
+    }
+
+    total_allocated_ += n;
+    return ret;
+  }
+
+  PAlloc Allocate(Length n, bool donated = false) {
+    CHECK_CONDITION(n <= kPagesPerHugePage);
+    PAlloc ret = AllocateRaw(n, donated);
+    ret.n = n;
+    Mark(ret);
+    CheckStats();
+    return ret;
+  }
+
+  // Returns true iff the filler returned an empty hugepage.
+  bool DeleteRaw(const PAlloc& p) {
+    FakeTracker* pt;
+    {
+      absl::base_internal::SpinLockHolder l(&pageheap_lock);
+      pt = filler_.Put(p.pt, p.p, p.n);
+    }
+    total_allocated_ -= p.n;
+    if (pt != nullptr) {
+      EXPECT_EQ(kPagesPerHugePage, pt->longest_free_range());
+      EXPECT_TRUE(pt->empty());
+      --hp_contained_;
+      delete pt;
+      return true;
+    }
+
+    return false;
+  }
+
+  // Returns true iff the filler returned an empty hugepage
+  bool Delete(const PAlloc& p) {
+    Check(p);
+    bool r = DeleteRaw(p);
+    CheckStats();
+    return r;
+  }
+
+  Length ReleasePages(Length desired, absl::Duration d = absl::ZeroDuration()) {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    return filler_.ReleasePages(desired, d, false);
+  }
+
+  Length HardReleasePages(Length desired) {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    return filler_.ReleasePages(desired, absl::ZeroDuration(), true);
+  }
+
+  // Generates an "interesting" pattern of allocations that highlights all the
+  // various features of our stats.
+  std::vector<PAlloc> GenerateInterestingAllocs();
+
+ private:
+  static int64_t clock_;
+};
+
+int64_t FillerTest::clock_{1234};
+
+TEST_P(FillerTest, Density) {
+  absl::BitGen rng;
+  // Start with a really annoying setup: some hugepages half
+  // empty (randomly)
+  std::vector<PAlloc> allocs;
+  std::vector<PAlloc> doomed_allocs;
+  static const HugeLength kNumHugePages = NHugePages(64);
+  for (auto i = Length(0); i < kNumHugePages.in_pages(); ++i) {
+    ASSERT_EQ(i, filler_.pages_allocated());
+    if (absl::Bernoulli(rng, 1.0 / 2)) {
+      allocs.push_back(Allocate(Length(1)));
+    } else {
+      doomed_allocs.push_back(Allocate(Length(1)));
+    }
+  }
+  for (auto d : doomed_allocs) {
+    Delete(d);
+  }
+  EXPECT_EQ(kNumHugePages, filler_.size());
+  // We want a good chance of touching ~every allocation.
+  size_t n = allocs.size();
+  // Now, randomly add and delete to the allocations.
+  // We should converge to full and empty pages.
+  for (int j = 0; j < 6; j++) {
+    absl::c_shuffle(allocs, rng);
+
+    for (int i = 0; i < n; ++i) {
+      Delete(allocs[i]);
+      allocs[i] = Allocate(Length(1));
+      ASSERT_EQ(Length(n), filler_.pages_allocated());
+    }
+  }
+
+  EXPECT_GE(allocs.size() / kPagesPerHugePage.raw_num() + 1,
+            filler_.size().raw_num());
+
+  // clean up, check for failures
+  for (auto a : allocs) {
+    Delete(a);
+    ASSERT_EQ(Length(--n), filler_.pages_allocated());
+  }
+}
+
+TEST_P(FillerTest, Release) {
+  static const Length kAlloc = kPagesPerHugePage / 2;
+  PAlloc p1 = Allocate(kAlloc - Length(1));
+  PAlloc p2 = Allocate(kAlloc + Length(1));
+
+  PAlloc p3 = Allocate(kAlloc - Length(2));
+  PAlloc p4 = Allocate(kAlloc + Length(2));
+  // We have two hugepages, both full: nothing to release.
+  ASSERT_EQ(Length(0), ReleasePages(kMaxValidPages));
+  Delete(p1);
+  Delete(p3);
+  // Now we should see the p1 hugepage - emptier - released.
+  ASSERT_EQ(kAlloc - Length(1), ReleasePages(kAlloc - Length(1)));
+  EXPECT_EQ(kAlloc - Length(1), filler_.unmapped_pages());
+  ASSERT_TRUE(p1.pt->released());
+  ASSERT_FALSE(p3.pt->released());
+
+  // We expect to reuse p1.pt.
+  PAlloc p5 = Allocate(kAlloc - Length(1));
+  ASSERT_TRUE(p1.pt == p5.pt || p3.pt == p5.pt);
+
+  Delete(p2);
+  Delete(p4);
+  Delete(p5);
+}
+
+TEST_P(FillerTest, Fragmentation) {
+  absl::BitGen rng;
+  auto dist = EmpiricalDistribution(absl::GetFlag(FLAGS_frag_req_limit));
+
+  std::vector<PAlloc> allocs;
+  Length total;
+  while (total < absl::GetFlag(FLAGS_frag_size)) {
+    auto n = Length(dist(rng));
+    total += n;
+    allocs.push_back(AllocateRaw(n));
+  }
+
+  double max_slack = 0.0;
+  const size_t kReps = absl::GetFlag(FLAGS_frag_iters);
+  for (size_t i = 0; i < kReps; ++i) {
+    auto stats = filler_.stats();
+    double slack = static_cast<double>(stats.free_bytes) / stats.system_bytes;
+
+    max_slack = std::max(slack, max_slack);
+    if (i % (kReps / 40) == 0) {
+      printf("%zu events: %zu allocs totalling %zu slack %f\n", i,
+             allocs.size(), total.raw_num(), slack);
+    }
+    if (absl::Bernoulli(rng, 1.0 / 2)) {
+      size_t index = absl::Uniform<int32_t>(rng, 0, allocs.size());
+      std::swap(allocs[index], allocs.back());
+      DeleteRaw(allocs.back());
+      total -= allocs.back().n;
+      allocs.pop_back();
+    } else {
+      auto n = Length(dist(rng));
+      allocs.push_back(AllocateRaw(n));
+      total += n;
+    }
+  }
+
+  EXPECT_LE(max_slack, 0.05);
+
+  for (auto a : allocs) {
+    DeleteRaw(a);
+  }
+}
+
+TEST_P(FillerTest, PrintFreeRatio) {
+  // This test is sensitive to the number of pages per hugepage, as we are
+  // printing raw stats.
+  if (kPagesPerHugePage != Length(256)) {
+    GTEST_SKIP();
+  }
+
+  // Allocate two huge pages, release one, verify that we do not get an invalid
+  // (>1.) ratio of free : non-fulls.
+
+  // First huge page
+  PAlloc a1 = Allocate(kPagesPerHugePage / 2);
+  PAlloc a2 = Allocate(kPagesPerHugePage / 2);
+
+  // Second huge page
+  constexpr Length kQ = kPagesPerHugePage / 4;
+
+  PAlloc a3 = Allocate(kQ);
+  PAlloc a4 = Allocate(kQ);
+  PAlloc a5 = Allocate(kQ);
+  PAlloc a6 = Allocate(kQ);
+
+  Delete(a6);
+
+  ReleasePages(kQ);
+
+  Delete(a5);
+
+  std::string buffer(1024 * 1024, '\0');
+  {
+    Printer printer(&*buffer.begin(), buffer.size());
+    filler_.Print(&printer, /*everything=*/true);
+    buffer.erase(printer.SpaceRequired());
+  }
+
+  if (GetParam() == FillerPartialRerelease::Retain) {
+    EXPECT_THAT(
+        buffer,
+        testing::StartsWith(
+            R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 2 total, 1 full, 0 partial, 1 released (1 partially), 0 quarantined
+HugePageFiller: 64 pages free in 2 hugepages, 0.1250 free
+HugePageFiller: among non-fulls, 0.2500 free
+HugePageFiller: 128 used pages in subreleased hugepages (128 of them in partially released)
+HugePageFiller: 1 hugepages partially released, 0.2500 released
+HugePageFiller: 0.6667 of used pages hugepageable)"));
+  } else {
+    EXPECT_THAT(
+        buffer,
+        testing::StartsWith(
+            R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 2 total, 1 full, 0 partial, 1 released (0 partially), 0 quarantined
+HugePageFiller: 0 pages free in 2 hugepages, 0.0000 free
+HugePageFiller: among non-fulls, 0.0000 free
+HugePageFiller: 128 used pages in subreleased hugepages (0 of them in partially released)
+HugePageFiller: 1 hugepages partially released, 0.5000 released
+HugePageFiller: 0.6667 of used pages hugepageable)"));
+  }
+
+  // Cleanup remaining allocs.
+  Delete(a1);
+  Delete(a2);
+  Delete(a3);
+  Delete(a4);
+}
+
+static double BytesToMiB(size_t bytes) { return bytes / (1024.0 * 1024.0); }
+
+using testing::AnyOf;
+using testing::Eq;
+using testing::StrEq;
+
+TEST_P(FillerTest, HugePageFrac) {
+  // I don't actually care which we get, both are
+  // reasonable choices, but don't report a NaN/complain
+  // about divide by 0s/ give some bogus number for empty.
+  EXPECT_THAT(filler_.hugepage_frac(), AnyOf(Eq(0), Eq(1)));
+  static const Length kQ = kPagesPerHugePage / 4;
+  // These are all on one page:
+  auto a1 = Allocate(kQ);
+  auto a2 = Allocate(kQ);
+  auto a3 = Allocate(kQ - Length(1));
+  auto a4 = Allocate(kQ + Length(1));
+
+  // As are these:
+  auto a5 = Allocate(kPagesPerHugePage - kQ);
+  auto a6 = Allocate(kQ);
+
+  EXPECT_EQ(1, filler_.hugepage_frac());
+  // Free space doesn't affect it...
+  Delete(a4);
+  Delete(a6);
+
+  EXPECT_EQ(1, filler_.hugepage_frac());
+
+  // Releasing the hugepage does.
+  ASSERT_EQ(kQ + Length(1), ReleasePages(kQ + Length(1)));
+  EXPECT_EQ((3.0 * kQ.raw_num()) / (6.0 * kQ.raw_num() - 1.0),
+            filler_.hugepage_frac());
+
+  // Check our arithmetic in a couple scenarios.
+
+  // 2 kQs on the release and 3 on the hugepage
+  Delete(a2);
+  EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1),
+            filler_.hugepage_frac());
+  // This releases the free page on the partially released hugepage.
+  ASSERT_EQ(kQ, ReleasePages(kQ));
+  EXPECT_EQ((3.0 * kQ.raw_num()) / (5.0 * kQ.raw_num() - 1),
+            filler_.hugepage_frac());
+
+  // just-over-1 kQ on the release and 3 on the hugepage
+  Delete(a3);
+  EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac());
+  // This releases the free page on the partially released hugepage.
+  ASSERT_EQ(kQ - Length(1), ReleasePages(kQ - Length(1)));
+  EXPECT_EQ((3 * kQ.raw_num()) / (4.0 * kQ.raw_num()), filler_.hugepage_frac());
+
+  // All huge!
+  Delete(a1);
+  EXPECT_EQ(1, filler_.hugepage_frac());
+
+  Delete(a5);
+}
+
+// Repeatedly grow from FLAG_bytes to FLAG_bytes * growth factor, then shrink
+// back down by random deletion. Then release partial hugepages until
+// pageheap is bounded by some fraction of usage.
+// Measure the effective hugepage fraction at peak and baseline usage,
+// and the blowup in VSS footprint.
+//
+// This test is a tool for analyzing parameters -- not intended as an actual
+// unit test.
+TEST_P(FillerTest, DISABLED_ReleaseFrac) {
+  absl::BitGen rng;
+  const Length baseline = LengthFromBytes(absl::GetFlag(FLAGS_bytes));
+  const Length peak = baseline * absl::GetFlag(FLAGS_growth_factor);
+  const Length free_target = baseline * absl::GetFlag(FLAGS_release_until);
+
+  std::vector<PAlloc> allocs;
+  while (filler_.used_pages() < baseline) {
+    allocs.push_back(AllocateRaw(Length(1)));
+  }
+
+  while (true) {
+    while (filler_.used_pages() < peak) {
+      allocs.push_back(AllocateRaw(Length(1)));
+    }
+    const double peak_frac = filler_.hugepage_frac();
+    // VSS
+    const size_t footprint = filler_.size().in_bytes();
+
+    std::shuffle(allocs.begin(), allocs.end(), rng);
+
+    size_t limit = allocs.size();
+    while (filler_.used_pages() > baseline) {
+      --limit;
+      DeleteRaw(allocs[limit]);
+    }
+    allocs.resize(limit);
+    while (filler_.free_pages() > free_target) {
+      ReleasePages(kMaxValidPages);
+    }
+    const double baseline_frac = filler_.hugepage_frac();
+
+    printf("%.3f %.3f %6.1f MiB\n", peak_frac, baseline_frac,
+           BytesToMiB(footprint));
+  }
+}
+
+TEST_P(FillerTest, ReleaseAccounting) {
+  const Length N = kPagesPerHugePage;
+  auto big = Allocate(N - Length(2));
+  auto tiny1 = Allocate(Length(1));
+  auto tiny2 = Allocate(Length(1));
+  auto half1 = Allocate(N / 2);
+  auto half2 = Allocate(N / 2);
+
+  Delete(half1);
+  Delete(big);
+
+  ASSERT_EQ(NHugePages(2), filler_.size());
+
+  // We should pick the [empty big][full tiny] hugepage here.
+  EXPECT_EQ(N - Length(2), ReleasePages(N - Length(2)));
+  EXPECT_EQ(N - Length(2), filler_.unmapped_pages());
+  // This shouldn't trigger a release
+  Delete(tiny1);
+  if (GetParam() == FillerPartialRerelease::Retain) {
+    EXPECT_EQ(N - Length(2), filler_.unmapped_pages());
+    // Until we call ReleasePages()
+    EXPECT_EQ(Length(1), ReleasePages(Length(1)));
+  }
+  EXPECT_EQ(N - Length(1), filler_.unmapped_pages());
+
+  // As should this, but this will drop the whole hugepage
+  Delete(tiny2);
+  EXPECT_EQ(Length(0), filler_.unmapped_pages());
+  EXPECT_EQ(NHugePages(1), filler_.size());
+
+  // This shouldn't trigger any release: we just claim credit for the
+  // releases we did automatically on tiny2.
+  if (GetParam() == FillerPartialRerelease::Retain) {
+    EXPECT_EQ(Length(1), ReleasePages(Length(1)));
+  } else {
+    EXPECT_EQ(Length(2), ReleasePages(Length(2)));
+  }
+  EXPECT_EQ(Length(0), filler_.unmapped_pages());
+  EXPECT_EQ(NHugePages(1), filler_.size());
+
+  // Check subrelease stats
+  EXPECT_EQ(N / 2, filler_.used_pages());
+  EXPECT_EQ(Length(0), filler_.used_pages_in_any_subreleased());
+  EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released());
+  EXPECT_EQ(Length(0), filler_.used_pages_in_released());
+
+  // Now we pick the half/half hugepage
+  EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages));
+  EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+  // Check subrelease stats
+  EXPECT_EQ(N / 2, filler_.used_pages());
+  EXPECT_EQ(N / 2, filler_.used_pages_in_any_subreleased());
+  EXPECT_EQ(Length(0), filler_.used_pages_in_partial_released());
+  EXPECT_EQ(N / 2, filler_.used_pages_in_released());
+
+  // Check accounting for partially released hugepages with partial rerelease
+  if (GetParam() == FillerPartialRerelease::Retain) {
+    // Allocating and deallocating a small object causes the page to turn from
+    // a released hugepage into a partially released hugepage.
+    auto tiny3 = Allocate(Length(1));
+    auto tiny4 = Allocate(Length(1));
+    Delete(tiny4);
+    EXPECT_EQ(N / 2 + Length(1), filler_.used_pages());
+    EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_any_subreleased());
+    EXPECT_EQ(N / 2 + Length(1), filler_.used_pages_in_partial_released());
+    EXPECT_EQ(Length(0), filler_.used_pages_in_released());
+    Delete(tiny3);
+  }
+
+  Delete(half2);
+  EXPECT_EQ(NHugePages(0), filler_.size());
+  EXPECT_EQ(Length(0), filler_.unmapped_pages());
+}
+
+TEST_P(FillerTest, ReleaseWithReuse) {
+  const Length N = kPagesPerHugePage;
+  auto half = Allocate(N / 2);
+  auto tiny1 = Allocate(N / 4);
+  auto tiny2 = Allocate(N / 4);
+
+  Delete(half);
+
+  ASSERT_EQ(NHugePages(1), filler_.size());
+
+  // We should be able to release the pages from half1.
+  EXPECT_EQ(N / 2, ReleasePages(kMaxValidPages));
+  EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+  // Release tiny1, release more.
+  Delete(tiny1);
+
+  EXPECT_EQ(N / 4, ReleasePages(kMaxValidPages));
+  EXPECT_EQ(3 * N / 4, filler_.unmapped_pages());
+
+  // Repopulate, confirm we can't release anything and unmapped pages goes to 0.
+  tiny1 = Allocate(N / 4);
+  EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages));
+  EXPECT_EQ(N / 2, filler_.unmapped_pages());
+
+  // Continue repopulating.
+  half = Allocate(N / 2);
+  EXPECT_EQ(Length(0), ReleasePages(kMaxValidPages));
+  EXPECT_EQ(Length(0), filler_.unmapped_pages());
+  EXPECT_EQ(NHugePages(1), filler_.size());
+
+  // Release everything and cleanup.
+  Delete(half);
+  Delete(tiny1);
+  Delete(tiny2);
+  EXPECT_EQ(NHugePages(0), filler_.size());
+  EXPECT_EQ(Length(0), filler_.unmapped_pages());
+}
+
+TEST_P(FillerTest, AvoidArbitraryQuarantineVMGrowth) {
+  const Length N = kPagesPerHugePage;
+  // Guarantee we have a ton of released pages go empty.
+  for (int i = 0; i < 10 * 1000; ++i) {
+    auto half1 = Allocate(N / 2);
+    auto half2 = Allocate(N / 2);
+    Delete(half1);
+    ASSERT_EQ(N / 2, ReleasePages(N / 2));
+    Delete(half2);
+  }
+
+  auto s = filler_.stats();
+  EXPECT_GE(1024 * 1024 * 1024, s.system_bytes);
+}
+
+TEST_P(FillerTest, StronglyPreferNonDonated) {
+  // We donate several huge pages of varying fullnesses. Then we make several
+  // allocations that would be perfect fits for the donated hugepages, *after*
+  // making one allocation that won't fit, to ensure that a huge page is
+  // contributed normally. Finally, we verify that we can still get the
+  // donated huge pages back. (I.e. they weren't used.)
+  std::vector<PAlloc> donated;
+  ASSERT_GE(kPagesPerHugePage, Length(10));
+  for (auto i = Length(1); i <= Length(3); ++i) {
+    donated.push_back(Allocate(kPagesPerHugePage - i, /*donated=*/true));
+  }
+
+  std::vector<PAlloc> regular;
+  for (auto i = Length(4); i >= Length(1); --i) {
+    regular.push_back(Allocate(i));
+  }
+
+  for (const PAlloc& alloc : donated) {
+    // All the donated huge pages should be freeable.
+    EXPECT_TRUE(Delete(alloc));
+  }
+
+  for (const PAlloc& alloc : regular) {
+    Delete(alloc);
+  }
+}
+
+TEST_P(FillerTest, ParallelUnlockingSubrelease) {
+  if (GetParam() == FillerPartialRerelease::Retain) {
+    // When rerelease happens without going to Unback(), this test
+    // (intentionally) deadlocks, as we never receive the call.
+    return;
+  }
+
+  // Verify that we can deallocate a partial huge page and successfully unlock
+  // the pageheap_lock without introducing race conditions around the metadata
+  // for PageTracker::released_.
+  //
+  // Currently, HPAA unbacks *all* subsequent deallocations to a huge page once
+  // we have broken up *any* part of it.
+  //
+  // If multiple deallocations are in-flight, we need to leave sufficient
+  // breadcrumbs to ourselves (PageTracker::releasing_ is a Length, not a bool)
+  // so that one deallocation completing does not have us "forget" that another
+  // deallocation is about to unback other parts of the hugepage.
+  //
+  // If PageTracker::releasing_ were a bool, the completion of "t1" and
+  // subsequent reallocation of "a2" in this test would mark the entirety of the
+  // page as full, so we would choose to *not* unback a2 (when deallocated) or
+  // a3 (when deallocated by t3).
+  constexpr Length N = kPagesPerHugePage;
+
+  auto a1 = AllocateRaw(N / 2);
+  auto a2 = AllocateRaw(Length(1));
+  auto a3 = AllocateRaw(Length(1));
+
+  // Trigger subrelease.  The filler now has a partial hugepage, so subsequent
+  // calls to Delete() will cause us to unback the remainder of it.
+  EXPECT_GT(ReleasePages(kMaxValidPages), Length(0));
+
+  auto m1 = absl::make_unique<absl::Mutex>();
+  auto m2 = absl::make_unique<absl::Mutex>();
+
+  m1->Lock();
+  m2->Lock();
+
+  absl::BlockingCounter counter(2);
+  BlockingUnback::counter = &counter;
+
+  std::thread t1([&]() {
+    BlockingUnback::set_lock(m1.get());
+
+    DeleteRaw(a2);
+  });
+
+  std::thread t2([&]() {
+    BlockingUnback::set_lock(m2.get());
+
+    DeleteRaw(a3);
+  });
+
+  // Wait for t1 and t2 to block.
+  counter.Wait();
+
+  // At this point, t1 and t2 are blocked (as if they were on a long-running
+  // syscall) on "unback" (m1 and m2, respectively).  pageheap_lock is not held.
+  //
+  // Allocating a4 will complete the hugepage, but we have on-going releaser
+  // threads.
+  auto a4 = AllocateRaw((N / 2) - Length(2));
+  EXPECT_EQ(NHugePages(1), filler_.size());
+
+  // Let one of the threads proceed.  The huge page consists of:
+  // * a1 (N/2  ):  Allocated
+  // * a2 (    1):  Unbacked
+  // * a3 (    1):  Unbacking (blocked on m2)
+  // * a4 (N/2-2):  Allocated
+  m1->Unlock();
+  t1.join();
+
+  // Reallocate a2.  We should still consider the huge page partially backed for
+  // purposes of subreleasing.
+  a2 = AllocateRaw(Length(1));
+  EXPECT_EQ(NHugePages(1), filler_.size());
+  DeleteRaw(a2);
+
+  // Let the other thread proceed.  The huge page consists of:
+  // * a1 (N/2  ):  Allocated
+  // * a2 (    1):  Unbacked
+  // * a3 (    1):  Unbacked
+  // * a4 (N/2-2):  Allocated
+  m2->Unlock();
+  t2.join();
+
+  EXPECT_EQ(filler_.used_pages(), N - Length(2));
+  EXPECT_EQ(filler_.unmapped_pages(), Length(2));
+  EXPECT_EQ(filler_.free_pages(), Length(0));
+
+  // Clean up.
+  DeleteRaw(a1);
+  DeleteRaw(a4);
+
+  BlockingUnback::counter = nullptr;
+}
+
+TEST_P(FillerTest, SkipSubrelease) {
+  // This test is sensitive to the number of pages per hugepage, as we are
+  // printing raw stats.
+  if (kPagesPerHugePage != Length(256)) {
+    GTEST_SKIP();
+  }
+
+  // Generate a peak, wait for time interval a, generate a trough, subrelease,
+  // wait for time interval b, generate another peak.
+  const auto peak_trough_peak = [&](absl::Duration a, absl::Duration b,
+                                    absl::Duration peak_interval,
+                                    bool expected_subrelease) {
+    const Length N = kPagesPerHugePage;
+    PAlloc half = Allocate(N / 2);
+    PAlloc tiny1 = Allocate(N / 4);
+    PAlloc tiny2 = Allocate(N / 4);
+
+    // To force a peak, we allocate 3/4 and 1/4 of a huge page.  This is
+    // necessary after we delete `half` below, as a half huge page for the peak
+    // would fill into the gap previously occupied by it.
+    PAlloc peak1a = Allocate(3 * N / 4);
+    PAlloc peak1b = Allocate(N / 4);
+    EXPECT_EQ(filler_.used_pages(), 2 * N);
+    Delete(peak1a);
+    Delete(peak1b);
+    Advance(a);
+
+    Delete(half);
+
+    EXPECT_EQ(expected_subrelease ? N / 2 : Length(0),
+              ReleasePages(10 * N, peak_interval));
+
+    Advance(b);
+
+    PAlloc peak2a = Allocate(3 * N / 4);
+    PAlloc peak2b = Allocate(N / 4);
+
+    PAlloc peak3a = Allocate(3 * N / 4);
+    PAlloc peak3b = Allocate(N / 4);
+
+    Delete(tiny1);
+    Delete(tiny2);
+    Delete(peak2a);
+    Delete(peak2b);
+    Delete(peak3a);
+    Delete(peak3b);
+
+    EXPECT_EQ(filler_.used_pages(), Length(0));
+    EXPECT_EQ(filler_.unmapped_pages(), Length(0));
+    EXPECT_EQ(filler_.free_pages(), Length(0));
+
+    EXPECT_EQ(expected_subrelease ? N / 2 : Length(0), ReleasePages(10 * N));
+  };
+
+  {
+    SCOPED_TRACE("peak-trough-peak 1");
+    peak_trough_peak(absl::Minutes(2), absl::Minutes(2), absl::Minutes(3),
+                     false);
+  }
+
+  Advance(absl::Minutes(30));
+
+  {
+    SCOPED_TRACE("peak-trough-peak 2");
+    peak_trough_peak(absl::Minutes(2), absl::Minutes(7), absl::Minutes(3),
+                     false);
+  }
+
+  Advance(absl::Minutes(30));
+
+  {
+    SCOPED_TRACE("peak-trough-peak 3");
+    peak_trough_peak(absl::Minutes(5), absl::Minutes(3), absl::Minutes(2),
+                     true);
+  }
+
+  Advance(absl::Minutes(30));
+
+  // This captures a corner case: If we hit another peak immediately after a
+  // subrelease decision (in the same time series epoch), do not count this as
+  // a correct subrelease decision.
+  {
+    SCOPED_TRACE("peak-trough-peak 4");
+    peak_trough_peak(absl::Milliseconds(10), absl::Milliseconds(10),
+                     absl::Minutes(2), false);
+  }
+
+  Advance(absl::Minutes(30));
+
+  // Ensure that the tracker is updated.
+  auto tiny = Allocate(Length(1));
+  Delete(tiny);
+
+  std::string buffer(1024 * 1024, '\0');
+  {
+    Printer printer(&*buffer.begin(), buffer.size());
+    filler_.Print(&printer, true);
+  }
+  buffer.resize(strlen(buffer.c_str()));
+
+  EXPECT_THAT(buffer, testing::HasSubstr(R"(
+HugePageFiller: Since the start of the execution, 4 subreleases (512 pages) were skipped due to recent (120s) peaks.
+HugePageFiller: 25.0000% of decisions confirmed correct, 0 pending (25.0000% of pages, 0 pending).
+)"));
+}
+
+class FillerStatsTrackerTest : public testing::Test {
+ private:
+  static int64_t clock_;
+  static int64_t FakeClock() { return clock_; }
+  static double GetFakeClockFrequency() {
+    return absl::ToDoubleNanoseconds(absl::Seconds(2));
+  }
+
+ protected:
+  static constexpr absl::Duration kWindow = absl::Minutes(10);
+
+  using StatsTrackerType = FillerStatsTracker<16>;
+  StatsTrackerType tracker_{
+      Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kWindow,
+      absl::Minutes(5)};
+
+  void Advance(absl::Duration d) {
+    clock_ += static_cast<int64_t>(absl::ToDoubleSeconds(d) *
+                                   GetFakeClockFrequency());
+  }
+
+  // Generates four data points for the tracker that represent "interesting"
+  // points (i.e., min/max pages demand, min/max hugepages).
+  void GenerateInterestingPoints(Length num_pages, HugeLength num_hugepages,
+                                 Length num_free_pages);
+
+  // Generates a data point with a particular amount of demand pages, while
+  // ignoring the specific number of hugepages.
+  void GenerateDemandPoint(Length num_pages, Length num_free_pages);
+};
+
+int64_t FillerStatsTrackerTest::clock_{0};
+
+void FillerStatsTrackerTest::GenerateInterestingPoints(Length num_pages,
+                                                       HugeLength num_hugepages,
+                                                       Length num_free_pages) {
+  for (int i = 0; i <= 1; ++i) {
+    for (int j = 0; j <= 1; ++j) {
+      StatsTrackerType::FillerStats stats;
+      stats.num_pages = num_pages + Length((i == 0) ? 4 : 8 * j);
+      stats.free_pages = num_free_pages + Length(10 * i + j);
+      stats.unmapped_pages = Length(10);
+      stats.used_pages_in_subreleased_huge_pages = num_pages;
+      stats.huge_pages[StatsTrackerType::kRegular] =
+          num_hugepages + ((i == 1) ? NHugePages(4) : NHugePages(8) * j);
+      stats.huge_pages[StatsTrackerType::kDonated] = num_hugepages;
+      stats.huge_pages[StatsTrackerType::kPartialReleased] = NHugePages(i);
+      stats.huge_pages[StatsTrackerType::kReleased] = NHugePages(j);
+      tracker_.Report(stats);
+    }
+  }
+}
+
+void FillerStatsTrackerTest::GenerateDemandPoint(Length num_pages,
+                                                 Length num_free_pages) {
+  HugeLength hp = NHugePages(1);
+  StatsTrackerType::FillerStats stats;
+  stats.num_pages = num_pages;
+  stats.free_pages = num_free_pages;
+  stats.unmapped_pages = Length(0);
+  stats.used_pages_in_subreleased_huge_pages = Length(0);
+  stats.huge_pages[StatsTrackerType::kRegular] = hp;
+  stats.huge_pages[StatsTrackerType::kDonated] = hp;
+  stats.huge_pages[StatsTrackerType::kPartialReleased] = hp;
+  stats.huge_pages[StatsTrackerType::kReleased] = hp;
+  tracker_.Report(stats);
+}
+
+// Tests that the tracker aggregates all data correctly. The output is tested by
+// comparing the text output of the tracker. While this is a bit verbose, it is
+// much cleaner than extracting and comparing all data manually.
+TEST_F(FillerStatsTrackerTest, Works) {
+  // Ensure that the beginning (when free pages are 0) is outside the 5-min
+  // window the instrumentation is recording.
+  GenerateInterestingPoints(Length(1), NHugePages(1), Length(1));
+  Advance(absl::Minutes(5));
+
+  GenerateInterestingPoints(Length(100), NHugePages(5), Length(200));
+
+  Advance(absl::Minutes(1));
+
+  GenerateInterestingPoints(Length(200), NHugePages(10), Length(100));
+
+  Advance(absl::Minutes(1));
+
+  // Test text output (time series summary).
+  {
+    std::string buffer(1024 * 1024, '\0');
+    Printer printer(&*buffer.begin(), buffer.size());
+    {
+      tracker_.Print(&printer);
+      buffer.erase(printer.SpaceRequired());
+    }
+
+    EXPECT_THAT(buffer, StrEq(R"(HugePageFiller: time series over 5 min interval
+
+HugePageFiller: realized fragmentation: 0.8 MiB
+HugePageFiller: minimum free pages: 110 (100 backed)
+HugePageFiller: at peak demand: 208 pages (and 111 free, 10 unmapped)
+HugePageFiller: at peak demand: 26 hps (14 regular, 10 donated, 1 partial, 1 released)
+HugePageFiller: at peak hps: 208 pages (and 111 free, 10 unmapped)
+HugePageFiller: at peak hps: 26 hps (14 regular, 10 donated, 1 partial, 1 released)
+
+HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks.
+HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending).
+HugePageFiller: Subrelease stats last 10 min: total 0 pages subreleased, 0 hugepages broken
+)"));
+  }
+
+  // Test pbtxt output (full time series).
+  {
+    std::string buffer(1024 * 1024, '\0');
+    Printer printer(&*buffer.begin(), buffer.size());
+    {
+      PbtxtRegion region(&printer, kTop, /*indent=*/0);
+      tracker_.PrintInPbtxt(&region);
+    }
+    buffer.erase(printer.SpaceRequired());
+
+    EXPECT_THAT(buffer, StrEq(R"(
+  filler_skipped_subrelease {
+    skipped_subrelease_interval_ms: 0
+    skipped_subrelease_pages: 0
+    correctly_skipped_subrelease_pages: 0
+    pending_skipped_subrelease_pages: 0
+    skipped_subrelease_count: 0
+    correctly_skipped_subrelease_count: 0
+    pending_skipped_subrelease_count: 0
+  }
+  filler_stats_timeseries {
+    window_ms: 37500
+    epochs: 16
+    min_free_pages_interval_ms: 300000
+    min_free_pages: 110
+    min_free_backed_pages: 100
+    measurements {
+      epoch: 6
+      timestamp_ms: 0
+      min_free_pages: 11
+      min_free_backed_pages: 1
+      num_pages_subreleased: 0
+      num_hugepages_broken: 0
+      at_minimum_demand {
+        num_pages: 1
+        regular_huge_pages: 5
+        donated_huge_pages: 1
+        partial_released_huge_pages: 1
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 1
+      }
+      at_maximum_demand {
+        num_pages: 9
+        regular_huge_pages: 5
+        donated_huge_pages: 1
+        partial_released_huge_pages: 1
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 1
+      }
+      at_minimum_huge_pages {
+        num_pages: 5
+        regular_huge_pages: 1
+        donated_huge_pages: 1
+        partial_released_huge_pages: 0
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 1
+      }
+      at_maximum_huge_pages {
+        num_pages: 5
+        regular_huge_pages: 9
+        donated_huge_pages: 1
+        partial_released_huge_pages: 0
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 1
+      }
+    }
+    measurements {
+      epoch: 14
+      timestamp_ms: 300000
+      min_free_pages: 210
+      min_free_backed_pages: 200
+      num_pages_subreleased: 0
+      num_hugepages_broken: 0
+      at_minimum_demand {
+        num_pages: 100
+        regular_huge_pages: 9
+        donated_huge_pages: 5
+        partial_released_huge_pages: 1
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 100
+      }
+      at_maximum_demand {
+        num_pages: 108
+        regular_huge_pages: 9
+        donated_huge_pages: 5
+        partial_released_huge_pages: 1
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 100
+      }
+      at_minimum_huge_pages {
+        num_pages: 104
+        regular_huge_pages: 5
+        donated_huge_pages: 5
+        partial_released_huge_pages: 0
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 100
+      }
+      at_maximum_huge_pages {
+        num_pages: 104
+        regular_huge_pages: 13
+        donated_huge_pages: 5
+        partial_released_huge_pages: 0
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 100
+      }
+    }
+    measurements {
+      epoch: 15
+      timestamp_ms: 337500
+      min_free_pages: 110
+      min_free_backed_pages: 100
+      num_pages_subreleased: 0
+      num_hugepages_broken: 0
+      at_minimum_demand {
+        num_pages: 200
+        regular_huge_pages: 14
+        donated_huge_pages: 10
+        partial_released_huge_pages: 1
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 200
+      }
+      at_maximum_demand {
+        num_pages: 208
+        regular_huge_pages: 14
+        donated_huge_pages: 10
+        partial_released_huge_pages: 1
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 200
+      }
+      at_minimum_huge_pages {
+        num_pages: 204
+        regular_huge_pages: 10
+        donated_huge_pages: 10
+        partial_released_huge_pages: 0
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 200
+      }
+      at_maximum_huge_pages {
+        num_pages: 204
+        regular_huge_pages: 18
+        donated_huge_pages: 10
+        partial_released_huge_pages: 0
+        released_huge_pages: 1
+        used_pages_in_subreleased_huge_pages: 200
+      }
+    }
+  }
+)"));
+  }
+}
+
+TEST_F(FillerStatsTrackerTest, InvalidDurations) {
+  // These should not crash.
+  tracker_.min_free_pages(absl::InfiniteDuration());
+  tracker_.min_free_pages(kWindow + absl::Seconds(1));
+  tracker_.min_free_pages(-(kWindow + absl::Seconds(1)));
+  tracker_.min_free_pages(-absl::InfiniteDuration());
+}
+
+TEST_F(FillerStatsTrackerTest, ComputeRecentPeaks) {
+  GenerateDemandPoint(Length(3000), Length(1000));
+  Advance(absl::Minutes(1.25));
+  GenerateDemandPoint(Length(1500), Length(0));
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(100), Length(2000));
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(200), Length(3000));
+
+  GenerateDemandPoint(Length(200), Length(3000));
+  FillerStatsTracker<>::FillerStats stats =
+      tracker_.GetRecentPeak(absl::Minutes(3));
+  EXPECT_EQ(stats.num_pages, Length(1500));
+  EXPECT_EQ(stats.free_pages, Length(0));
+
+  FillerStatsTracker<>::FillerStats stats2 =
+      tracker_.GetRecentPeak(absl::Minutes(5));
+  EXPECT_EQ(stats2.num_pages, Length(3000));
+  EXPECT_EQ(stats2.free_pages, Length(1000));
+
+  Advance(absl::Minutes(4));
+  GenerateDemandPoint(Length(200), Length(3000));
+
+  FillerStatsTracker<>::FillerStats stats3 =
+      tracker_.GetRecentPeak(absl::Minutes(4));
+  EXPECT_EQ(stats3.num_pages, Length(200));
+  EXPECT_EQ(stats3.free_pages, Length(3000));
+
+  Advance(absl::Minutes(5));
+  GenerateDemandPoint(Length(200), Length(3000));
+
+  FillerStatsTracker<>::FillerStats stats4 =
+      tracker_.GetRecentPeak(absl::Minutes(5));
+  EXPECT_EQ(stats4.num_pages, Length(200));
+  EXPECT_EQ(stats4.free_pages, Length(3000));
+}
+
+TEST_F(FillerStatsTrackerTest, TrackCorrectSubreleaseDecisions) {
+  // First peak (large)
+  GenerateDemandPoint(Length(1000), Length(1000));
+
+  // Incorrect subrelease: Subrelease to 1000
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(100), Length(1000));
+  tracker_.ReportSkippedSubreleasePages(Length(900), Length(1000),
+                                        absl::Minutes(3));
+
+  // Second peak (small)
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(500), Length(1000));
+
+  EXPECT_EQ(tracker_.total_skipped().pages, Length(900));
+  EXPECT_EQ(tracker_.total_skipped().count, 1);
+  EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0));
+  EXPECT_EQ(tracker_.correctly_skipped().count, 0);
+  EXPECT_EQ(tracker_.pending_skipped().pages, Length(900));
+  EXPECT_EQ(tracker_.pending_skipped().count, 1);
+
+  // Correct subrelease: Subrelease to 500
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(500), Length(100));
+  tracker_.ReportSkippedSubreleasePages(Length(50), Length(550),
+                                        absl::Minutes(3));
+  GenerateDemandPoint(Length(500), Length(50));
+  tracker_.ReportSkippedSubreleasePages(Length(50), Length(500),
+                                        absl::Minutes(3));
+  GenerateDemandPoint(Length(500), Length(0));
+
+  EXPECT_EQ(tracker_.total_skipped().pages, Length(1000));
+  EXPECT_EQ(tracker_.total_skipped().count, 3);
+  EXPECT_EQ(tracker_.correctly_skipped().pages, Length(0));
+  EXPECT_EQ(tracker_.correctly_skipped().count, 0);
+  EXPECT_EQ(tracker_.pending_skipped().pages, Length(1000));
+  EXPECT_EQ(tracker_.pending_skipped().count, 3);
+
+  // Third peak (large, too late for first peak)
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(1100), Length(1000));
+
+  Advance(absl::Minutes(5));
+  GenerateDemandPoint(Length(1100), Length(1000));
+
+  EXPECT_EQ(tracker_.total_skipped().pages, Length(1000));
+  EXPECT_EQ(tracker_.total_skipped().count, 3);
+  EXPECT_EQ(tracker_.correctly_skipped().pages, Length(100));
+  EXPECT_EQ(tracker_.correctly_skipped().count, 2);
+  EXPECT_EQ(tracker_.pending_skipped().pages, Length(0));
+  EXPECT_EQ(tracker_.pending_skipped().count, 0);
+}
+
+TEST_F(FillerStatsTrackerTest, SubreleaseCorrectnessWithChangingIntervals) {
+  // First peak (large)
+  GenerateDemandPoint(Length(1000), Length(1000));
+
+  Advance(absl::Minutes(1));
+  GenerateDemandPoint(Length(100), Length(1000));
+
+  tracker_.ReportSkippedSubreleasePages(Length(50), Length(1000),
+                                        absl::Minutes(4));
+  Advance(absl::Minutes(1));
+
+  // With two correctness intervals in the same epoch, take the maximum
+  tracker_.ReportSkippedSubreleasePages(Length(100), Length(1000),
+                                        absl::Minutes(1));
+  tracker_.ReportSkippedSubreleasePages(Length(200), Length(1000),
+                                        absl::Minutes(7));
+
+  Advance(absl::Minutes(5));
+  GenerateDemandPoint(Length(1100), Length(1000));
+  Advance(absl::Minutes(10));
+  GenerateDemandPoint(Length(1100), Length(1000));
+
+  EXPECT_EQ(tracker_.total_skipped().pages, Length(350));
+  EXPECT_EQ(tracker_.total_skipped().count, 3);
+  EXPECT_EQ(tracker_.correctly_skipped().pages, Length(300));
+  EXPECT_EQ(tracker_.correctly_skipped().count, 2);
+  EXPECT_EQ(tracker_.pending_skipped().pages, Length(0));
+  EXPECT_EQ(tracker_.pending_skipped().count, 0);
+}
+
+std::vector<FillerTest::PAlloc> FillerTest::GenerateInterestingAllocs() {
+  PAlloc a = Allocate(Length(1));
+  EXPECT_EQ(ReleasePages(kMaxValidPages), kPagesPerHugePage - Length(1));
+  Delete(a);
+  // Get the report on the released page
+  EXPECT_EQ(ReleasePages(kMaxValidPages), Length(1));
+
+  // Use a maximally-suboptimal pattern to get lots of hugepages into the
+  // filler.
+  std::vector<PAlloc> result;
+  static_assert(kPagesPerHugePage > Length(7),
+                "Not enough pages per hugepage!");
+  for (auto i = Length(0); i < Length(7); ++i) {
+    result.push_back(Allocate(kPagesPerHugePage - i - Length(1)));
+  }
+
+  // Get two released hugepages.
+  EXPECT_EQ(ReleasePages(Length(7)), Length(7));
+  EXPECT_EQ(ReleasePages(Length(6)), Length(6));
+
+  // Fill some of the remaining pages with small allocations.
+  for (int i = 0; i < 9; ++i) {
+    result.push_back(Allocate(Length(1)));
+  }
+
+  // Finally, donate one hugepage.
+  result.push_back(Allocate(Length(1), /*donated=*/true));
+  return result;
+}
+
+// Test the output of Print(). This is something of a change-detector test,
+// but that's not all bad in this case.
+TEST_P(FillerTest, Print) {
+  if (kPagesPerHugePage != Length(256)) {
+    // The output is hardcoded on this assumption, and dynamically calculating
+    // it would be way too much of a pain.
+    return;
+  }
+  auto allocs = GenerateInterestingAllocs();
+
+  std::string buffer(1024 * 1024, '\0');
+  {
+    Printer printer(&*buffer.begin(), buffer.size());
+    filler_.Print(&printer, /*everything=*/true);
+    buffer.erase(printer.SpaceRequired());
+  }
+
+  EXPECT_THAT(
+      buffer,
+      StrEq(R"(HugePageFiller: densely pack small requests into hugepages
+HugePageFiller: 8 total, 3 full, 3 partial, 2 released (0 partially), 0 quarantined
+HugePageFiller: 261 pages free in 8 hugepages, 0.1274 free
+HugePageFiller: among non-fulls, 0.3398 free
+HugePageFiller: 499 used pages in subreleased hugepages (0 of them in partially released)
+HugePageFiller: 2 hugepages partially released, 0.0254 released
+HugePageFiller: 0.7187 of used pages hugepageable
+HugePageFiller: Since startup, 269 pages subreleased, 3 hugepages broken, (0 pages, 0 hugepages due to reaching tcmalloc limit)
+
+HugePageFiller: fullness histograms
+
+HugePageFiller: # of regular hps with a<= # of free pages <b
+HugePageFiller: <  0<=     3 <  1<=     1 <  2<=     0 <  3<=     0 <  4<=     1 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of donated hps with a<= # of free pages <b
+HugePageFiller: <  0<=     0 <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     0 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     1
+
+HugePageFiller: # of partial released hps with a<= # of free pages <b
+HugePageFiller: <  0<=     0 <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     0 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of released hps with a<= # of free pages <b
+HugePageFiller: <  0<=     0 <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     2 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of regular hps with a<= longest free range <b
+HugePageFiller: <  0<=     3 <  1<=     1 <  2<=     0 <  3<=     0 <  4<=     1 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of partial released hps with a<= longest free range <b
+HugePageFiller: <  0<=     0 <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     0 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of released hps with a<= longest free range <b
+HugePageFiller: <  0<=     0 <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     2 < 16<=     0
+HugePageFiller: < 32<=     0 < 48<=     0 < 64<=     0 < 80<=     0 < 96<=     0 <112<=     0
+HugePageFiller: <128<=     0 <144<=     0 <160<=     0 <176<=     0 <192<=     0 <208<=     0
+HugePageFiller: <224<=     0 <240<=     0 <252<=     0 <253<=     0 <254<=     0 <255<=     0
+
+HugePageFiller: # of regular hps with a<= # of allocations <b
+HugePageFiller: <  1<=     1 <  2<=     1 <  3<=     1 <  4<=     2 <  5<=     0 < 17<=     0
+HugePageFiller: < 33<=     0 < 49<=     0 < 65<=     0 < 81<=     0 < 97<=     0 <113<=     0
+HugePageFiller: <129<=     0 <145<=     0 <161<=     0 <177<=     0 <193<=     0 <209<=     0
+HugePageFiller: <225<=     0 <241<=     0 <253<=     0 <254<=     0 <255<=     0 <256<=     0
+
+HugePageFiller: # of partial released hps with a<= # of allocations <b
+HugePageFiller: <  1<=     0 <  2<=     0 <  3<=     0 <  4<=     0 <  5<=     0 < 17<=     0
+HugePageFiller: < 33<=     0 < 49<=     0 < 65<=     0 < 81<=     0 < 97<=     0 <113<=     0
+HugePageFiller: <129<=     0 <145<=     0 <161<=     0 <177<=     0 <193<=     0 <209<=     0
+HugePageFiller: <225<=     0 <241<=     0 <253<=     0 <254<=     0 <255<=     0 <256<=     0
+
+HugePageFiller: # of released hps with a<= # of allocations <b
+HugePageFiller: <  1<=     2 <  2<=     0 <  3<=     0 <  4<=     0 <  5<=     0 < 17<=     0
+HugePageFiller: < 33<=     0 < 49<=     0 < 65<=     0 < 81<=     0 < 97<=     0 <113<=     0
+HugePageFiller: <129<=     0 <145<=     0 <161<=     0 <177<=     0 <193<=     0 <209<=     0
+HugePageFiller: <225<=     0 <241<=     0 <253<=     0 <254<=     0 <255<=     0 <256<=     0
+
+HugePageFiller: time series over 5 min interval
+
+HugePageFiller: realized fragmentation: 0.0 MiB
+HugePageFiller: minimum free pages: 0 (0 backed)
+HugePageFiller: at peak demand: 1774 pages (and 261 free, 13 unmapped)
+HugePageFiller: at peak demand: 8 hps (5 regular, 1 donated, 0 partial, 2 released)
+HugePageFiller: at peak hps: 1774 pages (and 261 free, 13 unmapped)
+HugePageFiller: at peak hps: 8 hps (5 regular, 1 donated, 0 partial, 2 released)
+
+HugePageFiller: Since the start of the execution, 0 subreleases (0 pages) were skipped due to recent (0s) peaks.
+HugePageFiller: 0.0000% of decisions confirmed correct, 0 pending (0.0000% of pages, 0 pending).
+HugePageFiller: Subrelease stats last 10 min: total 269 pages subreleased, 3 hugepages broken
+)"));
+  for (const auto& alloc : allocs) {
+    Delete(alloc);
+  }
+}
+
+// Test the output of PrintInPbtxt(). This is something of a change-detector
+// test, but that's not all bad in this case.
+TEST_P(FillerTest, PrintInPbtxt) {
+  if (kPagesPerHugePage != Length(256)) {
+    // The output is hardcoded on this assumption, and dynamically calculating
+    // it would be way too much of a pain.
+    return;
+  }
+  auto allocs = GenerateInterestingAllocs();
+
+  std::string buffer(1024 * 1024, '\0');
+  Printer printer(&*buffer.begin(), buffer.size());
+  {
+    PbtxtRegion region(&printer, kTop, /*indent=*/0);
+    filler_.PrintInPbtxt(&region);
+  }
+  buffer.erase(printer.SpaceRequired());
+
+  EXPECT_THAT(buffer, StrEq(R"(
+  filler_full_huge_pages: 3
+  filler_partial_huge_pages: 3
+  filler_released_huge_pages: 2
+  filler_partially_released_huge_pages: 0
+  filler_free_pages: 261
+  filler_used_pages_in_subreleased: 499
+  filler_used_pages_in_partial_released: 0
+  filler_unmapped_bytes: 0
+  filler_hugepageable_used_bytes: 10444800
+  filler_num_pages_subreleased: 269
+  filler_num_hugepages_broken: 3
+  filler_num_pages_subreleased_due_to_limit: 0
+  filler_num_hugepages_broken_due_to_limit: 0
+  filler_tracker {
+    type: REGULAR
+    free_pages_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 3
+    }
+    free_pages_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 1
+    }
+    free_pages_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 1
+    }
+    free_pages_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 3
+    }
+    longest_free_range_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 1
+    }
+    longest_free_range_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 1
+    }
+    longest_free_range_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 1
+    }
+    allocations_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 1
+    }
+    allocations_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 1
+    }
+    allocations_histogram {
+      lower_bound: 4
+      upper_bound: 4
+      value: 2
+    }
+    allocations_histogram {
+      lower_bound: 5
+      upper_bound: 16
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 17
+      upper_bound: 32
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 33
+      upper_bound: 48
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 49
+      upper_bound: 64
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 65
+      upper_bound: 80
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 81
+      upper_bound: 96
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 97
+      upper_bound: 112
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 113
+      upper_bound: 128
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 129
+      upper_bound: 144
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 145
+      upper_bound: 160
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 161
+      upper_bound: 176
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 177
+      upper_bound: 192
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 193
+      upper_bound: 208
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 209
+      upper_bound: 224
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 225
+      upper_bound: 240
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 241
+      upper_bound: 252
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 256
+      upper_bound: 256
+      value: 0
+    }
+  }
+  filler_tracker {
+    type: DONATED
+    free_pages_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 1
+    }
+    longest_free_range_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 1
+    }
+    allocations_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 1
+    }
+    allocations_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 4
+      upper_bound: 4
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 5
+      upper_bound: 16
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 17
+      upper_bound: 32
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 33
+      upper_bound: 48
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 49
+      upper_bound: 64
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 65
+      upper_bound: 80
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 81
+      upper_bound: 96
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 97
+      upper_bound: 112
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 113
+      upper_bound: 128
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 129
+      upper_bound: 144
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 145
+      upper_bound: 160
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 161
+      upper_bound: 176
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 177
+      upper_bound: 192
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 193
+      upper_bound: 208
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 209
+      upper_bound: 224
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 225
+      upper_bound: 240
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 241
+      upper_bound: 252
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 256
+      upper_bound: 256
+      value: 0
+    }
+  }
+  filler_tracker {
+    type: PARTIAL
+    free_pages_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 4
+      upper_bound: 4
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 5
+      upper_bound: 16
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 17
+      upper_bound: 32
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 33
+      upper_bound: 48
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 49
+      upper_bound: 64
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 65
+      upper_bound: 80
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 81
+      upper_bound: 96
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 97
+      upper_bound: 112
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 113
+      upper_bound: 128
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 129
+      upper_bound: 144
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 145
+      upper_bound: 160
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 161
+      upper_bound: 176
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 177
+      upper_bound: 192
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 193
+      upper_bound: 208
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 209
+      upper_bound: 224
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 225
+      upper_bound: 240
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 241
+      upper_bound: 252
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 256
+      upper_bound: 256
+      value: 0
+    }
+  }
+  filler_tracker {
+    type: RELEASED
+    free_pages_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 2
+    }
+    free_pages_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    free_pages_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 0
+      upper_bound: 0
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 4
+      upper_bound: 15
+      value: 2
+    }
+    longest_free_range_histogram {
+      lower_bound: 16
+      upper_bound: 31
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 32
+      upper_bound: 47
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 48
+      upper_bound: 63
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 64
+      upper_bound: 79
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 80
+      upper_bound: 95
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 96
+      upper_bound: 111
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 112
+      upper_bound: 127
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 128
+      upper_bound: 143
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 144
+      upper_bound: 159
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 160
+      upper_bound: 175
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 176
+      upper_bound: 191
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 192
+      upper_bound: 207
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 208
+      upper_bound: 223
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 224
+      upper_bound: 239
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 240
+      upper_bound: 251
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 252
+      upper_bound: 252
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    longest_free_range_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 1
+      upper_bound: 1
+      value: 2
+    }
+    allocations_histogram {
+      lower_bound: 2
+      upper_bound: 2
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 3
+      upper_bound: 3
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 4
+      upper_bound: 4
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 5
+      upper_bound: 16
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 17
+      upper_bound: 32
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 33
+      upper_bound: 48
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 49
+      upper_bound: 64
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 65
+      upper_bound: 80
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 81
+      upper_bound: 96
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 97
+      upper_bound: 112
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 113
+      upper_bound: 128
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 129
+      upper_bound: 144
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 145
+      upper_bound: 160
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 161
+      upper_bound: 176
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 177
+      upper_bound: 192
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 193
+      upper_bound: 208
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 209
+      upper_bound: 224
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 225
+      upper_bound: 240
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 241
+      upper_bound: 252
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 253
+      upper_bound: 253
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 254
+      upper_bound: 254
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 255
+      upper_bound: 255
+      value: 0
+    }
+    allocations_histogram {
+      lower_bound: 256
+      upper_bound: 256
+      value: 0
+    }
+  }
+  filler_skipped_subrelease {
+    skipped_subrelease_interval_ms: 0
+    skipped_subrelease_pages: 0
+    correctly_skipped_subrelease_pages: 0
+    pending_skipped_subrelease_pages: 0
+    skipped_subrelease_count: 0
+    correctly_skipped_subrelease_count: 0
+    pending_skipped_subrelease_count: 0
+  }
+  filler_stats_timeseries {
+    window_ms: 1000
+    epochs: 600
+    min_free_pages_interval_ms: 300000
+    min_free_pages: 0
+    min_free_backed_pages: 0
+    measurements {
+      epoch: 599
+      timestamp_ms: 0
+      min_free_pages: 0
+      min_free_backed_pages: 0
+      num_pages_subreleased: 269
+      num_hugepages_broken: 3
+      at_minimum_demand {
+        num_pages: 0
+        regular_huge_pages: 0
+        donated_huge_pages: 0
+        partial_released_huge_pages: 0
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 0
+      }
+      at_maximum_demand {
+        num_pages: 1774
+        regular_huge_pages: 5
+        donated_huge_pages: 1
+        partial_released_huge_pages: 0
+        released_huge_pages: 2
+        used_pages_in_subreleased_huge_pages: 499
+      }
+      at_minimum_huge_pages {
+        num_pages: 0
+        regular_huge_pages: 0
+        donated_huge_pages: 0
+        partial_released_huge_pages: 0
+        released_huge_pages: 0
+        used_pages_in_subreleased_huge_pages: 0
+      }
+      at_maximum_huge_pages {
+        num_pages: 1774
+        regular_huge_pages: 5
+        donated_huge_pages: 1
+        partial_released_huge_pages: 0
+        released_huge_pages: 2
+        used_pages_in_subreleased_huge_pages: 499
+      }
+    }
+  }
+)"));
+  for (const auto& alloc : allocs) {
+    Delete(alloc);
+  }
+}
+
+// Testing subrelase stats: ensure that the cumulative number of released
+// pages and broken hugepages is no less than those of the last 10 mins
+TEST_P(FillerTest, CheckSubreleaseStats) {
+  // Get lots of hugepages into the filler.
+  Advance(absl::Minutes(1));
+  std::vector<PAlloc> result;
+  static_assert(kPagesPerHugePage > Length(10),
+                "Not enough pages per hugepage!");
+  for (int i = 0; i < 10; ++i) {
+    result.push_back(Allocate(kPagesPerHugePage - Length(i + 1)));
+  }
+
+  // Breaking up 2 hugepages, releasing 19 pages due to reaching limit,
+  EXPECT_EQ(HardReleasePages(Length(10)), Length(10));
+  EXPECT_EQ(HardReleasePages(Length(9)), Length(9));
+
+  Advance(absl::Minutes(1));
+  SubreleaseStats subrelease = filler_.subrelease_stats();
+  EXPECT_EQ(subrelease.total_pages_subreleased, Length(0));
+  EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 0);
+  EXPECT_EQ(subrelease.num_pages_subreleased, Length(19));
+  EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 2);
+  EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+  // Do some work so that the timeseries updates its stats
+  for (int i = 0; i < 5; ++i) {
+    result.push_back(Allocate(Length(1)));
+  }
+  subrelease = filler_.subrelease_stats();
+  EXPECT_EQ(subrelease.total_pages_subreleased, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2);
+  EXPECT_EQ(subrelease.num_pages_subreleased, Length(0));
+  EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0);
+  EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+  // Breaking up 3 hugepages, releasing 21 pages (background thread)
+  EXPECT_EQ(ReleasePages(Length(8)), Length(8));
+  EXPECT_EQ(ReleasePages(Length(7)), Length(7));
+  EXPECT_EQ(ReleasePages(Length(6)), Length(6));
+
+  subrelease = filler_.subrelease_stats();
+  EXPECT_EQ(subrelease.total_pages_subreleased, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 2);
+  EXPECT_EQ(subrelease.num_pages_subreleased, Length(21));
+  EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 3);
+  EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+  Advance(absl::Minutes(10));  // This forces timeseries to wrap
+  // Do some work
+  for (int i = 0; i < 5; ++i) {
+    result.push_back(Allocate(Length(1)));
+  }
+  subrelease = filler_.subrelease_stats();
+  EXPECT_EQ(subrelease.total_pages_subreleased, Length(40));
+  EXPECT_EQ(subrelease.total_hugepages_broken.raw_num(), 5);
+  EXPECT_EQ(subrelease.num_pages_subreleased, Length(0));
+  EXPECT_EQ(subrelease.num_hugepages_broken.raw_num(), 0);
+  EXPECT_EQ(subrelease.total_pages_subreleased_due_to_limit, Length(19));
+  EXPECT_EQ(subrelease.total_hugepages_broken_due_to_limit.raw_num(), 2);
+
+  std::string buffer(1024 * 1024, '\0');
+  {
+    Printer printer(&*buffer.begin(), buffer.size());
+    filler_.Print(&printer, /*everything=*/true);
+    buffer.erase(printer.SpaceRequired());
+  }
+
+  ASSERT_THAT(
+      buffer,
+      testing::HasSubstr(
+          "HugePageFiller: Since startup, 40 pages subreleased, 5 hugepages "
+          "broken, (19 pages, 2 hugepages due to reaching tcmalloc "
+          "limit)"));
+  ASSERT_THAT(buffer, testing::EndsWith(
+                          "HugePageFiller: Subrelease stats last 10 min: total "
+                          "21 pages subreleased, 3 hugepages broken\n"));
+
+  for (const auto& alloc : result) {
+    Delete(alloc);
+  }
+}
+
+TEST_P(FillerTest, ConstantBrokenHugePages) {
+  // Get and Fill up many huge pages
+  const HugeLength kHugePages = NHugePages(10 * kPagesPerHugePage.raw_num());
+
+  absl::BitGen rng;
+  std::vector<PAlloc> alloc;
+  alloc.reserve(kHugePages.raw_num());
+  std::vector<PAlloc> dead;
+  dead.reserve(kHugePages.raw_num());
+  std::vector<PAlloc> alloc_small;
+  alloc_small.reserve(kHugePages.raw_num() + 2);
+
+  for (HugeLength i; i < kHugePages; ++i) {
+    auto size =
+        Length(absl::Uniform<size_t>(rng, 2, kPagesPerHugePage.raw_num() - 1));
+    alloc_small.push_back(Allocate(Length(1)));
+    alloc.push_back(Allocate(size - Length(1)));
+    dead.push_back(Allocate(kPagesPerHugePage - size));
+  }
+  ASSERT_EQ(filler_.size(), kHugePages);
+
+  for (int i = 0; i < 2; ++i) {
+    for (auto& a : dead) {
+      Delete(a);
+    }
+    ReleasePages(filler_.free_pages());
+    ASSERT_EQ(filler_.free_pages(), Length(0));
+    alloc_small.push_back(
+        Allocate(Length(1)));  // To force subrelease stats to update
+
+    std::string buffer(1024 * 1024, '\0');
+    {
+      Printer printer(&*buffer.begin(), buffer.size());
+      filler_.Print(&printer, /*everything=*/false);
+      buffer.erase(printer.SpaceRequired());
+    }
+
+    ASSERT_THAT(buffer, testing::HasSubstr(absl::StrCat(kHugePages.raw_num(),
+                                                        " hugepages broken")));
+    if (i == 1) {
+      // Number of pages in alloc_small
+      ASSERT_THAT(buffer, testing::HasSubstr(absl::StrCat(
+                              kHugePages.raw_num() + 2,
+                              " used pages in subreleased hugepages")));
+      // Sum of pages in alloc and dead
+      ASSERT_THAT(buffer,
+                  testing::HasSubstr(absl::StrCat(
+                      kHugePages.raw_num() * kPagesPerHugePage.raw_num() -
+                          kHugePages.raw_num(),
+                      " pages subreleased")));
+    }
+
+    dead.swap(alloc);
+    alloc.clear();
+  }
+
+  // Clean up
+  for (auto& a : alloc_small) {
+    Delete(a);
+  }
+}
+
+// Confirms that a timeseries that contains every epoch does not exceed the
+// expected buffer capacity of 1 MiB.
+TEST_P(FillerTest, CheckBufferSize) {
+  const int kEpochs = 600;
+  const absl::Duration kEpochLength = absl::Seconds(1);
+
+  PAlloc big = Allocate(kPagesPerHugePage - Length(4));
+
+  for (int i = 0; i < kEpochs; i += 2) {
+    auto tiny = Allocate(Length(2));
+    Advance(kEpochLength);
+    Delete(tiny);
+    Advance(kEpochLength);
+  }
+
+  Delete(big);
+
+  std::string buffer(1024 * 1024, '\0');
+  Printer printer(&*buffer.begin(), buffer.size());
+  {
+    PbtxtRegion region(&printer, kTop, /*indent=*/0);
+    filler_.PrintInPbtxt(&region);
+  }
+
+  // We assume a maximum buffer size of 1 MiB. When increasing this size, ensure
+  // that all places processing mallocz protos get updated as well.
+  size_t buffer_size = printer.SpaceRequired();
+  printf("HugePageFiller buffer size: %zu\n", buffer_size);
+  EXPECT_LE(buffer_size, 1024 * 1024);
+}
+
+TEST_P(FillerTest, ReleasePriority) {
+  // Fill up many huge pages (>> kPagesPerHugePage).  This relies on an
+  // implementation detail of ReleasePages buffering up at most
+  // kPagesPerHugePage as potential release candidates.
+  const HugeLength kHugePages = NHugePages(10 * kPagesPerHugePage.raw_num());
+
+  // We will ensure that we fill full huge pages, then deallocate some parts of
+  // those to provide space for subrelease.
+  absl::BitGen rng;
+  std::vector<PAlloc> alloc;
+  alloc.reserve(kHugePages.raw_num());
+  std::vector<PAlloc> dead;
+  dead.reserve(kHugePages.raw_num());
+
+  absl::flat_hash_set<FakeTracker*> unique_pages;
+  unique_pages.reserve(kHugePages.raw_num());
+
+  for (HugeLength i; i < kHugePages; ++i) {
+    Length size(absl::Uniform<size_t>(rng, 1, kPagesPerHugePage.raw_num() - 1));
+
+    PAlloc a = Allocate(size);
+    unique_pages.insert(a.pt);
+    alloc.push_back(a);
+    dead.push_back(Allocate(kPagesPerHugePage - size));
+  }
+
+  ASSERT_EQ(filler_.size(), kHugePages);
+
+  for (auto& a : dead) {
+    Delete(a);
+  }
+
+  // As of 5/2020, our release priority is to subrelease huge pages with the
+  // fewest used pages.  Bucket unique_pages by that used_pages().
+  std::vector<std::vector<FakeTracker*>> ordered(kPagesPerHugePage.raw_num());
+  for (auto* pt : unique_pages) {
+    // None of these should be released yet.
+    EXPECT_FALSE(pt->released());
+    ordered[pt->used_pages().raw_num()].push_back(pt);
+  }
+
+  // Iteratively release random amounts of free memory--until all free pages
+  // become unmapped pages--and validate that we followed the expected release
+  // priority.
+  Length free_pages;
+  while ((free_pages = filler_.free_pages()) > Length(0)) {
+    Length to_release(absl::LogUniform<size_t>(rng, 1, free_pages.raw_num()));
+    Length released = ReleasePages(to_release);
+    ASSERT_LE(released, free_pages);
+
+    // Iterate through each element of ordered.  If any trackers are released,
+    // all previous trackers must be released.
+    bool previous_all_released = true;
+    for (auto l = Length(0); l < kPagesPerHugePage; ++l) {
+      bool any_released = false;
+      bool all_released = true;
+
+      for (auto* pt : ordered[l.raw_num()]) {
+        bool released = pt->released();
+
+        any_released |= released;
+        all_released &= released;
+      }
+
+      if (any_released) {
+        EXPECT_TRUE(previous_all_released) << [&]() {
+          // On mismatch, print the bitmap of released states on l-1/l.
+          std::vector<bool> before;
+          if (l > Length(0)) {
+            before.reserve(ordered[l.raw_num() - 1].size());
+            for (auto* pt : ordered[l.raw_num() - 1]) {
+              before.push_back(pt->released());
+            }
+          }
+
+          std::vector<bool> after;
+          after.reserve(ordered[l.raw_num()].size());
+          for (auto* pt : ordered[l.raw_num()]) {
+            after.push_back(pt->released());
+          }
+
+          return absl::StrCat("before = {", absl::StrJoin(before, ";"),
+                              "}\nafter  = {", absl::StrJoin(after, ";"), "}");
+        }();
+      }
+
+      previous_all_released = all_released;
+    }
+  }
+
+  // All huge pages should be released.
+  for (auto* pt : unique_pages) {
+    EXPECT_TRUE(pt->released());
+  }
+
+  for (auto& a : alloc) {
+    Delete(a);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(All, FillerTest,
+                         testing::Values(FillerPartialRerelease::Return,
+                                         FillerPartialRerelease::Retain));
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_pages.h b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h
new file mode 100644
index 0000000000..4498994f75
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_pages.h
@@ -0,0 +1,343 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Helpers for nicely typed interfaces that pass around refs to large
+// ranges.  You probably don't want to store HugeRanges long term
+// (nothing will break, but that's not what they're efficient for.)
+#ifndef TCMALLOC_HUGE_PAGES_H_
+#define TCMALLOC_HUGE_PAGES_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+#include <ostream>
+#include <utility>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline constexpr Length kPagesPerHugePage =
+    Length(1 << (kHugePageShift - kPageShift));
+
+// A single aligned huge page.
+struct HugePage {
+  void *start_addr() const {
+    ASSERT(pn <= kMaxPageNumber);
+    return reinterpret_cast<void *>(pn << kHugePageShift);
+  }
+
+  PageId first_page() const {
+    ASSERT(pn <= kMaxPageNumber);
+    return PageId(pn << (kHugePageShift - kPageShift));
+  }
+
+  size_t index() const {
+    ASSERT(pn <= kMaxPageNumber);
+    return pn;
+  }
+
+  static constexpr uintptr_t kMaxPageNumber =
+      std::numeric_limits<uintptr_t>::max() >> kHugePageShift;
+
+  uintptr_t pn;
+};
+
+struct HugeLength {
+  size_t n;
+
+  constexpr HugeLength() : n(0) {}
+  explicit HugeLength(double x) : n(ceil(x)) { ASSERT(x >= 0); }
+  constexpr size_t raw_num() const { return n; }
+  constexpr size_t in_bytes() const { return n * kHugePageSize; }
+  constexpr size_t in_mib() const {
+    static_assert(kHugePageSize >= 1024 * 1024, "tiny hugepages?");
+    return n * (kHugePageSize / 1024 / 1024);
+  }
+  constexpr Length in_pages() const { return n * kPagesPerHugePage; }
+
+  // It is possible to have a HugeLength that corresponds to more
+  // bytes than can be addressed (i.e. > size_t.)  Check for that.
+  bool overflows() const;
+
+ private:
+  explicit constexpr HugeLength(size_t x) : n(x) {}
+  friend constexpr HugeLength NHugePages(size_t n);
+};
+
+// Literal constructors (made explicit to avoid accidental uses when
+// another unit was meant.)
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength NHugePages(size_t n) { return HugeLength(n); }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength HLFromBytes(size_t bytes) {
+  return NHugePages(bytes / kHugePageSize);
+}
+
+// Rounds *up* to the nearest hugepage.
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength HLFromPages(Length pages) {
+  return NHugePages((pages + kPagesPerHugePage - Length(1)) /
+                    kPagesPerHugePage);
+}
+
+inline HugeLength &operator++(HugeLength &len) {  // NOLINT(runtime/references)
+  len.n++;
+  return len;
+}
+
+inline HugePage &operator++(HugePage &p) {  // NOLINT(runtime/references)
+  ASSERT(p.pn + 1 <= HugePage::kMaxPageNumber);
+  p.pn++;
+  return p;
+}
+
+inline HugeLength &operator--(HugeLength &len) {  // NOLINT(runtime/references)
+  ASSERT(len.n >= 1);
+  len.n--;
+  return len;
+}
+
+inline constexpr bool operator<(HugeLength lhs, HugeLength rhs) {
+  return lhs.n < rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(HugeLength lhs, HugeLength rhs) {
+  return lhs.n > rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(HugeLength lhs, HugeLength rhs) {
+  return lhs.n <= rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(HugePage lhs, HugePage rhs) {
+  return lhs.pn < rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(HugePage lhs, HugePage rhs) {
+  return lhs.pn > rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(HugeLength lhs, HugeLength rhs) {
+  return lhs.n >= rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(HugePage lhs, HugePage rhs) {
+  return lhs.pn <= rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(HugePage lhs, HugePage rhs) {
+  return lhs.pn >= rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(HugePage lhs, HugePage rhs) {
+  return lhs.pn == rhs.pn;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(HugePage lhs, HugePage rhs) {
+  return !(lhs == rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(HugeLength lhs, HugeLength rhs) {
+  return lhs.n == rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(HugeLength lhs, HugeLength rhs) {
+  return lhs.n != rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr size_t operator/(HugeLength lhs, HugeLength rhs) {
+  return lhs.n / rhs.n;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator*(HugeLength lhs, size_t rhs) {
+  return NHugePages(lhs.n * rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator/(HugeLength lhs, size_t rhs) {
+  return NHugePages(lhs.n / rhs);
+}
+
+inline HugeLength &operator*=(HugeLength &lhs, size_t rhs) {
+  lhs.n *= rhs;
+  return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator%(HugeLength lhs, HugeLength rhs) {
+  return NHugePages(lhs.n % rhs.n);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator+(HugePage lhs, HugeLength rhs) {
+  ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber);
+  return HugePage{lhs.pn + rhs.n};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator+(HugeLength lhs, HugePage rhs) {
+  return rhs + lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugePage operator-(HugePage lhs, HugeLength rhs) {
+  return ASSERT(lhs.pn >= rhs.n), HugePage{lhs.pn - rhs.n};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator-(HugePage lhs, HugePage rhs) {
+  return ASSERT(lhs.pn >= rhs.pn), NHugePages(lhs.pn - rhs.pn);
+}
+
+inline HugePage &operator+=(HugePage &lhs, HugeLength rhs) {
+  ASSERT(lhs.pn + rhs.n <= HugePage::kMaxPageNumber);
+  lhs.pn += rhs.n;
+  return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator+(HugeLength lhs, HugeLength rhs) {
+  return NHugePages(lhs.n + rhs.n);
+}
+
+inline HugeLength &operator+=(HugeLength &lhs, HugeLength rhs) {
+  lhs.n += rhs.n;
+  return lhs;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr HugeLength operator-(HugeLength lhs, HugeLength rhs) {
+  return ASSERT(lhs.n >= rhs.n), NHugePages(lhs.n - rhs.n);
+}
+
+inline HugeLength &operator-=(HugeLength &lhs, HugeLength rhs) {
+  ASSERT(lhs.n >= rhs.n);
+  lhs.n -= rhs.n;
+  return lhs;
+}
+
+inline bool HugeLength::overflows() const {
+  return *this > HLFromBytes(std::numeric_limits<size_t>::max());
+}
+
+inline void PrintTo(const HugeLength &n, ::std::ostream *os) {
+  *os << n.raw_num() << "hps";
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline HugePage HugePageContaining(PageId p) {
+  return {p.index() >> (kHugePageShift - kPageShift)};
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline HugePage HugePageContaining(void *p) {
+  return HugePageContaining(PageIdContaining(p));
+}
+
+// A set of contiguous huge pages.
+struct HugeRange {
+  void *start_addr() const { return first.start_addr(); }
+  void *end_addr() const { return (first + n).start_addr(); }
+  size_t byte_len() const {
+    return static_cast<char *>(end_addr()) - static_cast<char *>(start_addr());
+  }
+
+  // Assume any range starting at 0 is bogus.
+  bool valid() const { return first.start_addr() != nullptr; }
+
+  constexpr HugePage start() const { return first; }
+
+  constexpr HugeLength len() const { return n; }
+
+  HugePage operator[](HugeLength i) const { return first + i; }
+
+  template <typename H>
+  friend H AbslHashValue(H h, const HugeRange &r) {
+    return H::combine(std::move(h), r.start().start_addr(), r.len().raw_num());
+  }
+
+  bool contains(PageId p) const { return contains(HugePageContaining(p)); }
+  bool contains(HugePage p) const { return p >= first && (p - first) < n; }
+  bool contains(HugeRange r) const {
+    return r.first >= first && (r.first + r.n) <= (first + n);
+  }
+
+  bool intersects(HugeRange r) const {
+    return r.contains(start()) || contains(r.start());
+  }
+
+  // True iff r is our immediate successor (i.e. this + r is one large
+  // (non-overlapping) range.)
+  bool precedes(HugeRange r) const { return end_addr() == r.start_addr(); }
+
+  static HugeRange Nil() {
+    return {HugePageContaining(nullptr), NHugePages(0)};
+  }
+
+  static HugeRange Make(HugePage p, HugeLength n) { return {p, n}; }
+
+  HugePage first;
+  HugeLength n;
+};
+
+inline constexpr bool operator==(HugeRange lhs, HugeRange rhs) {
+  return lhs.start() == rhs.start() && lhs.len() == rhs.len();
+}
+
+// REQUIRES: a and b are disjoint but adjacent (in that order)
+
+inline HugeRange Join(HugeRange a, HugeRange b) {
+  CHECK_CONDITION(a.precedes(b));
+  return {a.start(), a.len() + b.len()};
+}
+
+// REQUIRES r.len() >= n
+// Splits r into two ranges, one of length n.  The other is either the rest
+// of the space (if any) or Nil.
+inline std::pair<HugeRange, HugeRange> Split(HugeRange r, HugeLength n) {
+  ASSERT(r.len() >= n);
+  if (r.len() > n) {
+    return {HugeRange::Make(r.start(), n),
+            HugeRange::Make(r.start() + n, r.len() - n)};
+  } else {
+    return {r, HugeRange::Nil()};
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+#endif  // TCMALLOC_HUGE_PAGES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region.h b/contrib/libs/tcmalloc/tcmalloc/huge_region.h
new file mode 100644
index 0000000000..0262c007b2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_region.h
@@ -0,0 +1,551 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_REGION_H_
+#define TCMALLOC_HUGE_REGION_H_
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "absl/base/internal/cycleclock.h"
+#include "tcmalloc/huge_allocator.h"
+#include "tcmalloc/huge_page_filler.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Track allocations from a fixed-size multiple huge page region.
+// Similar to PageTracker but a few important differences:
+// - crosses multiple hugepages
+// - backs region on demand
+// - supports breaking up the partially-allocated region for use elsewhere
+//
+// This is intended to help with fast allocation of regions too large
+// for HugePageFiller, but too small to round to a full hugepage; both
+// lengths that do fit in a hugepage, but often wouldn't fit in
+// available gaps (1.75 MiB), and lengths that don't fit, but would
+// introduce unacceptable fragmentation (2.1 MiB).
+//
+class HugeRegion : public TList<HugeRegion>::Elem {
+ public:
+  // We could template this if there was any need.
+  static constexpr HugeLength kRegionSize = HLFromBytes(1024 * 1024 * 1024);
+  static constexpr size_t kNumHugePages = kRegionSize.raw_num();
+  static constexpr HugeLength size() { return kRegionSize; }
+
+  // REQUIRES: r.len() == size(); r unbacked.
+  HugeRegion(HugeRange r, MemoryModifyFunction unback);
+  HugeRegion() = delete;
+
+  // If available, return a range of n free pages, setting *from_released =
+  // true iff the returned range is currently unbacked.
+  // Returns false if no range available.
+  bool MaybeGet(Length n, PageId *p, bool *from_released);
+
+  // Return [p, p + n) for new allocations.
+  // If release=true, release any hugepages made empty as a result.
+  // REQUIRES: [p, p + n) was the result of a previous MaybeGet.
+  void Put(PageId p, Length n, bool release);
+
+  // Release any hugepages that are unused but backed.
+  HugeLength Release();
+
+  // Is p located in this region?
+  bool contains(PageId p) { return location_.contains(p); }
+
+  // Stats
+  Length used_pages() const { return Length(tracker_.used()); }
+  Length free_pages() const {
+    return size().in_pages() - unmapped_pages() - used_pages();
+  }
+  Length unmapped_pages() const { return (size() - nbacked_).in_pages(); }
+
+  void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                    PageAgeHistograms *ages) const;
+
+  HugeLength backed() const;
+
+  void Print(Printer *out) const;
+  void PrintInPbtxt(PbtxtRegion *detail) const;
+
+  BackingStats stats() const;
+
+  // We don't define this as operator< because it's a rather specialized order.
+  bool BetterToAllocThan(const HugeRegion *rhs) const {
+    return longest_free() < rhs->longest_free();
+  }
+
+  void prepend_it(HugeRegion *other) { this->prepend(other); }
+
+  void append_it(HugeRegion *other) { this->append(other); }
+
+ private:
+  RangeTracker<kRegionSize.in_pages().raw_num()> tracker_;
+
+  HugeRange location_;
+
+  static int64_t AverageWhens(Length a, int64_t a_when, Length b,
+                              int64_t b_when) {
+    const double aw = static_cast<double>(a.raw_num()) * a_when;
+    const double bw = static_cast<double>(b.raw_num()) * b_when;
+    return static_cast<int64_t>((aw + bw) / (a.raw_num() + b.raw_num()));
+  }
+
+  Length longest_free() const { return Length(tracker_.longest_free()); }
+
+  // Adjust counts of allocs-per-hugepage for [p, p + n) being added/removed.
+
+  // *from_released is set to true iff [p, p + n) is currently unbacked
+  void Inc(PageId p, Length n, bool *from_released);
+  // If release is true, unback any hugepage that becomes empty.
+  void Dec(PageId p, Length n, bool release);
+
+  void UnbackHugepages(bool should[kNumHugePages]);
+
+  // How many pages are used in each hugepage?
+  Length pages_used_[kNumHugePages];
+  // Is this hugepage backed?
+  bool backed_[kNumHugePages];
+  HugeLength nbacked_;
+  int64_t whens_[kNumHugePages];
+  HugeLength total_unbacked_{NHugePages(0)};
+
+  MemoryModifyFunction unback_;
+};
+
+// Manage a set of regions from which we allocate.
+// Strategy: Allocate from the most fragmented region that fits.
+template <typename Region>
+class HugeRegionSet {
+ public:
+  HugeRegionSet() : n_(0) {}
+
+  // If available, return a range of n free pages, setting *from_released =
+  // true iff the returned range is currently unbacked.
+  // Returns false if no range available.
+  bool MaybeGet(Length n, PageId *page, bool *from_released);
+
+  // Return an allocation to a region (if one matches!)
+  bool MaybePut(PageId p, Length n);
+
+  // Add region to the set.
+  void Contribute(Region *region);
+
+  // Unback any totally unused hugepages; return the number of pages
+  // we managed to release.
+  HugeLength Release();
+
+  void Print(Printer *out) const;
+  void PrintInPbtxt(PbtxtRegion *hpaa) const;
+  void AddSpanStats(SmallSpanStats *small, LargeSpanStats *large,
+                    PageAgeHistograms *ages) const;
+  BackingStats stats() const;
+
+ private:
+  void Fix(Region *r) {
+    // We've changed r's fragmentation--move it through the list to the
+    // correct home (if needed).
+    Rise(r);
+    Fall(r);
+  }
+
+  // Check if r has to move forward in the list.
+  void Rise(Region *r) {
+    auto prev = list_.at(r);
+    --prev;
+    if (prev == list_.end()) return;           // we're at the front
+    if (!r->BetterToAllocThan(*prev)) return;  // we're far enough forward
+    list_.remove(r);
+    for (auto iter = prev; iter != list_.end(); --iter) {
+      if (!r->BetterToAllocThan(*iter)) {
+        iter->append_it(r);
+        return;
+      }
+    }
+    list_.prepend(r);
+  }
+
+  // Check if r has to move backward in the list.
+  void Fall(Region *r) {
+    auto next = list_.at(r);
+    ++next;
+    if (next == list_.end()) return;          // we're at the back
+    if (!next->BetterToAllocThan(r)) return;  // we're far enough back
+    list_.remove(r);
+    for (auto iter = next; iter != list_.end(); ++iter) {
+      if (!iter->BetterToAllocThan(r)) {
+        iter->prepend_it(r);
+        return;
+      }
+    }
+    list_.append(r);
+  }
+
+  // Add r in its sorted place.
+  void AddToList(Region *r) {
+    for (Region *curr : list_) {
+      if (r->BetterToAllocThan(curr)) {
+        curr->prepend_it(r);
+        return;
+      }
+    }
+
+    // Note this handles the empty-list case
+    list_.append(r);
+  }
+
+  size_t n_;
+  // Sorted by longest_free increasing.
+  TList<Region> list_;
+};
+
+// REQUIRES: r.len() == size(); r unbacked.
+inline HugeRegion::HugeRegion(HugeRange r, MemoryModifyFunction unback)
+    : tracker_{},
+      location_(r),
+      pages_used_{},
+      backed_{},
+      nbacked_(NHugePages(0)),
+      unback_(unback) {
+  int64_t now = absl::base_internal::CycleClock::Now();
+  for (int i = 0; i < kNumHugePages; ++i) {
+    whens_[i] = now;
+    // These are already 0 but for clarity...
+    pages_used_[i] = Length(0);
+    backed_[i] = false;
+  }
+}
+
+inline bool HugeRegion::MaybeGet(Length n, PageId *p, bool *from_released) {
+  if (n > longest_free()) return false;
+  auto index = Length(tracker_.FindAndMark(n.raw_num()));
+
+  PageId page = location_.start().first_page() + index;
+  *p = page;
+
+  // the last hugepage we touch
+  Inc(page, n, from_released);
+  return true;
+}
+
+// If release=true, release any hugepages made empty as a result.
+inline void HugeRegion::Put(PageId p, Length n, bool release) {
+  Length index = p - location_.start().first_page();
+  tracker_.Unmark(index.raw_num(), n.raw_num());
+
+  Dec(p, n, release);
+}
+
+// Release any hugepages that are unused but backed.
+inline HugeLength HugeRegion::Release() {
+  HugeLength r = NHugePages(0);
+  bool should_unback_[kNumHugePages] = {};
+  for (size_t i = 0; i < kNumHugePages; ++i) {
+    if (backed_[i] && pages_used_[i] == Length(0)) {
+      should_unback_[i] = true;
+      ++r;
+    }
+  }
+  UnbackHugepages(should_unback_);
+  return r;
+}
+
+inline void HugeRegion::AddSpanStats(SmallSpanStats *small,
+                                     LargeSpanStats *large,
+                                     PageAgeHistograms *ages) const {
+  size_t index = 0, n;
+  Length f, u;
+  // This is complicated a bit by the backed/unbacked status of pages.
+  while (tracker_.NextFreeRange(index, &index, &n)) {
+    // [index, index + n) is an *unused* range.  As it may cross
+    // hugepages, we may need to truncate it so it is either a
+    // *free* or a *released* range, and compute a reasonable value
+    // for its "when".
+    PageId p = location_.start().first_page() + Length(index);
+    const HugePage hp = HugePageContaining(p);
+    size_t i = (hp - location_.start()) / NHugePages(1);
+    const bool backed = backed_[i];
+    Length truncated;
+    int64_t when = 0;
+    while (n > 0 && backed_[i] == backed) {
+      const PageId lim = (location_.start() + NHugePages(i + 1)).first_page();
+      Length here = std::min(Length(n), lim - p);
+      when = AverageWhens(truncated, when, here, whens_[i]);
+      truncated += here;
+      n -= here.raw_num();
+      p += here;
+      i++;
+      ASSERT(i < kNumHugePages || n == 0);
+    }
+    n = truncated.raw_num();
+    const bool released = !backed;
+    if (released) {
+      u += Length(n);
+    } else {
+      f += Length(n);
+    }
+    if (Length(n) < kMaxPages) {
+      if (small != nullptr) {
+        if (released) {
+          small->returned_length[n]++;
+        } else {
+          small->normal_length[n]++;
+        }
+      }
+    } else {
+      if (large != nullptr) {
+        large->spans++;
+        if (released) {
+          large->returned_pages += Length(n);
+        } else {
+          large->normal_pages += Length(n);
+        }
+      }
+    }
+
+    if (ages != nullptr) {
+      ages->RecordRange(Length(n), released, when);
+    }
+    index += n;
+  }
+  CHECK_CONDITION(f == free_pages());
+  CHECK_CONDITION(u == unmapped_pages());
+}
+
+inline HugeLength HugeRegion::backed() const {
+  HugeLength b;
+  for (int i = 0; i < kNumHugePages; ++i) {
+    if (backed_[i]) {
+      ++b;
+    }
+  }
+
+  return b;
+}
+
+inline void HugeRegion::Print(Printer *out) const {
+  const size_t kib_used = used_pages().in_bytes() / 1024;
+  const size_t kib_free = free_pages().in_bytes() / 1024;
+  const size_t kib_longest_free = longest_free().in_bytes() / 1024;
+  const HugeLength unbacked = size() - backed();
+  const size_t mib_unbacked = unbacked.in_mib();
+  out->printf(
+      "HugeRegion: %zu KiB used, %zu KiB free, "
+      "%zu KiB contiguous space, %zu MiB unbacked, "
+      "%zu MiB unbacked lifetime\n",
+      kib_used, kib_free, kib_longest_free, mib_unbacked,
+      total_unbacked_.in_bytes() / 1024 / 1024);
+}
+
+inline void HugeRegion::PrintInPbtxt(PbtxtRegion *detail) const {
+  detail->PrintI64("used_bytes", used_pages().in_bytes());
+  detail->PrintI64("free_bytes", free_pages().in_bytes());
+  detail->PrintI64("longest_free_range_bytes", longest_free().in_bytes());
+  const HugeLength unbacked = size() - backed();
+  detail->PrintI64("unbacked_bytes", unbacked.in_bytes());
+  detail->PrintI64("total_unbacked_bytes", total_unbacked_.in_bytes());
+}
+
+inline BackingStats HugeRegion::stats() const {
+  BackingStats s;
+  s.system_bytes = location_.len().in_bytes();
+  s.free_bytes = free_pages().in_bytes();
+  s.unmapped_bytes = unmapped_pages().in_bytes();
+  return s;
+}
+
+inline void HugeRegion::Inc(PageId p, Length n, bool *from_released) {
+  bool should_back = false;
+  const int64_t now = absl::base_internal::CycleClock::Now();
+  while (n > Length(0)) {
+    const HugePage hp = HugePageContaining(p);
+    const size_t i = (hp - location_.start()) / NHugePages(1);
+    const PageId lim = (hp + NHugePages(1)).first_page();
+    Length here = std::min(n, lim - p);
+    if (pages_used_[i] == Length(0) && !backed_[i]) {
+      backed_[i] = true;
+      should_back = true;
+      ++nbacked_;
+      whens_[i] = now;
+    }
+    pages_used_[i] += here;
+    ASSERT(pages_used_[i] <= kPagesPerHugePage);
+    p += here;
+    n -= here;
+  }
+  *from_released = should_back;
+}
+
+inline void HugeRegion::Dec(PageId p, Length n, bool release) {
+  const int64_t now = absl::base_internal::CycleClock::Now();
+  bool should_unback_[kNumHugePages] = {};
+  while (n > Length(0)) {
+    const HugePage hp = HugePageContaining(p);
+    const size_t i = (hp - location_.start()) / NHugePages(1);
+    const PageId lim = (hp + NHugePages(1)).first_page();
+    Length here = std::min(n, lim - p);
+    ASSERT(here > Length(0));
+    ASSERT(pages_used_[i] >= here);
+    ASSERT(backed_[i]);
+    whens_[i] =
+        AverageWhens(here, now, kPagesPerHugePage - pages_used_[i], whens_[i]);
+    pages_used_[i] -= here;
+    if (pages_used_[i] == Length(0)) {
+      should_unback_[i] = true;
+    }
+    p += here;
+    n -= here;
+  }
+  if (release) {
+    UnbackHugepages(should_unback_);
+  }
+}
+
+inline void HugeRegion::UnbackHugepages(bool should[kNumHugePages]) {
+  const int64_t now = absl::base_internal::CycleClock::Now();
+  size_t i = 0;
+  while (i < kNumHugePages) {
+    if (!should[i]) {
+      i++;
+      continue;
+    }
+    size_t j = i;
+    while (j < kNumHugePages && should[j]) {
+      backed_[j] = false;
+      whens_[j] = now;
+      j++;
+    }
+
+    HugeLength hl = NHugePages(j - i);
+    nbacked_ -= hl;
+    HugePage p = location_.start() + NHugePages(i);
+    unback_(p.start_addr(), hl.in_bytes());
+    total_unbacked_ += hl;
+    i = j;
+  }
+}
+
+// If available, return a range of n free pages, setting *from_released =
+// true iff the returned range is currently unbacked.
+// Returns false if no range available.
+template <typename Region>
+inline bool HugeRegionSet<Region>::MaybeGet(Length n, PageId *page,
+                                            bool *from_released) {
+  for (Region *region : list_) {
+    if (region->MaybeGet(n, page, from_released)) {
+      Fix(region);
+      return true;
+    }
+  }
+  return false;
+}
+
+// Return an allocation to a region (if one matches!)
+template <typename Region>
+inline bool HugeRegionSet<Region>::MaybePut(PageId p, Length n) {
+  for (Region *region : list_) {
+    if (region->contains(p)) {
+      region->Put(p, n, true);
+      Fix(region);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// Add region to the set.
+template <typename Region>
+inline void HugeRegionSet<Region>::Contribute(Region *region) {
+  n_++;
+  AddToList(region);
+}
+
+// Unback any totally unused hugepages; return the number of pages
+// we managed to release.
+template <typename Region>
+inline HugeLength HugeRegionSet<Region>::Release() {
+  HugeLength hl = NHugePages(0);
+  for (Region *region : list_) {
+    hl += region->Release();
+  }
+
+  return hl;
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::Print(Printer *out) const {
+  out->printf("HugeRegionSet: 1 MiB+ allocations best-fit into %zu MiB slabs\n",
+              Region::size().in_bytes() / 1024 / 1024);
+  out->printf("HugeRegionSet: %zu total regions\n", n_);
+  Length total_free;
+  HugeLength total_backed = NHugePages(0);
+
+  for (Region *region : list_) {
+    region->Print(out);
+    total_free += region->free_pages();
+    total_backed += region->backed();
+  }
+
+  out->printf("HugeRegionSet: %zu hugepages backed out of %zu total\n",
+              total_backed.raw_num(), Region::size().raw_num() * n_);
+
+  const Length in_pages = total_backed.in_pages();
+  out->printf("HugeRegionSet: %zu pages free in backed region, %.4f free\n",
+              total_free.raw_num(),
+              in_pages > Length(0) ? static_cast<double>(total_free.raw_num()) /
+                                         static_cast<double>(in_pages.raw_num())
+                                   : 0.0);
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::PrintInPbtxt(PbtxtRegion *hpaa) const {
+  hpaa->PrintI64("min_huge_region_alloc_size", 1024 * 1024);
+  hpaa->PrintI64("huge_region_size", Region::size().in_bytes());
+  for (Region *region : list_) {
+    auto detail = hpaa->CreateSubRegion("huge_region_details");
+    region->PrintInPbtxt(&detail);
+  }
+}
+
+template <typename Region>
+inline void HugeRegionSet<Region>::AddSpanStats(SmallSpanStats *small,
+                                                LargeSpanStats *large,
+                                                PageAgeHistograms *ages) const {
+  for (Region *region : list_) {
+    region->AddSpanStats(small, large, ages);
+  }
+}
+
+template <typename Region>
+inline BackingStats HugeRegionSet<Region>::stats() const {
+  BackingStats stats;
+  for (Region *region : list_) {
+    stats += region->stats();
+  }
+
+  return stats;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_REGION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc
new file mode 100644
index 0000000000..4370b92762
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/huge_region_test.cc
@@ -0,0 +1,565 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/huge_region.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include <algorithm>
+#include <memory>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::NiceMock;
+using testing::StrictMock;
+
+class HugeRegionTest : public ::testing::Test {
+ protected:
+  HugeRegionTest()
+      :  // an unlikely magic page
+        p_(HugePageContaining(reinterpret_cast<void *>(0x1faced200000))),
+        region_({p_, region_.size()}, MockUnback) {
+    // we usually don't care about backing calls, unless testing that
+    // specifically.
+    mock_ = absl::make_unique<NiceMock<MockBackingInterface>>();
+  }
+
+  ~HugeRegionTest() override { mock_.reset(nullptr); }
+
+  // This is wordy, but necessary for mocking:
+  class BackingInterface {
+   public:
+    virtual void Unback(void *p, size_t len) = 0;
+    virtual ~BackingInterface() {}
+  };
+
+  class MockBackingInterface : public BackingInterface {
+   public:
+    MOCK_METHOD2(Unback, void(void *p, size_t len));
+  };
+
+  static std::unique_ptr<MockBackingInterface> mock_;
+
+  static void MockUnback(void *p, size_t len) { mock_->Unback(p, len); }
+
+  void CheckMock() { testing::Mock::VerifyAndClearExpectations(mock_.get()); }
+
+  void ExpectUnback(HugeRange r) {
+    void *ptr = r.start_addr();
+    size_t bytes = r.byte_len();
+    EXPECT_CALL(*mock_, Unback(ptr, bytes)).Times(1);
+  }
+
+  struct Alloc {
+    PageId p;
+    Length n;
+    size_t mark;
+  };
+
+  HugePage p_;
+  typedef HugeRegion Region;
+  Region region_;
+  size_t next_mark_{0};
+  size_t marks_[Region::size().in_pages().raw_num()];
+
+  void Mark(Alloc a) {
+    EXPECT_LE(p_.first_page(), a.p);
+    size_t index = (a.p - p_.first_page()).raw_num();
+    size_t end = index + a.n.raw_num();
+    EXPECT_LE(end, region_.size().in_pages().raw_num());
+    for (; index < end; ++index) {
+      marks_[index] = a.mark;
+    }
+  }
+
+  void Check(Alloc a) {
+    EXPECT_LE(p_.first_page(), a.p);
+    size_t index = (a.p - p_.first_page()).raw_num();
+    size_t end = index + a.n.raw_num();
+    EXPECT_LE(end, region_.size().in_pages().raw_num());
+    for (; index < end; ++index) {
+      EXPECT_EQ(a.mark, marks_[index]);
+    }
+  }
+
+  Alloc Allocate(Length n) {
+    bool from_released;
+    return Allocate(n, &from_released);
+  }
+
+  Alloc Allocate(Length n, bool *from_released) {
+    Alloc ret;
+    CHECK_CONDITION(region_.MaybeGet(n, &ret.p, from_released));
+    ret.n = n;
+    ret.mark = ++next_mark_;
+    Mark(ret);
+    return ret;
+  }
+
+  void Delete(Alloc a) {
+    Check(a);
+    region_.Put(a.p, a.n, false);
+  }
+
+  void DeleteUnback(Alloc a) {
+    Check(a);
+    region_.Put(a.p, a.n, true);
+  }
+};
+
+std::unique_ptr<HugeRegionTest::MockBackingInterface> HugeRegionTest::mock_;
+
+TEST_F(HugeRegionTest, Basic) {
+  Length total;
+  std::vector<Alloc> allocs;
+  for (Length n(1); total + n < region_.size().in_pages(); ++n) {
+    allocs.push_back(Allocate(n));
+    total += n;
+    EXPECT_EQ(total, region_.used_pages());
+  }
+
+  // Free every other alloc
+  std::vector<Length> lengths;
+  std::vector<Alloc> new_allocs;
+  for (int j = 0; j < allocs.size(); ++j) {
+    if (j % 2 == 0) {
+      new_allocs.push_back(allocs[j]);
+      continue;
+    }
+    Length n = allocs[j].n;
+    Delete(allocs[j]);
+    total -= n;
+    EXPECT_EQ(total, region_.used_pages());
+    lengths.push_back(n);
+  }
+  allocs.swap(new_allocs);
+  // and reallocate them in a random order:
+  std::shuffle(lengths.begin(), lengths.end(), absl::BitGen());
+  // This should fit, since thge allocator is best-fit
+  // and we have unique gaps of each size.
+  for (auto n : lengths) {
+    allocs.push_back(Allocate(n));
+    total += n;
+    EXPECT_EQ(total, region_.used_pages());
+  }
+
+  for (auto a : allocs) {
+    Delete(a);
+  }
+}
+
+TEST_F(HugeRegionTest, ReqsBacking) {
+  const Length n = kPagesPerHugePage;
+  std::vector<Alloc> allocs;
+  // should back the first page
+  bool from_released;
+  allocs.push_back(Allocate(n - Length(1), &from_released));
+  EXPECT_TRUE(from_released);
+  // nothing
+  allocs.push_back(Allocate(Length(1), &from_released));
+  EXPECT_FALSE(from_released);
+  // second page
+  allocs.push_back(Allocate(Length(1), &from_released));
+  EXPECT_TRUE(from_released);
+  // third, fourth, fifth
+  allocs.push_back(Allocate(3 * n, &from_released));
+  EXPECT_TRUE(from_released);
+
+  for (auto a : allocs) {
+    Delete(a);
+  }
+}
+
+TEST_F(HugeRegionTest, Release) {
+  mock_ = absl::make_unique<StrictMock<MockBackingInterface>>();
+  const Length n = kPagesPerHugePage;
+  bool from_released;
+  auto a = Allocate(n * 4 - Length(1), &from_released);
+  EXPECT_TRUE(from_released);
+
+  auto b = Allocate(n * 3, &from_released);
+  EXPECT_TRUE(from_released);
+
+  auto c = Allocate(n * 5 + Length(1), &from_released);
+  EXPECT_TRUE(from_released);
+
+  auto d = Allocate(n * 2, &from_released);
+  EXPECT_TRUE(from_released);
+
+  auto e = Allocate(n / 2, &from_released);
+  EXPECT_TRUE(from_released);
+  auto f = Allocate(n / 2, &from_released);
+  EXPECT_FALSE(from_released);
+
+  // Don't unback the first or last hugepage this touches -- since they
+  // overlap with others.
+  Delete(b);
+  ExpectUnback({p_ + NHugePages(4), NHugePages(2)});
+  EXPECT_EQ(NHugePages(2), region_.Release());
+  CheckMock();
+
+  // Now we're on exact boundaries so we should unback the whole range.
+  Delete(d);
+  ExpectUnback({p_ + NHugePages(12), NHugePages(2)});
+  EXPECT_EQ(NHugePages(2), region_.Release());
+  CheckMock();
+
+  Delete(a);
+  ExpectUnback({p_ + NHugePages(0), NHugePages(4)});
+  EXPECT_EQ(NHugePages(4), region_.Release());
+  CheckMock();
+
+  // Should work just as well with aggressive Put():
+  ExpectUnback({p_ + NHugePages(6), NHugePages(6)});
+  DeleteUnback(c);
+  CheckMock();
+
+  // And this _shouldn't_ do anything (page still in use)
+  DeleteUnback(e);
+  // But this should:
+  ExpectUnback({p_ + NHugePages(14), NHugePages(1)});
+  DeleteUnback(f);
+  CheckMock();
+}
+
+TEST_F(HugeRegionTest, Reback) {
+  mock_ = absl::make_unique<StrictMock<MockBackingInterface>>();
+  const Length n = kPagesPerHugePage / 4;
+  bool from_released;
+  // Even in back/unback cycles we should still call the functions
+  // on every transition.
+  for (int i = 0; i < 20; ++i) {
+    std::vector<Alloc> allocs;
+    allocs.push_back(Allocate(n, &from_released));
+    EXPECT_TRUE(from_released);
+    allocs.push_back(Allocate(n, &from_released));
+    EXPECT_FALSE(from_released);
+    allocs.push_back(Allocate(n, &from_released));
+    EXPECT_FALSE(from_released);
+    allocs.push_back(Allocate(n, &from_released));
+    EXPECT_FALSE(from_released);
+
+    std::shuffle(allocs.begin(), allocs.end(), absl::BitGen());
+    DeleteUnback(allocs[0]);
+    DeleteUnback(allocs[1]);
+    DeleteUnback(allocs[2]);
+
+    ExpectUnback({p_, NHugePages(1)});
+    DeleteUnback(allocs[3]);
+    CheckMock();
+  }
+}
+
+TEST_F(HugeRegionTest, Stats) {
+  const Length kLen = region_.size().in_pages();
+  const size_t kBytes = kLen.in_bytes();
+  struct Helper {
+    static void Stat(const Region &region, std::vector<Length> *small_backed,
+                     std::vector<Length> *small_unbacked, LargeSpanStats *large,
+                     BackingStats *stats, double *avg_age_backed,
+                     double *avg_age_unbacked) {
+      SmallSpanStats small;
+      *large = LargeSpanStats();
+      PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+      region.AddSpanStats(&small, large, &ages);
+      small_backed->clear();
+      small_unbacked->clear();
+      for (auto i = Length(0); i < kMaxPages; ++i) {
+        for (int j = 0; j < small.normal_length[i.raw_num()]; ++j) {
+          small_backed->push_back(i);
+        }
+
+        for (int j = 0; j < small.returned_length[i.raw_num()]; ++j) {
+          small_unbacked->push_back(i);
+        }
+      }
+
+      *stats = region.stats();
+
+      *avg_age_backed = ages.GetTotalHistogram(false)->avg_age();
+      *avg_age_unbacked = ages.GetTotalHistogram(true)->avg_age();
+    }
+  };
+
+  LargeSpanStats large;
+  std::vector<Length> small_backed, small_unbacked;
+  BackingStats stats;
+  double avg_age_backed, avg_age_unbacked;
+
+  absl::SleepFor(absl::Milliseconds(10));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre());
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(1, large.spans);
+  EXPECT_EQ(Length(0), large.normal_pages);
+  EXPECT_EQ(kLen, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ(0, stats.free_bytes);
+  EXPECT_EQ(kBytes, stats.unmapped_bytes);
+  EXPECT_LE(0.01, avg_age_unbacked);
+  EXPECT_EQ(0, avg_age_backed);
+
+  // We don't, in production, use small allocations from the region, but
+  // the API supports it, so test it here.
+  Alloc a = Allocate(Length(1));
+  Allocate(Length(1));
+  Alloc b = Allocate(Length(2));
+  Alloc barrier = Allocate(Length(1));
+  Alloc c = Allocate(Length(3));
+  Allocate(Length(1));
+  const Length slack = kPagesPerHugePage - Length(9);
+
+  absl::SleepFor(absl::Milliseconds(20));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre());
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(2, large.spans);
+  EXPECT_EQ(slack, large.normal_pages);
+  EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ(slack.in_bytes(), stats.free_bytes);
+  EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+  EXPECT_LE(0.02, avg_age_backed);
+  EXPECT_LE(0.03, avg_age_unbacked);
+
+  Delete(a);
+  absl::SleepFor(absl::Milliseconds(30));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre(Length(1)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(2, large.spans);
+  EXPECT_EQ(slack, large.normal_pages);
+  EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ((slack + Length(1)).in_bytes(), stats.free_bytes);
+  EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+  EXPECT_LE((slack.raw_num() * 0.05 + 1 * 0.03) / (slack.raw_num() + 1),
+            avg_age_backed);
+  EXPECT_LE(0.06, avg_age_unbacked);
+
+  Delete(b);
+  absl::SleepFor(absl::Milliseconds(40));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(2)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(2, large.spans);
+  EXPECT_EQ(slack, large.normal_pages);
+  EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ((slack + Length(3)).in_bytes(), stats.free_bytes);
+  EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+  EXPECT_LE(
+      (slack.raw_num() * 0.09 + 1 * 0.07 + 2 * 0.04) / (slack.raw_num() + 3),
+      avg_age_backed);
+  EXPECT_LE(0.10, avg_age_unbacked);
+
+  Delete(c);
+  absl::SleepFor(absl::Milliseconds(50));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed,
+              testing::ElementsAre(Length(1), Length(2), Length(3)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(2, large.spans);
+  EXPECT_EQ(slack, large.normal_pages);
+  EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ((slack + Length(6)).in_bytes(), stats.free_bytes);
+  EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+  EXPECT_LE((slack.raw_num() * 0.14 + 1 * 0.12 + 2 * 0.09 + 3 * 0.05) /
+                (slack.raw_num() + 6),
+            avg_age_backed);
+  EXPECT_LE(0.15, avg_age_unbacked);
+
+  Delete(barrier);
+  absl::SleepFor(absl::Milliseconds(60));
+  Helper::Stat(region_, &small_backed, &small_unbacked, &large, &stats,
+               &avg_age_backed, &avg_age_unbacked);
+  EXPECT_THAT(small_backed, testing::ElementsAre(Length(1), Length(6)));
+  EXPECT_THAT(small_unbacked, testing::ElementsAre());
+  EXPECT_EQ(2, large.spans);
+  EXPECT_EQ(slack, large.normal_pages);
+  EXPECT_EQ(kLen - kPagesPerHugePage, large.returned_pages);
+  EXPECT_EQ(kBytes, stats.system_bytes);
+  EXPECT_EQ((slack + Length(7)).in_bytes(), stats.free_bytes);
+  EXPECT_EQ((region_.size() - NHugePages(1)).in_bytes(), stats.unmapped_bytes);
+  EXPECT_LE(
+      (slack.raw_num() * 0.20 + 1 * 0.18 + 2 * 0.15 + 3 * 0.11 + 1 * 0.06) /
+          (slack.raw_num() + 7),
+      avg_age_backed);
+  EXPECT_LE(0.21, avg_age_unbacked);
+}
+
+// Test that free regions are broken down properly when they cross
+// page boundaries that change the backed/unbacked state.
+TEST_F(HugeRegionTest, StatBreakdown) {
+  const Length n = kPagesPerHugePage;
+  Alloc a = Allocate(n / 4);
+  Alloc b = Allocate(n * 3 + n / 3);
+  Alloc c = Allocate((n - n / 3 - n / 4) + n * 5 + n / 5);
+  Alloc d = Allocate(n - (n / 5) - Length(1));
+  // This unbacks the middle 2 hugepages, but not the beginning or
+  // trailing region
+  DeleteUnback(b);
+  Delete(c);
+  SmallSpanStats small;
+  LargeSpanStats large;
+  region_.AddSpanStats(&small, &large, nullptr);
+  // Backed beginning of hugepage 0, unbacked range in middle of b,
+  // long backed range from c, unbacked tail of allocation.
+  EXPECT_EQ(4, large.spans);
+  // Tail end of A's page, B/C combined page + all of C.
+  EXPECT_EQ((n - n / 4) + n * 6 + (n / 5), large.normal_pages);
+  // The above fill up 10 total pages.
+  EXPECT_EQ(2 * n + (Region::size().raw_num() - 10) * n, large.returned_pages);
+  EXPECT_EQ(1, small.normal_length[1]);
+
+  EXPECT_EQ(Length(1) + large.normal_pages + large.returned_pages +
+                region_.used_pages(),
+            Region::size().in_pages());
+  Delete(a);
+  Delete(d);
+}
+
+static void NilUnback(void *p, size_t bytes) {}
+
+class HugeRegionSetTest : public testing::Test {
+ protected:
+  typedef HugeRegion Region;
+
+  HugeRegionSetTest() { next_ = HugePageContaining(nullptr); }
+
+  std::unique_ptr<Region> GetRegion() {
+    // These regions are backed by "real" memory, but we don't touch it.
+    std::unique_ptr<Region> r(new Region({next_, Region::size()}, NilUnback));
+    next_ += Region::size();
+    return r;
+  }
+
+  HugeRegionSet<Region> set_;
+  HugePage next_;
+
+  struct Alloc {
+    PageId p;
+    Length n;
+  };
+};
+
+TEST_F(HugeRegionSetTest, Set) {
+  absl::BitGen rng;
+  PageId p;
+  constexpr Length kSize = kPagesPerHugePage + Length(1);
+  bool from_released;
+  ASSERT_FALSE(set_.MaybeGet(Length(1), &p, &from_released));
+  auto r1 = GetRegion();
+  auto r2 = GetRegion();
+  auto r3 = GetRegion();
+  auto r4 = GetRegion();
+  set_.Contribute(r1.get());
+  set_.Contribute(r2.get());
+  set_.Contribute(r3.get());
+  set_.Contribute(r4.get());
+
+  std::vector<Alloc> allocs;
+  std::vector<Alloc> doomed;
+
+  while (set_.MaybeGet(kSize, &p, &from_released)) {
+    allocs.push_back({p, kSize});
+  }
+
+  // Define a random set by shuffling, then move half of the allocations into
+  // doomed.
+  std::shuffle(allocs.begin(), allocs.end(), rng);
+  doomed.insert(doomed.begin(), allocs.begin() + allocs.size() / 2,
+                allocs.end());
+  allocs.erase(allocs.begin() + allocs.size() / 2, allocs.end());
+
+  for (auto d : doomed) {
+    ASSERT_TRUE(set_.MaybePut(d.p, d.n));
+  }
+
+  for (size_t i = 0; i < 100 * 1000; ++i) {
+    const size_t N = allocs.size();
+    size_t index = absl::Uniform<int32_t>(rng, 0, N);
+    std::swap(allocs[index], allocs[N - 1]);
+    auto a = allocs.back();
+    ASSERT_TRUE(set_.MaybePut(a.p, a.n));
+    allocs.pop_back();
+    ASSERT_TRUE(set_.MaybeGet(kSize, &p, &from_released));
+    allocs.push_back({p, kSize});
+  }
+
+  // Random traffic should have defragmented our allocations into full
+  // and empty regions, and released the empty ones.  Annoyingly, we don't
+  // know which region is which, so we have to do a bit of silliness:
+  std::vector<Region *> regions = {r1.get(), r2.get(), r3.get(), r4.get()};
+  std::sort(regions.begin(), regions.end(),
+            [](const Region *a, const Region *b) -> bool {
+              return a->used_pages() > b->used_pages();
+            });
+
+  for (int i = 0; i < regions.size(); i++) {
+    Log(kLog, __FILE__, __LINE__, i, regions[i]->used_pages().raw_num(),
+        regions[i]->free_pages().raw_num(),
+        regions[i]->unmapped_pages().raw_num());
+  }
+  // Now first two should be "full" (ish)
+  EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+            regions[0]->used_pages().raw_num());
+  EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+            regions[1]->used_pages().raw_num());
+  // and last two "empty" (ish.)
+  EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+            regions[2]->unmapped_pages().raw_num());
+  EXPECT_LE(Region::size().in_pages().raw_num() * 0.9,
+            regions[3]->unmapped_pages().raw_num());
+
+  // Check the stats line up.
+  auto stats = set_.stats();
+  auto raw = r1->stats();
+  raw += r2->stats();
+  raw += r3->stats();
+  raw += r4->stats();
+  EXPECT_EQ(raw.system_bytes, stats.system_bytes);
+  EXPECT_EQ(raw.unmapped_bytes, stats.unmapped_bytes);
+  EXPECT_EQ(raw.free_bytes, stats.free_bytes);
+
+  // Print out the stats for inspection of formats.
+  std::vector<char> buf(64 * 1024);
+  Printer out(&buf[0], buf.size());
+  set_.Print(&out);
+  printf("%s\n", &buf[0]);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h
new file mode 100644
index 0000000000..49c95d66cb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_danger.h
@@ -0,0 +1,60 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// The routines exported by this module are subtle and dangerous.
+
+#ifndef TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
+#define TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
+
+#include <atomic>
+#include <type_traits>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace atomic_danger {
+
+// Casts the address of a std::atomic<IntType> to the address of an IntType.
+//
+// This is almost certainly not the function you are looking for! It is
+// undefined behavior, as the object under a std::atomic<int> isn't
+// fundamentally an int. This function is intended for passing the address of an
+// atomic integer to syscalls or for assembly interpretation.
+//
+// Callers should be migrated if C++ standardizes a better way to do this:
+// * http://wg21.link/n4013 (Atomic operations on non-atomic data)
+// * http://wg21.link/p0019 (Atomic Ref, merged into C++20)
+// * http://wg21.link/p1478 (Byte-wise atomic memcpy)
+template <typename IntType>
+IntType* CastToIntegral(std::atomic<IntType>* atomic_for_syscall) {
+  static_assert(std::is_integral<IntType>::value,
+                "CastToIntegral must be instantiated with an integral type.");
+#if __cpp_lib_atomic_is_always_lock_free >= 201603
+  static_assert(std::atomic<IntType>::is_always_lock_free,
+                "CastToIntegral must be instantiated with a lock-free type.");
+#else
+  static_assert(__atomic_always_lock_free(sizeof(IntType),
+                                          nullptr /* typical alignment */),
+                "CastToIntegral must be instantiated with a lock-free type.");
+#endif
+  return reinterpret_cast<IntType*>(atomic_for_syscall);
+}
+}  // namespace atomic_danger
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_ATOMIC_DANGER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h
new file mode 100644
index 0000000000..da7f30646d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/atomic_stats_counter.h
@@ -0,0 +1,74 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
+#define TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
+
+#include <atomic>
+
+#include "absl/base/macros.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class StatsCounter {
+ public:
+  constexpr StatsCounter() : value_(0) {}
+  StatsCounter(const StatsCounter&) = delete;
+  StatsCounter& operator=(const StatsCounter&) = delete;
+
+  ~StatsCounter() = default;
+
+  using Value = int64_t;
+
+  // Add "increment" to this statistics counter.
+  // "increment" may take any value, including negative ones.
+  // Counts are not lost in the face of concurrent uses of Add().
+  // Counts added by this call may be lost in the face of concurrent calls
+  // by other calls, such as Clear() or LossyAdd().
+  // This call is suitable for maintaining statistics.   It is not suitable
+  // for other purposes; in particular, it should not be used for
+  // data synchronization, generating sequence numbers, or reference counting.
+  void Add(Value increment) {
+    // As always, clients may not assume properties implied by the
+    // implementation, which may change.
+    this->value_.fetch_add(increment, std::memory_order_relaxed);
+  }
+
+  // Return the current value of the counter.
+  Value value() const { return this->value_.load(std::memory_order_relaxed); }
+
+  // Add "increment" to this lossy statistics counter.  Counts (including those
+  // added by other calls) _may be lost_ if this call is used concurrently with
+  // other calls to LossyAdd() or Add().  This call is suitable for maintaining
+  // statistics where performance is more important than not losing counts.  It
+  // is not suitable for other purposes; in particular, it should not be used
+  // for data synchronization, generating sequence numbers, or reference
+  // counting.
+  void LossyAdd(Value increment) {
+    this->value_.store(this->value_.load(std::memory_order_relaxed) + increment,
+                       std::memory_order_relaxed);
+  }
+
+ private:
+  std::atomic<Value> value_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_ATOMIC_STATS_COUNTER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits.h b/contrib/libs/tcmalloc/tcmalloc/internal/bits.h
new file mode 100644
index 0000000000..80ca17085c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/bits.h
@@ -0,0 +1,82 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_BITS_H_
+#define TCMALLOC_INTERNAL_BITS_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class Bits {
+ public:
+  // Returns true if a value is zero or a power of two.
+  template <typename T>
+  static constexpr
+      typename std::enable_if<std::is_unsigned<T>::value, bool>::type
+      IsZeroOrPow2(T n) {
+    return (n & (n - 1)) == 0;
+  }
+
+  // Returns true if a value is a power of two.
+  template <typename T>
+  static constexpr
+      typename std::enable_if<std::is_unsigned<T>::value, bool>::type
+      IsPow2(T n) {
+    return n != 0 && (n & (n - 1)) == 0;
+  }
+
+  template <typename T>
+  static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+  Log2Floor(T n) {
+    if (n == 0) {
+      return -1;
+    }
+
+    if (sizeof(T) <= sizeof(unsigned int)) {
+      return std::numeric_limits<T>::digits - 1 - __builtin_clz(n);
+    } else if (sizeof(T) <= sizeof(unsigned long)) {
+      return std::numeric_limits<T>::digits - 1 - __builtin_clzl(n);
+    } else {
+      static_assert(sizeof(T) <= sizeof(unsigned long long));
+      return std::numeric_limits<T>::digits - 1 - __builtin_clzll(n);
+    }
+  }
+
+  template <typename T>
+  static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+  Log2Ceiling(T n) {
+    T floor = Log2Floor(n);
+    if (IsZeroOrPow2(n))
+      return floor;
+    else
+      return floor + 1;
+  }
+
+  template <typename T>
+  static constexpr typename std::enable_if<std::is_unsigned<T>::value, T>::type
+  RoundUpToPow2(T n) {
+    if (n == 0) return 1;
+    return T{1} << Log2Ceiling(n);
+  }
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_INTERNAL_BITS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc
new file mode 100644
index 0000000000..0589b314d2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/bits_test.cc
@@ -0,0 +1,104 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/bits.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(BitsTest, Log2EdgeCases) {
+  EXPECT_EQ(-1, Bits::Log2Floor(0u));
+  EXPECT_EQ(-1, Bits::Log2Ceiling(0u));
+
+  for (int i = 0; i < 32; i++) {
+    uint32_t n = 1U << i;
+    EXPECT_EQ(i, Bits::Log2Floor(n));
+    EXPECT_EQ(i, Bits::Log2Ceiling(n));
+    if (n > 2) {
+      EXPECT_EQ(i - 1, Bits::Log2Floor(n - 1));
+      EXPECT_EQ(i, Bits::Log2Floor(n + 1));
+      EXPECT_EQ(i, Bits::Log2Ceiling(n - 1));
+      EXPECT_EQ(i + 1, Bits::Log2Ceiling(n + 1));
+    }
+  }
+
+  EXPECT_EQ(Bits::Log2Ceiling(uint64_t{0x40000000000}), 42);
+  EXPECT_EQ(Bits::Log2Floor(uint64_t{0x40000000000}), 42);
+}
+
+TEST(BitsTest, Log2Random) {
+  absl::BitGen random;
+
+  const int kNumIterations = 10000;
+  for (int i = 0; i < kNumIterations; i++) {
+    int maxbit = -1;
+    uint32_t n = 0;
+    while (!absl::Bernoulli(random, 1.0 / 32)) {
+      int bit = absl::Uniform<int32_t>(random, 0, 32);
+      n |= (1U << bit);
+      maxbit = std::max(bit, maxbit);
+    }
+    EXPECT_EQ(maxbit, Bits::Log2Floor(n));
+  }
+}
+
+TEST(BitsTest, IsZeroOrPow2) {
+  EXPECT_TRUE(Bits::IsZeroOrPow2(0u));
+  EXPECT_TRUE(Bits::IsZeroOrPow2(1u));
+  EXPECT_TRUE(Bits::IsZeroOrPow2(2u));
+  EXPECT_FALSE(Bits::IsZeroOrPow2(3u));
+  EXPECT_TRUE(Bits::IsZeroOrPow2(4u));
+  EXPECT_FALSE(Bits::IsZeroOrPow2(1337u));
+  EXPECT_TRUE(Bits::IsZeroOrPow2(65536u));
+  EXPECT_FALSE(Bits::IsZeroOrPow2(std::numeric_limits<uint32_t>::max()));
+  EXPECT_TRUE(Bits::IsZeroOrPow2(uint32_t{1} << 31));
+}
+
+TEST(BitsTest, IsPow2) {
+  EXPECT_FALSE(Bits::IsPow2(0u));
+  EXPECT_TRUE(Bits::IsPow2(1u));
+  EXPECT_TRUE(Bits::IsPow2(2u));
+  EXPECT_FALSE(Bits::IsPow2(3u));
+  EXPECT_TRUE(Bits::IsPow2(4u));
+  EXPECT_FALSE(Bits::IsPow2(1337u));
+  EXPECT_TRUE(Bits::IsPow2(65536u));
+  EXPECT_FALSE(Bits::IsPow2(std::numeric_limits<uint32_t>::max()));
+  EXPECT_TRUE(Bits::IsPow2(uint32_t{1} << 31));
+}
+
+TEST(BitsTest, RoundUpToPow2) {
+  EXPECT_EQ(Bits::RoundUpToPow2(0u), 1);
+  EXPECT_EQ(Bits::RoundUpToPow2(1u), 1);
+  EXPECT_EQ(Bits::RoundUpToPow2(2u), 2);
+  EXPECT_EQ(Bits::RoundUpToPow2(3u), 4);
+  EXPECT_EQ(Bits::RoundUpToPow2(4u), 4);
+  EXPECT_EQ(Bits::RoundUpToPow2(1337u), 2048);
+  EXPECT_EQ(Bits::RoundUpToPow2(65536u), 65536);
+  EXPECT_EQ(Bits::RoundUpToPow2(65536u - 1337u), 65536);
+  EXPECT_EQ(Bits::RoundUpToPow2(uint64_t{0x40000000000}),
+            uint64_t{0x40000000000});
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc
new file mode 100644
index 0000000000..12a1709b34
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.cc
@@ -0,0 +1,88 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/cache_topology.h"
+
+#include <fcntl.h>
+#include <string.h>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+int OpenSysfsCacheList(size_t cpu) {
+  char path[PATH_MAX];
+  snprintf(path, sizeof(path),
+           "/sys/devices/system/cpu/cpu%zu/cache/index3/shared_cpu_list", cpu);
+  return signal_safe_open(path, O_RDONLY | O_CLOEXEC);
+}
+}  // namespace
+
+int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current) {
+  // Remove all parts coming after a dash or comma.
+  const size_t dash = current.find('-');
+  if (dash != absl::string_view::npos) current = current.substr(0, dash);
+  const size_t comma = current.find(',');
+  if (comma != absl::string_view::npos) current = current.substr(0, comma);
+
+  int first_cpu;
+  CHECK_CONDITION(absl::SimpleAtoi(current, &first_cpu));
+  CHECK_CONDITION(first_cpu < CPU_SETSIZE);
+  return first_cpu;
+}
+
+int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]) {
+  int index = 0;
+  // Set to a sane value.
+  memset(l3_cache_index, 0, CPU_SETSIZE);
+  for (int cpu = 0; cpu < CPU_SETSIZE; ++cpu) {
+    const int fd = OpenSysfsCacheList(cpu);
+    if (fd == -1) {
+      // At some point we reach the number of CPU on the system, and
+      // we should exit. We verify that there was no other problem.
+      CHECK_CONDITION(errno == ENOENT);
+      return index;
+    }
+    // The file contains something like:
+    //   0-11,22-33
+    // we are looking for the first number in that file.
+    char buf[10];
+    const size_t bytes_read =
+        signal_safe_read(fd, buf, 10, /*bytes_read=*/nullptr);
+    signal_safe_close(fd);
+    CHECK_CONDITION(bytes_read >= 0);
+
+    const int first_cpu =
+        BuildCpuToL3CacheMap_FindFirstNumberInBuf({buf, bytes_read});
+    CHECK_CONDITION(first_cpu < CPU_SETSIZE);
+    CHECK_CONDITION(first_cpu <= cpu);
+    if (cpu == first_cpu) {
+      l3_cache_index[cpu] = index++;
+    } else {
+      l3_cache_index[cpu] = l3_cache_index[first_cpu];
+    }
+  }
+  return index;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h
new file mode 100644
index 0000000000..292f175470
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology.h
@@ -0,0 +1,36 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
+#define TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
+
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Build a mapping from cpuid to the index of the L3 cache used by that cpu.
+// Returns the number of caches detected.
+int BuildCpuToL3CacheMap(uint8_t l3_cache_index[CPU_SETSIZE]);
+
+// Helper function exposed to permit testing it.
+int BuildCpuToL3CacheMap_FindFirstNumberInBuf(absl::string_view current);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_CACHE_TOPOLOGY_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc
new file mode 100644
index 0000000000..927ecace94
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/cache_topology_test.cc
@@ -0,0 +1,51 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/cache_topology.h"
+
+#include <sched.h>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace {
+
+TEST(CacheToplogy, ComputesSomethingReasonable) {
+  // This test verifies that each L3 cache serves the same number of CPU. This
+  // is not a strict requirement for the correct operation of this code, but a
+  // sign of sanity.
+  uint8_t l3_cache_index[CPU_SETSIZE];
+  const int num_nodes =
+      tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap(l3_cache_index);
+  EXPECT_EQ(absl::base_internal::NumCPUs() % num_nodes, 0);
+  ASSERT_GT(num_nodes, 0);
+  static const int kMaxNodes = 256 / 8;
+  int count_per_node[kMaxNodes] = {0};
+  for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) {
+    count_per_node[l3_cache_index[i]]++;
+  }
+  for (int i = 0; i < num_nodes; ++i) {
+    EXPECT_EQ(count_per_node[i], absl::base_internal::NumCPUs() / num_nodes);
+  }
+}
+
+TEST(CacheTopology, FindFirstNumberInBuf) {
+  using tcmalloc::tcmalloc_internal::BuildCpuToL3CacheMap_FindFirstNumberInBuf;
+  EXPECT_EQ(7, BuildCpuToL3CacheMap_FindFirstNumberInBuf("7,-787"));
+  EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5"));
+  EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5-9"));
+  EXPECT_EQ(5, BuildCpuToL3CacheMap_FindFirstNumberInBuf("5,9"));
+}
+
+}  // namespace
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/clock.h b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h
new file mode 100644
index 0000000000..65c765203c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/clock.h
@@ -0,0 +1,41 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CLOCK_H_
+#define TCMALLOC_INTERNAL_CLOCK_H_
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Represents an abstract clock. The now and freq functions are analogous to
+// CycleClock::Now and CycleClock::Frequency, which will be the most commonly
+// used implementations. Tests can use this interface to mock out the clock.
+struct Clock {
+  // Returns the current time in ticks (relative to an arbitrary time base).
+  int64_t (*now)();
+
+  // Returns the number of ticks per second.
+  double (*freq)();
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_CLOCK_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/config.h b/contrib/libs/tcmalloc/tcmalloc/internal/config.h
new file mode 100644
index 0000000000..73dbab06aa
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/config.h
@@ -0,0 +1,136 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_CONFIG_H_
+#define TCMALLOC_INTERNAL_CONFIG_H_
+
+#include <stddef.h>
+
+#include "absl/base/policy_checks.h"
+
+// TCMALLOC_HAVE_SCHED_GETCPU is defined when the system implements
+// sched_getcpu(3) as by glibc and it's imitators.
+#if defined(__linux__) || defined(__ros__)
+#define TCMALLOC_HAVE_SCHED_GETCPU 1
+#else
+#undef TCMALLOC_HAVE_SCHED_GETCPU
+#endif
+
+// TCMALLOC_HAVE_STRUCT_MALLINFO is defined when we know that the system has
+// `struct mallinfo` available.
+//
+// The FreeBSD libc, and subsequently macOS, does not provide the `mallopt`
+// interfaces. We know that bionic, glibc (and variants), newlib, and uclibc do
+// provide the `mallopt` interface.  The musl libc is known to not provide the
+// interface, nor does it provide a macro for checking.  As a result, we
+// conservatively state that `struct mallinfo` is only available on these
+// environments.
+#if !defined(OS_FREEBSD) && !defined(OS_MACOSX) &&                       \
+    (defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \
+     defined(__UCLIBC__))
+#define TCMALLOC_HAVE_STRUCT_MALLINFO 1
+#else
+#undef TCMALLOC_HAVE_STRUCT_MALLINFO
+#endif
+
+// When possible, name the text section as google_malloc.  This macro should not
+// be added to header files as that may move unrelated code to google_malloc
+// section.
+#if defined(__clang__) && defined(__linux__)
+#define GOOGLE_MALLOC_SECTION_BEGIN \
+  _Pragma("clang section text = \"google_malloc\"")
+#define GOOGLE_MALLOC_SECTION_END _Pragma("clang section text = \"\"")
+#else
+#define GOOGLE_MALLOC_SECTION_BEGIN
+#define GOOGLE_MALLOC_SECTION_END
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#if __GNUC__ < 9 || (__GNUC__ == 9 && __GNUC_MINOR__ < 2)
+#error "GCC 9.2 or higher is required."
+#endif
+#endif
+
+#if defined(__clang__)
+#if __clang_major__ < 9
+#error "Clang 9 or higher is required."
+#endif
+#endif
+
+#if !defined(__x86_64__) && !defined(__ppc64__) && !defined(__arm__) && \
+    !defined(__aarch64__) && !defined(__riscv)
+#error "Unsupported architecture."
+#endif
+
+#if !defined(__cplusplus) || __cplusplus < 201703L
+#error "TCMalloc requires C++17 or later."
+#else
+// Also explicitly use some C++17 syntax, to prevent detect flags like
+// `-Wc++14-compat`.
+namespace tcmalloc::google3_requires_cpp17_or_later {}
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+#if defined __x86_64__
+// All current and planned x86_64 processors only look at the lower 48 bits
+// in virtual to physical address translation.  The top 16 are thus unused.
+// TODO(b/134686025): Under what operating systems can we increase it safely to
+// 17? This lets us use smaller page maps.  On first allocation, a 36-bit page
+// map uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
+inline constexpr int kAddressBits =
+    (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
+#elif defined __powerpc64__ && defined __linux__
+// Linux(4.12 and above) on powerpc64 supports 128TB user virtual address space
+// by default, and up to 512TB if user space opts in by specifing hint in mmap.
+// See comments in arch/powerpc/include/asm/processor.h
+// and arch/powerpc/mm/mmap.c.
+inline constexpr int kAddressBits =
+    (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 49);
+#elif defined __aarch64__ && defined __linux__
+// According to Documentation/arm64/memory.txt of kernel 3.16,
+// AARCH64 kernel supports 48-bit virtual addresses for both user and kernel.
+inline constexpr int kAddressBits =
+    (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
+#elif defined __riscv && defined __linux__
+inline constexpr int kAddressBits =
+    (sizeof(void *) < 8 ? (8 * sizeof(void *)) : 48);
+#else
+inline constexpr int kAddressBits = 8 * sizeof(void*);
+#endif
+
+#if defined(__x86_64__)
+// x86 has 2 MiB huge pages
+static constexpr size_t kHugePageShift = 21;
+#elif defined(__PPC64__)
+static constexpr size_t kHugePageShift = 24;
+#elif defined __aarch64__ && defined __linux__
+static constexpr size_t kHugePageShift = 21;
+#elif defined __riscv && defined __linux__
+static constexpr size_t kHugePageShift = 21;
+#else
+// ...whatever, guess something big-ish
+static constexpr size_t kHugePageShift = 21;
+#endif
+
+static constexpr size_t kHugePageSize = static_cast<size_t>(1)
+                                        << kHugePageShift;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_CONFIG_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h
new file mode 100644
index 0000000000..b82a3ce9e5
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/declarations.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// These declarations are for internal use, allowing us to have access to
+// allocation functions whose declarations are not provided by the standard
+// library.
+#ifndef TCMALLOC_INTERNAL_DECLARATIONS_H_
+#define TCMALLOC_INTERNAL_DECLARATIONS_H_
+
+#include <cstddef>
+#include <new>
+
+namespace std {
+enum class align_val_t : size_t;
+}  // namespace std
+
+void* operator new(std::size_t, std::align_val_t);
+void* operator new(std::size_t, std::align_val_t,
+                   const std::nothrow_t&) noexcept;
+void* operator new[](std::size_t, std::align_val_t);
+void* operator new[](std::size_t, std::align_val_t,
+                     const std::nothrow_t&) noexcept;
+
+void operator delete(void*, std::align_val_t) noexcept;
+void operator delete(void*, std::size_t) noexcept;
+void operator delete(void*, std::size_t, std::align_val_t) noexcept;
+void operator delete[](void*, std::align_val_t) noexcept;
+void operator delete[](void*, std::size_t) noexcept;
+void operator delete[](void*, std::size_t, std::align_val_t) noexcept;
+
+#endif  // TCMALLOC_INTERNAL_DECLARATIONS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc
new file mode 100644
index 0000000000..e786dd7a96
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.cc
@@ -0,0 +1,45 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/environment.h"
+
+#include <string.h>
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// POSIX provides the **environ array which contains environment variables in a
+// linear array, terminated by a NULL string.  This array is only perturbed when
+// the environment is changed (which is inherently unsafe) so it's safe to
+// return a const pointer into it.
+// e.g. { "SHELL=/bin/bash", "MY_ENV_VAR=1", "" }
+extern "C" char** environ;
+const char* thread_safe_getenv(const char* env_var) {
+  int var_len = strlen(env_var);
+
+  char** envv = environ;
+  if (!envv) {
+    return nullptr;
+  }
+
+  for (; *envv != nullptr; envv++)
+    if (strncmp(*envv, env_var, var_len) == 0 && (*envv)[var_len] == '=')
+      return *envv + var_len + 1;  // skip over the '='
+
+  return nullptr;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment.h b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h
new file mode 100644
index 0000000000..f54840e8d7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_ENVIRONMENT_H_
+#define TCMALLOC_INTERNAL_ENVIRONMENT_H_
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// WARNING ********************************************************************
+// getenv(2) can only be safely used in the absence of calls which perturb the
+// environment (e.g. putenv/setenv/clearenv).  The use of such calls is
+// strictly thread-hostile since these calls do *NOT* synchronize and there is
+// *NO* thread-safe way in which the POSIX **environ array may be queried about
+// modification.
+// ****************************************************************************
+// The default getenv(2) is not guaranteed to be thread-safe as there are no
+// semantics specifying the implementation of the result buffer.  The result
+// from thread_safe_getenv() may be safely queried in a multi-threaded context.
+// If you have explicit synchronization with changes environment variables then
+// any copies of the returned pointer must be invalidated across modification.
+const char* thread_safe_getenv(const char* env_var);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_ENVIRONMENT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc
new file mode 100644
index 0000000000..6878301ec9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/environment_test.cc
@@ -0,0 +1,45 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/environment.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(EnvironmentTest, thread_safe_getenv) {
+  // Should never be defined at test start
+  const char *result, *undefined_env_var = "UTIL_TEST_UNDEFINED_ENV_VAR";
+
+  // Check that we handle an undefined variable and then set it
+  ASSERT_TRUE(getenv(undefined_env_var) == nullptr);
+  ASSERT_TRUE(thread_safe_getenv(undefined_env_var) == nullptr);
+  ASSERT_EQ(setenv(undefined_env_var, "1234567890", 0), 0);
+  ASSERT_TRUE(getenv(undefined_env_var) != nullptr);
+
+  // Make sure we can find the new variable
+  result = thread_safe_getenv(undefined_env_var);
+  ASSERT_TRUE(result != nullptr);
+  // ... and that it matches what was set
+  EXPECT_EQ(strcmp(result, getenv(undefined_env_var)), 0);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h
new file mode 100644
index 0000000000..514dd4a73e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions.h
@@ -0,0 +1,252 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
+#define TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
+
+#include <algorithm>
+#include <cstdlib>
+#include <functional>
+
+#include "absl/algorithm/container.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/low_level_alloc.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/hash/hash.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/linked_list.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Counts how many times we observed objects with a particular stack trace
+// that were short lived/long lived. Each LifetimeStats object is associated
+// with a particular allocation site (i.e., allocation stack trace) and each
+// allocation site has at most one LifetimeStats object. All accesses to
+// LifetimeStats objects need to be synchronized via the page heap lock.
+class LifetimeStats : public TList<LifetimeStats>::Elem {
+ public:
+  enum class Certainty { kLowCertainty, kHighCertainty };
+  enum class Prediction { kShortLived, kLongLived };
+
+  void Update(Prediction prediction) {
+    if (prediction == Prediction::kShortLived) {
+      short_lived_++;
+    } else {
+      long_lived_++;
+    }
+  }
+
+  Prediction Predict(Certainty certainty) {
+    if (certainty == Certainty::kLowCertainty) {
+      return (short_lived_ > long_lived_) ? Prediction::kShortLived
+                                          : Prediction::kLongLived;
+    } else {
+      // If little data was collected, predict as long-lived (current behavior).
+      return (short_lived_ > (long_lived_ + 10)) ? Prediction::kShortLived
+                                                 : Prediction::kLongLived;
+    }
+  }
+
+  // Reference counts are protected by LifetimeDatabase::table_lock_.
+
+  // Increments the reference count of this entry.
+  void IncRef() { ++refcount_; }
+
+  // Returns true if and only if the reference count reaches 0.
+  bool DecRef() { return --refcount_ == 0; }
+
+ private:
+  uint64_t refcount_ = 1;
+  uint64_t short_lived_ = 0;
+  uint64_t long_lived_ = 0;
+};
+
+// Manages stack traces and statistics about their associated lifetimes. Since
+// the database can fill up, old entries are evicted. Evicted entries need to
+// survive as long as the last lifetime tracker referencing them and are thus
+// reference-counted.
+class LifetimeDatabase {
+ public:
+  struct Key {
+    int depth;  // Number of PC values stored in array below
+    void* stack[kMaxStackDepth];
+
+    // Statically instantiate at the start of the allocation to acquire
+    // the allocation stack trace.
+    Key() { depth = absl::GetStackTrace(stack, kMaxStackDepth, 1); }
+
+    template <typename H>
+    friend H AbslHashValue(H h, const Key& c) {
+      return H::combine(H::combine_contiguous(std::move(h), c.stack, c.depth),
+                        c.depth);
+    }
+
+    bool operator==(const Key& other) const {
+      if (depth != other.depth) {
+        return false;
+      }
+      return std::equal(stack, stack + depth, other.stack);
+    }
+  };
+
+  // Captures statistics associated with the low-level allocator backing the
+  // memory used by the database.
+  struct ArenaStats {
+    uint64_t bytes_allocated;
+  };
+
+  static constexpr int kMaxDatabaseSize = 1024;
+
+  LifetimeDatabase() {}
+  ~LifetimeDatabase() {}
+
+  // Not copyable or movable
+  LifetimeDatabase(const LifetimeDatabase&) = delete;
+  LifetimeDatabase& operator=(const LifetimeDatabase&) = delete;
+
+  // Identifies the current stack trace and returns a handle to the lifetime
+  // statistics associated with this stack trace. May run outside the page heap
+  // lock -- we therefore need to do our own locking. This increments the
+  // reference count of the lifetime stats object and the caller is responsible
+  // for calling RemoveLifetimeStatsReference when finished with the object.
+  LifetimeStats* LookupOrAddLifetimeStats(Key* k) {
+    absl::base_internal::SpinLockHolder h(&table_lock_);
+    auto it = table_.find(*k);
+    LifetimeStats* s;
+    if (it == table_.end()) {
+      MaybeEvictLRU();
+      // Allocate a new entry using the low-level allocator, which is safe
+      // to call from within TCMalloc.
+      s = stats_allocator_.allocate(1);
+      new (s) LifetimeStats();
+      table_.insert(std::make_pair(*k, s));
+      stats_fifo_.append(s);
+    } else {
+      s = it->second;
+      UpdateLRU(s);
+    }
+    s->IncRef();
+    return s;
+  }
+
+  void RemoveLifetimeStatsReference(LifetimeStats* s) {
+    absl::base_internal::SpinLockHolder h(&table_lock_);
+    if (s->DecRef()) {
+      stats_allocator_.deallocate(s, 1);
+    }
+  }
+
+  size_t size() const {
+    absl::base_internal::SpinLockHolder h(&table_lock_);
+    return table_.size();
+  }
+
+  size_t evictions() const {
+    absl::base_internal::SpinLockHolder h(&table_lock_);
+    return n_evictions_;
+  }
+
+  static ArenaStats* arena_stats() {
+    static ArenaStats stats = {0};
+    return &stats;
+  }
+
+ protected:
+  static const int kMaxStackDepth = 64;
+
+  static absl::base_internal::LowLevelAlloc::Arena* GetArena() {
+    static absl::base_internal::LowLevelAlloc::Arena* arena =
+        absl::base_internal::LowLevelAlloc::NewArena(0);
+    return arena;
+  }
+
+  static uint64_t bytes_allocated_ ABSL_GUARDED_BY(table_lock_);
+
+  void UpdateLRU(LifetimeStats* stats)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) {
+    stats_fifo_.remove(stats);
+    stats_fifo_.append(stats);
+  }
+
+  // If an entry is evicted, it is returned (nullptr otherwise).
+  void MaybeEvictLRU() ABSL_EXCLUSIVE_LOCKS_REQUIRED(table_lock_) {
+    if (table_.size() < kMaxDatabaseSize) {
+      return;
+    }
+    n_evictions_++;
+    LifetimeStats* evict = stats_fifo_.first();
+    stats_fifo_.remove(evict);
+    for (auto it = table_.begin(); it != table_.end(); ++it) {
+      if (it->second == evict) {
+        table_.erase(it);
+        if (evict->DecRef()) {
+          stats_allocator_.deallocate(evict, 1);
+        }
+        return;
+      }
+    }
+    CHECK_CONDITION(false);  // Should not happen
+  }
+
+ private:
+  template <typename T>
+  class MyAllocator : public std::allocator<T> {
+   public:
+    template <typename U>
+    struct rebind {
+      using other = MyAllocator<U>;
+    };
+
+    MyAllocator() noexcept {}
+
+    template <typename U>
+    explicit MyAllocator(const MyAllocator<U>&) noexcept {}
+
+    T* allocate(size_t num_objects, const void* = nullptr) {
+      size_t bytes = num_objects * sizeof(T);
+      arena_stats()->bytes_allocated += bytes;
+      return static_cast<T*>(absl::base_internal::LowLevelAlloc::AllocWithArena(
+          bytes, GetArena()));
+    }
+
+    void deallocate(T* p, size_t num_objects) {
+      size_t bytes = num_objects * sizeof(T);
+      arena_stats()->bytes_allocated -= bytes;
+      absl::base_internal::LowLevelAlloc::Free(p);
+    }
+  };
+
+  MyAllocator<LifetimeStats> stats_allocator_ ABSL_GUARDED_BY(table_lock_);
+  mutable absl::base_internal::SpinLock table_lock_{
+      absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY};
+
+  // Stores the current mapping from allocation site to LifetimeStats.
+  std::unordered_map<Key, LifetimeStats*, absl::Hash<Key>, std::equal_to<Key>,
+                     MyAllocator<std::pair<const Key, LifetimeStats*>>>
+      table_ ABSL_GUARDED_BY(table_lock_);
+
+  // Stores the entries ordered by how many times they have been accessed.
+  TList<LifetimeStats> stats_fifo_ ABSL_GUARDED_BY(table_lock_);
+  size_t n_evictions_ ABSL_GUARDED_BY(table_lock_) = 0;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_LIFETIME_PREDICTIONS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc
new file mode 100644
index 0000000000..4280890afe
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_predictions_test.cc
@@ -0,0 +1,156 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/lifetime_predictions.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class LifetimeDatabaseTest : public testing::Test {
+ protected:
+  LifetimeDatabase lifetime_database_;
+
+  ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+  AllocateA() {
+    LifetimeDatabase::Key key;
+    return lifetime_database_.LookupOrAddLifetimeStats(&key);
+  }
+
+  ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+  AllocateB() {
+    LifetimeDatabase::Key key;
+    return lifetime_database_.LookupOrAddLifetimeStats(&key);
+  }
+
+  ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+  AllocateWithStacktraceId(int id) {
+    if (id == 0) {
+      LifetimeDatabase::Key key;
+      return lifetime_database_.LookupOrAddLifetimeStats(&key);
+    } else if (id % 2 == 0) {
+      return AllocateWithStacktraceId(id / 2);
+    } else {
+      return AllocateWithStacktraceId_2(id / 2);
+    }
+  }
+
+  // Record a sufficiently large number of short-lived allocations to make
+  // a prediction short-lived, absent any long-lived allocations.
+  void MakeShortLived(LifetimeStats* stats, bool high_certainty) {
+    for (int i = 0; i < (high_certainty ? 100 : 2); i++) {
+      stats->Update(LifetimeStats::Prediction::kShortLived);
+    }
+  }
+
+ private:
+  ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NO_TAIL_CALL LifetimeStats*
+  AllocateWithStacktraceId_2(int id) {
+    if (id == 0) {
+      LifetimeDatabase::Key key;
+      return lifetime_database_.LookupOrAddLifetimeStats(&key);
+    } else if (id % 2 == 0) {
+      return AllocateWithStacktraceId(id / 2);
+    } else {
+      return AllocateWithStacktraceId_2(id / 2);
+    }
+  }
+};
+
+TEST_F(LifetimeDatabaseTest, Basic) {
+  PRAGMA_NO_UNROLL
+  for (int i = 0; i < 2; i++) {
+    LifetimeStats* r1 = AllocateA();
+    LifetimeStats* r2 = AllocateB();
+    LifetimeStats* r3 = AllocateB();
+    ASSERT_NE(nullptr, r1);
+    ASSERT_NE(nullptr, r2);
+    ASSERT_NE(nullptr, r3);
+
+    // First iteration: set short-lived count.
+    if (i == 0) {
+      MakeShortLived(r1, false);
+      MakeShortLived(r2, true);
+    } else {
+      EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+                r1->Predict(LifetimeStats::Certainty::kLowCertainty));
+      EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+                r1->Predict(LifetimeStats::Certainty::kHighCertainty));
+      EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+                r2->Predict(LifetimeStats::Certainty::kLowCertainty));
+      EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+                r2->Predict(LifetimeStats::Certainty::kHighCertainty));
+      EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+                r3->Predict(LifetimeStats::Certainty::kLowCertainty));
+      EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+                r3->Predict(LifetimeStats::Certainty::kHighCertainty));
+    }
+
+    lifetime_database_.RemoveLifetimeStatsReference(r1);
+    lifetime_database_.RemoveLifetimeStatsReference(r2);
+    lifetime_database_.RemoveLifetimeStatsReference(r3);
+  }
+}
+
+TEST_F(LifetimeDatabaseTest, Eviction) {
+  const int kEntries = 5 * LifetimeDatabase::kMaxDatabaseSize;
+
+  std::vector<LifetimeStats*> refs;
+
+  PRAGMA_NO_UNROLL
+  for (int i = 0; i < kEntries; i++) {
+    LifetimeStats* r = AllocateWithStacktraceId(i);
+    refs.push_back(r);
+
+    ASSERT_NE(nullptr, r);
+    if (i < LifetimeDatabase::kMaxDatabaseSize) {
+      MakeShortLived(r, true);
+    }
+  }
+
+  // Check that even evicted entries are still accessible due to refcounts.
+  for (int i = 0; i < kEntries; i++) {
+    if (i < LifetimeDatabase::kMaxDatabaseSize) {
+      EXPECT_EQ(LifetimeStats::Prediction::kShortLived,
+                refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty));
+    } else {
+      EXPECT_EQ(LifetimeStats::Prediction::kLongLived,
+                refs[i]->Predict(LifetimeStats::Certainty::kLowCertainty));
+    }
+  }
+
+  EXPECT_EQ(LifetimeDatabase::kMaxDatabaseSize, lifetime_database_.size());
+  EXPECT_EQ(kEntries - LifetimeDatabase::kMaxDatabaseSize,
+            lifetime_database_.evictions());
+
+  uint64_t before_bytes = lifetime_database_.arena_stats()->bytes_allocated;
+
+  // Return all of the references, which should drop the remaining refcounts.
+  for (int i = 0; i < kEntries; i++) {
+    lifetime_database_.RemoveLifetimeStatsReference(refs[i]);
+  }
+
+  uint64_t after_bytes = lifetime_database_.arena_stats()->bytes_allocated;
+
+  // Check that this freed up memory
+  EXPECT_LT(after_bytes, before_bytes);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h
new file mode 100644
index 0000000000..d348dbe609
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker.h
@@ -0,0 +1,172 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
+#define TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/clock.h"
+#include "tcmalloc/internal/lifetime_predictions.h"
+#include "tcmalloc/internal/linked_list.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+template <typename LifetimeDatabaseT, typename LifetimeStatsT>
+class LifetimeTrackerImpl {
+ public:
+  // A tracker is attached to an individual allocation and tracks its lifetime.
+  // This allocation can either be in a region or in the filler. It contains
+  // a pointer back to the LifetimeStats of the allocation site that generated
+  // this allocation, so that statistics can be updated.
+  struct Tracker : public TList<Tracker>::Elem {
+    // The deadline after which the object is considered long-lived.
+    uint64_t deadline = 0;
+
+    // If the allocation is associated with a counterfactual, this contains
+    // the hypothetical location in the short-lived region (null otherwise).
+    void* counterfactual_ptr = nullptr;
+
+    // Lifetime statistics associated with this allocation (will be updated when
+    // the lifetime is known).
+    LifetimeStatsT* lifetime;
+
+    // The allocation this stat belongs to was predicted short-lived.
+    bool predicted_short_lived = false;
+
+    // Is this element currently tracked by the lifetime tracker?
+    bool is_tracked() { return deadline != 0; }
+
+    // Reset the element (implies not tracked).
+    void reset() {
+      deadline = 0;
+      counterfactual_ptr = nullptr;
+    }
+  };
+
+  struct Stats {
+    uint64_t expired_lifetimes = 0;
+    uint64_t overestimated_lifetimes = 0;
+    uint64_t short_lived_predictions = 0;
+    uint64_t long_lived_predictions = 0;
+  };
+
+  explicit LifetimeTrackerImpl(
+      LifetimeDatabaseT* lifetime_database, absl::Duration timeout,
+      Clock clock = Clock{.now = absl::base_internal::CycleClock::Now,
+                          .freq = absl::base_internal::CycleClock::Frequency})
+      : timeout_(absl::ToDoubleSeconds(timeout) * clock.freq()),
+        lifetime_database_(*lifetime_database),
+        clock_(clock) {}
+
+  // Registers a donated allocation with the tracker.
+  void AddAllocation(Tracker* tracker, LifetimeStatsT* lifetime,
+                     bool predicted_short_lived) {
+    CheckForLifetimeExpirations();
+
+    if (predicted_short_lived) {
+      stats_.short_lived_predictions++;
+    } else {
+      stats_.long_lived_predictions++;
+    }
+
+    ASSERT(tracker != nullptr);
+    ASSERT(lifetime != nullptr);
+    tracker->deadline = clock_.now() + timeout_;
+    tracker->lifetime = lifetime;
+    tracker->predicted_short_lived = predicted_short_lived;
+    list_.append(tracker);
+  }
+
+  // Remove an allocation from the tracker. This will stop tracking the
+  // allocation and record whether it was correctly predicted.
+  void RemoveAllocation(Tracker* tracker) {
+    CheckForLifetimeExpirations();
+
+    // This is not tracked anymore.
+    if (!tracker->is_tracked()) {
+      return;
+    }
+
+    if (!tracker->predicted_short_lived) {
+      stats_.overestimated_lifetimes++;
+    }
+
+    if (tracker->lifetime != nullptr) {
+      tracker->lifetime->Update(LifetimeStatsT::Prediction::kShortLived);
+      lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime);
+    }
+
+    tracker->reset();
+
+    list_.remove(tracker);
+  }
+
+  // Check whether any lifetimes in the tracker have passed the threshold after
+  // which they are not short-lived anymore.
+  void CheckForLifetimeExpirations() {
+    // TODO(mmaas): Expirations are fairly cheap, but there is a theoretical
+    // possibility of having an arbitrary number of expirations at once, which
+    // could affect tail latency. We may want to limit the number of pages we
+    // let expire per unit time.
+    uint64_t now = clock_.now();
+    Tracker* tracker = TryGetExpired(now);
+    while (tracker != nullptr) {
+      ASSERT(tracker->is_tracked());
+
+      // A page that was predicted short-lived was actually long-lived.
+      if (tracker->predicted_short_lived) {
+        stats_.expired_lifetimes++;
+      }
+
+      if (tracker->lifetime != nullptr) {
+        tracker->lifetime->Update(LifetimeStatsT::Prediction::kLongLived);
+        lifetime_database_.RemoveLifetimeStatsReference(tracker->lifetime);
+      }
+
+      tracker->reset();
+      tracker = TryGetExpired(now);
+    }
+  }
+
+  Stats stats() const { return stats_; }
+
+ private:
+  // Returns the earliest expiring entry, or nullptr if none expired.
+  Tracker* TryGetExpired(uint64_t now) {
+    if (!list_.empty() && list_.first()->deadline < now) {
+      Tracker* s = list_.first();
+      list_.remove(s);
+      return s;
+    }
+    return nullptr;
+  }
+
+  const uint64_t timeout_;
+
+  TList<Tracker> list_;
+  Stats stats_;
+  LifetimeDatabaseT& lifetime_database_;
+  Clock clock_;
+};
+
+using LifetimeTracker = LifetimeTrackerImpl<LifetimeDatabase, LifetimeStats>;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_LIFETIME_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc
new file mode 100644
index 0000000000..78ed38ecae
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/lifetime_tracker_test.cc
@@ -0,0 +1,129 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/lifetime_tracker.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/lifetime_predictions.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class MockLifetimeStats {
+ public:
+  enum class Prediction { kShortLived, kLongLived };
+  MOCK_METHOD(void, Update, (Prediction prediction), ());
+};
+
+class MockLifetimeDatabase {
+ public:
+  MOCK_METHOD(void, RemoveLifetimeStatsReference, (MockLifetimeStats*), ());
+};
+
+using LifetimeTrackerUnderTest =
+    LifetimeTrackerImpl<MockLifetimeDatabase, MockLifetimeStats>;
+
+class LifetimeTrackerTest : public testing::Test {
+ protected:
+  const Clock kFakeClock =
+      Clock{.now = FakeClock, .freq = GetFakeClockFrequency};
+
+  void Advance(absl::Duration d) {
+    clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+  }
+
+ private:
+  static int64_t FakeClock() { return clock_; }
+
+  static double GetFakeClockFrequency() {
+    return absl::ToDoubleNanoseconds(absl::Seconds(2));
+  }
+
+  static int64_t clock_;
+};
+
+int64_t LifetimeTrackerTest::clock_{0};
+
+TEST_F(LifetimeTrackerTest, Basic) {
+  MockLifetimeDatabase database;
+  LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock);
+  MockLifetimeStats stats;
+
+  LifetimeTrackerUnderTest::Tracker tracker1;
+  tracker.AddAllocation(&tracker1, &stats, false);
+  Advance(absl::Seconds(1));
+
+  EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kLongLived));
+  EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats));
+
+  LifetimeTrackerUnderTest::Tracker tracker2;
+  tracker.AddAllocation(&tracker2, &stats, false);
+
+  EXPECT_CALL(stats, Update(MockLifetimeStats::Prediction::kShortLived));
+  EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats));
+
+  Advance(absl::Seconds(0.1));
+  tracker.RemoveAllocation(&tracker2);
+
+  EXPECT_EQ(tracker.stats().expired_lifetimes, 0);
+  EXPECT_EQ(tracker.stats().overestimated_lifetimes, 1);
+  EXPECT_EQ(tracker.stats().short_lived_predictions, 0);
+  EXPECT_EQ(tracker.stats().long_lived_predictions, 2);
+}
+
+TEST_F(LifetimeTrackerTest, ExpirationLogic) {
+  MockLifetimeDatabase database;
+  LifetimeTrackerUnderTest tracker(&database, absl::Seconds(0.5), kFakeClock);
+
+  // Create 100 trackers, all predicted short-lived. Every second tracker will
+  // be long-lived and therefore expire.
+  const int kNumTrackers = 100;
+  std::vector<LifetimeTrackerUnderTest::Tracker> trackers(kNumTrackers);
+  MockLifetimeStats stats[] = {MockLifetimeStats(), MockLifetimeStats()};
+
+  for (int i = 0; i < kNumTrackers; ++i) {
+    tracker.AddAllocation(&trackers[i], &stats[i % 2], true);
+    Advance(absl::Milliseconds(1));
+  }
+
+  EXPECT_CALL(stats[0], Update(MockLifetimeStats::Prediction::kShortLived))
+      .Times(kNumTrackers / 2);
+  EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[0]))
+      .Times(kNumTrackers / 2);
+
+  for (int i = 0; i < kNumTrackers; i += 2) {
+    tracker.RemoveAllocation(&trackers[i]);
+  }
+
+  // After an additional 450ms, 1/4 of the allocations should have expired.
+  EXPECT_CALL(stats[1], Update(MockLifetimeStats::Prediction::kLongLived))
+      .Times(kNumTrackers / 4);
+  EXPECT_CALL(database, RemoveLifetimeStatsReference(&stats[1]))
+      .Times(kNumTrackers / 4);
+
+  Advance(absl::Milliseconds(450));
+  tracker.CheckForLifetimeExpirations();
+
+  EXPECT_EQ(tracker.stats().expired_lifetimes, kNumTrackers / 4);
+  EXPECT_EQ(tracker.stats().overestimated_lifetimes, 0);
+  EXPECT_EQ(tracker.stats().short_lived_predictions, kNumTrackers);
+  EXPECT_EQ(tracker.stats().long_lived_predictions, 0);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h
new file mode 100644
index 0000000000..181a480275
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list.h
@@ -0,0 +1,254 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Some very basic linked list functions for dealing with using void * as
+// storage.
+
+#ifndef TCMALLOC_INTERNAL_LINKED_LIST_H_
+#define TCMALLOC_INTERNAL_LINKED_LIST_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* SLL_Next(void* t) {
+  return *(reinterpret_cast<void**>(t));
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_SetNext(void* t, void* n) {
+  *(reinterpret_cast<void**>(t)) = n;
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE SLL_Push(void** list, void* element) {
+  SLL_SetNext(element, *list);
+  *list = element;
+}
+
+inline void* SLL_Pop(void** list) {
+  void* result = *list;
+  void* next = SLL_Next(*list);
+  *list = next;
+  // Prefetching NULL leads to a DTLB miss, thus only prefetch when 'next'
+  // is not NULL.
+#if defined(__GNUC__)
+  if (next) {
+    __builtin_prefetch(next, 0, 3);
+  }
+#endif
+  return result;
+}
+
+// LinkedList forms an in-place linked list with its void* elements.
+class LinkedList {
+ private:
+  void* list_;       // Linked list.
+  uint32_t length_;  // Current length.
+
+ public:
+  void Init() {
+    list_ = nullptr;
+    length_ = 0;
+  }
+
+  // Return current length of list
+  size_t length() const { return length_; }
+
+  // Is list empty?
+  bool empty() const { return list_ == nullptr; }
+
+  void ABSL_ATTRIBUTE_ALWAYS_INLINE Push(void* ptr) {
+    SLL_Push(&list_, ptr);
+    length_++;
+  }
+
+  bool ABSL_ATTRIBUTE_ALWAYS_INLINE TryPop(void** ret) {
+    void* obj = list_;
+    if (ABSL_PREDICT_FALSE(obj == nullptr)) {
+      return false;
+    }
+
+    void* next = SLL_Next(obj);
+    list_ = next;
+    length_--;
+
+#if defined(__GNUC__)
+    if (ABSL_PREDICT_TRUE(next)) {
+      __builtin_prefetch(next, 0, 0);
+    }
+#endif
+
+    *ret = obj;
+    return true;
+  }
+
+  // PushBatch and PopBatch do not guarantee an ordering.
+  void PushBatch(int N, void** batch) {
+    ASSERT(N > 0);
+    for (int i = 0; i < N - 1; ++i) {
+      SLL_SetNext(batch[i], batch[i + 1]);
+    }
+    SLL_SetNext(batch[N - 1], list_);
+    list_ = batch[0];
+    length_ += N;
+  }
+
+  void PopBatch(int N, void** batch) {
+    void* p = list_;
+    for (int i = 0; i < N; ++i) {
+      batch[i] = p;
+      p = SLL_Next(p);
+    }
+    list_ = p;
+    ASSERT(length_ >= N);
+    length_ -= N;
+  }
+};
+
+// A well-typed intrusive doubly linked list.
+template <typename T>
+class TList {
+ private:
+  class Iter;
+
+ public:
+  // The intrusive element supertype.  Use the CRTP to declare your class:
+  // class MyListItems : public TList<MyListItems>::Elem { ...
+  class Elem {
+    friend class Iter;
+    friend class TList<T>;
+    Elem* next_;
+    Elem* prev_;
+
+   protected:
+    constexpr Elem() : next_(nullptr), prev_(nullptr) {}
+
+    // Returns true iff the list is empty after removing this
+    bool remove() {
+      // Copy out next/prev before doing stores, otherwise compiler assumes
+      // potential aliasing and does unnecessary reloads after stores.
+      Elem* next = next_;
+      Elem* prev = prev_;
+      ASSERT(prev->next_ == this);
+      prev->next_ = next;
+      ASSERT(next->prev_ == this);
+      next->prev_ = prev;
+#ifndef NDEBUG
+      prev_ = nullptr;
+      next_ = nullptr;
+#endif
+      return next == prev;
+    }
+
+    void prepend(Elem* item) {
+      Elem* prev = prev_;
+      item->prev_ = prev;
+      item->next_ = this;
+      prev->next_ = item;
+      prev_ = item;
+    }
+
+    void append(Elem* item) {
+      Elem* next = next_;
+      item->next_ = next;
+      item->prev_ = this;
+      next->prev_ = item;
+      next_ = item;
+    }
+  };
+
+  // Initialize to empty list.
+  constexpr TList() { head_.next_ = head_.prev_ = &head_; }
+
+  bool empty() const { return head_.next_ == &head_; }
+
+  // Return the length of the linked list. O(n).
+  size_t length() const {
+    size_t result = 0;
+    for (Elem* e = head_.next_; e != &head_; e = e->next_) {
+      result++;
+    }
+    return result;
+  }
+
+  // Returns first element in the list. The list must not be empty.
+  ABSL_ATTRIBUTE_RETURNS_NONNULL T* first() const {
+    ASSERT(!empty());
+    ASSERT(head_.next_ != nullptr);
+    return static_cast<T*>(head_.next_);
+  }
+
+  // Returns last element in the list. The list must not be empty.
+  ABSL_ATTRIBUTE_RETURNS_NONNULL T* last() const {
+    ASSERT(!empty());
+    ASSERT(head_.prev_ != nullptr);
+    return static_cast<T*>(head_.prev_);
+  }
+
+  // Add item to the front of list.
+  void prepend(T* item) { head_.append(item); }
+
+  void append(T* item) { head_.prepend(item); }
+
+  bool remove(T* item) {
+    // must be on the list; we don't check.
+    return item->remove();
+  }
+
+  // Support for range-based iteration over a list.
+  Iter begin() const { return Iter(head_.next_); }
+  Iter end() const { return Iter(const_cast<Elem*>(&head_)); }
+
+  // Iterator pointing to a given list item.
+  // REQUIRES: item is a member of the list.
+  Iter at(T* item) const { return Iter(item); }
+
+ private:
+  // Support for range-based iteration over a list.
+  class Iter {
+    friend class TList;
+    Elem* elem_;
+    explicit Iter(Elem* elem) : elem_(elem) {}
+
+   public:
+    Iter& operator++() {
+      elem_ = elem_->next_;
+      return *this;
+    }
+    Iter& operator--() {
+      elem_ = elem_->prev_;
+      return *this;
+    }
+
+    bool operator!=(Iter other) const { return elem_ != other.elem_; }
+    bool operator==(Iter other) const { return elem_ == other.elem_; }
+    T* operator*() const { return static_cast<T*>(elem_); }
+    T* operator->() const { return static_cast<T*>(elem_); }
+  };
+  friend class Iter;
+
+  Elem head_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_LINKED_LIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc
new file mode 100644
index 0000000000..505b1b62c2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_benchmark.cc
@@ -0,0 +1,146 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mock_span.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+void BM_PushPop(benchmark::State& state) {
+  const int pointers = state.range(0);
+  const int sequential_calls = state.range(1);
+
+  LinkedList list;
+  list.Init();
+  const size_t size = pointers * sizeof(void*);
+
+  std::vector<void*> v(sequential_calls);
+  for (int i = 0; i < sequential_calls; i++) {
+    v[i] = malloc(size);
+  }
+  std::shuffle(v.begin(), v.end(), absl::BitGen());
+
+  for (auto s : state) {
+    // Push sequential_calls times.
+    for (int j = 0; j < sequential_calls; j++) {
+      list.Push(v[j]);
+    }
+
+    // Pop sequential_calls times.
+    for (int j = 0; j < sequential_calls; j++) {
+      void* ret;
+      list.TryPop(&ret);
+    }
+  }
+
+  state.SetItemsProcessed(sequential_calls * state.iterations());
+
+  for (int i = 0; i < sequential_calls; i++) {
+    free(v[i]);
+  }
+}
+BENCHMARK(BM_PushPop)->RangePair(1, 64, 1, 32);
+
+void BM_PushPopBatch(benchmark::State& state) {
+  const int pointers = state.range(0);
+  const int batch_size = state.range(1);
+
+  LinkedList list;
+  list.Init();
+  const size_t size = pointers * sizeof(void*);
+
+  const int kNumberOfObjects = 64 << 10;
+  std::vector<void*> v(kNumberOfObjects);
+  for (int i = 0; i < kNumberOfObjects; i++) {
+    v[i] = malloc(size);
+  }
+  std::shuffle(v.begin(), v.end(), absl::BitGen());
+
+  const int kMaxObjectsToMove = 32;
+  void* batch[kMaxObjectsToMove];
+
+  for (auto s : state) {
+    // PushBatch
+    for (int j = 0; j < kNumberOfObjects / batch_size; j++) {
+      list.PushBatch(batch_size, v.data() + j * batch_size);
+    }
+
+    // PopBatch.
+    for (int j = 0; j < kNumberOfObjects / batch_size; j++) {
+      list.PopBatch(batch_size, batch);
+    }
+  }
+
+  state.SetItemsProcessed((kNumberOfObjects / batch_size) * batch_size *
+                          state.iterations());
+
+  for (int i = 0; i < kNumberOfObjects; i++) {
+    free(v[i]);
+  }
+}
+BENCHMARK(BM_PushPopBatch)->RangePair(1, 64, 1, 32);
+
+static void BM_AppendRemove(benchmark::State& state) {
+  MockSpanList list;
+
+  int sequential_calls = state.range(0);
+
+  std::vector<MockSpan*> vappend(sequential_calls);
+
+  // Create MockSpans in append order
+  for (int i = 0; i < sequential_calls; i++) {
+    MockSpan* s = MockSpan::New(i);
+    CHECK_CONDITION(s != nullptr);
+    vappend[i] = s;
+  }
+
+  // Remove all sequential_calls elements from the list in a random order
+  std::vector<MockSpan*> vremove(sequential_calls);
+  vremove = vappend;
+  std::shuffle(vremove.begin(), vremove.end(), absl::BitGen());
+
+  for (auto _ : state) {
+    // Append sequential_calls elements to the list.
+    for (MockSpan* s : vappend) {
+      list.append(s);
+    }
+
+    // Remove in a random order
+    for (MockSpan* s : vremove) {
+      list.remove(s);
+    }
+  }
+
+  for (MockSpan* s : vappend) {
+    delete s;
+  }
+}
+
+BENCHMARK(BM_AppendRemove)->Range(32, 32 * 1024);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc
new file mode 100644
index 0000000000..3299bca8d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linked_list_test.cc
@@ -0,0 +1,239 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/linked_list.h"
+
+#include <stdlib.h>
+
+#include <algorithm>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/container/node_hash_set.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/mock_span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class LinkedListTest : public ::testing::Test {
+ protected:
+  void SetUp() override { list_.Init(); }
+
+  LinkedList list_;
+};
+
+TEST_F(LinkedListTest, PushPop) {
+  const int N = 20;
+  std::vector<void*> ptrs{nullptr};
+
+  EXPECT_EQ(0, list_.length());
+  EXPECT_TRUE(list_.empty());
+
+  for (int i = 0; i < N; i++) {
+    void* ptr = malloc(sizeof(void*));
+    ASSERT_FALSE(ptr == nullptr);
+    ptrs.push_back(ptr);
+
+    list_.Push(ptr);
+
+    EXPECT_EQ(i + 1, list_.length());
+    EXPECT_FALSE(list_.empty());
+  }
+
+  for (int i = N; i > 0; i--) {
+    EXPECT_EQ(i, list_.length());
+    EXPECT_FALSE(list_.empty());
+
+    void* ptr;
+    bool ret = list_.TryPop(&ptr);
+    EXPECT_TRUE(ret);
+    EXPECT_EQ(ptrs[i], ptr);
+
+    free(ptrs[i]);
+  }
+
+  EXPECT_EQ(0, list_.length());
+  EXPECT_TRUE(list_.empty());
+}
+
+// PushPopBatch validates that the batch operations push and pop the required
+// number of elements from the list, but it does not assert that order within
+// the batch is maintained.
+TEST_F(LinkedListTest, PushPopBatch) {
+  const std::vector<int> batch_sizes{1, 3, 5, 7, 10, 16};
+  absl::flat_hash_set<void*> pushed;
+
+  size_t length = 0;
+  for (int batch_size : batch_sizes) {
+    std::vector<void*> batch;
+
+    for (int i = 0; i < batch_size; i++) {
+      void* ptr = malloc(sizeof(void*));
+      ASSERT_FALSE(ptr == nullptr);
+      batch.push_back(ptr);
+      pushed.insert(ptr);
+    }
+
+    list_.PushBatch(batch_size, batch.data());
+    length += batch_size;
+
+    EXPECT_EQ(length, list_.length());
+    EXPECT_EQ(length == 0, list_.empty());
+  }
+
+  absl::flat_hash_set<void*> popped;
+  for (int batch_size : batch_sizes) {
+    std::vector<void*> batch(batch_size, nullptr);
+    list_.PopBatch(batch_size, batch.data());
+    length -= batch_size;
+
+    popped.insert(batch.begin(), batch.end());
+    EXPECT_EQ(length, list_.length());
+    EXPECT_EQ(length == 0, list_.empty());
+  }
+
+  EXPECT_EQ(pushed, popped);
+
+  for (void* ptr : pushed) {
+    free(ptr);
+  }
+}
+
+class TListTest : public ::testing::Test {
+ protected:
+  MockSpanList list_;
+};
+
+TEST_F(TListTest, AppendPushPop) {
+  const int N = 20;
+
+  EXPECT_EQ(list_.length(), 0);
+  EXPECT_TRUE(list_.empty());
+
+  // Append N elements to the list.
+  for (int i = 0; i < N; i++) {
+    MockSpan* s = MockSpan::New(i);
+    ASSERT_FALSE(s == nullptr);
+    list_.append(s);
+    EXPECT_EQ(list_.first()->index_, 0);
+    EXPECT_EQ(list_.last()->index_, i);
+
+    EXPECT_EQ(list_.length(), i + 1);
+    EXPECT_FALSE(list_.empty());
+  }
+
+  // Remove all N elements from the end of the list.
+  for (int i = N; i > 0; i--) {
+    EXPECT_EQ(list_.length(), i);
+    EXPECT_FALSE(list_.empty());
+
+    MockSpan* last = list_.last();
+    EXPECT_EQ(list_.first()->index_, 0);
+    EXPECT_EQ(list_.last()->index_, i - 1);
+
+    EXPECT_FALSE(last == nullptr);
+    bool ret = list_.remove(last);
+    // Returns true iff the list is empty after the remove.
+    EXPECT_EQ(ret, i == 1);
+
+    delete last;
+  }
+  EXPECT_EQ(list_.length(), 0);
+  EXPECT_TRUE(list_.empty());
+}
+
+TEST_F(TListTest, PrependPushPop) {
+  const int N = 20;
+
+  EXPECT_EQ(list_.length(), 0);
+  EXPECT_TRUE(list_.empty());
+
+  // Prepend N elements to the list.
+  for (int i = 0; i < N; i++) {
+    MockSpan* s = MockSpan::New(i);
+    ASSERT_FALSE(s == nullptr);
+    list_.prepend(s);
+    EXPECT_EQ(list_.first()->index_, i);
+    EXPECT_EQ(list_.last()->index_, 0);
+
+    EXPECT_EQ(list_.length(), i + 1);
+    EXPECT_FALSE(list_.empty());
+  }
+
+  // Check range iterator
+  {
+    int x = N - 1;
+    for (const MockSpan* s : list_) {
+      EXPECT_EQ(s->index_, x);
+      x--;
+    }
+  }
+
+  // Remove all N elements from the front of the list.
+  for (int i = N; i > 0; i--) {
+    EXPECT_EQ(list_.length(), i);
+    EXPECT_FALSE(list_.empty());
+
+    MockSpan* first = list_.first();
+    EXPECT_EQ(list_.first()->index_, i - 1);
+    EXPECT_EQ(list_.last()->index_, 0);
+
+    EXPECT_FALSE(first == nullptr);
+    bool ret = list_.remove(first);
+    // Returns true iff the list is empty after the remove.
+    EXPECT_EQ(ret, i == 1);
+
+    delete first;
+  }
+  EXPECT_EQ(list_.length(), 0);
+  EXPECT_TRUE(list_.empty());
+}
+
+TEST_F(TListTest, AppendRandomRemove) {
+  const int N = 100;
+  std::vector<MockSpan*> v(N);
+
+  // Append N elements to the list.
+  for (int i = 0; i < N; i++) {
+    MockSpan* s = MockSpan::New(i);
+    ASSERT_FALSE(s == nullptr);
+    v[i] = s;
+    list_.append(s);
+  }
+
+  // Remove all N elements from the list in a random order
+  std::shuffle(v.begin(), v.end(), absl::BitGen());
+  int i = N;
+  for (MockSpan* s : v) {
+    EXPECT_EQ(list_.length(), i);
+    EXPECT_FALSE(list_.empty());
+
+    bool ret = list_.remove(s);
+    // Returns true iff the list is empty after the remove.
+    EXPECT_EQ(ret, i == 1);
+
+    delete s;
+    i--;
+  }
+  EXPECT_EQ(list_.length(), 0);
+  EXPECT_TRUE(list_.empty());
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h
new file mode 100644
index 0000000000..0abf54ff1c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/linux_syscall_support.h
@@ -0,0 +1,65 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
+#define TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
+
+/* include/uapi/linux/rseq.h                                                 */
+
+struct kernel_rseq {
+  unsigned cpu_id_start;
+  unsigned cpu_id;
+  unsigned long long rseq_cs;
+  unsigned flags;
+  unsigned padding[2];
+  // This is a prototype extension to the rseq() syscall.  Since a process may
+  // run on only a few cores at a time, we can use a dense set of "v(irtual)
+  // cpus."  This can reduce cache requirements, as we only need N caches for
+  // the cores we actually run on simultaneously, rather than a cache for every
+  // physical core.
+  union {
+    struct {
+      short numa_node_id;
+      short vcpu_id;
+    };
+    int vcpu_flat;
+  };
+} __attribute__((aligned(4 * sizeof(unsigned long long))));
+
+static_assert(sizeof(kernel_rseq) == (4 * sizeof(unsigned long long)),
+              "Unexpected size for rseq structure");
+
+struct kernel_rseq_cs {
+  unsigned version;
+  unsigned flags;
+  unsigned long long start_ip;
+  unsigned long long post_commit_offset;
+  unsigned long long abort_ip;
+  // This is aligned, per upstream RSEQ specification.
+} __attribute__((aligned(4 * sizeof(unsigned long long))));
+
+static_assert(sizeof(kernel_rseq_cs) == (4 * sizeof(unsigned long long)),
+              "Unexpected size for rseq_cs structure");
+
+#if !defined(__NR_rseq)
+#if defined(__x86_64__)
+#define __NR_rseq 334
+#elif defined(__aarch64__)
+#define __NR_rseq 293
+#elif defined(__PPC__)
+#define __NR_rseq 387
+#endif
+#endif
+
+#endif  // TCMALLOC_INTERNAL_LINUX_SYSCALL_SUPPORT_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc
new file mode 100644
index 0000000000..2b70bc1502
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.cc
@@ -0,0 +1,276 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/logging.h"
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/debugging/stacktrace.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Variables for storing crash output.  Allocated statically since we
+// may not be able to heap-allocate while crashing.
+ABSL_CONST_INIT static absl::base_internal::SpinLock crash_lock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+static bool crashed = false;
+
+static const size_t kStatsBufferSize = 16 << 10;
+static char stats_buffer[kStatsBufferSize] = {0};
+
+static void WriteMessage(const char* msg, int length) {
+  (void)::write(STDERR_FILENO, msg, length);
+}
+
+void (*log_message_writer)(const char* msg, int length) = WriteMessage;
+
+class Logger {
+ public:
+  bool Add(const LogItem& item);
+  bool AddStr(const char* str, int n);
+  bool AddNum(uint64_t num, int base);  // base must be 10 or 16.
+
+  static constexpr int kBufSize = 512;
+  char* p_;
+  char* end_;
+  char buf_[kBufSize];
+
+  StackTrace trace;
+};
+
+static Logger FormatLog(bool with_stack, const char* filename, int line,
+                        LogItem a, LogItem b, LogItem c, LogItem d) {
+  Logger state;
+  state.p_ = state.buf_;
+  state.end_ = state.buf_ + sizeof(state.buf_);
+  // clang-format off
+  state.AddStr(filename, strlen(filename)) &&
+      state.AddStr(":", 1) &&
+      state.AddNum(line, 10) &&
+      state.AddStr("]", 1) &&
+      state.Add(a) &&
+      state.Add(b) &&
+      state.Add(c) &&
+      state.Add(d);
+  // clang-format on
+
+  if (with_stack) {
+    state.trace.depth =
+        absl::GetStackTrace(state.trace.stack, kMaxStackDepth, 1);
+    state.Add(LogItem("@"));
+    for (int i = 0; i < state.trace.depth; i++) {
+      state.Add(LogItem(state.trace.stack[i]));
+    }
+  }
+
+  // Teminate with newline
+  if (state.p_ >= state.end_) {
+    state.p_ = state.end_ - 1;
+  }
+  *state.p_ = '\n';
+  state.p_++;
+
+  return state;
+}
+
+ABSL_ATTRIBUTE_NOINLINE
+void Log(LogMode mode, const char* filename, int line, LogItem a, LogItem b,
+         LogItem c, LogItem d) {
+  Logger state = FormatLog(mode == kLogWithStack, filename, line, a, b, c, d);
+  int msglen = state.p_ - state.buf_;
+  (*log_message_writer)(state.buf_, msglen);
+}
+
+ABSL_ATTRIBUTE_NOINLINE
+void Crash(CrashMode mode, const char* filename, int line, LogItem a, LogItem b,
+           LogItem c, LogItem d) {
+  Logger state = FormatLog(true, filename, line, a, b, c, d);
+
+  int msglen = state.p_ - state.buf_;
+
+  // FailureSignalHandler mallocs for various logging attempts.
+  // We might be crashing holding tcmalloc locks.
+  // We're substantially less likely to try to take those locks
+  // (and thus deadlock until the alarm timer fires) if we disable sampling.
+#ifndef __APPLE__
+  if (&TCMalloc_Internal_SetProfileSamplingRate != nullptr) {
+    TCMalloc_Internal_SetProfileSamplingRate(0);
+  }
+#endif  // __APPLE__
+
+  bool first_crash = false;
+  {
+    absl::base_internal::SpinLockHolder l(&crash_lock);
+    if (!crashed) {
+      crashed = true;
+      first_crash = true;
+    }
+  }
+
+  (*log_message_writer)(state.buf_, msglen);
+  if (first_crash && mode == kCrashWithStats) {
+#ifndef __APPLE__
+    if (&TCMalloc_Internal_GetStats != nullptr) {
+      size_t n = TCMalloc_Internal_GetStats(stats_buffer, kStatsBufferSize);
+      (*log_message_writer)(stats_buffer, std::min(n, kStatsBufferSize));
+    }
+#endif  // __APPLE__
+  }
+
+  abort();
+}
+
+bool Logger::Add(const LogItem& item) {
+  // Separate real items with spaces
+  if (item.tag_ != LogItem::kEnd && p_ < end_) {
+    *p_ = ' ';
+    p_++;
+  }
+
+  switch (item.tag_) {
+    case LogItem::kStr:
+      return AddStr(item.u_.str, strlen(item.u_.str));
+    case LogItem::kUnsigned:
+      return AddNum(item.u_.unum, 10);
+    case LogItem::kSigned:
+      if (item.u_.snum < 0) {
+        // The cast to uint64_t is intentionally before the negation
+        // so that we do not attempt to negate -2^63.
+        return AddStr("-", 1) &&
+               AddNum(-static_cast<uint64_t>(item.u_.snum), 10);
+      } else {
+        return AddNum(static_cast<uint64_t>(item.u_.snum), 10);
+      }
+    case LogItem::kPtr:
+      return AddStr("0x", 2) &&
+             AddNum(reinterpret_cast<uintptr_t>(item.u_.ptr), 16);
+    default:
+      return false;
+  }
+}
+
+bool Logger::AddStr(const char* str, int n) {
+  ptrdiff_t remaining = end_ - p_;
+  if (remaining < n) {
+    // Try to log a truncated message if there is some space.
+    static constexpr absl::string_view kDots = "...";
+    if (remaining > kDots.size() + 1) {
+      int truncated = remaining - kDots.size();
+      memcpy(p_, str, truncated);
+      p_ += truncated;
+      memcpy(p_, kDots.data(), kDots.size());
+      p_ += kDots.size();
+
+      return true;
+    }
+    return false;
+  } else {
+    memcpy(p_, str, n);
+    p_ += n;
+    return true;
+  }
+}
+
+bool Logger::AddNum(uint64_t num, int base) {
+  static const char kDigits[] = "0123456789abcdef";
+  char space[22];  // more than enough for 2^64 in smallest supported base (10)
+  char* end = space + sizeof(space);
+  char* pos = end;
+  do {
+    pos--;
+    *pos = kDigits[num % base];
+    num /= base;
+  } while (num > 0 && pos > space);
+  return AddStr(pos, end - pos);
+}
+
+PbtxtRegion::PbtxtRegion(Printer* out, PbtxtRegionType type, int indent)
+    : out_(out), type_(type), indent_(indent) {
+  switch (type_) {
+    case kTop:
+      break;
+    case kNested:
+      out_->printf("{");
+      break;
+  }
+  ++indent_;
+}
+
+PbtxtRegion::~PbtxtRegion() {
+  --indent_;
+  out_->printf("\n");
+  for (int i = 0; i < indent_; i++) {
+    out_->printf("  ");
+  }
+  switch (type_) {
+    case kTop:
+      break;
+    case kNested:
+      out_->printf("}");
+      break;
+  }
+}
+
+void PbtxtRegion::NewLineAndIndent() {
+  out_->printf("\n");
+  for (int i = 0; i < indent_; i++) {
+    out_->printf("  ");
+  }
+}
+
+void PbtxtRegion::PrintI64(absl::string_view key, int64_t value) {
+  NewLineAndIndent();
+  out_->printf("%s: %" PRIi64, key, value);
+}
+
+void PbtxtRegion::PrintDouble(absl::string_view key, double value) {
+  NewLineAndIndent();
+  out_->printf("%s: %f", key, value);
+}
+
+void PbtxtRegion::PrintBool(absl::string_view key, bool value) {
+  NewLineAndIndent();
+  out_->printf("%s: %s", key, value ? "true" : "false");
+}
+
+void PbtxtRegion::PrintRaw(absl::string_view key, absl::string_view value) {
+  NewLineAndIndent();
+  out_->printf("%s: %s", key, value);
+}
+
+PbtxtRegion PbtxtRegion::CreateSubRegion(absl::string_view key) {
+  NewLineAndIndent();
+  out_->printf("%s ", key);
+  PbtxtRegion sub(out_, kNested, indent_);
+  return sub;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging.h b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h
new file mode 100644
index 0000000000..4d42aa40a9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging.h
@@ -0,0 +1,222 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Internal logging and related utility routines.
+
+#ifndef TCMALLOC_INTERNAL_LOGGING_H_
+#define TCMALLOC_INTERNAL_LOGGING_H_
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/optimization.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+
+//-------------------------------------------------------------------
+// Utility routines
+//-------------------------------------------------------------------
+
+// Safe logging helper: we write directly to the stderr file
+// descriptor and avoid FILE buffering because that may invoke
+// malloc().
+//
+// Example:
+//   Log(kLog, __FILE__, __LINE__, "error", bytes);
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static constexpr int kMaxStackDepth = 64;
+
+// size/depth are made the same size as a pointer so that some generic
+// code below can conveniently cast them back and forth to void*.
+struct StackTrace {
+
+  // For small sampled objects, we allocate a full span to hold the
+  // sampled object.  However to avoid disturbing fragmentation
+  // profiles, in such cases we also allocate a small proxy object
+  // using the normal mechanism.
+  //
+  // proxy field is defined only for heap sample stack traces.
+  // For heap samples, proxy==NULL iff size > kMaxSize.
+  void* proxy;
+
+  uintptr_t requested_size;
+  uintptr_t requested_alignment;
+  uintptr_t allocated_size;  // size after sizeclass/page rounding
+
+  uintptr_t depth;           // Number of PC values stored in array below
+  void* stack[kMaxStackDepth];
+
+  // weight is the expected number of *bytes* that were requested
+  // between the previous sample and this one
+  size_t weight;
+
+  void* user_data;
+
+  template <typename H>
+  friend H AbslHashValue(H h, const StackTrace& t) {
+    // As we use StackTrace as a key-value node in StackTraceTable, we only
+    // produce a hasher for the fields used as keys.
+    return H::combine(H::combine_contiguous(std::move(h), t.stack, t.depth),
+                      t.depth, t.requested_size, t.requested_alignment,
+                      t.allocated_size
+    );
+  }
+};
+
+enum LogMode {
+  kLog,           // Just print the message
+  kLogWithStack,  // Print the message and a stack trace
+};
+
+class Logger;
+
+// A LogItem holds any of the argument types that can be passed to Log()
+class LogItem {
+ public:
+  LogItem() : tag_(kEnd) {}
+  LogItem(const char* v) : tag_(kStr) { u_.str = v; }
+  LogItem(int v) : tag_(kSigned) { u_.snum = v; }
+  LogItem(long v) : tag_(kSigned) { u_.snum = v; }
+  LogItem(long long v) : tag_(kSigned) { u_.snum = v; }
+  LogItem(unsigned int v) : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(unsigned long v) : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(unsigned long long v) : tag_(kUnsigned) { u_.unum = v; }
+  LogItem(const void* v) : tag_(kPtr) { u_.ptr = v; }
+
+ private:
+  friend class Logger;
+  enum Tag { kStr, kSigned, kUnsigned, kPtr, kEnd };
+  Tag tag_;
+  union {
+    const char* str;
+    const void* ptr;
+    int64_t snum;
+    uint64_t unum;
+  } u_;
+};
+
+extern void Log(LogMode mode, const char* filename, int line, LogItem a,
+                LogItem b = LogItem(), LogItem c = LogItem(),
+                LogItem d = LogItem());
+
+enum CrashMode {
+  kCrash,          // Print the message and crash
+  kCrashWithStats  // Print the message, some stats, and crash
+};
+
+ABSL_ATTRIBUTE_NORETURN
+void Crash(CrashMode mode, const char* filename, int line, LogItem a,
+           LogItem b = LogItem(), LogItem c = LogItem(), LogItem d = LogItem());
+
+// Tests can override this function to collect logging messages.
+extern void (*log_message_writer)(const char* msg, int length);
+
+// Like assert(), but executed even in NDEBUG mode
+#undef CHECK_CONDITION
+#define CHECK_CONDITION(cond)                                           \
+  (ABSL_PREDICT_TRUE(cond) ? (void)0                                    \
+                           : (::tcmalloc::tcmalloc_internal::Crash(     \
+                                 ::tcmalloc::tcmalloc_internal::kCrash, \
+                                 __FILE__, __LINE__, #cond)))
+
+// Our own version of assert() so we can avoid hanging by trying to do
+// all kinds of goofy printing while holding the malloc lock.
+#ifndef NDEBUG
+#define ASSERT(cond) CHECK_CONDITION(cond)
+#else
+#define ASSERT(cond) ((void)0)
+#endif
+
+// Print into buffer
+class Printer {
+ private:
+  char* buf_;     // Where should we write next
+  int left_;      // Space left in buffer (including space for \0)
+  int required_;  // Space we needed to complete all printf calls up to this
+                  // point
+
+ public:
+  // REQUIRES: "length > 0"
+  Printer(char* buf, int length) : buf_(buf), left_(length), required_(0) {
+    ASSERT(length > 0);
+    buf[0] = '\0';
+  }
+
+  template <typename... Args>
+  void printf(const absl::FormatSpec<Args...>& format, const Args&... args) {
+    ASSERT(left_ >= 0);
+    if (left_ <= 0) {
+      return;
+    }
+
+    const int r = absl::SNPrintF(buf_, left_, format, args...);
+    if (r < 0) {
+      left_ = 0;
+      return;
+    }
+    required_ += r;
+
+    if (r > left_) {
+      left_ = 0;
+    } else {
+      left_ -= r;
+      buf_ += r;
+    }
+  }
+
+  int SpaceRequired() const { return required_; }
+};
+
+enum PbtxtRegionType { kTop, kNested };
+
+// A helper class that prints pbtxt via RAII. A pbtxt region can be either a
+// top region (with no brackets) or a nested region (enclosed by curly
+// brackets).
+class PbtxtRegion {
+ public:
+  PbtxtRegion(Printer* out, PbtxtRegionType type, int indent);
+  ~PbtxtRegion();
+
+  PbtxtRegion(const PbtxtRegion&) = delete;
+  PbtxtRegion(PbtxtRegion&&) = default;
+
+  // Prints 'key: value'.
+  void PrintI64(absl::string_view key, int64_t value);
+  void PrintDouble(absl::string_view key, double value);
+  void PrintBool(absl::string_view key, bool value);
+  // Useful for enums.
+  void PrintRaw(absl::string_view key, absl::string_view value);
+
+  // Prints 'key subregion'. Return the created subregion.
+  PbtxtRegion CreateSubRegion(absl::string_view key);
+
+ private:
+  void NewLineAndIndent();
+
+  Printer* out_;
+  PbtxtRegionType type_;
+  int indent_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_LOGGING_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc
new file mode 100644
index 0000000000..c7b58de40f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test.cc
@@ -0,0 +1,117 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/logging.h"
+
+#include <string.h>
+
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/flags/flag.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static std::string* log_buffer;
+
+static void RecordLogMessage(const char* msg, int length) {
+  // Make tests less brittle by trimming trailing whitespace
+  while (length > 0 && (msg[length - 1] == ' ' || msg[length - 1] == '\n')) {
+    length--;
+  }
+  log_buffer->assign(msg, length);
+}
+
+TEST(InternalLogging, MessageFormatting) {
+  std::string long_string;
+  for (int i = 0; i < 100; i++) {
+    long_string += "the quick brown fox jumped over the lazy dog";
+  }
+
+  // Arrange to intercept Log() output
+  log_buffer = new std::string();
+  void (*old_writer)(const char*, int) = log_message_writer;
+  log_message_writer = RecordLogMessage;
+
+  Log(kLog, "foo.cc", 100, "Hello");
+  EXPECT_EQ("foo.cc:100] Hello", *log_buffer);
+
+  Log(kLog, "foo.cc", 100, 123u, -456, 0);
+  EXPECT_EQ("foo.cc:100] 123 -456 0", *log_buffer);
+
+  Log(kLog, "foo.cc", 100, 123u, std::numeric_limits<int64_t>::min());
+  EXPECT_EQ("foo.cc:100] 123 -9223372036854775808", *log_buffer);
+
+  Log(kLog, "foo.cc", 2,
+      reinterpret_cast<const void*>(static_cast<uintptr_t>(1025)));
+  EXPECT_EQ("foo.cc:2] 0x401", *log_buffer);
+
+  Log(kLog, "foo.cc", 10, "hello", long_string.c_str());
+  EXPECT_THAT(*log_buffer,
+              testing::StartsWith(
+                  "foo.cc:10] hello the quick brown fox jumped over the lazy "
+                  "dogthe quick brown fox jumped over the lazy dog"));
+
+  Log(kLogWithStack, "foo.cc", 10, "stk");
+  EXPECT_TRUE(strstr(log_buffer->c_str(), "stk @ 0x") != nullptr)
+      << *log_buffer;
+
+  log_message_writer = old_writer;
+  delete log_buffer;
+}
+
+TEST(InternalLogging, Assert) {
+  CHECK_CONDITION((2 + 2) == 4);
+
+  if (false)
+    CHECK_CONDITION(false);
+  else
+    CHECK_CONDITION(true);
+
+  ASSERT_DEATH(CHECK_CONDITION((2 + 2) == 5),
+               ".*tcmalloc\\/internal/logging_test\\.cc:[0-9]+\\] "
+               "\\(2 \\+ 2\\) == 5 @( 0x[0-9a-f]+)+");
+}
+
+TEST(Printer, RequiredSpace) {
+  const char kChunk[] = "0123456789";
+  std::string expected;
+
+  for (int i = 0; i < 10; i++) {
+    int length = strlen(kChunk) * i + 1;
+    std::unique_ptr<char[]> buf(new char[length]);
+    Printer printer(buf.get(), length);
+
+    for (int j = 0; j < i; j++) {
+      printer.printf("%s", kChunk);
+    }
+    EXPECT_EQ(buf.get(), expected);
+    EXPECT_EQ(length - 1, printer.SpaceRequired());
+
+    // Go past the end of the buffer.  This should not overrun or affect the
+    // existing contents of buf, but we should see SpaceRequired tick up.
+    printer.printf("%s", kChunk);
+    EXPECT_EQ(buf.get(), expected);
+    EXPECT_EQ(length - 1 + strlen(kChunk), printer.SpaceRequired());
+
+    expected.append(kChunk);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc
new file mode 100644
index 0000000000..36c2b38771
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/logging_test_helper.cc
@@ -0,0 +1,18 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This is a trivial program.  When run with a virtual address size rlimit,
+// TCMalloc should crash cleanly, rather than hang.
+
+int main() { return 0; }
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc
new file mode 100644
index 0000000000..71591834d4
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.cc
@@ -0,0 +1,132 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/memory_stats.h"
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "absl/strings/numbers.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+
+struct FDCloser {
+  FDCloser() : fd(-1) {}
+  ~FDCloser() {
+    if (fd != -1) {
+      signal_safe_close(fd);
+    }
+  }
+  int fd;
+};
+
+}  // namespace
+
+bool GetMemoryStats(MemoryStats* stats) {
+#if !defined(__linux__)
+  return false;
+#endif
+
+  FDCloser fd;
+  fd.fd = signal_safe_open("/proc/self/statm", O_RDONLY | O_CLOEXEC);
+  ASSERT(fd.fd >= 0);
+  if (fd.fd < 0) {
+    return false;
+  }
+
+  char buf[1024];
+  ssize_t rc = signal_safe_read(fd.fd, buf, sizeof(buf), nullptr);
+  ASSERT(rc >= 0);
+  ASSERT(rc < sizeof(buf));
+  if (rc < 0 || rc >= sizeof(buf)) {
+    return false;
+  }
+  buf[rc] = '\0';
+
+  const size_t pagesize = getpagesize();
+  absl::string_view contents(buf, rc);
+  absl::string_view::size_type start = 0;
+  int index = 0;
+  do {
+    auto end = contents.find(' ', start);
+
+    absl::string_view value;
+    if (end == absl::string_view::npos) {
+      value = contents.substr(start);
+    } else {
+      value = contents.substr(start, end - start);
+    }
+
+    int64_t parsed;
+    if (!absl::SimpleAtoi(value, &parsed)) {
+      return false;
+    }
+
+    // Fields in /proc/self/statm:
+    //  [0] = vss
+    //  [1] = rss
+    //  [2] = shared
+    //  [3] = code
+    //  [4] = unused
+    //  [5] = data + stack
+    //  [6] = unused
+    switch (index) {
+      case 0:
+        stats->vss = parsed * pagesize;
+        break;
+      case 1:
+        stats->rss = parsed * pagesize;
+        break;
+      case 2:
+        stats->shared = parsed * pagesize;
+        break;
+      case 3:
+        stats->code = parsed * pagesize;
+        break;
+      case 5:
+        stats->data = parsed * pagesize;
+        break;
+      case 4:
+      case 6:
+      default:
+        // Unused
+        break;
+    }
+
+    if (end == absl::string_view::npos) {
+      break;
+    }
+
+    start = end + 1;
+  } while (start < contents.size() && index++ < 6);
+
+  if (index < 6) {
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h
new file mode 100644
index 0000000000..a65f5b03d3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats.h
@@ -0,0 +1,41 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MEMORY_STATS_H_
+#define TCMALLOC_INTERNAL_MEMORY_STATS_H_
+
+#include <stdint.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct MemoryStats {
+  int64_t vss;
+  int64_t rss;
+  int64_t shared;
+  int64_t code;
+  int64_t data;
+};
+
+// Memory stats of a process
+bool GetMemoryStats(MemoryStats* stats);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_MEMORY_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc
new file mode 100644
index 0000000000..176c712734
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/memory_stats_test.cc
@@ -0,0 +1,43 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/memory_stats.h"
+
+#include <memory>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+TEST(Stats, ValidRanges) {
+  MemoryStats stats;
+#if defined(__linux__)
+  ASSERT_TRUE(GetMemoryStats(&stats));
+#else
+  ASSERT_FALSE(GetMemoryStats(&stats));
+  return;
+#endif
+
+  EXPECT_GT(stats.vss, 0);
+  EXPECT_GT(stats.rss, 0);
+  EXPECT_GT(stats.shared, 0);
+  EXPECT_GT(stats.code, 0);
+  EXPECT_GT(stats.data, 0);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc
new file mode 100644
index 0000000000..e4120bcf5a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.cc
@@ -0,0 +1,129 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/mincore.h"
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Class that implements the call into the OS provided mincore() function.
+class OsMInCore final : public MInCoreInterface {
+ public:
+  int mincore(void* addr, size_t length, unsigned char* result) final {
+    return ::mincore(addr, length, result);
+  }
+
+  ~OsMInCore() override = default;
+};
+
+// Returns the number of resident bytes for an range of memory of arbitrary
+// alignment and size.
+size_t MInCore::residence_impl(void* addr, size_t size,
+                               MInCoreInterface* mincore) {
+  if (size == 0) {
+    return 0;
+  }
+  unsigned char res[kArrayLength];
+  const size_t kPageSize = getpagesize();
+
+  uintptr_t uaddr = reinterpret_cast<uintptr_t>(addr);
+  // Round address down to get the start of the page containing the data.
+  uintptr_t basePage = uaddr & ~(kPageSize - 1);
+  // Round end address up to get the end of the page containing the data.
+  uintptr_t endPage = (uaddr + size + kPageSize - 1) & ~(kPageSize - 1);
+
+  uintptr_t remainingPages = endPage - basePage;
+
+  // We need to handle the first and last pages differently. Most pages
+  // will contribute pagesize bytes to residence, but the first and last
+  // pages will contribute fewer than that. Easiest way to do this is to
+  // handle the special case where the entire object fits into a page,
+  // then handle the case where the object spans more than one page.
+  if (remainingPages == kPageSize) {
+    // Find out whether the first page is resident.
+    mincore->mincore(reinterpret_cast<void*>(basePage), remainingPages, res);
+    // Residence info is returned in LSB, other bits are undefined.
+    if ((res[0] & 1) == 1) {
+      return size;
+    }
+    return 0;
+  }
+
+  // We're calling this outside the loop so that we can get info for the
+  // first page, deal with subsequent pages in the loop, and then handle
+  // the last page after the loop.
+  size_t scanLength = std::min(remainingPages, kPageSize * kArrayLength);
+  if (mincore->mincore(reinterpret_cast<void*>(basePage), scanLength, res) !=
+      0) {
+    return 0;
+  }
+
+  size_t totalResident = 0;
+
+  // Handle the first page.
+  size_t firstPageSize = kPageSize - (uaddr - basePage);
+  if ((res[0] & 1) == 1) {
+    totalResident += firstPageSize;
+  }
+  basePage += kPageSize;
+  remainingPages -= kPageSize;
+
+  int resIndex = 1;
+
+  // Handle all pages but the last page.
+  while (remainingPages > kPageSize) {
+    if ((res[resIndex] & 1) == 1) {
+      totalResident += kPageSize;
+    }
+    resIndex++;
+    basePage += kPageSize;
+    remainingPages -= kPageSize;
+    // Refresh the array if necessary.
+    if (resIndex == kArrayLength) {
+      resIndex = 0;
+      scanLength = std::min(remainingPages, kPageSize * kArrayLength);
+      if (mincore->mincore(reinterpret_cast<void*>(basePage), scanLength,
+                           res) != 0) {
+        return 0;
+      }
+    }
+  }
+
+  // Check final page
+  size_t lastPageSize = kPageSize - (endPage - uaddr - size);
+  if ((res[resIndex] & 1) == 1) {
+    totalResident += lastPageSize;
+  }
+
+  return totalResident;
+}
+
+// Return residence info using call to OS provided mincore().
+size_t MInCore::residence(void* addr, size_t size) {
+  OsMInCore mc;
+  return residence_impl(addr, size, &mc);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h
new file mode 100644
index 0000000000..c353bdac87
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore.h
@@ -0,0 +1,65 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MINCORE_H_
+#define TCMALLOC_INTERNAL_MINCORE_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Class to wrap mincore so that we can replace it for testing.
+class MInCoreInterface {
+ public:
+  MInCoreInterface() {}
+  virtual ~MInCoreInterface() {}
+  virtual int mincore(void* addr, size_t length, unsigned char* result) = 0;
+
+ private:
+  MInCoreInterface(const MInCoreInterface&) = delete;
+  MInCoreInterface& operator=(const MInCoreInterface&) = delete;
+};
+
+// The MInCore class through the function residence(addr, size) provides
+// a convenient way to report the residence of an arbitrary memory region.
+// This is a wrapper for the ::mincore() function. The ::mincore() function has
+// the constraint of requiring the base address to be page aligned.
+class MInCore {
+ public:
+  MInCore() {}
+  // For a region of memory return the number of bytes that are
+  // actually resident in memory. Note that the address and size
+  // do not need to be a multiple of the system page size.
+  static size_t residence(void* addr, size_t size);
+
+ private:
+  // Separate out the implementation to make the code easier to test.
+  static size_t residence_impl(void* addr, size_t size,
+                               MInCoreInterface* mincore);
+
+  // Size of the array used to gather results from mincore().
+  static constexpr int kArrayLength = 4096;
+  // Friends required for testing
+  friend class MInCoreTest;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_MINCORE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc
new file mode 100644
index 0000000000..02c8ead48d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_benchmark.cc
@@ -0,0 +1,61 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <set>
+
+#include "absl/memory/memory.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mincore.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace {
+
+// Benchmark performance of mincore. We use an array on the stack to gather
+// mincore data. The larger the array the more we amortise the cost of calling
+// mincore, but the more stack space the array takes up.
+void BM_mincore(benchmark::State& state) {
+  const int size = state.range(0);
+
+  // If we want to place the array on the stack then the maximum frame size is
+  // 16KiB. So there is no point in benchmarking sizes larger than this.
+  const int kMaxArraySize = 16 * 1024;
+  CHECK_CONDITION(size <= kMaxArraySize);
+  auto resident = absl::make_unique<unsigned char[]>(kMaxArraySize);
+
+  const size_t kPageSize = getpagesize();
+  // We want to scan the same amount of memory in all cases
+  const size_t regionSize = 1 * 1024 * 1024 * 1024;
+  for (auto s : state) {
+    uintptr_t memory = 0;
+    while (memory < regionSize) {
+      // Call mincore for the next section
+      int length = std::min(size * kPageSize, (regionSize - memory));
+      ::mincore(reinterpret_cast<void*>(memory), length, resident.get());
+      memory += length * kPageSize;
+    }
+  }
+}
+BENCHMARK(BM_mincore)->Range(1, 16 * 1024);
+
+}  // namespace
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc
new file mode 100644
index 0000000000..daa1178b25
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mincore_test.cc
@@ -0,0 +1,193 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/mincore.h"
+
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <set>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+using ::testing::Eq;
+
+// Mock interface to mincore() which has reports residence based on
+// an array provided at construction.
+class MInCoreMock : public MInCoreInterface {
+ public:
+  MInCoreMock() : mapped_() {}
+  ~MInCoreMock() override {}
+
+  // Implementation of minCore that reports presence based on provided array.
+  int mincore(void* addr, size_t length, unsigned char* result) override {
+    const size_t kPageSize = getpagesize();
+    uintptr_t uAddress = reinterpret_cast<uintptr_t>(addr);
+    // Check that we only pass page aligned addresses into mincore().
+    EXPECT_THAT(uAddress & (kPageSize - 1), Eq(0));
+
+    uintptr_t uEndAddress = uAddress + length;
+    int index = 0;
+    // Check for presence of the target pages in the map.
+    while (uAddress < uEndAddress) {
+      result[index] = (mapped_.find(uAddress) != mapped_.end() ? 1 : 0);
+      uAddress += kPageSize;
+      index++;
+    }
+    return 0;
+  }
+
+  void addPage(uintptr_t uAddress) { mapped_.insert(uAddress); }
+
+ private:
+  std::set<uintptr_t> mapped_;
+};
+
+// Friend class of MInCore which calls the mincore mock.
+class MInCoreTest {
+ public:
+  MInCoreTest() : mcm_() {}
+  ~MInCoreTest() {}
+
+  size_t residence(uintptr_t addr, size_t size) {
+    return MInCore::residence_impl(reinterpret_cast<void*>(addr), size, &mcm_);
+  }
+
+  void addPage(uintptr_t page) { mcm_.addPage(page); }
+
+  // Expose the internal size of array that we use to call mincore() so
+  // that we can be sure to need multiple calls to cover large memory regions.
+  const size_t chunkSize() { return MInCore::kArrayLength; }
+
+ private:
+  MInCoreMock mcm_;
+};
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(StaticVarsTest, TestResidence) {
+  MInCoreTest mct;
+  const size_t kPageSize = getpagesize();
+
+  // Set up a pattern with a few resident pages.
+  // page 0 not mapped
+  mct.addPage(kPageSize);
+  // page 2 not mapped
+  mct.addPage(3 * kPageSize);
+  mct.addPage(4 * kPageSize);
+
+  // An object of size zero should have a residence of zero.
+  EXPECT_THAT(mct.residence(320, 0), Eq(0));
+
+  // Check that an object entirely on the first page is
+  // reported as entirely unmapped.
+  EXPECT_THAT(mct.residence(320, 55), Eq(0));
+
+  // Check that an object entirely on the second page is
+  // reported as entirely mapped.
+  EXPECT_THAT(mct.residence(kPageSize + 320, 55), Eq(55));
+
+  // An object of size zero should have a residence of zero.
+  EXPECT_THAT(mct.residence(kPageSize + 320, 0), Eq(0));
+
+  // Check that an object over a mapped and unmapped page is half mapped.
+  EXPECT_THAT(mct.residence(kPageSize / 2, kPageSize), Eq(kPageSize / 2));
+
+  // Check that an object which spans two pages is reported as being mapped
+  // only on the page that's resident.
+  EXPECT_THAT(mct.residence(kPageSize / 2 * 3, kPageSize), Eq(kPageSize / 2));
+
+  // Check that an object that is on two mapped pages is reported as entirely
+  // resident.
+  EXPECT_THAT(mct.residence(kPageSize / 2 * 7, kPageSize), Eq(kPageSize));
+
+  // Check that an object that is on one mapped page is reported as only
+  // resident on the mapped page.
+  EXPECT_THAT(mct.residence(kPageSize * 2, kPageSize + 1), Eq(1));
+
+  // Check that an object that is on one mapped page is reported as only
+  // resident on the mapped page.
+  EXPECT_THAT(mct.residence(kPageSize + 1, kPageSize + 1), Eq(kPageSize - 1));
+
+  // Check that an object which spans beyond the mapped pages is reported
+  // as unmapped
+  EXPECT_THAT(mct.residence(kPageSize * 6, kPageSize), Eq(0));
+
+  // Check an object that spans three pages, two of them mapped.
+  EXPECT_THAT(mct.residence(kPageSize / 2 * 7 + 1, kPageSize * 2),
+              Eq(kPageSize * 3 / 2 - 1));
+}
+
+// Test whether we are correctly handling multiple calls to mincore.
+TEST(StaticVarsTest, TestLargeResidence) {
+  MInCoreTest mct;
+  uintptr_t uAddress = 0;
+  const size_t kPageSize = getpagesize();
+  // Set up a pattern covering 6 * page size *  MInCore::kArrayLength to
+  // allow us to test for situations where the region we're checking
+  // requires multiple calls to mincore().
+  // Use a mapped/unmapped/unmapped pattern, this will mean that
+  // the regions examined by mincore() do not have regular alignment
+  // with the pattern.
+  for (int i = 0; i < 2 * mct.chunkSize(); i++) {
+    mct.addPage(uAddress);
+    uAddress += 3 * kPageSize;
+  }
+
+  uintptr_t baseAddress = 0;
+  for (int size = kPageSize; size < 32 * 1024 * 1024; size += 2 * kPageSize) {
+    uintptr_t unit = kPageSize * 3;
+    EXPECT_THAT(mct.residence(baseAddress, size),
+                Eq(kPageSize * ((size + unit - 1) / unit)));
+  }
+}
+
+TEST(StaticVarsTest, UnmappedMemory) {
+  const size_t kPageSize = getpagesize();
+  const int kNumPages = 16;
+
+  // Overallocate kNumPages of memory, so we can munmap the page before and
+  // after it.
+  void* p = mmap(nullptr, (kNumPages + 2) * kPageSize, PROT_READ | PROT_WRITE,
+                 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+  ASSERT_NE(p, MAP_FAILED) << errno;
+  ASSERT_EQ(munmap(p, kPageSize), 0);
+  void* q = reinterpret_cast<char*>(p) + kPageSize;
+  void* last = reinterpret_cast<char*>(p) + (kNumPages + 1) * kPageSize;
+  ASSERT_EQ(munmap(last, kPageSize), 0);
+
+  memset(q, 0, kNumPages * kPageSize);
+  ::benchmark::DoNotOptimize(q);
+
+  for (int i = 0; i <= kNumPages; i++) {
+    EXPECT_EQ(i * kPageSize, MInCore::residence(q, i * kPageSize));
+  }
+
+  ASSERT_EQ(munmap(q, kNumPages * kPageSize), 0);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h
new file mode 100644
index 0000000000..10922c48bd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/mock_span.h
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_MOCK_SPAN_H_
+#define TCMALLOC_INTERNAL_MOCK_SPAN_H_
+
+#include "tcmalloc/internal/linked_list.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class MockSpan;
+typedef TList<MockSpan> MockSpanList;
+
+class MockSpan : public MockSpanList::Elem {
+ public:
+  MockSpan() {}
+
+  static MockSpan* New(int idx = 0) {
+    MockSpan* ret = new MockSpan();
+    ret->index_ = idx;
+    return ret;
+  }
+
+  int index_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_INTERNAL_MOCK_SPAN_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc
new file mode 100644
index 0000000000..1639bd1b6d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.cc
@@ -0,0 +1,220 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/numa.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <array>
+#include <cstring>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/functional/function_ref.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Returns true iff NUMA awareness should be enabled by default (i.e. in the
+// absence of the TCMALLOC_NUMA_AWARE environment variable). This weak
+// implementation may be overridden by the one in want_numa_aware.cc.
+ABSL_ATTRIBUTE_WEAK bool default_want_numa_aware() { return false; }
+
+int OpenSysfsCpulist(size_t node) {
+  char path[PATH_MAX];
+  snprintf(path, sizeof(path), "/sys/devices/system/node/node%zu/cpulist",
+           node);
+  return signal_safe_open(path, O_RDONLY | O_CLOEXEC);
+}
+
+cpu_set_t ParseCpulist(absl::FunctionRef<ssize_t(char *, size_t)> read) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+
+  std::array<char, 16> buf;
+  size_t carry_over = 0;
+  int cpu_from = -1;
+
+  while (true) {
+    const ssize_t rc = read(buf.data() + carry_over, buf.size() - carry_over);
+    CHECK_CONDITION(rc >= 0);
+
+    const absl::string_view current(buf.data(), carry_over + rc);
+
+    // If we have no more data to parse & couldn't read any then we've reached
+    // the end of the input & are done.
+    if (current.empty() && rc == 0) {
+      break;
+    }
+
+    size_t consumed;
+    const size_t dash = current.find('-');
+    const size_t comma = current.find(',');
+    if (dash != absl::string_view::npos && dash < comma) {
+      CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, dash), &cpu_from));
+      consumed = dash + 1;
+    } else if (comma != absl::string_view::npos || rc == 0) {
+      int cpu;
+      CHECK_CONDITION(absl::SimpleAtoi(current.substr(0, comma), &cpu));
+      if (comma == absl::string_view::npos) {
+        consumed = current.size();
+      } else {
+        consumed = comma + 1;
+      }
+      if (cpu_from != -1) {
+        for (int c = cpu_from; c <= cpu; c++) {
+          CPU_SET(c, &set);
+        }
+        cpu_from = -1;
+      } else {
+        CPU_SET(cpu, &set);
+      }
+    } else {
+      consumed = 0;
+    }
+
+    carry_over = current.size() - consumed;
+    memmove(buf.data(), buf.data() + consumed, carry_over);
+  }
+
+  return set;
+}
+
+bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE],
+                      uint64_t *const partition_to_nodes,
+                      NumaBindMode *const bind_mode,
+                      const size_t num_partitions, const size_t scale_by,
+                      absl::FunctionRef<int(size_t)> open_node_cpulist) {
+  // Node 0 will always map to partition 0; record it here in case the system
+  // doesn't support NUMA or the user opts out of our awareness of it - in
+  // either case we'll record nothing in the loop below.
+  partition_to_nodes[NodeToPartition(0, num_partitions)] |= 1 << 0;
+
+  // If we only compiled in support for one partition then we're trivially
+  // done; NUMA awareness is unavailable.
+  if (num_partitions == 1) return false;
+
+  // We rely on rseq to quickly obtain a CPU ID & lookup the appropriate
+  // partition in NumaTopology::GetCurrentPartition(). If rseq is unavailable,
+  // disable NUMA awareness.
+  if (!subtle::percpu::IsFast()) return false;
+
+  // Honor default_want_numa_aware() to allow compile time configuration of
+  // whether to enable NUMA awareness by default, and allow the user to
+  // override that either way by setting TCMALLOC_NUMA_AWARE in the
+  // environment.
+  //
+  // In cases where we don't enable NUMA awareness we simply return. Since the
+  // cpu_to_scaled_partition & partition_to_nodes arrays are zero initialized
+  // we're trivially done - CPUs all map to partition 0, which contains only
+  // CPU 0 added above.
+  const char *e =
+      tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_NUMA_AWARE");
+  if (e == nullptr) {
+    // Enable NUMA awareness iff default_want_numa_aware().
+    if (!default_want_numa_aware()) return false;
+  } else if (!strcmp(e, "no-binding")) {
+    // Enable NUMA awareness with no memory binding behavior.
+    *bind_mode = NumaBindMode::kNone;
+  } else if (!strcmp(e, "advisory-binding") || !strcmp(e, "1")) {
+    // Enable NUMA awareness with advisory memory binding behavior.
+    *bind_mode = NumaBindMode::kAdvisory;
+  } else if (!strcmp(e, "strict-binding")) {
+    // Enable NUMA awareness with strict memory binding behavior.
+    *bind_mode = NumaBindMode::kStrict;
+  } else if (!strcmp(e, "0")) {
+    // Disable NUMA awareness.
+    return false;
+  } else {
+    Crash(kCrash, __FILE__, __LINE__, "bad TCMALLOC_NUMA_AWARE env var", e);
+  }
+
+  // The cpu_to_scaled_partition array has a fixed size so that we can
+  // statically allocate it & avoid the need to check whether it has been
+  // allocated prior to lookups. It has CPU_SETSIZE entries which ought to be
+  // sufficient, but sanity check that indexing it by CPU number shouldn't
+  // exceed its bounds.
+  int num_cpus = absl::base_internal::NumCPUs();
+  CHECK_CONDITION(num_cpus <= CPU_SETSIZE);
+
+  // We could just always report that we're NUMA aware, but if a NUMA-aware
+  // binary runs on a system that doesn't include multiple NUMA nodes then our
+  // NUMA awareness will offer no benefit whilst incurring the cost of
+  // redundant work & stats. As such we only report that we're NUMA aware if
+  // there's actually NUMA to be aware of, which we track here.
+  bool numa_aware = false;
+
+  for (size_t node = 0;; node++) {
+    // Detect NUMA nodes by opening their cpulist files from sysfs.
+    const int fd = open_node_cpulist(node);
+    if (fd == -1) {
+      // We expect to encounter ENOENT once node surpasses the actual number of
+      // nodes present in the system. Any other error is a problem.
+      CHECK_CONDITION(errno == ENOENT);
+      break;
+    }
+
+    // Record this node in partition_to_nodes.
+    const size_t partition = NodeToPartition(node, num_partitions);
+    partition_to_nodes[partition] |= 1 << node;
+
+    // cpu_to_scaled_partition_ entries are default initialized to zero, so
+    // skip redundantly parsing CPU lists for nodes that map to partition 0.
+    if (partition == 0) {
+      signal_safe_close(fd);
+      continue;
+    }
+
+    // Parse the cpulist file to determine which CPUs are local to this node.
+    const cpu_set_t node_cpus =
+        ParseCpulist([&](char *const buf, const size_t count) {
+          return signal_safe_read(fd, buf, count, /*bytes_read=*/nullptr);
+        });
+
+    // Assign local CPUs to the appropriate partition.
+    for (size_t cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+      if (CPU_ISSET(cpu, &node_cpus)) {
+        cpu_to_scaled_partition[cpu + kNumaCpuFudge] = partition * scale_by;
+      }
+    }
+
+    // If we observed any CPUs for this node then we've now got CPUs assigned
+    // to a non-zero partition; report that we're NUMA aware.
+    if (CPU_COUNT(&node_cpus) != 0) {
+      numa_aware = true;
+    }
+
+    signal_safe_close(fd);
+  }
+
+  return numa_aware;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa.h b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h
new file mode 100644
index 0000000000..bf04c65c21
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa.h
@@ -0,0 +1,227 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_NUMA_H_
+#define TCMALLOC_INTERNAL_NUMA_H_
+
+#include <sched.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#include "absl/functional/function_ref.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/percpu.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Indicates how TCMalloc should handle binding memory regions to nodes within
+// particular NUMA partitions.
+enum class NumaBindMode {
+  // Don't bind memory at all. Note that this does not make NUMA awareness
+  // pointless so long as the NUMA memory policy of threads performing
+  // allocations favors the local node. It does mean that we won't be certain
+  // that memory is local to any particular partition, it will just be likely.
+  kNone,
+  // Attempt to bind memory but don't treat failure as fatal. If binding fails
+  // then a warning will be logged & we'll then be in much the same state as
+  // kNone.
+  kAdvisory,
+  // Strictly bind memory to nodes within the partition we expect - any error
+  // in doing so is fatal & the program will crash. This allows a program to
+  // ensure that memory is definitely bound to the set of nodes we expect.
+  kStrict,
+};
+
+// We use the result of RseqCpuId() in GetCurrentPartition() to avoid branching
+// in the fast path, but this means that the CPU number we look up in
+// cpu_to_scaled_partition_ might equal kCpuIdUninitialized or
+// kCpuIdUnsupported. We add this fudge factor to the value to compensate,
+// ensuring that our accesses to the cpu_to_scaled_partition_ array are always
+// in bounds.
+static constexpr size_t kNumaCpuFudge = -subtle::percpu::kCpuIdUnsupported;
+
+// Provides information about the topology of a NUMA system.
+//
+// In general we cannot know at compile time how many NUMA nodes the system
+// that we run upon will include, but we also cannot size our data structures
+// arbitrarily at runtime in the name of efficiency. In order to resolve the
+// conflict between these two constraints we define the concept of a NUMA
+// 'partition' as being an arbitrary set of NUMA nodes, disjoint from all other
+// partitions. At compile time we select a fixed number of partitions to
+// support, and at runtime we map each NUMA node in the system to a partition.
+// If the number of supported partitions is greater than or equal to the number
+// of NUMA nodes in the system then partition & node are effectively identical.
+// If however the system has more nodes than we do partitions then nodes
+// assigned to the same partition will share size classes & thus memory. This
+// may incur a performance hit, but allows us to at least run on any system.
+template <size_t NumPartitions, size_t ScaleBy = 1>
+class NumaTopology {
+ public:
+  // Trivially zero initialize data members.
+  constexpr NumaTopology() = default;
+
+  // Initialize topology information. This must be called only once, before any
+  // of the functions below.
+  void Init();
+
+  // Like Init(), but allows a test to specify a different `open_node_cpulist`
+  // function in order to provide NUMA topology information that doesn't
+  // reflect the system we're running upon.
+  void InitForTest(absl::FunctionRef<int(size_t)> open_node_cpulist);
+
+  // Returns true if NUMA awareness is available & enabled, otherwise false.
+  bool numa_aware() const {
+    // Explicitly checking NumPartitions here provides a compile time constant
+    // false in cases where NumPartitions==1, allowing NUMA awareness to be
+    // optimized away.
+    return (NumPartitions > 1) && numa_aware_;
+  }
+
+  // Returns the number of NUMA partitions deemed 'active' - i.e. the number of
+  // partitions that other parts of TCMalloc need to concern themselves with.
+  // Checking this rather than using kNumaPartitions allows users to avoid work
+  // on non-zero partitions when NUMA awareness is disabled.
+  size_t active_partitions() const { return numa_aware() ? NumPartitions : 1; }
+
+  // Return a value indicating how we should behave with regards to binding
+  // memory regions to NUMA nodes.
+  NumaBindMode bind_mode() const { return bind_mode_; }
+
+  // Return the NUMA partition number to which the CPU we're currently
+  // executing upon belongs. Note that whilst the CPU->partition mapping is
+  // fixed, the return value of this function may change at arbitrary times as
+  // this thread migrates between CPUs.
+  size_t GetCurrentPartition() const;
+
+  // Like GetCurrentPartition(), but returns a partition number multiplied by
+  // ScaleBy.
+  size_t GetCurrentScaledPartition() const;
+
+  // Return the NUMA partition number to which `cpu` belongs.
+  //
+  // It is valid for cpu to equal subtle::percpu::kCpuIdUninitialized or
+  // subtle::percpu::kCpuIdUnsupported. In either case partition 0 will be
+  // returned.
+  size_t GetCpuPartition(int cpu) const;
+
+  // Like GetCpuPartition(), but returns a partition number multiplied by
+  // ScaleBy.
+  size_t GetCpuScaledPartition(int cpu) const;
+
+  // Return a bitmap in which set bits identify the nodes that belong to the
+  // specified NUMA `partition`.
+  uint64_t GetPartitionNodes(int partition) const;
+
+ private:
+  // Maps from CPU number (plus kNumaCpuFudge) to NUMA partition.
+  size_t cpu_to_scaled_partition_[CPU_SETSIZE + kNumaCpuFudge] = {0};
+  // Maps from NUMA partition to a bitmap of NUMA nodes within the partition.
+  uint64_t partition_to_nodes_[NumPartitions] = {0};
+  // Indicates whether NUMA awareness is available & enabled.
+  bool numa_aware_ = false;
+  // Desired memory binding behavior.
+  NumaBindMode bind_mode_ = NumaBindMode::kAdvisory;
+};
+
+// Opens a /sys/devices/system/node/nodeX/cpulist file for read only access &
+// returns the file descriptor.
+int OpenSysfsCpulist(size_t node);
+
+// Parse a CPU list in the format used by
+// /sys/devices/system/node/nodeX/cpulist files - that is, individual CPU
+// numbers or ranges in the format <start>-<end> inclusive all joined by comma
+// characters.
+//
+// The read function is expected to operate much like the read syscall. It
+// should read up to `count` bytes into `buf` and return the number of bytes
+// actually read. If an error occurs during reading it should return -1 with
+// errno set to an appropriate error code.
+cpu_set_t ParseCpulist(
+    absl::FunctionRef<ssize_t(char *buf, size_t count)> read);
+
+// Initialize the data members of a NumaTopology<> instance.
+//
+// This function must only be called once per NumaTopology<> instance, and
+// relies upon the data members of that instance being default initialized.
+//
+// The `open_node_cpulist` function is typically OpenSysfsCpulist but tests may
+// use a different implementation.
+//
+// Returns true if we're actually NUMA aware; i.e. if we have CPUs mapped to
+// multiple partitions.
+bool InitNumaTopology(size_t cpu_to_scaled_partition[CPU_SETSIZE],
+                      uint64_t *partition_to_nodes, NumaBindMode *bind_mode,
+                      size_t num_partitions, size_t scale_by,
+                      absl::FunctionRef<int(size_t)> open_node_cpulist);
+
+// Returns the NUMA partition to which `node` belongs.
+inline size_t NodeToPartition(const size_t node, const size_t num_partitions) {
+  return node % num_partitions;
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline void NumaTopology<NumPartitions, ScaleBy>::Init() {
+  numa_aware_ =
+      InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_,
+                       &bind_mode_, NumPartitions, ScaleBy, OpenSysfsCpulist);
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline void NumaTopology<NumPartitions, ScaleBy>::InitForTest(
+    absl::FunctionRef<int(size_t)> open_node_cpulist) {
+  numa_aware_ =
+      InitNumaTopology(cpu_to_scaled_partition_, partition_to_nodes_,
+                       &bind_mode_, NumPartitions, ScaleBy, open_node_cpulist);
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentPartition()
+    const {
+  if constexpr (NumPartitions == 1) return 0;
+  return GetCpuPartition(subtle::percpu::RseqCpuId());
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCurrentScaledPartition()
+    const {
+  if constexpr (NumPartitions == 1) return 0;
+  return GetCpuScaledPartition(subtle::percpu::RseqCpuId());
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuPartition(
+    const int cpu) const {
+  return GetCpuScaledPartition(cpu) / ScaleBy;
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline size_t NumaTopology<NumPartitions, ScaleBy>::GetCpuScaledPartition(
+    const int cpu) const {
+  if constexpr (NumPartitions == 1) return 0;
+  return cpu_to_scaled_partition_[cpu + kNumaCpuFudge];
+}
+
+template <size_t NumPartitions, size_t ScaleBy>
+inline uint64_t NumaTopology<NumPartitions, ScaleBy>::GetPartitionNodes(
+    const int partition) const {
+  return partition_to_nodes_[partition];
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_NUMA_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc
new file mode 100644
index 0000000000..bbd86a3f7d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/numa_test.cc
@@ -0,0 +1,284 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/numa.h"
+
+#include <errno.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <new>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_join.h"
+#include "absl/strings/string_view.h"
+#include "absl/types/span.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+int memfd_create(const char *name, unsigned int flags) {
+#ifdef __NR_memfd_create
+  return syscall(__NR_memfd_create, name, flags);
+#else
+  errno = ENOSYS;
+  return -1;
+#endif
+}
+
+// A synthetic cpulist that can be read from a file descriptor.
+class SyntheticCpuList {
+ public:
+  explicit SyntheticCpuList(const absl::string_view content) {
+    fd_ = memfd_create("cpulist", MFD_CLOEXEC);
+    CHECK_CONDITION(fd_ != -1);
+
+    CHECK_CONDITION(write(fd_, content.data(), content.size()) ==
+                    content.size());
+    CHECK_CONDITION(write(fd_, "\n", 1) == 1);
+    CHECK_CONDITION(lseek(fd_, 0, SEEK_SET) == 0);
+  }
+
+  ~SyntheticCpuList() { close(fd_); }
+
+  // Disallow copies, which would make require reference counting to know when
+  // we should close fd_.
+  SyntheticCpuList(const SyntheticCpuList &) = delete;
+  SyntheticCpuList &operator=(const SyntheticCpuList &) = delete;
+
+  // Moves are fine - only one instance at a time holds the fd.
+  SyntheticCpuList(SyntheticCpuList &&other)
+      : fd_(std::exchange(other.fd_, -1)) {}
+  SyntheticCpuList &operator=(SyntheticCpuList &&other) {
+    new (this) SyntheticCpuList(std::move(other));
+    return *this;
+  }
+
+  int fd() const { return fd_; }
+
+ private:
+  // The underlying memfd.
+  int fd_;
+};
+
+class NumaTopologyTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    // We use memfd to create synthetic cpulist files, and can't run without
+    // it. Skip all affected tests if memfd is not supported (i.e. Linux <
+    // 3.17).
+    const int fd = memfd_create("test", MFD_CLOEXEC);
+    if (fd == -1 && errno == ENOSYS) {
+      GTEST_SKIP() << "Test requires memfd support";
+    }
+    close(fd);
+
+    // If rseq is unavailable the NumaTopology never enables NUMA awareness.
+    if (!subtle::percpu::IsFast()) {
+      GTEST_SKIP() << "Test requires rseq support";
+    }
+  }
+};
+
+template <size_t NumPartitions>
+NumaTopology<NumPartitions> CreateNumaTopology(
+    const absl::Span<const SyntheticCpuList> cpu_lists) {
+  NumaTopology<NumPartitions> nt;
+  nt.InitForTest([&](const size_t node) {
+    if (node >= cpu_lists.size()) {
+      errno = ENOENT;
+      return -1;
+    }
+    return cpu_lists[node].fd();
+  });
+  return nt;
+}
+
+// Ensure that if we set NumPartitions=1 then NUMA awareness is disabled even
+// in the presence of a system with multiple NUMA nodes.
+TEST_F(NumaTopologyTest, NoCompileTimeNuma) {
+  std::vector<SyntheticCpuList> nodes;
+  nodes.emplace_back("0");
+  nodes.emplace_back("1");
+
+  const auto nt = CreateNumaTopology<1>(nodes);
+
+  EXPECT_EQ(nt.numa_aware(), false);
+  EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Ensure that if we run on a system with no NUMA support at all (i.e. no
+// /sys/devices/system/node/nodeX/cpulist files) we correctly disable NUMA
+// awareness.
+TEST_F(NumaTopologyTest, NoRunTimeNuma) {
+  const auto nt = CreateNumaTopology<2>({});
+
+  EXPECT_EQ(nt.numa_aware(), false);
+  EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Ensure that if we run on a system with only 1 node then we disable NUMA
+// awareness.
+TEST_F(NumaTopologyTest, SingleNode) {
+  std::vector<SyntheticCpuList> nodes;
+  nodes.emplace_back("0-27");
+
+  const auto nt = CreateNumaTopology<4>(nodes);
+
+  EXPECT_EQ(nt.numa_aware(), false);
+  EXPECT_EQ(nt.active_partitions(), 1);
+}
+
+// Basic sanity test modelling a simple 2 node system.
+TEST_F(NumaTopologyTest, TwoNode) {
+  std::vector<SyntheticCpuList> nodes;
+  nodes.emplace_back("0-5");
+  nodes.emplace_back("6-11");
+
+  const auto nt = CreateNumaTopology<2>(nodes);
+
+  EXPECT_EQ(nt.numa_aware(), true);
+  EXPECT_EQ(nt.active_partitions(), 2);
+
+  for (int cpu = 0; cpu <= 5; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+  }
+  for (int cpu = 6; cpu <= 11; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+  }
+}
+
+// Test that cpulists too long to fit into the 16 byte buffer used by
+// InitNumaTopology() parse successfully.
+TEST_F(NumaTopologyTest, LongCpuLists) {
+  std::vector<SyntheticCpuList> nodes;
+
+  // Content from here onwards lies   |
+  // beyond the 16 byte buffer.       |
+  //                                  v
+  nodes.emplace_back("0-1,2-3,4-5,6-7,8");        // Right after a comma
+  nodes.emplace_back("9,10,11,12,13,14,15-19");   // Right before a comma
+  nodes.emplace_back("20-21,22-23,24-25,26-29");  // Within range end
+  nodes.emplace_back("30-32,33,34,35,36-38,39");  // Within range start
+  nodes.emplace_back("40-43,44,45-49");
+
+  const auto nt = CreateNumaTopology<3>(nodes);
+
+  EXPECT_EQ(nt.numa_aware(), true);
+  EXPECT_EQ(nt.active_partitions(), 3);
+
+  for (int cpu = 0; cpu <= 8; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+  }
+  for (int cpu = 9; cpu <= 19; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+  }
+  for (int cpu = 20; cpu <= 29; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 2);
+  }
+  for (int cpu = 30; cpu <= 39; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 0);
+  }
+  for (int cpu = 40; cpu <= 49; cpu++) {
+    EXPECT_EQ(nt.GetCpuPartition(cpu), 1);
+  }
+}
+
+// Ensure we can initialize using the host system's real NUMA topology
+// information.
+TEST_F(NumaTopologyTest, Host) {
+  NumaTopology<4> nt;
+  nt.Init();
+
+  // We don't actually know anything about the host, so there's not much more
+  // we can do beyond checking that we didn't crash.
+}
+
+// Ensure that we can parse randomized cpulists correctly.
+TEST(ParseCpulistTest, Random) {
+  absl::BitGen gen;
+
+  static constexpr int kIterations = 100;
+  for (int i = 0; i < kIterations; i++) {
+    cpu_set_t reference;
+    CPU_ZERO(&reference);
+
+    // Set a random number of CPUs within the reference set.
+    const double density = absl::Uniform(gen, 0.0, 1.0);
+    for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+      if (absl::Bernoulli(gen, density)) {
+        CPU_SET(cpu, &reference);
+      }
+    }
+
+    // Serialize the reference set into a cpulist-style string.
+    std::vector<std::string> components;
+    for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+      if (!CPU_ISSET(cpu, &reference)) continue;
+
+      const int start = cpu;
+      int next = cpu + 1;
+      while (next < CPU_SETSIZE && CPU_ISSET(next, &reference)) {
+        cpu = next;
+        next = cpu + 1;
+      }
+
+      if (cpu == start) {
+        components.push_back(absl::StrCat(cpu));
+      } else {
+        components.push_back(absl::StrCat(start, "-", cpu));
+      }
+    }
+    const std::string serialized = absl::StrJoin(components, ",");
+
+    // Now parse that string using our ParseCpulist function, randomizing the
+    // amount of data we provide to it from each read.
+    absl::string_view remaining(serialized);
+    const cpu_set_t parsed =
+        ParseCpulist([&](char *const buf, const size_t count) -> ssize_t {
+          // Calculate how much data we have left to provide.
+          const size_t max = std::min(count, remaining.size());
+
+          // If none, we have no choice but to provide nothing.
+          if (max == 0) return 0;
+
+          // If we do have data, return a randomly sized subset of it to stress
+          // the logic around reading partial values.
+          const size_t copy = absl::Uniform(gen, static_cast<size_t>(1), max);
+          memcpy(buf, remaining.data(), copy);
+          remaining.remove_prefix(copy);
+          return copy;
+        });
+
+    // We ought to have parsed the same set of CPUs that we serialized.
+    EXPECT_TRUE(CPU_EQUAL(&parsed, &reference));
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h
new file mode 100644
index 0000000000..6380183a50
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/optimization.h
@@ -0,0 +1,45 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_OPTIMIZATION_H_
+#define TCMALLOC_INTERNAL_OPTIMIZATION_H_
+
+#include "tcmalloc/internal/logging.h"
+
+// Our wrapper for __builtin_assume, allowing us to check the assumption on
+// debug builds.
+#ifndef NDEBUG
+#ifdef __clang__
+#define ASSUME(cond) CHECK_CONDITION(cond), __builtin_assume(cond)
+#else
+#define ASSUME(cond) \
+  CHECK_CONDITION(cond), (!(cond) ? __builtin_unreachable() : (void)0)
+#endif
+#else
+#ifdef __clang__
+#define ASSUME(cond) __builtin_assume(cond)
+#else
+#define ASSUME(cond) (!(cond) ? __builtin_unreachable() : (void)0)
+#endif
+#endif
+
+// Annotations for functions that are not affected by nor affect observable
+// state of the program.
+#if ABSL_HAVE_ATTRIBUTE(const)
+#define TCMALLOC_ATTRIBUTE_CONST __attribute__((const))
+#else
+#define TCMALLOC_ATTRIBUTE_CONST
+#endif
+
+#endif  // TCMALLOC_INTERNAL_OPTIMIZATION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h
new file mode 100644
index 0000000000..f14798fe74
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/parameter_accessors.h
@@ -0,0 +1,56 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
+#define TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
+
+#include "absl/base/attributes.h"
+#include "absl/time/time.h"
+
+extern "C" {
+
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetBackgroundReleaseRate(
+    size_t value);
+ABSL_ATTRIBUTE_WEAK uint64_t TCMalloc_Internal_GetHeapSizeHardLimit();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetHPAASubrelease();
+ABSL_ATTRIBUTE_WEAK void
+TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(absl::Duration* v);
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK double
+TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction();
+ABSL_ATTRIBUTE_WEAK bool TCMalloc_Internal_GetPerCpuCachesEnabled();
+ABSL_ATTRIBUTE_WEAK size_t TCMalloc_Internal_GetStats(char* buffer,
+                                                      size_t buffer_length);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetHPAASubrelease(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(
+    bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(
+    bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(
+    int64_t v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(
+    double v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v);
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_SetProfileSamplingRate(int64_t v);
+ABSL_ATTRIBUTE_WEAK void
+TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(absl::Duration v);
+}
+
+#endif  // TCMALLOC_INTERNAL_PARAMETER_ACCESSORS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc
new file mode 100644
index 0000000000..f8706f0f21
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.cc
@@ -0,0 +1,352 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/percpu.h"
+
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/call_once.h"  // IWYU pragma: keep
+#include "absl/base/internal/sysinfo.h"
+#include "tcmalloc/internal/linux_syscall_support.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+// ----------------------------------------------------------------------------
+// Internal structures
+// ----------------------------------------------------------------------------
+
+// Restartable Sequence (RSEQ)
+
+extern "C" {
+// We provide a per-thread value (defined in percpu_.c) which both tracks
+// thread-local initialization state and (with RSEQ) provides an atomic
+// in-memory reference for this thread's execution CPU.  This value is only
+// valid when the thread is currently executing
+// Possible values:
+//   Unavailable/uninitialized:
+//     { kCpuIdUnsupported, kCpuIdUninitialized }
+//   Initialized, available:
+//     [0, NumCpus())    (Always updated at context-switch)
+ABSL_PER_THREAD_TLS_KEYWORD ABSL_ATTRIBUTE_WEAK volatile kernel_rseq
+    __rseq_abi = {
+        0,      static_cast<unsigned>(kCpuIdUninitialized),   0, 0,
+        {0, 0}, {{kCpuIdUninitialized, kCpuIdUninitialized}},
+};
+
+#ifdef __ppc__
+// On PPC, we have two cases for accessing the __rseq_abi TLS variable:
+// * For initial-exec TLS, we write the raw assembly for accessing the memory
+//   with the appropriate relocations and offsets.  On optimized builds, this is
+//   the use case that matters.
+// * For non-initial-exec TLS, access is far more involved.  We call this helper
+//   function from percpu_rseq_ppc.S to leave the initialization and access to
+//   the compiler.
+ABSL_ATTRIBUTE_UNUSED ABSL_ATTRIBUTE_NOINLINE void* tcmalloc_tls_fetch_pic() {
+  return const_cast<kernel_rseq*>(&__rseq_abi);
+}
+#endif
+
+}  // extern "C"
+
+enum PerCpuInitStatus {
+  kFastMode,
+  kSlowMode,
+};
+
+ABSL_CONST_INIT static PerCpuInitStatus init_status = kSlowMode;
+ABSL_CONST_INIT static absl::once_flag init_per_cpu_once;
+#if TCMALLOC_PERCPU_USE_RSEQ
+ABSL_CONST_INIT static std::atomic<bool> using_upstream_fence{false};
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+
+// Is this thread's __rseq_abi struct currently registered with the kernel?
+static bool ThreadRegistered() { return RseqCpuId() >= kCpuIdInitialized; }
+
+static bool InitThreadPerCpu() {
+  // If we're already registered, there's nothing further for us to do.
+  if (ThreadRegistered()) {
+    return true;
+  }
+
+#ifdef __NR_rseq
+  return 0 == syscall(__NR_rseq, &__rseq_abi, sizeof(__rseq_abi), 0,
+                      TCMALLOC_PERCPU_RSEQ_SIGNATURE);
+#endif  // __NR_rseq
+  return false;
+}
+
+bool UsingFlatVirtualCpus() {
+  return false;
+}
+
+static void InitPerCpu() {
+  CHECK_CONDITION(absl::base_internal::NumCPUs() <=
+                  std::numeric_limits<uint16_t>::max());
+
+  // Based on the results of successfully initializing the first thread, mark
+  // init_status to initialize all subsequent threads.
+  if (InitThreadPerCpu()) {
+    init_status = kFastMode;
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+    constexpr int kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ = (1 << 8);
+    // It is safe to make the syscall below multiple times.
+    using_upstream_fence.store(
+            0 == syscall(__NR_membarrier,
+                         kMEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0),
+        std::memory_order_relaxed);
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+  }
+}
+
+// Tries to initialize RSEQ both at the process-wide (init_status) and
+// thread-level (cpu-id) level.  If process-wide initialization has already been
+// completed then only the thread-level will be completed.  A return of false
+// indicates that initialization failed and RSEQ is unavailable.
+bool InitFastPerCpu() {
+  absl::base_internal::LowLevelCallOnce(&init_per_cpu_once, InitPerCpu);
+
+  // Once we've decided fast-cpu support is available, initialization for all
+  // subsequent threads must succeed for consistency.
+  if (init_status == kFastMode && RseqCpuId() == kCpuIdUninitialized) {
+    CHECK_CONDITION(InitThreadPerCpu());
+  }
+
+  // If we've decided to use slow mode, set the thread-local CPU ID to
+  // __rseq_abi.cpu_id so that IsFast doesn't call this function again for
+  // this thread.
+  if (init_status == kSlowMode) {
+    __rseq_abi.cpu_id = kCpuIdUnsupported;
+  }
+
+  return init_status == kFastMode;
+}
+
+// ----------------------------------------------------------------------------
+// Implementation of unaccelerated (no RSEQ) per-cpu operations
+// ----------------------------------------------------------------------------
+
+static bool SetAffinityOneCpu(int cpu) {
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  CPU_SET(cpu, &set);
+  if (0 == sched_setaffinity(0, sizeof(cpu_set_t), &set)) {
+    return true;
+  }
+  CHECK_CONDITION(errno == EINVAL);
+  return false;
+}
+
+// We're being asked to fence against the mask <cpus>, but a NULL mask
+// means every CPU.  Do we need <cpu>?
+static bool NeedCpu(int cpu, const cpu_set_t* cpus) {
+  if (cpus == nullptr) return true;
+  return CPU_ISSET(cpu, cpus);
+}
+
+static void SlowFence(const cpu_set_t* cpus) {
+  // Necessary, so the point in time mentioned below has visibility
+  // of our writes.
+  std::atomic_thread_fence(std::memory_order_seq_cst);
+
+  // First, save our cpumask (the user may want it back.)
+  cpu_set_t old;
+  CPU_ZERO(&old);
+  CHECK_CONDITION(0 == sched_getaffinity(0, sizeof(cpu_set_t), &old));
+
+  // Here's the basic idea: if we run on every CPU, then every thread
+  // that runs after us has certainly seen every store we've made up
+  // to this point, so we pin ourselves to each CPU in turn.
+  //
+  // But we can't run everywhere; our control plane may have set cpuset.cpus to
+  // some subset of CPUs (and may be changing it as we speak.)  On the plus
+  // side, if we are unable to run on a particular CPU, the same is true for our
+  // siblings (up to some races, dealt with below), so we don't need to.
+
+  for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+    if (!NeedCpu(cpu, cpus)) {
+      // unnecessary -- user doesn't care about synchronization on this cpu
+      continue;
+    }
+    // If we can't pin ourselves there, then no one else can run there, so
+    // that's fine.
+    while (SetAffinityOneCpu(cpu)) {
+      // But even if the pin succeeds, we might not end up running there;
+      // between the pin trying to migrate and running on <cpu>, a change
+      // to cpuset.cpus may cause us to migrate somewhere else instead.
+      // So make sure we actually got where we wanted.
+      if (cpu == sched_getcpu()) {
+        break;
+      }
+    }
+  }
+  // Overly detailed explanation of kernel operations follows.
+  //
+  // OK, at this point, for each cpu i, there are two possibilities:
+  //  * we've run on i (so we interrupted any sibling &  writes are visible)
+  //  * At some point in time T1, we read a value of cpuset.cpus disallowing i.
+  //
+  // Linux kernel details: all writes and reads to cpuset.cpus are
+  // serialized on a mutex (called callback_mutex).  Because of the
+  // memory barrier above, our writes certainly happened-before T1.
+  //
+  // Moreover, whoever wrote cpuset.cpus to ban i looped over our
+  // threads in kernel, migrating all threads away from i and setting
+  // their masks to disallow i.  So once that loop is known to be
+  // over, any thread that was running on i has been interrupted at
+  // least once, and migrated away.  It is possible a second
+  // subsequent change to cpuset.cpus (at time T2) re-allowed i, but
+  // serialization of cpuset.cpus changes guarantee that our writes
+  // are visible at T2, and since migration is a barrier, any sibling
+  // migrated after T2 to cpu i will also see our writes.
+  //
+  // So we just have to make sure the update loop from whoever wrote
+  // cpuset.cpus at T1 is completed.  That loop executes under a
+  // second mutex (cgroup_mutex.)  So if we take that mutex ourselves,
+  // we can be sure that update loop at T1 is done.  So read
+  // /proc/self/cpuset. We don't care what it says; as long as it takes the lock
+  // in question.  This guarantees that every thread is either running on a cpu
+  // we visited, or received a cpuset.cpus rewrite that happened strictly after
+  // our writes.
+
+  using tcmalloc::tcmalloc_internal::signal_safe_close;
+  using tcmalloc::tcmalloc_internal::signal_safe_open;
+  using tcmalloc::tcmalloc_internal::signal_safe_read;
+  int fd = signal_safe_open("/proc/self/cpuset", O_RDONLY);
+  CHECK_CONDITION(fd >= 0);
+
+  char c;
+  CHECK_CONDITION(1 == signal_safe_read(fd, &c, 1, nullptr));
+
+  CHECK_CONDITION(0 == signal_safe_close(fd));
+
+  // Try to go back to what we originally had before Fence.
+  if (0 != sched_setaffinity(0, sizeof(cpu_set_t), &old)) {
+    CHECK_CONDITION(EINVAL == errno);
+    // The original set is no longer valid, which should only happen if
+    // cpuset.cpus was changed at some point in Fence.  If that happened and we
+    // didn't fence, our control plane would have rewritten our affinity mask to
+    // everything in cpuset.cpus, so do that.
+    cpu_set_t set;
+    CPU_ZERO(&set);
+    for (int i = 0; i < absl::base_internal::NumCPUs(); ++i) {
+      CPU_SET(i, &set);
+    }
+    CHECK_CONDITION(0 == sched_setaffinity(0, sizeof(cpu_set_t), &set));
+  }
+}
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+static void UpstreamRseqFenceCpu(int cpu) {
+  ABSL_RAW_CHECK(using_upstream_fence.load(std::memory_order_relaxed),
+                 "upstream fence unavailable.");
+
+  constexpr int kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ = (1 << 7);
+  constexpr int kMEMBARRIER_CMD_FLAG_CPU = (1 << 0);
+
+  int64_t res = syscall(__NR_membarrier, kMEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+                        kMEMBARRIER_CMD_FLAG_CPU, cpu);
+
+  ABSL_RAW_CHECK(res == 0 || res == -ENXIO /* missing CPU */,
+                 "Upstream fence failed.");
+}
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+
+// Interrupt every concurrently running sibling thread on any cpu in
+// "cpus", and guarantee our writes up til now are visible to every
+// other CPU. (cpus == NULL is equivalent to all CPUs.)
+static void FenceInterruptCPUs(const cpu_set_t* cpus) {
+  CHECK_CONDITION(IsFast());
+
+  // TODO(b/149390298):  Provide an upstream extension for sys_membarrier to
+  // interrupt ongoing restartable sequences.
+  SlowFence(cpus);
+}
+
+void Fence() {
+  CompilerBarrier();
+
+  // Other operations (or all in RSEQ mode) might just be running on another
+  // CPU.  Do something about that: use RSEQ::Fence() to just send interrupts
+  // and restart any such operation.
+#if TCMALLOC_PERCPU_USE_RSEQ
+  if (using_upstream_fence.load(std::memory_order_relaxed)) {
+    UpstreamRseqFenceCpu(-1);
+    return;
+  }
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+
+  FenceInterruptCPUs(nullptr);
+}
+
+void FenceCpu(int cpu, const size_t virtual_cpu_id_offset) {
+  // Prevent compiler re-ordering of code below. In particular, the call to
+  // GetCurrentCpu must not appear in assembly program order until after any
+  // code that comes before FenceCpu in C++ program order.
+  CompilerBarrier();
+
+  // A useful fast path: nothing needs doing at all to order us with respect
+  // to our own CPU.
+  if (GetCurrentVirtualCpu(virtual_cpu_id_offset) == cpu) {
+    return;
+  }
+
+  if (virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id)) {
+    ASSUME(false);
+
+    // With virtual CPUs, we cannot identify the true physical core we need to
+    // interrupt.
+#if TCMALLOC_PERCPU_USE_RSEQ
+    if (using_upstream_fence.load(std::memory_order_relaxed)) {
+      UpstreamRseqFenceCpu(-1);
+      return;
+    }
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+    FenceInterruptCPUs(nullptr);
+    return;
+  }
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+  if (using_upstream_fence.load(std::memory_order_relaxed)) {
+    UpstreamRseqFenceCpu(cpu);
+    return;
+  }
+#endif  // TCMALLOC_PERCPU_USE_RSEQ
+
+  cpu_set_t set;
+  CPU_ZERO(&set);
+  CPU_SET(cpu, &set);
+  FenceInterruptCPUs(&set);
+}
+
+}  // namespace percpu
+}  // namespace subtle
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h
new file mode 100644
index 0000000000..ad2124e0d1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu.h
@@ -0,0 +1,342 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PERCPU_H_
+#define TCMALLOC_INTERNAL_PERCPU_H_
+
+#define TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT 18
+
+// TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM defines whether or not we have an
+// implementation for the target OS and architecture.
+#if defined(__linux__) && \
+    (defined(__x86_64__) || defined(__PPC64__) || defined(__aarch64__))
+#define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 1
+#else
+#define TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM 0
+#endif
+
+#define TCMALLOC_PERCPU_RSEQ_VERSION 0x0
+#define TCMALLOC_PERCPU_RSEQ_FLAGS 0x0
+#if defined(__x86_64__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x53053053
+#elif defined(__ppc__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x0FE5000B
+#elif defined(__aarch64__)
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0xd428bc00
+#else
+// Rather than error, allow us to build, but with an invalid signature.
+#define TCMALLOC_PERCPU_RSEQ_SIGNATURE 0x0
+#endif
+
+// The constants above this line must be macros since they are shared with the
+// RSEQ assembly sources.
+#ifndef __ASSEMBLER__
+
+#ifdef __linux__
+#include <sched.h>
+#endif
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/atomic_danger.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/linux_syscall_support.h"
+#include "tcmalloc/internal/logging.h"
+
+// TCMALLOC_PERCPU_USE_RSEQ defines whether TCMalloc support for RSEQ on the
+// target architecture exists. We currently only provide RSEQ for 64-bit x86 and
+// PPC binaries.
+#if !defined(TCMALLOC_PERCPU_USE_RSEQ)
+#if (ABSL_PER_THREAD_TLS == 1) && (TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM == 1)
+#define TCMALLOC_PERCPU_USE_RSEQ 1
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ 0
+#endif
+#endif  // !defined(TCMALLOC_PERCPU_USE_RSEQ)
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+inline constexpr int kRseqUnregister = 1;
+
+// Internal state used for tracking initialization of RseqCpuId()
+inline constexpr int kCpuIdUnsupported = -2;
+inline constexpr int kCpuIdUninitialized = -1;
+inline constexpr int kCpuIdInitialized = 0;
+
+#if TCMALLOC_PERCPU_USE_RSEQ
+extern "C" ABSL_PER_THREAD_TLS_KEYWORD volatile kernel_rseq __rseq_abi;
+
+static inline int RseqCpuId() { return __rseq_abi.cpu_id; }
+
+static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) {
+#ifdef __x86_64__
+  ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id) ||
+         virtual_cpu_id_offset == offsetof(kernel_rseq, vcpu_id));
+  return *reinterpret_cast<short *>(reinterpret_cast<uintptr_t>(&__rseq_abi) +
+                                    virtual_cpu_id_offset);
+#else
+  ASSERT(virtual_cpu_id_offset == offsetof(kernel_rseq, cpu_id));
+  return RseqCpuId();
+#endif
+}
+#else  // !TCMALLOC_PERCPU_USE_RSEQ
+static inline int RseqCpuId() { return kCpuIdUnsupported; }
+
+static inline int VirtualRseqCpuId(const size_t virtual_cpu_id_offset) {
+  return kCpuIdUnsupported;
+}
+#endif
+
+typedef int (*OverflowHandler)(int cpu, size_t cl, void *item);
+typedef void *(*UnderflowHandler)(int cpu, size_t cl);
+
+// Functions below are implemented in the architecture-specific percpu_rseq_*.S
+// files.
+extern "C" {
+int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p,
+                                          intptr_t old_val, intptr_t new_val);
+
+#ifndef __x86_64__
+int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift,
+                               OverflowHandler f);
+int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item,
+                                          OverflowHandler f);
+void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f,
+                                size_t shift);
+void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl,
+                                           UnderflowHandler f);
+#endif  // __x86_64__
+
+// Push a batch for a slab which the Shift equal to
+// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl,
+                                                  void **batch, size_t len);
+
+// Pop a batch for a slab which the Shift equal to
+// TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl,
+                                                 void **batch, size_t len);
+
+#ifdef __x86_64__
+int TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(int target_cpu, intptr_t *p,
+                                               intptr_t old_val,
+                                               intptr_t new_val);
+size_t TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(void *ptr, size_t cl,
+                                                       void **batch,
+                                                       size_t len);
+size_t TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(void *ptr, size_t cl,
+                                                      void **batch, size_t len);
+#endif
+}
+
+// NOTE:  We skirt the usual naming convention slightly above using "_" to
+// increase the visibility of functions embedded into the root-namespace (by
+// virtue of C linkage) in the supported case.
+
+// Return whether we are using flat virtual CPUs.
+bool UsingFlatVirtualCpus();
+
+inline int GetCurrentCpuUnsafe() {
+// On PowerPC, Linux maintains the current CPU in the bottom 12 bits of special
+// purpose register SPRG3, which is readable from user mode. References:
+//
+//   https://github.com/torvalds/linux/blob/164c09978cebebd8b5fc198e9243777dbaecdfa0/arch/powerpc/kernel/vdso.c#L727
+//   https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L966
+//   https://github.com/torvalds/linux/blob/dfb945473ae8528fd885607b6fa843c676745e0c/arch/powerpc/include/asm/reg.h#L593
+//   https://lists.ozlabs.org/pipermail/linuxppc-dev/2012-July/099011.html
+//
+// This is intended for VDSO syscalls, but is much faster if we simply inline it
+// here, presumably due to the function call and null-check overheads of the
+// VDSO version. As of 2014-07 the CPU time costs are something like 1.2 ns for
+// the inline version vs 12 ns for VDSO.
+#if defined(__PPC64__) && defined(__linux__)
+  uint64_t spr;
+
+  // Mark the asm as volatile, so that it is not hoisted out of loops.
+  asm volatile("mfspr %0, 0x103;" : "=r"(spr));
+
+  return spr & 0xfff;
+#else
+  // Elsewhere, use the rseq mechanism.
+  return RseqCpuId();
+#endif
+}
+
+inline int GetCurrentCpu() {
+  // We can't use the unsafe version unless we have the appropriate version of
+  // the rseq extension. This also allows us a convenient escape hatch if the
+  // kernel changes the way it uses special-purpose registers for CPU IDs.
+  int cpu = GetCurrentCpuUnsafe();
+
+  // We open-code the check for fast-cpu availability since we do not want to
+  // force initialization in the first-call case.  This so done so that we can
+  // use this in places where it may not always be safe to initialize and so
+  // that it may serve in the future as a proxy for callers such as
+  // CPULogicalId() without introducing an implicit dependence on the fast-path
+  // extensions. Initialization is also simply unneeded on some platforms.
+  if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+    return cpu;
+  }
+
+#ifdef TCMALLOC_HAVE_SCHED_GETCPU
+  cpu = sched_getcpu();
+  ASSERT(cpu >= 0);
+#endif  // TCMALLOC_HAVE_SCHED_GETCPU
+
+  return cpu;
+}
+
+inline int GetCurrentVirtualCpuUnsafe(const size_t virtual_cpu_id_offset) {
+  return VirtualRseqCpuId(virtual_cpu_id_offset);
+}
+
+inline int GetCurrentVirtualCpu(const size_t virtual_cpu_id_offset) {
+  // We can't use the unsafe version unless we have the appropriate version of
+  // the rseq extension. This also allows us a convenient escape hatch if the
+  // kernel changes the way it uses special-purpose registers for CPU IDs.
+  int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+
+  // We open-code the check for fast-cpu availability since we do not want to
+  // force initialization in the first-call case.  This so done so that we can
+  // use this in places where it may not always be safe to initialize and so
+  // that it may serve in the future as a proxy for callers such as
+  // CPULogicalId() without introducing an implicit dependence on the fast-path
+  // extensions. Initialization is also simply unneeded on some platforms.
+  if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+    return cpu;
+  }
+
+#ifdef TCMALLOC_HAVE_SCHED_GETCPU
+  cpu = sched_getcpu();
+  ASSERT(cpu >= 0);
+#endif  // TCMALLOC_HAVE_SCHED_GETCPU
+
+  return cpu;
+}
+
+bool InitFastPerCpu();
+
+inline bool IsFast() {
+  if (!TCMALLOC_PERCPU_USE_RSEQ) {
+    return false;
+  }
+
+  int cpu = RseqCpuId();
+
+  if (ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized)) {
+    return true;
+  } else if (ABSL_PREDICT_FALSE(cpu == kCpuIdUnsupported)) {
+    return false;
+  } else {
+    // Sets 'cpu' for next time, and calls EnsureSlowModeInitialized if
+    // necessary.
+    return InitFastPerCpu();
+  }
+}
+
+// As IsFast(), but if this thread isn't already initialized, will not
+// attempt to do so.
+inline bool IsFastNoInit() {
+  if (!TCMALLOC_PERCPU_USE_RSEQ) {
+    return false;
+  }
+  int cpu = RseqCpuId();
+  return ABSL_PREDICT_TRUE(cpu >= kCpuIdInitialized);
+}
+
+// A barrier that prevents compiler reordering.
+inline void CompilerBarrier() {
+#if defined(__GNUC__)
+  __asm__ __volatile__("" : : : "memory");
+#else
+  std::atomic_thread_fence(std::memory_order_seq_cst);
+#endif
+}
+
+// Internal tsan annotations, do not use externally.
+// Required as tsan does not natively understand RSEQ.
+#ifdef THREAD_SANITIZER
+extern "C" {
+void __tsan_acquire(void *addr);
+void __tsan_release(void *addr);
+}
+#endif
+
+// TSAN relies on seeing (and rewriting) memory accesses.  It can't
+// get at the memory acccesses we make from RSEQ assembler sequences,
+// which means it doesn't know about the semantics our sequences
+// enforce.  So if we're under TSAN, add barrier annotations.
+inline void TSANAcquire(void *p) {
+#ifdef THREAD_SANITIZER
+  __tsan_acquire(p);
+#endif
+}
+
+inline void TSANRelease(void *p) {
+#ifdef THREAD_SANITIZER
+  __tsan_release(p);
+#endif
+}
+
+inline void TSANMemoryBarrierOn(void *p) {
+  TSANAcquire(p);
+  TSANRelease(p);
+}
+
+// These methods may *only* be called if IsFast() has been called by the current
+// thread (and it returned true).
+inline int CompareAndSwapUnsafe(int target_cpu, std::atomic<intptr_t> *p,
+                                intptr_t old_val, intptr_t new_val,
+                                const size_t virtual_cpu_id_offset) {
+  TSANMemoryBarrierOn(p);
+#if TCMALLOC_PERCPU_USE_RSEQ
+  switch (virtual_cpu_id_offset) {
+    case offsetof(kernel_rseq, cpu_id):
+      return TcmallocSlab_Internal_PerCpuCmpxchg64(
+          target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p),
+          old_val, new_val);
+#ifdef __x86_64__
+    case offsetof(kernel_rseq, vcpu_id):
+      return TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU(
+          target_cpu, tcmalloc_internal::atomic_danger::CastToIntegral(p),
+          old_val, new_val);
+#endif  // __x86_64__
+    default:
+      __builtin_unreachable();
+  }
+#else  // !TCMALLOC_PERCPU_USE_RSEQ
+  __builtin_unreachable();
+#endif  // !TCMALLOC_PERCPU_USE_RSEQ
+}
+
+void FenceCpu(int cpu, const size_t virtual_cpu_id_offset);
+
+}  // namespace percpu
+}  // namespace subtle
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // !__ASSEMBLER__
+#endif  // TCMALLOC_INTERNAL_PERCPU_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S
new file mode 100644
index 0000000000..3cdaf17835
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_aarch64.S
@@ -0,0 +1,524 @@
+/*
+ * Copyright 2020 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License")
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __aarch64__
+#error "percpu_rseq_aarch64.S should only be included for AArch64 builds"
+#endif  //  __aarch64__
+
+#include "tcmalloc/internal/percpu.h"
+
+/*
+ * API Exposition:
+ *
+ *   METHOD_abort:  // Emitted as part of START_RSEQ()
+ *     START_RSEQ() // Starts critical section between [start,commit)
+ *   METHOD_start:  // Emitted as part of START_RSEQ()
+ *     FETCH_CPU()  // Reads current CPU
+ *     ...
+ *     single store // Commits sequence
+ *   METHOD_commit:
+ *     ...return...
+ *
+ * This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+ * (rodata) constant table, whose address is used to start the critical
+ * section, and the abort trampoline.
+ *
+ * The trampoline is used because:
+ * 1.  Restarts are expected to be rare, so the extra jump when restarting is
+ *     expected to be infrequent.
+ * 2.  The upstream restartable sequence implementation expects the trailing 4
+ *     bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+ *     to an arbitrary choice).  For us, this is TCMALLOC_PERCPU_RSEQ_SIGNATURE.
+ *     This value is passed to the kernel during configuration of the rseq
+ *     syscall.
+ *     This would either need to be encoded as a nop (SIGN_ABORT) at the start
+ *     of every restartable sequence, increasing instruction cache pressure, or
+ *     placed directly before the entry point.
+ *
+ * The trampoline returns us to METHOD_abort, which is the normal entry point
+ * for the restartable sequence.  Upon restart, the (upstream) kernel API
+ * clears the per-thread restartable sequence state. We return to METHOD_abort
+ * (rather than METHOD_start), as we need to reinitialize this value.
+ */
+
+/* Place the code into the google_malloc section. This section is the heaviest
+ * user of Rseq code, so it makes sense to co-locate it.
+ */
+
+.section google_malloc, "ax"
+
+/* ---------------- start helper macros ----------------  */
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_AARCH64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// A function within a guarded memory region must start with a BTI C
+// instruction.
+// So per ABI that includes any externally visible code label.
+// Using hint to make sure we can use this on targets that support BTI and
+// targets that don't. It will behave as a no-op on targets that do not
+// support BTI or outside a guarded memory region.
+#ifdef __ARM_FEATURE_BTI_DEFAULT
+#define BTI_C hint 34
+#define TAILCALL(x) mov x16, x; br x16
+#else
+#define BTI_C
+#define TAILCALL(x) br x
+#endif
+
+// This macro defines:
+// * the rseq_cs instance that we'll use for label's critical section.
+// * a trampoline to return to when we abort.  This label_trampoline is
+//   distinct from label_start, as the return IP must be "signed" (see
+//   SIGN_ABORT()).
+//
+// TODO(b/141629158):  __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label)                                 \
+  .pushsection __rseq_cs, "aw";                                   \
+  .balign 32;                                                     \
+  .protected __rseq_cs_##label;                                   \
+  .type __rseq_cs_##label,@object;                                \
+  .size __rseq_cs_##label,32;                                     \
+  __rseq_cs_##label:                                              \
+  .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+  .quad .L##label##_start;                                        \
+  .quad .L##label##_commit - .L##label##_start;                   \
+  .quad label##_trampoline;                                       \
+  PINSECTION(.L##label##array);                                   \
+  .popsection;                                                    \
+  .pushsection __rseq_cs_ptr_array, "aw";                         \
+  .L##label##array:                                               \
+  .quad __rseq_cs_##label;                                        \
+  .popsection;                                                    \
+  .pushsection rseq_trampoline, "ax";                             \
+  SIGN_ABORT();                                                   \
+  .globl label##_trampoline;                                      \
+  .type  label##_trampoline, @function;                           \
+label##_trampoline:                                               \
+  .cfi_startproc;                                                 \
+  BTI_C;                                                          \
+  b .L##label##_abort;                                            \
+  .cfi_endproc;                                                   \
+  .size label##_trampoline, . - label##_trampoline;               \
+  .popsection;
+
+// This is part of the upstream rseq ABI.  The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter).  This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+// We use .inst here instead of a data directive so it works for both small and
+// big endian.
+#define SIGN_ABORT()           \
+  .inst TCMALLOC_PERCPU_RSEQ_SIGNATURE
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label
+/* We are assuming small memory model.  */
+#if __clang_major__ >= 11 && !defined(__AARCH64_CMODEL_SMALL__)
+#error "Memory model not supported!"
+#endif
+
+/* FETCH_CPU assumes &__rseq_abi is in x5.  */
+#define FETCH_CPU(dest) \
+  ldr dest, [x5, #4] /* cpuid is 32-bits */
+
+/* With PIE  have initial-exec TLS, even in the presence of position
+   independent code. */
+#if !defined(__PIC__) || defined(__PIE__)
+
+#define START_RSEQ(src)                         \
+  .L##src##_abort:                              \
+  mrs     x5, tpidr_el0;                        \
+  adrp    x6, :gottprel:__rseq_abi;             \
+  ldr     x6, [x6,:gottprel_lo12:__rseq_abi];   \
+  add     x5, x5, x6;                           \
+  adrp    x6, __rseq_cs_##src;                  \
+  add     x6, x6, :lo12:__rseq_cs_##src;        \
+  str     x6, [x5, #8];                         \
+  .L##src##_start:
+
+#else  /* !defined(__PIC__) || defined(__PIE__) */
+
+/*
+ * In the case where we can't guarantee we have initial-exec TLS we obtain
+ * __rseq_abi's TP offset using a TLS descriptor sequence, which we then add to
+ * the TP to get __rseq_abi's address.
+ * The call to the TLS descriptor can be optimized away by the linker, but since
+ * we can not guarantee it will we must save and restore the registers used to
+ * store the arguments of our functions. The function with most arguments has 5
+ * arguments, so we save x0-x4 and lr.
+ * TODO: Add PAC support because we are spiling LR.
+ */
+#define START_RSEQ(src)                        \
+  .L##src##_abort:                             \
+  mov     x5, lr;                              \
+  stp     x0, x1, [sp, -48]!;                  \
+  stp     x2, x3, [sp, #16];                   \
+  stp     x4, x5, [sp, #32];                   \
+  adrp    x0, :tlsdesc:__rseq_abi;             \
+  ldr     x1, [x0, :tlsdesc_lo12:__rseq_abi];  \
+  add     x0, x0, :tlsdesc_lo12:__rseq_abi;    \
+  .tlsdesccall __rseq_abi;                     \
+  blr     x1;                                  \
+  ldp     x4, x5, [sp, #32];                   \
+  mov     lr, x5;                              \
+  mrs     x5, tpidr_el0;                       \
+  add     x5, x5, x0;                          \
+  ldp     x2, x3, [sp, #16];                   \
+  ldp     x0, x1, [sp], #48;                   \
+  adrp    x6, __rseq_cs_##src;                 \
+  add     x6, x6, :lo12:__rseq_cs_##src;       \
+  str     x6, [x5, #8];                        \
+  .L##src##_start:
+
+#endif
+/* ---------------- end helper macros ---------------- */
+
+/* start of atomic restartable sequences */
+
+/*
+ * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p,
+ *                                  long old_val, long new_val)
+ * w0: target_cpu
+ * x1: p
+ * x2: old_val
+ * x3: new_val
+ */
+  .p2align 6 /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PerCpuCmpxchg64
+  .type  TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+  .cfi_startproc
+  BTI_C
+  START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64)
+  FETCH_CPU(w4)
+  cmp w0, w4 /* check cpu vs current_cpu */
+  bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit
+  ldr x6, [x1]
+  cmp x6, x2 /* verify *p == old */
+  bne .LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch
+  str x3, [x1]
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit:
+  mov x0, x4
+  ret  /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_mismatch:
+  mov x0, #-1 /* mismatch versus "old" or "check", return -1 */
+  ret
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+/* size_t TcmallocSlab_Internal_PushBatch_FixedShift(
+ *     void *ptr (x0),
+ *     size_t cl (w1),
+ *     void** batch (x2),
+ *     size_t len (w3) {
+ *   uint64_t r8 = __rseq_abi.cpu_id
+ *   uint64_t* r8 = CpuMemoryStart(x0, r8)
+ *   Header* hdr = r8 + w1 * 8
+ *   uint64_t r9 = hdr->current (zero-extend 16bit)
+ *   uint64_t r10 = hdr->end    (zero-extend 16bit)
+ *   if (r9 >= r10) return 0
+ *   r11 = r3
+ *   r10 = r9 + min(len, r10 - r9)
+ *   r13 = r9 + r10
+ *   r9 = r8 + r9 * 8
+ *   r14 = r8 + r13 * 8
+ * loop:
+ *   r12 = *(r11-=8) (pre-index) Pop from Batch
+ *   *(r9+=8) = r12 (post-index) Push to Slab
+ *   if (r9 != r14) goto loop
+ *   hdr->current = r13 (16bit store)
+ *   return r10
+ * }
+ */
+  .p2align 6 /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PushBatch_FixedShift
+  .type  TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+  .cfi_startproc
+  BTI_C
+  START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift)
+  FETCH_CPU(w8)
+  lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */
+  add x8, x0, x8
+  add x4, x8, x1, LSL #3    /* r4 = hdr */
+  ldrh w9, [x4]             /* r9 = current */
+  ldrh w10, [x4, #6]        /* r10 = end */
+  cmp w9, w10
+  bge .LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity
+  add  x11, x2, x3, LSL #3  /* r11 = batch + len * 8 */
+  sub  w10, w10, w9         /* r10 = free capacity */
+  cmp w3, w10
+  csel w10, w3, w10, ls     /* r10 = min(len, free capacity), amount we are
+                               pushing */
+  add x13, x9, x10          /* r13 = current + amount we are pushing. */
+  add x9, x8, x9, LSL #3    /* r9 = current cpu slab stack */
+  add x14, x8, x13, LSL #3  /* r14 = new current address */
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+  ldr x12, [x11,  #-8]!     /* r12 = [--r11] */
+  str x12, [x9], #8         /* [r9++] = r12 */
+  cmp x9, x14               /* if current cpu slab address == new current
+                               address */
+  bne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+  strh w13, [x4] /* store new current index */
+.LTcmallocSlab_Internal_PushBatch_FixedShift_commit:
+  mov x0, x10
+  ret
+.LTcmallocSlab_Internal_PushBatch_FixedShift_no_capacity:
+  mov x0, #0
+  ret
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+/* size_t TcmallocSlab_Internal_PopBatch_FixedShift(
+ *     void *ptr (x0),
+ *     size_t cl (w1),
+ *     void** batch (x2),
+ *     size_t len (w3) {
+ *   uint64_t r8 = __rseq_abi.cpu_id
+ *   uint64_t* r8 = CpuMemoryStart(ptr, r8)
+ *   Header* hdr = GetHeader(r8, cl)
+ *   uint64_t r9 = hdr->current
+ *   uint64_t r10 = hdr->begin
+ *   if (r9 <= r10) return 0
+ *   r11 = min(len, r9 - r10)
+ *   r13 = r8 + r9 * 8
+ *   r9 = r9 - r11
+ *   r12 = r2
+ *   r14 = r2 + r11 * 8
+ * loop:
+ *   r10 = *(r13 -= 8) (pre-index) Pop from slab
+ *   *(r12+=8) = r10  (post-index) Push to Batch
+ *   if (r12 != r14) goto loop
+ *   hdr->current = r9
+ *   return r11
+ * }
+ */
+  .p2align 6 /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PopBatch_FixedShift
+  .type  TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+  .cfi_startproc
+  BTI_C
+  START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift)
+  FETCH_CPU(w8)
+  lsl x8, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT /* multiply cpu by 256k */
+  add x8, x0, x8
+  add x4, x8, x1, LSL #3
+  ldrh w9, [x4]             /* current */
+  ldrh w10, [x4, #4]        /* begin */
+  cmp w10, w9
+  bhs .LTcmallocSlab_Internal_PopBatch_FixedShift_no_items
+  sub w11, w9, w10          /* r11 = available items */
+  cmp w3, w11
+  csel w11, w3, w11, ls     /* r11 = min(len, available items), amount we are
+                               popping */
+  add x13, x8, x9, LSL #3   /* r13 = current cpu slab stack  */
+  sub x9, x9, x11           /* update new current  */
+  mov x12, x2               /* r12 = batch */
+  add x14, x2, x11, LSL #3  /* r14 = batch + amount we are popping*8 */
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+  ldr x10, [x13, #-8]!      /* r10 = [--r13] */
+  str x10, [x12], #8        /* [r12++] = r10 */
+  cmp x12, x14              /* if current batch == batch + amount we are
+                               popping */
+  bne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+  strh w9, [x4]             /* store new current */
+.LTcmallocSlab_Internal_PopBatch_FixedShift_commit:
+  mov x0, x11
+  ret
+.LTcmallocSlab_Internal_PopBatch_FixedShift_no_items:
+  mov x0, #0
+  ret
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+  .globl TcmallocSlab_Internal_Push
+  .type TcmallocSlab_Internal_Push, @function
+TcmallocSlab_Internal_Push:
+.LTcmallocSlab_Internal_Push_entry:
+  .cfi_startproc
+    // Arguments use:
+    //  *  x0: (Argument: Slabs*) cpu_0_slab_ptr
+    //  *  x1: (Argument: uintptr_t) cl
+    //  *  x2: (Argument: uintptr_t) p
+    //  *  w3: (Argument: size_t) shift
+    //  *  x4: (Argument: uintptr_t) f
+    // Return value: current CPU
+    // Available x5-x15
+
+    BTI_C
+    START_RSEQ(TcmallocSlab_Internal_Push)
+    FETCH_CPU(w8)
+    lsl x9, x8, x3
+    add x9, x0, x9
+    add x10, x9, x1, LSL #3
+    ldrh w12, [x10]        /* current */
+    ldrh w11, [x10, #6]    /* end */
+    cmp w11, w12
+    ble .LTcmallocSlab_Internal_Push_no_capacity
+    str x2, [x9, x12, LSL #3]
+    add w12, w12, #1
+    strh w12, [x10]
+.LTcmallocSlab_Internal_Push_commit:
+    mov x0, x8
+    ret
+.LTcmallocSlab_Internal_Push_no_capacity:
+    mov x0, x8
+    TAILCALL(x4)
+.LTcmallocSlab_Internal_Push_region3:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push)
+
+
+  .globl TcmallocSlab_Internal_Push_FixedShift
+  .type TcmallocSlab_Internal_Push_FixedShift, @function
+TcmallocSlab_Internal_Push_FixedShift:
+  .cfi_startproc
+    // Arguments use:
+    //  *  x0: (Argument: Slabs*) cpu_0_slab_ptr
+    //  *  x1: (Argument: uintptr_t) cl
+    //  *  x2: (Argument: uintptr_t) p
+    //  *  x3: (Argument: uintptr_t) f
+    // Return value: current CPU
+    // Available x4-x15
+
+    BTI_C
+    START_RSEQ(TcmallocSlab_Internal_Push_FixedShift)
+    FETCH_CPU(w8)
+    lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+    add x9, x0, x9
+    add x10, x9, x1, LSL #3
+    ldrh w12, [x10]     /* current */
+    ldrh w11, [x10, #6] /* end */
+    cmp w11, w12
+    ble .LTcmallocSlab_Internal_Push_FixedShift_no_capacity
+    str x2, [x9, x12, LSL #3]
+    add w12, w12, #1
+    strh w12, [x10]
+.LTcmallocSlab_Internal_Push_FixedShift_commit:
+    mov x0, x8
+    ret
+.LTcmallocSlab_Internal_Push_FixedShift_no_capacity:
+    mov x0, x8
+    TAILCALL(x3)
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift)
+
+  .globl TcmallocSlab_Internal_Pop_FixedShift
+  .type TcmallocSlab_Internal_Pop_FixedShift, @function
+TcmallocSlab_Internal_Pop_FixedShift:
+  .cfi_startproc
+    // Arguments use:
+    //  *  x0: (Argument: Slabs*) cpu_0_slab_ptr
+    //  *  x1: (Argument: uintptr_t) cl
+    //  *  x2: (Argument: uintptr_t) f
+    // Return value: current CPU
+    // Available x3-x15
+
+    BTI_C
+    START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift)
+    FETCH_CPU(w8)               /* r8 = CPU  */
+    lsl x9, x8, #TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+                                /* r9 = CPU shifted */
+    add x9, x0, x9              /* r9 = start of CPU region */
+    add x10, x9, x1, LSL #3     /* r10 = start of slab header */
+    ldrh w12, [x10]             /* r12 = current index */
+    ldrh w11, [x10, #4]         /* r11 = begin index */
+    cmp w11, w12                /* if begin >= current */
+    bge .LTcmallocSlab_Internal_Pop_FixedShift_no_items
+    sub w12, w12, #1            /* r12 = current-- */
+    ldr x3, [x9, x12, LSL #3]   /* r3 = [start + current * 8] */
+    strh w12, [x10]             /* store new current index */
+.LTcmallocSlab_Internal_Pop_FixedShift_commit:
+    mov x0, x3                  /* return popped item */
+    ret
+.LTcmallocSlab_Internal_Pop_FixedShift_no_items:
+    mov x0, x8                  /* call overflow handler with CPU ID */
+    TAILCALL(x2)
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift)
+
+  .globl TcmallocSlab_Internal_Pop
+  .type TcmallocSlab_Internal_Pop, @function
+TcmallocSlab_Internal_Pop:
+  .cfi_startproc
+    // Arguments use:
+    //  *  x0: (Argument: Slabs*) cpu_0_slab_ptr
+    //  *  x1: (Argument: uintptr_t) cl
+    //  *  x2: (Argument: uintptr_t) f
+    //  *  w3: (Argument: size_t) shift
+    // Return value: Value
+    // Available x4-x15
+
+    BTI_C
+    START_RSEQ(TcmallocSlab_Internal_Pop)
+    FETCH_CPU(w8)               /* r8 = CPU ID */
+    lsl x9, x8, x3              /* x9 = CPU shifted by (r3) */
+    add x9, x0, x9              /* x9 = start of this CPU region */
+    add x10, x9, x1, LSL #3     /* r10 = slab header addr */
+    ldrh w12, [x10]             /* r12 = current index */
+    ldrh w11, [x10, #4]         /* x11 = begin index */
+    cmp w11, w12                /* if begin >= current */
+    bge    .LTcmallocSlab_Internal_Pop_no_items
+    sub w12, w12, #1            /* r12 = current-- */
+    ldr x4, [x9, x12, LSL #3]   /* r4 = [start + current * 8] */
+    strh w12, [x10]             /* update current index */
+.LTcmallocSlab_Internal_Pop_commit:
+    mov x0, x4                  /* return popped item */
+    ret
+.LTcmallocSlab_Internal_Pop_no_items:
+    mov x0, x8                  /* call overflow handler with CPU ID */
+    TAILCALL(x2)
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop)
+
+.section .note.GNU-stack,"",@progbits
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
+#define GNU_PROPERTY(type, value)       \
+  .section .note.gnu.property, "a";     \
+  .p2align 3;                           \
+  .word 4;                              \
+  .word 16;                             \
+  .word 5;                              \
+  .asciz "GNU";                         \
+  .word type;                           \
+  .word 4;                              \
+  .word value;                          \
+  .word 0;
+
+/* Add GNU property note if built with branch protection.  */
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT)
+GNU_PROPERTY (0xc0000000, 1)
+#endif
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S
new file mode 100644
index 0000000000..0219a2760a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_asm.S
@@ -0,0 +1,41 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Single file to include target specific implementations for percpu.
+
+#include "tcmalloc/internal/percpu.h"
+
+#if TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+#if defined(__x86_64__)
+#include "tcmalloc/internal/percpu_rseq_x86_64.S"
+#elif defined(__ppc__)
+#include "tcmalloc/internal/percpu_rseq_ppc.S"
+#elif defined(__aarch64__)
+#include "tcmalloc/internal/percpu_rseq_aarch64.S"
+#else
+#error "RSEQ support expected, but not found."
+#endif
+#endif // TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+
+// We do not need an executable stack. Put this outside the
+// architecture-specific region above in order to suppress "missing
+// .note.GNU-stack section implies executable stack" errors.
+//
+// Cf. http://en.chys.info/2010/12/note-gnu-stack/
+#if defined(__arm__) || defined(__PPC64__)
+.section .note.GNU-stack, "", %progbits
+#else
+.section .note.GNU-stack, "", @progbits
+#endif  // __arm__ || __PPC64__
+
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S
new file mode 100644
index 0000000000..234f28c2e7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_ppc.S
@@ -0,0 +1,606 @@
+/*
+ * Copyright 2019 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Rseq critical section functions and restart handlers.
+//
+// They must also avoid writing the nonvolatile and reserved general purpose
+// registers defined by the Power Architecture 64-Bit ELF V2 ABI
+//
+//  *  r1-r2
+//  *  r13
+//  *  r14-r31
+//
+// Finally, note that the restart handler reserves the right to clobber
+// condition registers. This means that critical section functions must not
+// explicitly or implicitly read condition registers outside of their
+// [start, limit) critical regions.
+
+#ifndef __ppc__
+#error "percpu_rseq_ppc.S should only be included for PPC builds"
+#endif
+
+#include "tcmalloc/internal/percpu.h"
+
+// Use the ELFv2 ABI.
+.abiversion 2
+.section google_malloc, "ax"
+
+////////////////////////////////////////////////////////////////////////
+// Macros
+////////////////////////////////////////////////////////////////////////
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label;
+
+// Place the CPU number into the bottom 12 bits of dst. The upper 52 bits are
+// unspecified.
+//
+// See GetCurrentCpu() for notes on the implementation.
+#define GET_CPU_UNMASKED(dst) \
+    mfspr dst, 259
+
+// Given an unmasked CPU number, put the interesting parts into dst.
+#define MASK_CPU(dst, src) \
+    clrldi dst, src, 52
+
+// Like GET_CPU_UNMASKED, but guarantees that the upper bits are cleared. May
+// be slower than the unmasked version.
+#define GET_CPU(dst) \
+    GET_CPU_UNMASKED(dst); \
+    MASK_CPU(dst, dst)
+
+// This is part of the upstream rseq ABI.  The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter).  This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+#define SIGN_ABORT()           \
+  .long TCMALLOC_PERCPU_RSEQ_SIGNATURE;
+
+// DEFINE_UPSTREAM_CS triggers the generation of rseq_cs table (the triple of
+// start, commit, abort IPs) and a trampoline function.
+//
+// Upstream API Exposition:
+//
+//   START_RSEQ() // vvvvv emits a bunch of things
+//     global entry point:
+//       TOC setup
+//     METHOD_critical_abort:
+//     local entry point:
+//       store rseq_cs to __rseq_abi.rseq_cs, starting restartable sequence
+//     METHOD_start:             // Emitted as part of START_RSEQ()
+//   // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+//
+//     GET_CPU...()            // Reads current CPU
+//     ...
+//     single store            // Commits sequence
+//   METHOD_critical_limit:
+//     ...return...
+//
+// START_RSEQ does several things:
+// * We need to set up the TOC pointer for global entry points.
+// * When restarting, we return to the local entry point, since the TOC pointer
+//   is left intact from the restart.  METHOD_critical_abort and local entry
+//   point are therefore the same address.
+// * It stores to the TLS to register that we're in a restartable sequence with
+//   the kernel.
+//
+// This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+// (rodata) constant table, whose address is used to start the critical
+// section, and the abort trampoline.
+//
+// The trampoline is used because:
+// 1.  Restarts are expected to be rare, so the extra jump when restarting is
+//     expected to be infrequent.
+// 2.  The upstream restartable sequence implementation expects the trailing 4
+//     bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+//     to an arbitrary choice).  For us, this is
+//     TCMALLOC_PERCPU_RSEQ_SIGNATURE.  This value is passed to the kernel
+//     during configuration of the rseq syscall.  This would either need to be
+//     encoded as a nop* at the start of every restartable sequence, increasing
+//     instruction cache pressure, or placed directly before the entry point.
+//
+//     * The upstream rseq protocol appears to be converging on using a trap
+//     instruction (twui), so we cannot allow it to appear anywhere in our
+//     actual executed path.
+//
+// Upon restart, the (upstream) kernel API clears the per-thread restartable
+// sequence state. We return to METHOD_abort (rather than METHOD_start), as we
+// need to reinitialize this value.
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_PPC64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// TODO(b/141629158):  __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label)                                 \
+  .pushsection __rseq_cs, "aw";                                   \
+  .balign 32;                                                     \
+  .protected __rseq_cs_##label;                                   \
+  .type __rseq_cs_##label,@object;                                \
+  .size __rseq_cs_##label,32;                                     \
+  __rseq_cs_##label:                                              \
+  .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+  .quad .L##label##_critical_start;                               \
+  .quad .L##label##_critical_limit - .L##label##_critical_start;  \
+  .quad label##_trampoline;                                       \
+  PINSECTION(.L##label##array);                                   \
+  .popsection;                                                    \
+  .pushsection __rseq_cs_ptr_array, "aw";                         \
+  .L##label##array:                                               \
+  .quad __rseq_cs_##label;                                        \
+  .popsection;                                                    \
+  .pushsection rseq_trampoline, "ax";                             \
+  SIGN_ABORT();                                                   \
+  .globl label##_trampoline;                                      \
+  .type  label##_trampoline, @function;                           \
+label##_trampoline:                                               \
+  .cfi_startproc;                                                 \
+  b .L##label##_critical_abort;                                   \
+  .cfi_endproc;                                                   \
+  .size label##_trampoline, . - label##_trampoline;               \
+  .popsection
+
+// With PIE:  We have initial-exec TLS, even in the presence of position
+// independent code.
+#if !defined(__PIC__) || defined(__PIE__)
+
+#define START_RSEQ(label)                                        \
+  .L##label##_gep0:                                              \
+  addis %r2, %r12, .TOC.-.L##label##_gep0@ha;                    \
+  addi %r2, %r2, .TOC.-.L##label##_gep0@l;                       \
+  .L##label##_critical_abort:                                    \
+  .L##label##_lep0:                                              \
+  .localentry label,.-label;                                     \
+  addis %r9, %r2, __rseq_cs_##label@toc@ha;                      \
+  addi %r9, %r9, __rseq_cs_##label@toc@l;                        \
+  addis %r10, %r13, __rseq_abi@tprel@ha;                         \
+  addi %r10, %r10, __rseq_abi@tprel@l;                           \
+  std %r9, 8(%r10);                                              \
+  .L##label##_critical_start:
+
+#else  /* !defined(__PIC__) || defined(__PIE__) */
+
+// Handle non-initial exec TLS.  When performance matters, we should be using
+// initial-exec TLS.
+//
+// We need to caller-save r3-r8, as they are our arguments to the actual
+// restartable sequence code.
+
+#define START_RSEQ(label)                                        \
+  .L##label##_gep0:                                              \
+  addis %r2, %r12, .TOC.-.L##label##_gep0@ha;                    \
+  addi %r2, %r2, .TOC.-.L##label##_gep0@l;                       \
+  .L##label##_critical_abort:                                    \
+  .L##label##_lep0:                                              \
+  .localentry label,.-label;                                     \
+  mflr 0;                                                        \
+  std  %r0,  0x10(1);                                            \
+  std  %r3, -0x10(1);                                            \
+  std  %r4, -0x18(1);                                            \
+  std  %r5, -0x20(1);                                            \
+  std  %r6, -0x28(1);                                            \
+  std  %r7, -0x30(1);                                            \
+  std  %r8, -0x38(1);                                            \
+  stdu %r1, -0x200(1);                                           \
+  bl tcmalloc_tls_fetch_pic;                                     \
+  nop;                                                           \
+  mr   %r10, %r3;                                                \
+  addi %r1, %r1, 0x200;                                          \
+  ld   %r8, -0x38(1);                                            \
+  ld   %r7, -0x30(1);                                            \
+  ld   %r6, -0x28(1);                                            \
+  ld   %r5, -0x20(1);                                            \
+  ld   %r4, -0x18(1);                                            \
+  ld   %r3, -0x10(1);                                            \
+  ld   %r0,  0x10(1);                                            \
+  mtlr 0;                                                        \
+  addis %r9, %r2, __rseq_cs_##label@toc@ha;                      \
+  addi %r9, %r9, __rseq_cs_##label@toc@l;                        \
+  std %r9, 8(%r10);                                              \
+  .L##label##_critical_start:
+
+#endif
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PerCpuCmpxchg64
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PerCpuCmpxchg64
+.type  TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_entry:
+  .cfi_startproc
+  // Register use:
+  //
+  //  *  r3: (Argument: int64) target_cpu
+  //  *  r4: (Argument: intptr_t*) p
+  //  *  r5: (Argument: intptr_t) old_val
+  //  *  r6: (Argument: intptr_t) new_val
+  //  *  r7: The current CPU number.
+  //  *  r8: The current value of *p.
+  //
+
+  START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+  // Are we running on the target CPU?
+  GET_CPU(%r7)
+  cmpd %r7, %r3
+  bne .LCAS_wrong_cpu
+
+  // Load the current value of *p.
+  ld %r8, 0(%r4)
+
+  // Is the value up to date?
+  cmpd %r8, %r5
+  bne .LCAS_wrong_value
+
+  // Store the new value, committing the operation.
+  std %r6, 0(%r4)
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_critical_limit:
+
+  // Return the target CPU, which is already in r3.
+  blr
+
+.LCAS_wrong_cpu:
+  // Return the current CPU.
+  mr %r3, %r7
+  blr
+
+.LCAS_wrong_value:
+  // Return -1.
+  li %r3, -1
+  blr
+
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64);
+
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Push
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Push
+.type  TcmallocSlab_Internal_Push, @function
+TcmallocSlab_Internal_Push:
+.LTcmallocSlab_Internal_Push_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) p
+  //  *  r6: (Argument: size_t) shift
+  //  *  r7: (Argument: uintptr_t) f
+  // Return value: current CPU
+  // Available r8 r9 r10 r11 r12
+  // Note that r12 may be overwritten in rseq_restart_address_internal so
+  // cannot be relied upon across restartable sequence boundaries.
+
+  START_RSEQ(TcmallocSlab_Internal_Push)
+
+  GET_CPU(%r8)              // r8  = current CPU, includes MASK operation
+  sld %r9, %r8, %r6         // r9  = r8 << shift (r6)
+  add %r9, %r3, %r9         // r9  = start of this CPU region
+  rldicr %r10, %r4, 3, 60   // r10 = header offset for class size cl (r4)
+  add %r10, %r9, %r10       // r10 = slab header addr (class offset + CPU base)
+  lhz %r12, 0(%r10)         // r12 = current index
+  lhz %r11, 6(%r10)         // r11 = length
+  cmpld %cr7, %r11, %r12    // compare current index with length
+  ble %cr7, .LTcmallocSlab_Internal_Push_no_capacity
+  rldicr %r11, %r12, 3, 60  // r11 = offset of current index
+  addi %r12, %r12, 1        // current index += 1
+  stdx %r5, %r9, %r11       // store pointer p (r5) into current offset
+  sth %r12, 0(%r10)         // update current index
+
+.LTcmallocSlab_Internal_Push_critical_limit:
+  mr %r3, %r8               // Return current CPU in r3
+  blr
+
+.LTcmallocSlab_Internal_Push_no_capacity:
+  mr %r3, %r8               // Place current CPU in r3
+  // r7 already contains target function
+  b .LPushOverflowTrampoline
+
+.LTcmallocSlab_Internal_Push_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Push_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Push_FixedShift
+.type  TcmallocSlab_Internal_Push_FixedShift, @function
+TcmallocSlab_Internal_Push_FixedShift:
+.LTcmallocSlab_Internal_Push_FixedShift_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) p
+  //  *  r6: (Argument: uintptr_t) f
+
+  START_RSEQ(TcmallocSlab_Internal_Push_FixedShift)
+
+  GET_CPU_UNMASKED(%r7)   // r7 = unmasked CPU
+                          // Mask upper 52 bits of %r7 and shift left in single
+                          // operation. Removes the need to have a separate
+                          // MASK operation on the critical path.
+  clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+  add %r8, %r3, %r8       // r8 = start of this CPU region
+  rldicr %r9, %r4, 3, 60  // r9 = start of header
+  add %r9, %r8, %r9       // r9 = slab header addr
+  lhz %r10, 0(%r9)        // r10 = current index
+  lhz %r11, 6(%r9)        // r11 = end index
+  cmpld %cr7, %r11, %r10  // Check for space
+  ble %cr7, .LTcmallocSlab_Internal_Push_FixedShift_no_capacity
+  rldicr %r11, %r10, 3, 60  // r11 = offset of current index
+  addi %r10, %r10, 1        // current index ++
+  stdx %r5, %r8, %r11       // store the item (from r5)
+  sth %r10, 0(%r9)          // store current index
+
+.LTcmallocSlab_Internal_Push_FixedShift_critical_limit:
+  MASK_CPU(%r3, %r7)     // Return and mask CPU into %r3
+  blr
+
+.LTcmallocSlab_Internal_Push_FixedShift_no_capacity:
+  MASK_CPU(%r3, %r7)     // Move and mask CPU into %r3
+  mr %r7, %r6            // Move target function into r7
+  b .LPushOverflowTrampoline
+
+.LTcmallocSlab_Internal_Push_FixedShift_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Push_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Push_FixedShift);
+
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Pop
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Pop
+.type  TcmallocSlab_Internal_Pop, @function
+TcmallocSlab_Internal_Pop:
+.LTcmallocSlab_Internal_Pop_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) f
+  //  *  r6: (Argument: size_t) shift
+  // Available r7 r8 r9 r10 r11
+  // r12 can be used as a temporary within rseq
+
+  START_RSEQ(TcmallocSlab_Internal_Pop)
+
+  GET_CPU(%r7)             // r7 = CPU, includes mask operation
+  sld %r12, %r7, %r6       // r12 = CPU shifted by shift (r6)
+  add %r12, %r3, %r12      // r12 = start of this CPU region
+  rldicr %r8, %r4, 3, 60   // r8 = offset to class size
+  add %r8, %r12, %r8       // r8 = slab header addr for class size
+  lhz %r9, 0(%r8)          // r9 = current index
+  lhz %r10, 4(%r8)         // r10 = begin
+  cmpld %cr7, %r10, %r9    // Check that we have items to pop
+  bge %cr7, .LTcmallocSlab_Internal_Pop_no_item
+  subi %r9, %r9, 1         // r9 = current index --
+  rldicr %r10, %r9, 3, 60  // r10 = offset to current item
+  ldx %r11, %r12, %r10     // load the item from base + index
+  sth %r9, 0(%r8)          // store current index
+
+.LTcmallocSlab_Internal_Pop_critical_limit:
+  // Move the item into r3, now that it's safe to do so.
+  mr %r3, %r11
+  blr
+
+.LTcmallocSlab_Internal_Pop_no_item:
+  mr %r3, %r7  // Place CPU into r3
+  b .LPopUnderflowTrampoline
+
+.LTcmallocSlab_Internal_Pop_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_Pop_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_Pop_FixedShift
+.type  TcmallocSlab_Internal_Pop_FixedShift, @function
+TcmallocSlab_Internal_Pop_FixedShift:
+.LTcmallocSlab_Internal_Pop_FixedShift_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) f
+
+  START_RSEQ(TcmallocSlab_Internal_Pop_FixedShift)
+
+  GET_CPU_UNMASKED(%r6)  // r6 = current CPU
+                         // Following instruction combines mask and shift
+  clrlsldi %r7, %r6, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+			 // r7 = header offset
+  add %r7, %r3, %r7       // r7 = start of this CPU region
+  rldicr %r8, %r4, 3, 60  // r8 = offset of size class
+  add %r8, %r7, %r8       // r8 = slab header addr
+  lhz %r9, 0(%r8)         // r9 = current index
+  lhz %r10, 4(%r8)        // r10 = begin index
+  cmpld %cr7, %r10, %r9   // Check that there are elements available
+  bge %cr7, .LTcmallocSlab_Internal_Pop_FixedShift_no_item
+  subi %r9, %r9, 1         // current index --
+  rldicr %r10, %r9, 3, 60  // r10 = offset of current index
+  ldx %r11, %r7, %r10      // r11 = load the item
+  sth %r9, 0(%r8)          // update current index
+
+.LTcmallocSlab_Internal_Pop_FixedShift_critical_limit:
+  // Move the item into r3, now that it's safe to do so.
+  mr %r3, %r11
+  blr
+
+.LTcmallocSlab_Internal_Pop_FixedShift_no_item:
+  MASK_CPU(%r3, %r6)          // Extract CPU from unmasked value in %r6
+  b .LPopUnderflowTrampoline
+
+.LTcmallocSlab_Internal_Pop_FixedShift_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_Pop_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_Pop_FixedShift);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PushBatch_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PushBatch_FixedShift
+.type  TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+.LTcmallocSlab_Internal_PushBatch_FixedShift_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) batch
+  //  *  r6: (Argument: uintptr_t) len
+
+  START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+  GET_CPU_UNMASKED(%r7)
+  clrlsldi %r8, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+  add %r8, %r3, %r8    // r8 - start of this CPU region
+  sldi %r9, %r4, 3
+  add %r9, %r8, %r9    // r9 - slab header addr
+  lhz %r10, 0(%r9)     // r10 - current
+  lhz %r11, 6(%r9)     // r11 - end
+  sldi %r7, %r6, 3   // r7 - len * 8
+  cmpld %cr7, %r11, %r10  // current < end?
+  ble %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit
+  sub %r11, %r11, %r10  // r11 - available capacity
+  // r11 = min(r11, r6)
+  cmpld %cr7, %r6, %r11
+  bge %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_min
+  mr %r11, %r6
+.LTcmallocSlab_Internal_PushBatch_FixedShift_min:
+  add %r11, %r10, %r11
+  sldi %r11, %r11, 3
+  sldi %r10, %r10, 3
+
+  // At this point:
+  // r5 - batch, r7 - offset in the batch
+  // r8 - cpu region, r10 - offset into the cpu region, r11 - limit of offset
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+  subi %r7, %r7, 8
+  ldx %r12, %r5, %r7  // load the item
+  stdx %r12, %r8, %r10  // store the item
+  addi %r10, %r10, 8
+  cmpld %cr7, %r10, %r11
+  bne %cr7, .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+  rotrdi %r10, %r10, 3
+  sth %r10, 0(%r9)  // update current
+
+.LTcmallocSlab_Internal_PushBatch_FixedShift_critical_limit:
+  // return r6 - r7 / 8
+  rotrdi %r7, %r7, 3
+  sub %r3, %r6, %r7
+  blr
+
+.LTcmallocSlab_Internal_PushBatch_FixedShift_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift);
+
+////////////////////////////////////////////////////////////////////////
+// TcmallocSlab_Internal_PopBatch_FixedShift
+////////////////////////////////////////////////////////////////////////
+
+.globl TcmallocSlab_Internal_PopBatch_FixedShift
+.type  TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+.LTcmallocSlab_Internal_PopBatch_FixedShift_entry:
+  .cfi_startproc
+  // Arguments use:
+  //  *  r3: (Argument: Slabs*) cpu_0_slab_ptr
+  //  *  r4: (Argument: uintptr_t) cl
+  //  *  r5: (Argument: uintptr_t) batch
+  //  *  r6: (Argument: uintptr_t) len
+
+  START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+  GET_CPU_UNMASKED(%r7)
+  clrlsldi %r7, %r7, 52, TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT
+  add %r7, %r3, %r7    // r7 - start of this CPU region
+  sldi %r8, %r4, 3
+  add %r8, %r7, %r8    // r8 - slab header addr
+  lhz %r9, 0(%r8)      // r9 - current
+  lhz %r10, 4(%r8)     // r10 - begin
+  li %r11, 0           // current position in batch
+  cmpld %cr7, %r10, %r9
+  bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit
+  sub %r10, %r9, %r10  // r10 - available items
+  // r10 = min(r10, r6)
+  cmpld %cr7, %r6, %r10
+  bge %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_min
+  mr %r10, %r6
+.LTcmallocSlab_Internal_PopBatch_FixedShift_min:
+  sub %r10, %r9, %r10
+  sldi %r10, %r10, 3
+  sldi %r9, %r9, 3
+
+  // At this point:
+  // r5 - batch, r11 - offset in the batch
+  // r7 - cpu region, r9 - offset into the cpu region, r10 - limit of offset
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+  subi %r9, %r9, 8
+  ldx %r12, %r7, %r9  // load the item
+  stdx %r12, %r5, %r11  // store the item
+  addi %r11, %r11, 8
+  cmpld %cr7, %r9, %r10
+  bne %cr7, .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+  rotrdi %r9, %r9, 3
+  sth %r9, 0(%r8)  // update current
+
+.LTcmallocSlab_Internal_PopBatch_FixedShift_critical_limit:
+  rotrdi %r3, %r11, 3
+  blr
+
+.LTcmallocSlab_Internal_PopBatch_FixedShift_function_limit:
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift);
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift);
+
+  // Input: r7 points to the function to tail call. r3...r6 are args for it.
+.LPushOverflowTrampoline:
+  mtctr %r7
+  mr %r12, %r7  // Callee expects r12 to point to its first instruction.
+  bctr
+
+  // Input: r5 points to the function to tail call. r3...r4 are args for it.
+.LPopUnderflowTrampoline:
+  mtctr %r5
+  mr %r12, %r5  // Callee expects r12 to point to its first instruction.
+  bctr
+
+.section .note.GNU-stack,"",%progbits
+
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc
new file mode 100644
index 0000000000..1438d8c3d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_unsupported.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Provides skeleton RSEQ functions which raise a hard error in the case of
+// being erroneously called on an unsupported platform.
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+
+#if !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+
+static void Unsupported() {
+  Crash(kCrash, __FILE__, __LINE__,
+        "RSEQ function called on unsupported platform.");
+}
+
+int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, intptr_t *p,
+                                          intptr_t old_val, intptr_t new_val) {
+  Unsupported();
+  return -1;
+}
+
+int TcmallocSlab_Internal_Push(void *ptr, size_t cl, void *item, size_t shift,
+                               OverflowHandler f) {
+  Unsupported();
+  return -1;
+}
+
+int TcmallocSlab_Internal_Push_FixedShift(void *ptr, size_t cl, void *item,
+                                          OverflowHandler f) {
+  Unsupported();
+  return -1;
+}
+
+void *TcmallocSlab_Internal_Pop(void *ptr, size_t cl, UnderflowHandler f,
+                                size_t shift) {
+  Unsupported();
+  return nullptr;
+}
+
+void *TcmallocSlab_Internal_Pop_FixedShift(void *ptr, size_t cl,
+                                           UnderflowHandler f) {
+  Unsupported();
+  return nullptr;
+}
+
+size_t TcmallocSlab_Internal_PushBatch_FixedShift(void *ptr, size_t cl,
+                                                  void **batch, size_t len) {
+  Unsupported();
+  return 0;
+}
+
+size_t TcmallocSlab_Internal_PopBatch_FixedShift(void *ptr, size_t cl,
+                                                 void **batch, size_t len) {
+  Unsupported();
+  return 0;
+}
+
+int PerCpuReadCycleCounter(int64_t *cycles) {
+  Unsupported();
+  return -1;
+}
+
+}  // namespace percpu
+}  // namespace subtle
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // !TCMALLOC_PERCPU_RSEQ_SUPPORTED_PLATFORM
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S
new file mode 100644
index 0000000000..866f4f90ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_rseq_x86_64.S
@@ -0,0 +1,463 @@
+/*
+ * Copyright 2019 The TCMalloc Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __x86_64__
+#error "percpu_rseq_x86_64.S should only be included for x86-64 builds"
+#endif  //  __x86_64__
+
+#include "tcmalloc/internal/percpu.h"
+
+/*
+ * API Exposition:
+ *
+ *   METHOD_abort:  // Emitted as part of START_RSEQ()
+ *     START_RSEQ() // Starts critical section between [start,commit)
+ *   METHOD_start:  // Emitted as part of START_RSEQ()
+ *     FETCH_CPU()  // Reads current CPU
+ *     ...
+ *     single store // Commits sequence
+ *   METHOD_commit:
+ *     ...return...
+ *
+ * This process is assisted by the DEFINE_UPSTREAM_CS macro, which encodes a
+ * (rodata) constant table, whose address is used to start the critical
+ * section, and the abort trampoline.
+ *
+ * The trampoline is used because:
+ * 1.  Restarts are expected to be rare, so the extra jump when restarting is
+ *     expected to be infrequent.
+ * 2.  The upstream restartable sequence implementation expects the trailing 4
+ *     bytes of the abort PC to be "signed" (to prevent manipulation of the PC
+ *     to an arbitrary choice).  For us, this is TCMALLOC_PERCPU_RSEQ_SIGNATURE.  This
+ *     value is passed to the kernel during configuration of the rseq syscall.
+ *     This would either need to be encoded as a nop (SIGN_ABORT) at the start
+ *     of every restartable sequence, increasing instruction cache pressure, or
+ *     placed directly before the entry point.
+ *
+ * The trampoline returns us to METHOD_abort, which is the normal entry point
+ * for the restartable sequence.  Upon restart, the (upstream) kernel API
+ * clears the per-thread restartable sequence state. We return to METHOD_abort
+ * (rather than METHOD_start), as we need to reinitialize this value.
+ */
+
+/* Place the code into the google_malloc section. This section is the heaviest
+ * user of Rseq code, so it makes sense to co-locate it.
+ */
+
+.section google_malloc, "ax"
+
+/* ---------------- start helper macros ----------------  */
+
+// This macro defines a relocation associated with the provided label to keep
+// section GC from discarding it independently of label.
+#if !defined(__clang_major__) || __clang_major__ >= 9
+#define PINSECTION(label) .reloc 0, R_X86_64_NONE, label
+#else
+#define PINSECTION(label)
+#endif
+
+// This macro defines:
+// * the rseq_cs instance that we'll use for label's critical section.
+// * a trampoline to return to when we abort.  This label_trampoline is
+//   distinct from label_start, as the return IP must be "signed" (see
+//   SIGN_ABORT()).
+//
+// TODO(b/141629158):  __rseq_cs only needs to be writeable to allow for
+// relocations, but could be read-only for non-PIE builds.
+#define DEFINE_UPSTREAM_CS(label)                                 \
+  .pushsection __rseq_cs, "aw";                                   \
+  .balign 32;                                                     \
+  .protected __rseq_cs_##label;                                   \
+  .type __rseq_cs_##label,@object;                                \
+  .size __rseq_cs_##label,32;                                     \
+  __rseq_cs_##label:                                              \
+  .long TCMALLOC_PERCPU_RSEQ_VERSION, TCMALLOC_PERCPU_RSEQ_FLAGS; \
+  .quad .L##label##_start;                                        \
+  .quad .L##label##_commit - .L##label##_start;                   \
+  .quad label##_trampoline;                                       \
+  PINSECTION(.L##label##array);                                   \
+  .popsection;                                                    \
+  .pushsection __rseq_cs_ptr_array, "aw";                         \
+  .L##label##array:                                               \
+  .quad __rseq_cs_##label;                                        \
+  .popsection;                                                    \
+  SIGN_ABORT();                                                   \
+  .globl label##_trampoline;                                      \
+  .type  label##_trampoline, @function;                           \
+label##_trampoline:                                               \
+  .cfi_startproc;                                                 \
+  jmp .L##label##_abort;                                          \
+  .cfi_endproc;                                                   \
+  .size label##_trampoline, . - label##_trampoline;
+
+// This is part of the upstream rseq ABI.  The 4 bytes prior to the abort IP
+// must match TCMALLOC_PERCPU_RSEQ_SIGNATURE (as configured by our rseq
+// syscall's signature parameter).  This signature is used to annotate valid
+// abort IPs (since rseq_cs could live in a user-writable segment).
+//
+// To allow this to be safely executed as a valid instruction, we encode the
+// value with a nop.  This is decoded as:
+//
+//   nopl 0xSIGNATURE(%rip)
+//
+#define SIGN_ABORT()           \
+  .byte 0x0f, 0x1f, 0x05;      \
+  .long TCMALLOC_PERCPU_RSEQ_SIGNATURE;
+
+/*
+ * Provide a directive to specify the size of symbol "label", relative to the
+ * current location and its start.
+ */
+#define ENCODE_SIZE(label) .size label, . - label;
+
+/* In all non-position independent cases we need to use RIP-relative label
+   addresses */
+#if !defined(__PIC__)
+#define LABEL_ADDR(label) $label
+#else
+#define LABEL_ADDR(label) label@GOTPCREL(%rip)
+#endif /* !defined(__PIC__) */
+
+/* With PIE;  have initial-exec TLS, even in the presence of position
+   independent code. */
+#if !defined(__PIC__) || defined(__PIE__)
+#define FETCH_CPU(dest) movl %fs:__rseq_abi@TPOFF+4, dest;
+#define FETCH_VCPU(dest) movzwl %fs:__rseq_abi@TPOFF+30, dest;
+#define START_RSEQ(src)                         \
+   .L##src##_abort:                             \
+   leaq __rseq_cs_##src(%rip), %rax;            \
+   movq %rax, %fs:__rseq_abi@TPOFF+8;           \
+   .L##src##_start:
+
+#else  /* !defined(__PIC__) || defined(__PIE__) */
+
+/*
+ * FETCH_CPU assumes &__rseq_abi is in %rax.  We cannot call
+ * tcmalloc_tls_fetch_pic at this point, as we have started our restartable
+ * sequence.  If we are prempted there, the kernel will clear rseq_cs as
+ * tcmalloc_tls_fetch_pic does not appear in the restartable sequence's address
+ * range.
+ */
+#define FETCH_CPU(dest) \
+  movl 4(%rax), dest;  /* cpuid is 32-bits */
+#define FETCH_VCPU(dest) \
+  movzwl 30(%rax), dest; /* vcpu_id is 16-bits */
+#define START_RSEQ(src)                     \
+  .L##src##_abort:                          \
+  call tcmalloc_internal_tls_fetch_pic@PLT; \
+  leaq __rseq_cs_##src(%rip), %r11;    	    \
+  movq %r11, 8(%rax);                       \
+   .L##src##_start:
+
+/*
+ * We can safely call this function from within an RSEQ section as it only
+ * generates a thread-local address which will not change across a missed
+ * restart.  This must precede the construction of any preparatory state.
+ */
+  .local tcmalloc_internal_tls_fetch_pic
+  .type tcmalloc_internal_tls_fetch_pic, @function
+tcmalloc_internal_tls_fetch_pic:
+  .cfi_startproc
+  push %rbp
+  .cfi_def_cfa_offset 16
+  .cfi_offset 6, -16
+  mov %rsp, %rbp
+  .cfi_def_cfa_register 6
+  sub $0x30, %rsp
+  mov %rsi, -0x08(%rbp)  /* atypical abi: tcmalloc_tls_fetch_pic preserves regs */
+  mov %rdi, -0x10(%rbp)
+  mov %rdx, -0x18(%rbp)
+  mov %rcx, -0x20(%rbp)
+  mov %r8,  -0x28(%rbp)
+  mov %r9,  -0x30(%rbp)
+  /*
+   * Below is an optimized relocatable TLS lookup per ELF spec:
+   *   http://www.akkadia.org/drepper/tls.pdf
+   * When possible, this is replaced at link-time with a call-free variant.
+   */
+  .byte 0x66;
+  leaq __rseq_abi@TLSGD(%rip), %rdi;
+  .word 0x6666;
+  rex64;
+  call __tls_get_addr@PLT;
+  mov -0x08(%rbp), %rsi
+  mov -0x10(%rbp), %rdi
+  mov -0x18(%rbp), %rdx
+  mov -0x20(%rbp), %rcx
+  mov -0x28(%rbp), %r8
+  mov -0x30(%rbp), %r9
+  add $0x30, %rsp
+  leave
+  .cfi_def_cfa_register 7
+  .cfi_def_cfa_offset 8
+  ret; /* &__rseq_abi in %rax */
+  .cfi_endproc
+ENCODE_SIZE(tcmalloc_internal_tls_fetch_pic)
+#endif  /* !defined(__PIC__) || defined(__PIE__) */
+
+/* ---------------- end helper macros ---------------- */
+
+/* start of atomic restartable sequences */
+
+/*
+ * NOTE:  We don't use cmpxchgq in the following functions since this would
+   make checking the success of our commit operation dependent on flags (which
+ * are in turn clobbered by the restart region) -- furthermore we can't just
+ * retry to fill in the flags since the restarted cmpxchg may have actually
+ * succeeded; spuriously failing subsequent attempts.
+ */
+
+/*
+ * int TcmallocSlab_Internal_PerCpuCmpxchg64(int target_cpu, long *p,
+ *                                  long old_val, long new_val)
+ */
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PerCpuCmpxchg64
+  .type  TcmallocSlab_Internal_PerCpuCmpxchg64, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64);
+  FETCH_CPU(%eax);
+  cmp %eax, %edi; /* check cpu vs current_cpu */
+  jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_commit;
+  cmp %rdx, (%rsi); /* verify *p == old */
+  jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch;
+  mov %rcx, (%rsi);
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_commit:
+  ret;  /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_value_mismatch:
+  mov $-1, %eax;  /* mismatch versus "old" or "check", return -1 */
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64)
+
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU
+  .type  TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU, @function
+TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU);
+  FETCH_VCPU(%eax);
+  cmp %eax, %edi; /* check cpu vs current_cpu */
+  jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit;
+  cmp %rdx, (%rsi); /* verify *p == old */
+  jne .LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch;
+  mov %rcx, (%rsi);
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_commit:
+  ret;  /* return current cpu, indicating mismatch OR success */
+.LTcmallocSlab_Internal_PerCpuCmpxchg64_VCPU_value_mismatch:
+  mov $-1, %eax;  /* mismatch versus "old" or "check", return -1 */
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PerCpuCmpxchg64_VCPU)
+
+/* size_t TcmallocSlab_Internal_PushBatch_FixedShift(
+ *     void *ptr (%rdi),
+ *     size_t cl (%rsi),
+ *     void** batch (%rdx),
+ *     size_t len (%rcx) {
+ *   uint64_t r8 = __rseq_abi.cpu_id;
+ *   uint64_t* r8 = CpuMemoryStart(rdi, r8);
+ *   Header* hdr = r8 + rsi * 8;
+ *   uint64_t r9 = hdr->current;
+ *   uint64_t r10 = hdr->end;
+ *   if (r9 >= r10) return 0;
+ *   r11 = rcx;
+ *   r10 = r9 + min(rcx, r10 - r9);
+ * loop:
+ *   r11--;
+ *   rax = batch[r11];
+ *   *(r8 + r9 * 8) = rax;
+ *   r9++;
+ *   if (r9 != r10) goto loop;
+ *   hdr->current = r9;
+ *   return rcx - r11;
+ * }
+ */
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PushBatch_FixedShift
+  .type  TcmallocSlab_Internal_PushBatch_FixedShift, @function
+TcmallocSlab_Internal_PushBatch_FixedShift:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift);
+  FETCH_CPU(%r8d);
+  shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+  	/* multiply cpu by 256k */
+  lea (%rdi, %r8), %r8;
+  movzwq (%r8, %rsi, 8), %r9; /* current */
+  movzwq 6(%r8, %rsi, 8), %r10; /* end */
+  cmpq %r10, %r9;
+  jae .LTcmallocSlab_Internal_PushBatch_FixedShift_full;
+  movq %rcx, %r11; /* r11 = copy of len */
+  subq %r9, %r10; /* r10 = free capacity */
+  cmpq %rcx, %r10;
+  cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */
+  addq %r9, %r10;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_loop:
+  decq %r11;
+  movq (%rdx, %r11, 8), %rax;
+  movq %rax, (%r8, %r9, 8);
+  incq %r9;
+  cmpq %r9, %r10;
+  jne .LTcmallocSlab_Internal_PushBatch_FixedShift_loop
+  movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PushBatch_FixedShift_commit:
+  movq %rcx, %rax;
+  subq %r11, %rax;
+  ret;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_full:
+  xor %rax, %rax;
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift)
+
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PushBatch_FixedShift_VCPU
+  .type  TcmallocSlab_Internal_PushBatch_FixedShift_VCPU, @function
+TcmallocSlab_Internal_PushBatch_FixedShift_VCPU:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU);
+  FETCH_VCPU(%r8d);
+  shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+  	/* multiply cpu by 256k */
+  lea (%rdi, %r8), %r8;
+  movzwq (%r8, %rsi, 8), %r9; /* current */
+  movzwq 6(%r8, %rsi, 8), %r10; /* end */
+  cmpq %r10, %r9;
+  jae .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full;
+  movq %rcx, %r11; /* r11 = copy of len */
+  subq %r9, %r10; /* r10 = free capacity */
+  cmpq %rcx, %r10;
+  cmovaq %rcx, %r10; /* r10 = min(len, free capacity) */
+  addq %r9, %r10;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop:
+  decq %r11;
+  movq (%rdx, %r11, 8), %rax;
+  movq %rax, (%r8, %r9, 8);
+  incq %r9;
+  cmpq %r9, %r10;
+  jne .LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_loop
+  movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_commit:
+  movq %rcx, %rax;
+  subq %r11, %rax;
+  ret;
+.LTcmallocSlab_Internal_PushBatch_FixedShift_VCPU_full:
+  xor %rax, %rax;
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PushBatch_FixedShift_VCPU)
+
+/* size_t TcmallocSlab_Internal_PopBatch_FixedShift(
+ *     void *ptr (%rdi),
+ *     size_t cl (%rsi),
+ *     void** batch (%rdx),
+ *     size_t len (%rcx) {
+ *   uint64_t r8 = __rseq_abi.cpu_id;
+ *   uint64_t* r8 = CpuMemoryStart(rdi, r8);
+ *   Header* hdr = GetHeader(rdi, rax, cl);
+ *   uint64_t r9 = hdr->current;
+ *   uint64_t r10 = hdr->begin;
+ *   if (r9 <= r10) return 0;
+ *   r11 = min(rcx, r9 - r10);
+ *   rax = 0;
+ * loop:
+ *   r9--;
+ *   r10 = *(r8 + r9 * 8);
+ *   batch[rax] = r10;
+ *   rax++;
+ *   if (rax != r11) goto loop;
+ *   hdr->current = r9;
+ *   return rax;
+ * }
+ */
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PopBatch_FixedShift
+  .type  TcmallocSlab_Internal_PopBatch_FixedShift, @function
+TcmallocSlab_Internal_PopBatch_FixedShift:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift);
+  FETCH_CPU(%r8d);
+  shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+  	/* multiply cpu by 256k */
+  lea (%rdi, %r8), %r8;
+  movzwq (%r8, %rsi, 8), %r9; /* current */
+  movzwq 4(%r8, %rsi, 8), %r10; /* begin */
+  cmp %r10, %r9;
+  jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_empty;
+  movq %r9, %r11;
+  subq %r10, %r11; /* r11 = available items */
+  cmpq %rcx, %r11;
+  cmovaq %rcx, %r11; /* r11 = min(len, available items) */
+  xorq %rax, %rax;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_loop:
+  decq %r9;
+  movq (%r8, %r9, 8), %r10;
+  movq %r10, (%rdx, %rax, 8);
+  incq %rax;
+  cmpq %rax, %r11;
+  jne .LTcmallocSlab_Internal_PopBatch_FixedShift_loop
+  movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PopBatch_FixedShift_commit:
+  ret;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_empty:
+  xor %rax, %rax;
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift)
+
+  .p2align 6; /* aligns to 2^6 with NOP filling */
+  .globl TcmallocSlab_Internal_PopBatch_FixedShift_VCPU
+  .type  TcmallocSlab_Internal_PopBatch_FixedShift_VCPU, @function
+TcmallocSlab_Internal_PopBatch_FixedShift_VCPU:
+  .cfi_startproc
+  START_RSEQ(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU);
+  FETCH_VCPU(%r8d);
+  shl $TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT, %r8;
+  	/* multiply cpu by 256k */
+  lea (%rdi, %r8), %r8;
+  movzwq (%r8, %rsi, 8), %r9; /* current */
+  movzwq 4(%r8, %rsi, 8), %r10; /* begin */
+  cmp %r10, %r9;
+  jbe .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty;
+  movq %r9, %r11;
+  subq %r10, %r11; /* r11 = available items */
+  cmpq %rcx, %r11;
+  cmovaq %rcx, %r11; /* r11 = min(len, available items) */
+  xorq %rax, %rax;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop:
+  decq %r9;
+  movq (%r8, %r9, 8), %r10;
+  movq %r10, (%rdx, %rax, 8);
+  incq %rax;
+  cmpq %rax, %r11;
+  jne .LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_loop
+  movw %r9w, (%r8, %rsi, 8);
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_commit:
+  ret;
+.LTcmallocSlab_Internal_PopBatch_FixedShift_VCPU_empty:
+  xor %rax, %rax;
+  ret;
+  .cfi_endproc
+ENCODE_SIZE(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU)
+DEFINE_UPSTREAM_CS(TcmallocSlab_Internal_PopBatch_FixedShift_VCPU)
+
+.section .note.GNU-stack,"",@progbits
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h
new file mode 100644
index 0000000000..91d15ba908
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc.h
@@ -0,0 +1,1279 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
+#define TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
+
+#include <atomic>
+#include <cstring>
+
+#include "absl/base/casts.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/sysinfo.h"
+#include "tcmalloc/internal/mincore.h"
+#include "tcmalloc/internal/percpu.h"
+
+#if defined(TCMALLOC_PERCPU_USE_RSEQ)
+#if !defined(__clang__)
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1
+#elif __clang_major__ >= 9 && !__has_feature(speculative_load_hardening)
+// asm goto requires the use of Clang 9 or newer:
+// https://releases.llvm.org/9.0.0/tools/clang/docs/ReleaseNotes.html#c-language-changes-in-clang
+//
+// SLH (Speculative Load Hardening) builds do not support asm goto.  We can
+// detect these compilation modes since
+// https://github.com/llvm/llvm-project/commit/379e68a763097bed55556c6dc7453e4b732e3d68.
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 1
+#if __clang_major__ >= 11
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT 1
+#endif
+
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0
+#endif
+#else
+#define TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO 0
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct PerCPUMetadataState {
+  size_t virtual_size;
+  size_t resident_size;
+};
+
+namespace subtle {
+namespace percpu {
+
+// Tcmalloc slab for per-cpu caching mode.
+// Conceptually it is equivalent to an array of NumClasses PerCpuSlab's,
+// and in fallback implementation it is implemented that way. But optimized
+// implementation uses more compact layout and provides faster operations.
+//
+// Methods of this type must only be used in threads where it is known that the
+// percpu primitives are available and percpu::IsFast() has previously returned
+// 'true'.
+template <size_t NumClasses>
+class TcmallocSlab {
+ public:
+  constexpr TcmallocSlab() = default;
+
+  // Init must be called before any other methods.
+  // <alloc> is memory allocation callback (e.g. malloc).
+  // <capacity> callback returns max capacity for size class <cl>.
+  // <lazy> indicates that per-CPU slabs should be populated on demand
+  // <shift> indicates the number of bits to shift the CPU ID in order to
+  //         obtain the location of the per-CPU slab. If this parameter matches
+  //         TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT as set in
+  //         percpu_intenal.h then the assembly language versions of push/pop
+  //         batch can be used; otherwise batch operations are emulated.
+  //
+  // Initial capacity is 0 for all slabs.
+  void Init(void*(alloc)(size_t size), size_t (*capacity)(size_t cl), bool lazy,
+            size_t shift);
+
+  // Only may be called if Init(..., lazy = true) was used.
+  void InitCPU(int cpu, size_t (*capacity)(size_t cl));
+
+  // For tests.
+  void Destroy(void(free)(void*));
+
+  // Number of elements in cpu/cl slab.
+  size_t Length(int cpu, size_t cl) const;
+
+  // Number of elements (currently) allowed in cpu/cl slab.
+  size_t Capacity(int cpu, size_t cl) const;
+
+  // If running on cpu, increment the cpu/cl slab's capacity to no greater than
+  // min(capacity+len, max_cap) and return the increment applied. Otherwise
+  // return 0. Note: max_cap must be the same as returned by capacity callback
+  // passed to Init.
+  size_t Grow(int cpu, size_t cl, size_t len, size_t max_cap);
+
+  // If running on cpu, decrement the cpu/cl slab's capacity to no less than
+  // max(capacity-len, 0) and return the actual decrement applied. Otherwise
+  // return 0.
+  size_t Shrink(int cpu, size_t cl, size_t len);
+
+  // Add an item (which must be non-zero) to the current CPU's slab. Returns
+  // true if add succeeds. Otherwise invokes <f> and returns false (assuming
+  // that <f> returns negative value).
+  bool Push(size_t cl, void* item, OverflowHandler f);
+
+  // Remove an item (LIFO) from the current CPU's slab. If the slab is empty,
+  // invokes <f> and returns its result.
+  void* Pop(size_t cl, UnderflowHandler f);
+
+  // Add up to <len> items to the current cpu slab from the array located at
+  // <batch>. Returns the number of items that were added (possibly 0). All
+  // items not added will be returned at the start of <batch>. Items are only
+  // not added if there is no space on the current cpu.
+  // REQUIRES: len > 0.
+  size_t PushBatch(size_t cl, void** batch, size_t len);
+
+  // Pop up to <len> items from the current cpu slab and return them in <batch>.
+  // Returns the number of items actually removed.
+  // REQUIRES: len > 0.
+  size_t PopBatch(size_t cl, void** batch, size_t len);
+
+  // Decrements the cpu/cl slab's capacity to no less than max(capacity-len, 0)
+  // and returns the actual decrement applied. It attempts to shrink any
+  // unused capacity (i.e end-current) in cpu/cl's slab; if it does not have
+  // enough unused items, it pops up to <len> items from cpu/cl slab and then
+  // shrinks the freed capacity.
+  //
+  // May be called from another processor, not just the <cpu>.
+  // REQUIRES: len > 0.
+  typedef void (*ShrinkHandler)(void* arg, size_t cl, void** batch, size_t n);
+  size_t ShrinkOtherCache(int cpu, size_t cl, size_t len, void* shrink_ctx,
+                          ShrinkHandler f);
+
+  // Remove all items (of all classes) from <cpu>'s slab; reset capacity for all
+  // classes to zero.  Then, for each sizeclass, invoke
+  // DrainHandler(drain_ctx, cl, <items from slab>, <previous slab capacity>);
+  //
+  // It is invalid to concurrently execute Drain() for the same CPU; calling
+  // Push/Pop/Grow/Shrink concurrently (even on the same CPU) is safe.
+  typedef void (*DrainHandler)(void* drain_ctx, size_t cl, void** batch,
+                               size_t n, size_t cap);
+  void Drain(int cpu, void* drain_ctx, DrainHandler f);
+
+  PerCPUMetadataState MetadataMemoryUsage() const;
+
+  // We use a single continuous region of memory for all slabs on all CPUs.
+  // This region is split into NumCPUs regions of size kPerCpuMem (256k).
+  // First NumClasses words of each CPU region are occupied by slab
+  // headers (Header struct). The remaining memory contain slab arrays.
+  struct Slabs {
+    std::atomic<int64_t> header[NumClasses];
+    void* mem[];
+  };
+
+  inline int GetCurrentVirtualCpuUnsafe() {
+    return VirtualRseqCpuId(virtual_cpu_id_offset_);
+  }
+
+ private:
+  // Slab header (packed, atomically updated 64-bit).
+  struct Header {
+    // All values are word offsets from per-CPU region start.
+    // The array is [begin, end).
+    uint16_t current;
+    // Copy of end. Updated by Shrink/Grow, but is not overwritten by Drain.
+    uint16_t end_copy;
+    // Lock updates only begin and end with a 32-bit write.
+    union {
+      struct {
+        uint16_t begin;
+        uint16_t end;
+      };
+      uint32_t lock_update;
+    };
+
+    // Lock is used by Drain to stop concurrent mutations of the Header.
+    // Lock sets begin to 0xffff and end to 0, which makes Push and Pop fail
+    // regardless of current value.
+    bool IsLocked() const;
+    void Lock();
+  };
+
+  // We cast Header to std::atomic<int64_t>.
+  static_assert(sizeof(Header) == sizeof(std::atomic<int64_t>),
+                "bad Header size");
+
+  Slabs* slabs_ = nullptr;
+  size_t shift_ = 0;
+  // This is in units of bytes.
+  size_t virtual_cpu_id_offset_ = offsetof(kernel_rseq, cpu_id);
+
+  Slabs* CpuMemoryStart(int cpu) const;
+  std::atomic<int64_t>* GetHeader(int cpu, size_t cl) const;
+  static Header LoadHeader(std::atomic<int64_t>* hdrp);
+  static void StoreHeader(std::atomic<int64_t>* hdrp, Header hdr);
+  static int CompareAndSwapHeader(int cpu, std::atomic<int64_t>* hdrp,
+                                  Header old, Header hdr,
+                                  size_t virtual_cpu_id_offset);
+};
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Length(int cpu, size_t cl) const {
+  Header hdr = LoadHeader(GetHeader(cpu, cl));
+  return hdr.IsLocked() ? 0 : hdr.current - hdr.begin;
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Capacity(int cpu, size_t cl) const {
+  Header hdr = LoadHeader(GetHeader(cpu, cl));
+  return hdr.IsLocked() ? 0 : hdr.end - hdr.begin;
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Grow(int cpu, size_t cl, size_t len,
+                                             size_t max_cap) {
+  const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+  std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+  for (;;) {
+    Header old = LoadHeader(hdrp);
+    if (old.IsLocked() || old.end - old.begin == max_cap) {
+      return 0;
+    }
+    uint16_t n = std::min<uint16_t>(len, max_cap - (old.end - old.begin));
+    Header hdr = old;
+    hdr.end += n;
+    hdr.end_copy += n;
+    const int ret =
+        CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset);
+    if (ret == cpu) {
+      return n;
+    } else if (ret >= 0) {
+      return 0;
+    }
+  }
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::Shrink(int cpu, size_t cl, size_t len) {
+  const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+  std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+  for (;;) {
+    Header old = LoadHeader(hdrp);
+    if (old.IsLocked() || old.current == old.end) {
+      return 0;
+    }
+    uint16_t n = std::min<uint16_t>(len, old.end - old.current);
+    Header hdr = old;
+    hdr.end -= n;
+    hdr.end_copy -= n;
+    const int ret =
+        CompareAndSwapHeader(cpu, hdrp, old, hdr, virtual_cpu_id_offset);
+    if (ret == cpu) {
+      return n;
+    } else if (ret >= 0) {
+      return 0;
+    }
+  }
+}
+
+#if defined(__x86_64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push(
+    typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item,
+    const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) {
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+  asm goto(
+#else
+  bool overflow;
+  asm volatile(
+#endif
+      // TODO(b/141629158):  __rseq_cs only needs to be writeable to allow for
+      // relocations, but could be read-only for non-PIE builds.
+      ".pushsection __rseq_cs, \"aw?\"\n"
+      ".balign 32\n"
+      ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n"
+      ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n"
+      "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n"
+      ".long 0x0\n"
+      ".long 0x0\n"
+      ".quad 4f\n"
+      ".quad 5f - 4f\n"
+      ".quad 2f\n"
+      ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+      ".reloc 0, R_X86_64_NONE, 1f\n"
+#endif
+      ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+      "1:\n"
+      ".balign 8;"
+      ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      // Force this section to be retained.  It is for debugging, but is
+      // otherwise not referenced.
+      ".popsection\n"
+      ".pushsection .text.unlikely, \"ax?\"\n"
+      ".byte 0x0f, 0x1f, 0x05\n"
+      ".long %c[rseq_sig]\n"
+      ".local TcmallocSlab_Internal_Push_trampoline_%=\n"
+      ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n"
+      "TcmallocSlab_Internal_Push_trampoline_%=:\n"
+      "2:\n"
+      "jmp 3f\n"
+      ".size TcmallocSlab_Internal_Push_trampoline_%=, . - "
+      "TcmallocSlab_Internal_Push_trampoline_%=;\n"
+      ".popsection\n"
+      // Prepare
+      //
+      // TODO(b/151503411):  Pending widespread availability of LLVM's asm
+      // goto with output contraints
+      // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can
+      // return the register allocations to the compiler rather than using
+      // explicit clobbers.  Prior to this, blocks which use asm goto cannot
+      // also specify outputs.
+      //
+      // r10: Scratch
+      // r11: Current
+      "3:\n"
+      "lea __rseq_cs_TcmallocSlab_Internal_Push_%=(%%rip), %%r10\n"
+      "mov %%r10, %c[rseq_cs_offset](%[rseq_abi])\n"
+      // Start
+      "4:\n"
+      // scratch = __rseq_abi.cpu_id;
+      "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %%r10d\n"
+      // scratch = slabs + scratch
+      "shlq %b[shift], %%r10\n"
+      "add %[slabs], %%r10\n"
+      // r11 = slabs->current;
+      "movzwq (%%r10, %[cl], 8), %%r11\n"
+      // if (ABSL_PREDICT_FALSE(r11 >= slabs->end)) { goto overflow; }
+      "cmp 6(%%r10, %[cl], 8), %%r11w\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+      "jae %l[overflow_label]\n"
+#else
+      "jae 5f\n"
+  // Important! code below this must not affect any flags (i.e.: ccae)
+  // If so, the above code needs to explicitly set a ccae return value.
+#endif
+      "mov %[item], (%%r10, %%r11, 8)\n"
+      "lea 1(%%r11), %%r11\n"
+      "mov %%r11w, (%%r10, %[cl], 8)\n"
+      // Commit
+      "5:\n"
+      :
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+      [overflow] "=@ccae"(overflow)
+#endif
+      : [rseq_abi] "r"(&__rseq_abi),
+        [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+        [rseq_cpu_offset] "r"(virtual_cpu_id_offset),
+        [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift),
+        [slabs] "r"(slabs), [cl] "r"(cl), [item] "r"(item)
+      : "cc", "memory", "r10", "r11"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+      : overflow_label
+#endif
+  );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+  if (ABSL_PREDICT_FALSE(overflow)) {
+    goto overflow_label;
+  }
+#endif
+  return 0;
+overflow_label:
+  // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+  // values for the fallthrough path.  The values on the taken branches are
+  // undefined.
+  int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+  return f(cpu, cl, item);
+}
+#endif  // defined(__x86_64__)
+
+#if defined(__aarch64__)
+
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE int TcmallocSlab_Internal_Push(
+    typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl, void* item,
+    const size_t shift, OverflowHandler f, const size_t virtual_cpu_id_offset) {
+  void* region_start;
+  uint64_t cpu_id;
+  void* end_ptr;
+  uintptr_t current;
+  uintptr_t end;
+  // Multiply cl by the bytesize of each header
+  size_t cl_lsl3 = cl * 8;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+  asm goto(
+#else
+  bool overflow;
+  asm volatile(
+#endif
+      // TODO(b/141629158):  __rseq_cs only needs to be writeable to allow for
+      // relocations, but could be read-only for non-PIE builds.
+      ".pushsection __rseq_cs, \"aw?\"\n"
+      ".balign 32\n"
+      ".local __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      ".type __rseq_cs_TcmallocSlab_Internal_Push_%=,@object\n"
+      ".size __rseq_cs_TcmallocSlab_Internal_Push_%=,32\n"
+      "__rseq_cs_TcmallocSlab_Internal_Push_%=:\n"
+      ".long 0x0\n"
+      ".long 0x0\n"
+      ".quad 4f\n"
+      ".quad 5f - 4f\n"
+      ".quad 2f\n"
+      ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+      ".reloc 0, R_AARCH64_NONE, 1f\n"
+#endif
+      ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+      "1:\n"
+      ".balign 8;"
+      ".quad __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      // Force this section to be retained.  It is for debugging, but is
+      // otherwise not referenced.
+      ".popsection\n"
+      ".pushsection .text.unlikely, \"ax?\"\n"
+      ".long %c[rseq_sig]\n"
+      ".local TcmallocSlab_Internal_Push_trampoline_%=\n"
+      ".type TcmallocSlab_Internal_Push_trampoline_%=,@function\n"
+      "TcmallocSlab_Internal_Push_trampoline_%=:\n"
+      "2:\n"
+      "b 3f\n"
+      ".popsection\n"
+      // Prepare
+      //
+      // TODO(b/151503411):  Pending widespread availability of LLVM's asm
+      // goto with output contraints
+      // (https://github.com/llvm/llvm-project/commit/23c2a5ce33f0), we can
+      // return the register allocations to the compiler rather than using
+      // explicit clobbers.  Prior to this, blocks which use asm goto cannot
+      // also specify outputs.
+      "3:\n"
+      // Use current as scratch here to hold address of this function's
+      // critical section
+      "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      "add  %[current], %[current], "
+      ":lo12:__rseq_cs_TcmallocSlab_Internal_Push_%=\n"
+      "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n"
+      // Start
+      "4:\n"
+      // cpu_id = __rseq_abi.cpu_id;
+      "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n"
+      // region_start = Start of cpu region
+      "lsl %[region_start], %[cpu_id], %[shift]\n"
+      "add %[region_start], %[region_start], %[slabs]\n"
+      // end_ptr = &(slab_headers[0]->end)
+      "add %[end_ptr], %[region_start], #6\n"
+      // current = slab_headers[cl]->current (current index)
+      "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n"
+      // end = slab_headers[cl]->end (end index)
+      "ldrh %w[end], [%[end_ptr], %[cl_lsl3]]\n"
+      // if (ABSL_PREDICT_FALSE(current >= end)) { goto overflow; }
+      "cmp %[end], %[current]\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+      "b.le %l[overflow_label]\n"
+#else
+      "b.le 5f\n"
+  // Important! code below this must not affect any flags (i.e.: ccae)
+  // If so, the above code needs to explicitly set a ccae return value.
+#endif
+      "str %[item], [%[region_start], %[current], LSL #3]\n"
+      "add %w[current], %w[current], #1\n"
+      "strh %w[current], [%[region_start], %[cl_lsl3]]\n"
+      // Commit
+      "5:\n"
+      : [end_ptr] "=&r"(end_ptr), [cpu_id] "=&r"(cpu_id),
+        [current] "=&r"(current), [end] "=&r"(end),
+        [region_start] "=&r"(region_start)
+
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+            ,
+        [overflow] "=@ccae"(overflow)
+#endif
+      : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs),
+        [cl_lsl3] "r"(cl_lsl3), [item] "r"(item), [rseq_abi] "r"(&__rseq_abi),
+        [shift] "r"(shift),
+        // Constants
+        [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+        [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE)
+      : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+      : overflow_label
+#endif
+  );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO
+  if (ABSL_PREDICT_FALSE(overflow)) {
+    goto overflow_label;
+  }
+#endif
+  return 0;
+overflow_label:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+  // values for the fallthrough path.  The values on the taken branches are
+  // undefined.
+  int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+  // With asm goto--without output constraints--the value of scratch is
+  // well-defined by the compiler and our implementation.  As an optimization on
+  // this case, we can avoid looking up cpu_id again, by undoing the
+  // transformation of cpu_id to the value of scratch.
+  int cpu = cpu_id;
+#endif
+  return f(cpu, cl, item);
+}
+#endif  // defined (__aarch64__)
+
+template <size_t NumClasses>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE bool TcmallocSlab<NumClasses>::Push(
+    size_t cl, void* item, OverflowHandler f) {
+  ASSERT(item != nullptr);
+#if defined(__x86_64__) || defined(__aarch64__)
+  return TcmallocSlab_Internal_Push<NumClasses>(slabs_, cl, item, shift_, f,
+                                                virtual_cpu_id_offset_) >= 0;
+#else
+  if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+    return TcmallocSlab_Internal_Push_FixedShift(slabs_, cl, item, f) >= 0;
+  } else {
+    return TcmallocSlab_Internal_Push(slabs_, cl, item, shift_, f) >= 0;
+  }
+#endif
+}
+
+#if defined(__x86_64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop(
+    typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl,
+    UnderflowHandler f, const size_t shift,
+    const size_t virtual_cpu_id_offset) {
+  void* result;
+  void* scratch;
+  uintptr_t current;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  asm goto
+#else
+  bool underflow;
+  asm
+#endif
+      (
+          // TODO(b/141629158):  __rseq_cs only needs to be writeable to allow
+          // for relocations, but could be read-only for non-PIE builds.
+          ".pushsection __rseq_cs, \"aw?\"\n"
+          ".balign 32\n"
+          ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n"
+          ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n"
+          "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n"
+          ".long 0x0\n"
+          ".long 0x0\n"
+          ".quad 4f\n"
+          ".quad 5f - 4f\n"
+          ".quad 2f\n"
+          ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+          ".reloc 0, R_X86_64_NONE, 1f\n"
+#endif
+          ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+          "1:\n"
+          ".balign 8;"
+          ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          // Force this section to be retained.  It is for debugging, but is
+          // otherwise not referenced.
+          ".popsection\n"
+          ".pushsection .text.unlikely, \"ax?\"\n"
+          ".byte 0x0f, 0x1f, 0x05\n"
+          ".long %c[rseq_sig]\n"
+          ".local TcmallocSlab_Internal_Pop_trampoline_%=\n"
+          ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n"
+          "TcmallocSlab_Internal_Pop_trampoline_%=:\n"
+          "2:\n"
+          "jmp 3f\n"
+          ".size TcmallocSlab_Internal_Pop_trampoline_%=, . - "
+          "TcmallocSlab_Internal_Pop_trampoline_%=;\n"
+          ".popsection\n"
+          // Prepare
+          "3:\n"
+          "lea __rseq_cs_TcmallocSlab_Internal_Pop_%=(%%rip), %[scratch];\n"
+          "mov %[scratch], %c[rseq_cs_offset](%[rseq_abi])\n"
+          // Start
+          "4:\n"
+          // scratch = __rseq_abi.cpu_id;
+          "movzwl (%[rseq_abi], %[rseq_cpu_offset]), %k[scratch]\n"
+          // scratch = slabs + scratch
+          "shlq %b[shift], %[scratch]\n"
+          "add %[slabs], %[scratch]\n"
+          // current = scratch->header[cl].current;
+          "movzwq (%[scratch], %[cl], 8), %[current]\n"
+          // if (ABSL_PREDICT_FALSE(scratch->header[cl].begin > current))
+          "cmp 4(%[scratch], %[cl], 8), %w[current]\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+          "jbe %l[underflow_path]\n"
+#else
+          "jbe 5f\n"
+  // Important! code below this must not affect any flags (i.e.: ccbe)
+  // If so, the above code needs to explicitly set a ccbe return value.
+#endif
+          "mov -16(%[scratch], %[current], 8), %[result]\n"
+          // A note about prefetcht0 in Pop:  While this prefetch may appear
+          // costly, trace analysis shows the target is frequently used
+          // (b/70294962). Stalling on a TLB miss at the prefetch site (which
+          // has no deps) and prefetching the line async is better than stalling
+          // at the use (which may have deps) to fill the TLB and the cache
+          // miss.
+          "prefetcht0 (%[result])\n"
+          "movq -8(%[scratch], %[current], 8), %[result]\n"
+          "lea -1(%[current]), %[current]\n"
+          "mov %w[current], (%[scratch], %[cl], 8)\n"
+          // Commit
+          "5:\n"
+          : [result] "=&r"(result),
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+            [underflow] "=@ccbe"(underflow),
+#endif
+            [scratch] "=&r"(scratch), [current] "=&r"(current)
+          : [rseq_abi] "r"(&__rseq_abi),
+            [rseq_cs_offset] "n"(offsetof(kernel_rseq, rseq_cs)),
+            [rseq_cpu_offset] "r"(virtual_cpu_id_offset),
+            [rseq_sig] "n"(TCMALLOC_PERCPU_RSEQ_SIGNATURE), [shift] "c"(shift),
+            [slabs] "r"(slabs), [cl] "r"(cl)
+          : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+          : underflow_path
+#endif
+      );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  if (ABSL_PREDICT_FALSE(underflow)) {
+    goto underflow_path;
+  }
+#endif
+
+  return result;
+underflow_path:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+  // values for the fallthrough path.  The values on the taken branches are
+  // undefined.
+  int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+  // With asm goto--without output constraints--the value of scratch is
+  // well-defined by the compiler and our implementation.  As an optimization on
+  // this case, we can avoid looking up cpu_id again, by undoing the
+  // transformation of cpu_id to the value of scratch.
+  int cpu =
+      (reinterpret_cast<char*>(scratch) - reinterpret_cast<char*>(slabs)) >>
+      shift;
+#endif
+  return f(cpu, cl);
+}
+#endif  // defined(__x86_64__)
+
+#if defined(__aarch64__)
+template <size_t NumClasses>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab_Internal_Pop(
+    typename TcmallocSlab<NumClasses>::Slabs* slabs, size_t cl,
+    UnderflowHandler f, const size_t shift,
+    const size_t virtual_cpu_id_offset) {
+  void* result;
+  void* region_start;
+  uint64_t cpu_id;
+  void* begin_ptr;
+  uintptr_t current;
+  uintptr_t new_current;
+  uintptr_t begin;
+  // Multiply cl by the bytesize of each header
+  size_t cl_lsl3 = cl * 8;
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  asm goto
+#else
+  bool underflow;
+  asm
+#endif
+      (
+          // TODO(b/141629158):  __rseq_cs only needs to be writeable to allow
+          // for relocations, but could be read-only for non-PIE builds.
+          ".pushsection __rseq_cs, \"aw?\"\n"
+          ".balign 32\n"
+          ".local __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          ".type __rseq_cs_TcmallocSlab_Internal_Pop_%=,@object\n"
+          ".size __rseq_cs_TcmallocSlab_Internal_Pop_%=,32\n"
+          "__rseq_cs_TcmallocSlab_Internal_Pop_%=:\n"
+          ".long 0x0\n"
+          ".long 0x0\n"
+          ".quad 4f\n"
+          ".quad 5f - 4f\n"
+          ".quad 2f\n"
+          ".popsection\n"
+#if !defined(__clang_major__) || __clang_major__ >= 9
+          ".reloc 0, R_AARCH64_NONE, 1f\n"
+#endif
+          ".pushsection __rseq_cs_ptr_array, \"aw?\"\n"
+          "1:\n"
+          ".balign 8;"
+          ".quad __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          // Force this section to be retained.  It is for debugging, but is
+          // otherwise not referenced.
+          ".popsection\n"
+          ".pushsection .text.unlikely, \"ax?\"\n"
+          ".long %c[rseq_sig]\n"
+          ".local TcmallocSlab_Internal_Pop_trampoline_%=\n"
+          ".type TcmallocSlab_Internal_Pop_trampoline_%=,@function\n"
+          "TcmallocSlab_Internal_Pop_trampoline_%=:\n"
+          "2:\n"
+          "b 3f\n"
+          ".popsection\n"
+          // Prepare
+          "3:\n"
+          // Use current as scratch here to hold address of this function's
+          // critical section
+          "adrp %[current], __rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          "add  %[current], %[current], "
+          ":lo12:__rseq_cs_TcmallocSlab_Internal_Pop_%=\n"
+          "str %[current], [%[rseq_abi], %c[rseq_cs_offset]]\n"
+          // Start
+          "4:\n"
+          // cpu_id = __rseq_abi.cpu_id;
+          "ldr %w[cpu_id], [%[rseq_abi], %[rseq_cpu_offset]]\n"
+          // region_start = Start of cpu region
+          "lsl %[region_start], %[cpu_id], %[shift]\n"
+          "add %[region_start], %[region_start], %[slabs]\n"
+          // begin_ptr = &(slab_headers[0]->begin)
+          "add %[begin_ptr], %[region_start], #4\n"
+          // current = slab_headers[cl]->current (current index)
+          "ldrh %w[current], [%[region_start], %[cl_lsl3]]\n"
+          // begin = slab_headers[cl]->begin (begin index)
+          "ldrh %w[begin], [%[begin_ptr], %[cl_lsl3]]\n"
+          // if (ABSL_PREDICT_FALSE(begin >= current)) { goto overflow; }
+          "cmp %w[begin], %w[current]\n"
+          "sub %w[new_current], %w[current], #1\n"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+          "b.ge %l[underflow_path]\n"
+#else
+          "b.ge 5f\n"
+  // Important! code below this must not affect any flags (i.e.: ccbe)
+  // If so, the above code needs to explicitly set a ccbe return value.
+#endif
+          // current--
+          "ldr %[result], [%[region_start], %[new_current], LSL #3]\n"
+          "strh %w[new_current], [%[region_start], %[cl_lsl3]]\n"
+          // Commit
+          "5:\n"
+          :
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+          [underflow] "=@ccbe"(underflow),
+#endif
+          [result] "=&r"(result),
+          // Temps
+          [cpu_id] "=&r"(cpu_id), [region_start] "=&r"(region_start),
+          [begin] "=&r"(begin), [current] "=&r"(current),
+          [new_current] "=&r"(new_current), [begin_ptr] "=&r"(begin_ptr)
+          // Real inputs
+          : [rseq_cpu_offset] "r"(virtual_cpu_id_offset), [slabs] "r"(slabs),
+            [cl_lsl3] "r"(cl_lsl3), [rseq_abi] "r"(&__rseq_abi),
+            [shift] "r"(shift),
+            // constants
+            [rseq_cs_offset] "in"(offsetof(kernel_rseq, rseq_cs)),
+            [rseq_sig] "in"(TCMALLOC_PERCPU_RSEQ_SIGNATURE)
+          : "cc", "memory"
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+          : underflow_path
+#endif
+      );
+#if !TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  if (ABSL_PREDICT_FALSE(underflow)) {
+    goto underflow_path;
+  }
+#endif
+
+  return result;
+underflow_path:
+#if TCMALLOC_PERCPU_USE_RSEQ_ASM_GOTO_OUTPUT
+  // As of 3/2020, LLVM's asm goto (even with output constraints) only provides
+  // values for the fallthrough path.  The values on the taken branches are
+  // undefined.
+  int cpu = VirtualRseqCpuId(virtual_cpu_id_offset);
+#else
+  // With asm goto--without output constraints--the value of scratch is
+  // well-defined by the compiler and our implementation.  As an optimization on
+  // this case, we can avoid looking up cpu_id again, by undoing the
+  // transformation of cpu_id to the value of scratch.
+  int cpu = cpu_id;
+#endif
+  return f(cpu, cl);
+}
+#endif  // defined(__aarch64__)
+
+template <size_t NumClasses>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* TcmallocSlab<NumClasses>::Pop(
+    size_t cl, UnderflowHandler f) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  return TcmallocSlab_Internal_Pop<NumClasses>(slabs_, cl, f, shift_,
+                                               virtual_cpu_id_offset_);
+#else
+  if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+    return TcmallocSlab_Internal_Pop_FixedShift(slabs_, cl, f);
+  } else {
+    return TcmallocSlab_Internal_Pop(slabs_, cl, f, shift_);
+  }
+#endif
+}
+
+static inline void* NoopUnderflow(int cpu, size_t cl) { return nullptr; }
+
+static inline int NoopOverflow(int cpu, size_t cl, void* item) { return -1; }
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::PushBatch(size_t cl, void** batch,
+                                                  size_t len) {
+  ASSERT(len != 0);
+  if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+    // TODO(b/159923407): TcmallocSlab_Internal_PushBatch_FixedShift needs to be
+    // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid
+    // needing to dispatch on two separate versions of the same function with
+    // only minor differences between them.
+    switch (virtual_cpu_id_offset_) {
+      case offsetof(kernel_rseq, cpu_id):
+        return TcmallocSlab_Internal_PushBatch_FixedShift(slabs_, cl, batch,
+                                                          len);
+#ifdef __x86_64__
+      case offsetof(kernel_rseq, vcpu_id):
+        return TcmallocSlab_Internal_PushBatch_FixedShift_VCPU(slabs_, cl,
+                                                               batch, len);
+#endif  // __x86_64__
+      default:
+        __builtin_unreachable();
+    }
+#else  // !TCMALLOC_PERCPU_USE_RSEQ
+    __builtin_unreachable();
+#endif  // !TCMALLOC_PERCPU_USE_RSEQ
+  } else {
+    size_t n = 0;
+    // Push items until either all done or a push fails
+    while (n < len && Push(cl, batch[len - 1 - n], NoopOverflow)) {
+      n++;
+    }
+    return n;
+  }
+}
+
+template <size_t NumClasses>
+inline size_t TcmallocSlab<NumClasses>::PopBatch(size_t cl, void** batch,
+                                                 size_t len) {
+  ASSERT(len != 0);
+  size_t n = 0;
+  if (shift_ == TCMALLOC_PERCPU_TCMALLOC_FIXED_SLAB_SHIFT) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+    // TODO(b/159923407): TcmallocSlab_Internal_PopBatch_FixedShift needs to be
+    // refactored to take a 5th parameter (virtual_cpu_id_offset) to avoid
+    // needing to dispatch on two separate versions of the same function with
+    // only minor differences between them.
+    switch (virtual_cpu_id_offset_) {
+      case offsetof(kernel_rseq, cpu_id):
+        n = TcmallocSlab_Internal_PopBatch_FixedShift(slabs_, cl, batch, len);
+        break;
+#ifdef __x86_64__
+      case offsetof(kernel_rseq, vcpu_id):
+        n = TcmallocSlab_Internal_PopBatch_FixedShift_VCPU(slabs_, cl, batch,
+                                                           len);
+        break;
+#endif  // __x86_64__
+      default:
+        __builtin_unreachable();
+    }
+
+    // PopBatch is implemented in assembly, msan does not know that the returned
+    // batch is initialized.
+    ANNOTATE_MEMORY_IS_INITIALIZED(batch, n * sizeof(batch[0]));
+#else  // !TCMALLOC_PERCPU_USE_RSEQ
+    __builtin_unreachable();
+#endif  // !TCMALLOC_PERCPU_USE_RSEQ
+  } else {
+    // Pop items until either all done or a pop fails
+    while (n < len && (batch[n] = Pop(cl, NoopUnderflow))) {
+      n++;
+    }
+  }
+  return n;
+}
+
+template <size_t NumClasses>
+inline typename TcmallocSlab<NumClasses>::Slabs*
+TcmallocSlab<NumClasses>::CpuMemoryStart(int cpu) const {
+  char* const bytes = reinterpret_cast<char*>(slabs_);
+  return reinterpret_cast<Slabs*>(&bytes[cpu << shift_]);
+}
+
+template <size_t NumClasses>
+inline std::atomic<int64_t>* TcmallocSlab<NumClasses>::GetHeader(
+    int cpu, size_t cl) const {
+  return &CpuMemoryStart(cpu)->header[cl];
+}
+
+template <size_t NumClasses>
+inline typename TcmallocSlab<NumClasses>::Header
+TcmallocSlab<NumClasses>::LoadHeader(std::atomic<int64_t>* hdrp) {
+  return absl::bit_cast<Header>(hdrp->load(std::memory_order_relaxed));
+}
+
+template <size_t NumClasses>
+inline void TcmallocSlab<NumClasses>::StoreHeader(std::atomic<int64_t>* hdrp,
+                                                  Header hdr) {
+  hdrp->store(absl::bit_cast<int64_t>(hdr), std::memory_order_relaxed);
+}
+
+template <size_t NumClasses>
+inline int TcmallocSlab<NumClasses>::CompareAndSwapHeader(
+    int cpu, std::atomic<int64_t>* hdrp, Header old, Header hdr,
+    const size_t virtual_cpu_id_offset) {
+#if __SIZEOF_POINTER__ == 8
+  const int64_t old_raw = absl::bit_cast<int64_t>(old);
+  const int64_t new_raw = absl::bit_cast<int64_t>(hdr);
+  return CompareAndSwapUnsafe(cpu, hdrp, static_cast<intptr_t>(old_raw),
+                              static_cast<intptr_t>(new_raw),
+                              virtual_cpu_id_offset);
+#else
+  Crash(kCrash, __FILE__, __LINE__, "This architecture is not supported.");
+#endif
+}
+
+template <size_t NumClasses>
+inline bool TcmallocSlab<NumClasses>::Header::IsLocked() const {
+  return begin == 0xffffu;
+}
+
+template <size_t NumClasses>
+inline void TcmallocSlab<NumClasses>::Header::Lock() {
+  // Write 0xffff to begin and 0 to end. This blocks new Push'es and Pop's.
+  // Note: we write only 4 bytes. The first 4 bytes are left intact.
+  // See Drain method for details. tl;dr: C++ does not allow us to legally
+  // express this without undefined behavior.
+  std::atomic<int32_t>* p =
+      reinterpret_cast<std::atomic<int32_t>*>(&lock_update);
+  Header hdr;
+  hdr.begin = 0xffffu;
+  hdr.end = 0;
+  p->store(absl::bit_cast<int32_t>(hdr.lock_update), std::memory_order_relaxed);
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Init(void*(alloc)(size_t size),
+                                    size_t (*capacity)(size_t cl), bool lazy,
+                                    size_t shift) {
+#ifdef __x86_64__
+  if (UsingFlatVirtualCpus()) {
+    virtual_cpu_id_offset_ = offsetof(kernel_rseq, vcpu_id);
+  }
+#endif  // __x86_64__
+
+  shift_ = shift;
+  size_t mem_size = absl::base_internal::NumCPUs() * (1ul << shift);
+  void* backing = alloc(mem_size);
+  // MSan does not see writes in assembly.
+  ANNOTATE_MEMORY_IS_INITIALIZED(backing, mem_size);
+  if (!lazy) {
+    memset(backing, 0, mem_size);
+  }
+  slabs_ = static_cast<Slabs*>(backing);
+  size_t bytes_used = 0;
+  for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+    bytes_used += sizeof(std::atomic<int64_t>) * NumClasses;
+    void** elems = CpuMemoryStart(cpu)->mem;
+
+    for (size_t cl = 0; cl < NumClasses; ++cl) {
+      size_t cap = capacity(cl);
+      CHECK_CONDITION(static_cast<uint16_t>(cap) == cap);
+
+      if (cap == 0) {
+        continue;
+      }
+
+      if (cap) {
+        if (!lazy) {
+          // In Pop() we prefetch the item a subsequent Pop() would return; this
+          // is slow if it's not a valid pointer. To avoid this problem when
+          // popping the last item, keep one fake item before the actual ones
+          // (that points, safely, to itself.)
+          *elems = elems;
+          elems++;
+        }
+
+        // One extra element for prefetch
+        bytes_used += (cap + 1) * sizeof(void*);
+      }
+
+      if (!lazy) {
+        // TODO(ckennelly): Consolidate this initialization logic with that in
+        // InitCPU.
+        size_t offset = elems - reinterpret_cast<void**>(CpuMemoryStart(cpu));
+        CHECK_CONDITION(static_cast<uint16_t>(offset) == offset);
+
+        Header hdr;
+        hdr.current = offset;
+        hdr.begin = offset;
+        hdr.end = offset;
+        hdr.end_copy = offset;
+
+        StoreHeader(GetHeader(cpu, cl), hdr);
+      }
+
+      elems += cap;
+      CHECK_CONDITION(reinterpret_cast<char*>(elems) -
+                          reinterpret_cast<char*>(CpuMemoryStart(cpu)) <=
+                      (1 << shift_));
+    }
+  }
+  // Check for less than 90% usage of the reserved memory
+  if (bytes_used * 10 < 9 * mem_size) {
+    Log(kLog, __FILE__, __LINE__, "Bytes used per cpu of available", bytes_used,
+        mem_size);
+  }
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::InitCPU(int cpu, size_t (*capacity)(size_t cl)) {
+  const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+  // TODO(ckennelly): Consolidate this logic with Drain.
+  // Phase 1: verify no header is locked
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    Header hdr = LoadHeader(GetHeader(cpu, cl));
+    CHECK_CONDITION(!hdr.IsLocked());
+  }
+
+  // Phase 2: Stop concurrent mutations.  Locking ensures that there exists no
+  // value of current such that begin < current.
+  for (bool done = false; !done;) {
+    for (size_t cl = 0; cl < NumClasses; ++cl) {
+      // Note: this reinterpret_cast and write in Lock lead to undefined
+      // behavior, because the actual object type is std::atomic<int64_t>. But
+      // C++ does not allow to legally express what we need here: atomic writes
+      // of different sizes.
+      reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+    }
+    FenceCpu(cpu, virtual_cpu_id_offset);
+    done = true;
+    for (size_t cl = 0; cl < NumClasses; ++cl) {
+      Header hdr = LoadHeader(GetHeader(cpu, cl));
+      if (!hdr.IsLocked()) {
+        // Header was overwritten by Grow/Shrink. Retry.
+        done = false;
+        break;
+      }
+    }
+  }
+
+  // Phase 3: Initialize prefetch target and compute the offsets for the
+  // boundaries of each size class' cache.
+  void** elems = CpuMemoryStart(cpu)->mem;
+  uint16_t begin[NumClasses];
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    size_t cap = capacity(cl);
+    CHECK_CONDITION(static_cast<uint16_t>(cap) == cap);
+
+    if (cap) {
+      // In Pop() we prefetch the item a subsequent Pop() would return; this is
+      // slow if it's not a valid pointer. To avoid this problem when popping
+      // the last item, keep one fake item before the actual ones (that points,
+      // safely, to itself.)
+      *elems = elems;
+      elems++;
+    }
+
+    size_t offset = elems - reinterpret_cast<void**>(CpuMemoryStart(cpu));
+    CHECK_CONDITION(static_cast<uint16_t>(offset) == offset);
+    begin[cl] = offset;
+
+    elems += cap;
+    CHECK_CONDITION(reinterpret_cast<char*>(elems) -
+                        reinterpret_cast<char*>(CpuMemoryStart(cpu)) <=
+                    (1 << shift_));
+  }
+
+  // Phase 4: Store current.  No restartable sequence will proceed
+  // (successfully) as !(begin < current) for all size classes.
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+    Header hdr = LoadHeader(hdrp);
+    hdr.current = begin[cl];
+    StoreHeader(hdrp, hdr);
+  }
+  FenceCpu(cpu, virtual_cpu_id_offset);
+
+  // Phase 5: Allow access to this cache.
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    Header hdr;
+    hdr.current = begin[cl];
+    hdr.begin = begin[cl];
+    hdr.end = begin[cl];
+    hdr.end_copy = begin[cl];
+    StoreHeader(GetHeader(cpu, cl), hdr);
+  }
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Destroy(void(free)(void*)) {
+  free(slabs_);
+  slabs_ = nullptr;
+}
+
+template <size_t NumClasses>
+size_t TcmallocSlab<NumClasses>::ShrinkOtherCache(int cpu, size_t cl,
+                                                  size_t len, void* ctx,
+                                                  ShrinkHandler f) {
+  ASSERT(cpu >= 0);
+  ASSERT(cpu < absl::base_internal::NumCPUs());
+  const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+  // Phase 1: Collect begin as it will be overwritten by the lock.
+  std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+  Header hdr = LoadHeader(hdrp);
+  CHECK_CONDITION(!hdr.IsLocked());
+  const uint16_t begin = hdr.begin;
+
+  // Phase 2: stop concurrent mutations.
+  for (bool done = false; !done;) {
+    reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+    FenceCpu(cpu, virtual_cpu_id_offset);
+    done = true;
+
+    hdr = LoadHeader(GetHeader(cpu, cl));
+    if (!hdr.IsLocked()) {
+      // Header was overwritten by Grow/Shrink. Retry.
+      done = false;
+    }
+  }
+
+  // Phase 3: If we do not have len number of items to shrink, we try
+  // to pop items from the list first to create enough capacity that can be
+  // shrunk. If we pop items, we also execute callbacks.
+  //
+  // We can't write all 4 fields at once with a single write, because Pop does
+  // several non-atomic loads of the fields. Consider that a concurrent Pop
+  // loads old current (still pointing somewhere in the middle of the region);
+  // then we update all fields with a single write; then Pop loads the updated
+  // begin which allows it to proceed; then it decrements current below begin.
+  //
+  // So we instead first just update current--our locked begin/end guarantee
+  // no Push/Pop will make progress.  Once we Fence below, we know no Push/Pop
+  // is using the old current, and can safely update begin/end to be an empty
+  // slab.
+
+  const uint16_t unused = hdr.end_copy - hdr.current;
+  if (unused < len) {
+    const uint16_t expected_pop = len - unused;
+    const uint16_t actual_pop =
+        std::min<uint16_t>(expected_pop, hdr.current - begin);
+    void** batch =
+        reinterpret_cast<void**>(GetHeader(cpu, 0) + hdr.current - actual_pop);
+    f(ctx, cl, batch, actual_pop);
+    hdr.current -= actual_pop;
+    StoreHeader(hdrp, hdr);
+    FenceCpu(cpu, virtual_cpu_id_offset);
+  }
+
+  // Phase 4: Shrink the capacity. Use a copy of begin and end_copy to
+  // restore the header, shrink it, and return the length by which the
+  // region was shrunk.
+  hdr.begin = begin;
+  const uint16_t to_shrink =
+      std::min<uint16_t>(len, hdr.end_copy - hdr.current);
+  hdr.end_copy -= to_shrink;
+  hdr.end = hdr.end_copy;
+  StoreHeader(hdrp, hdr);
+  return to_shrink;
+}
+
+template <size_t NumClasses>
+void TcmallocSlab<NumClasses>::Drain(int cpu, void* ctx, DrainHandler f) {
+  CHECK_CONDITION(cpu >= 0);
+  CHECK_CONDITION(cpu < absl::base_internal::NumCPUs());
+  const size_t virtual_cpu_id_offset = virtual_cpu_id_offset_;
+
+  // Push/Pop/Grow/Shrink can be executed concurrently with Drain.
+  // That's not an expected case, but it must be handled for correctness.
+  // Push/Pop/Grow/Shrink can only be executed on <cpu> and use rseq primitives.
+  // Push only updates current. Pop only updates current and end_copy
+  // (it mutates only current but uses 4 byte write for performance).
+  // Grow/Shrink mutate end and end_copy using 64-bit stores.
+
+  // We attempt to stop all concurrent operations by writing 0xffff to begin
+  // and 0 to end. However, Grow/Shrink can overwrite our write, so we do this
+  // in a loop until we know that the header is in quiescent state.
+
+  // Phase 1: collect all begin's (these are not mutated by anybody else).
+  uint16_t begin[NumClasses];
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    Header hdr = LoadHeader(GetHeader(cpu, cl));
+    CHECK_CONDITION(!hdr.IsLocked());
+    begin[cl] = hdr.begin;
+  }
+
+  // Phase 2: stop concurrent mutations.
+  for (bool done = false; !done;) {
+    for (size_t cl = 0; cl < NumClasses; ++cl) {
+      // Note: this reinterpret_cast and write in Lock lead to undefined
+      // behavior, because the actual object type is std::atomic<int64_t>. But
+      // C++ does not allow to legally express what we need here: atomic writes
+      // of different sizes.
+      reinterpret_cast<Header*>(GetHeader(cpu, cl))->Lock();
+    }
+    FenceCpu(cpu, virtual_cpu_id_offset);
+    done = true;
+    for (size_t cl = 0; cl < NumClasses; ++cl) {
+      Header hdr = LoadHeader(GetHeader(cpu, cl));
+      if (!hdr.IsLocked()) {
+        // Header was overwritten by Grow/Shrink. Retry.
+        done = false;
+        break;
+      }
+    }
+  }
+
+  // Phase 3: execute callbacks.
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    Header hdr = LoadHeader(GetHeader(cpu, cl));
+    // We overwrote begin and end, instead we use our local copy of begin
+    // and end_copy.
+    size_t n = hdr.current - begin[cl];
+    size_t cap = hdr.end_copy - begin[cl];
+    void** batch = reinterpret_cast<void**>(GetHeader(cpu, 0) + begin[cl]);
+    f(ctx, cl, batch, n, cap);
+  }
+
+  // Phase 4: reset current to beginning of the region.
+  // We can't write all 4 fields at once with a single write, because Pop does
+  // several non-atomic loads of the fields. Consider that a concurrent Pop
+  // loads old current (still pointing somewhere in the middle of the region);
+  // then we update all fields with a single write; then Pop loads the updated
+  // begin which allows it to proceed; then it decrements current below begin.
+  //
+  // So we instead first just update current--our locked begin/end guarantee
+  // no Push/Pop will make progress.  Once we Fence below, we know no Push/Pop
+  // is using the old current, and can safely update begin/end to be an empty
+  // slab.
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+    Header hdr = LoadHeader(hdrp);
+    hdr.current = begin[cl];
+    StoreHeader(hdrp, hdr);
+  }
+
+  // Phase 5: fence and reset the remaining fields to beginning of the region.
+  // This allows concurrent mutations again.
+  FenceCpu(cpu, virtual_cpu_id_offset);
+  for (size_t cl = 0; cl < NumClasses; ++cl) {
+    std::atomic<int64_t>* hdrp = GetHeader(cpu, cl);
+    Header hdr;
+    hdr.current = begin[cl];
+    hdr.begin = begin[cl];
+    hdr.end = begin[cl];
+    hdr.end_copy = begin[cl];
+    StoreHeader(hdrp, hdr);
+  }
+}
+
+template <size_t NumClasses>
+PerCPUMetadataState TcmallocSlab<NumClasses>::MetadataMemoryUsage() const {
+  PerCPUMetadataState result;
+  result.virtual_size = absl::base_internal::NumCPUs() * (1ul << shift_);
+  result.resident_size = MInCore::residence(slabs_, result.virtual_size);
+  return result;
+}
+
+}  // namespace percpu
+}  // namespace subtle
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_PERCPU_TCMALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc
new file mode 100644
index 0000000000..39f07fbe67
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/percpu_tcmalloc_test.cc
@@ -0,0 +1,855 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/percpu_tcmalloc.h"
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <atomic>
+#include <thread>  // NOLINT(build/c++11)
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/container/fixed_array.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/debugging/symbolize.h"
+#include "absl/random/random.h"
+#include "absl/random/seed_sequences.h"
+#include "absl/strings/string_view.h"
+#include "absl/synchronization/mutex.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "absl/types/span.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace subtle {
+namespace percpu {
+namespace {
+
+using testing::Each;
+using testing::UnorderedElementsAreArray;
+
+// Choose an available CPU and executes the passed functor on it. The
+// cpu that is chosen, as long as a valid disjoint remote CPU will be passed
+// as arguments to it.
+//
+// If the functor believes that it has failed in a manner attributable to
+// external modification, then it should return false and we will attempt to
+// retry the operation (up to a constant limit).
+void RunOnSingleCpuWithRemoteCpu(std::function<bool(int, int)> test) {
+  constexpr int kMaxTries = 1000;
+
+  for (int i = 0; i < kMaxTries; i++) {
+    auto allowed = AllowedCpus();
+
+    int target_cpu = allowed[0], remote_cpu;
+
+    // We try to pass something actually within the mask, but, for most tests it
+    // only needs to exist.
+    if (allowed.size() > 1)
+      remote_cpu = allowed[1];
+    else
+      remote_cpu = target_cpu ? 0 : 1;
+
+    ScopedAffinityMask mask(target_cpu);
+
+    // If the test function failed, assert that the mask was tampered with.
+    if (!test(target_cpu, remote_cpu))
+      ASSERT_TRUE(mask.Tampered());
+    else
+      return;
+  }
+
+  ASSERT_TRUE(false);
+}
+
+// Equivalent to RunOnSingleCpuWithRemoteCpu, except that only the CPU the
+// functor is executing on is passed.
+void RunOnSingleCpu(std::function<bool(int)> test) {
+  auto wrapper = [&test](int this_cpu, int unused) { return test(this_cpu); };
+  RunOnSingleCpuWithRemoteCpu(wrapper);
+}
+
+constexpr size_t kStressSlabs = 4;
+constexpr size_t kStressCapacity = 4;
+
+constexpr size_t kShift = 18;
+typedef class TcmallocSlab<kStressSlabs> TcmallocSlab;
+
+enum class SlabInit {
+  kEager,
+  kLazy,
+};
+
+class TcmallocSlabTest : public testing::TestWithParam<SlabInit> {
+ protected:
+  TcmallocSlabTest() {
+    slab_test_ = &slab_;
+    metadata_bytes_ = 0;
+
+// Ignore false-positive warning in GCC. For more information, see:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96003
+#pragma GCC diagnostic ignored "-Wnonnull"
+    slab_.Init(
+        &ByteCountingMalloc, [](size_t cl) { return kCapacity; },
+        GetParam() == SlabInit::kLazy, kShift);
+
+    for (int i = 0; i < kCapacity; ++i) {
+      object_ptrs_[i] = &objects_[i];
+    }
+  }
+
+  ~TcmallocSlabTest() override { slab_.Destroy(free); }
+
+  template <int result>
+  static int ExpectOverflow(int cpu, size_t cl, void* item) {
+    EXPECT_EQ(cpu, current_cpu_);
+    EXPECT_EQ(cl, current_cl_);
+    EXPECT_FALSE(overflow_called_);
+    overflow_called_ = true;
+    return result;
+  }
+
+  template <size_t result_object>
+  static void* ExpectUnderflow(int cpu, size_t cl) {
+    EXPECT_EQ(cpu, current_cpu_);
+    EXPECT_EQ(cl, current_cl_);
+    EXPECT_LT(result_object, kCapacity);
+    EXPECT_FALSE(underflow_called_);
+    underflow_called_ = true;
+    return &objects_[result_object];
+  }
+
+  template <int result>
+  bool PushExpectOverflow(TcmallocSlab* slab, size_t cl, void* item) {
+    bool res = slab->Push(cl, item, ExpectOverflow<result>);
+    EXPECT_TRUE(overflow_called_);
+    overflow_called_ = false;
+    return res;
+  }
+
+  template <size_t result_object>
+  void* PopExpectUnderflow(TcmallocSlab* slab, size_t cl) {
+    void* res = slab->Pop(cl, ExpectUnderflow<result_object>);
+    EXPECT_TRUE(underflow_called_);
+    underflow_called_ = false;
+    return res;
+  }
+
+  static void* ByteCountingMalloc(size_t size) {
+    const size_t kPageSize = getpagesize();
+    void* ptr;
+    CHECK_CONDITION(posix_memalign(&ptr, kPageSize, size) == 0);
+    if (ptr) {
+      // Emulate obtaining memory as if we got it from mmap (zero'd).
+      memset(ptr, 0, size);
+      madvise(ptr, size, MADV_DONTNEED);
+      metadata_bytes_ += size;
+    }
+    return ptr;
+  }
+
+  TcmallocSlab slab_;
+
+  static constexpr size_t kCapacity = 10;
+  static char objects_[kCapacity];
+  static void* object_ptrs_[kCapacity];
+  static int current_cpu_;
+  static size_t current_cl_;
+  static bool overflow_called_;
+  static bool underflow_called_;
+  static TcmallocSlab* slab_test_;
+  static size_t metadata_bytes_;
+};
+
+static int ExpectNoOverflow(int cpu, size_t cl, void* item) {
+  CHECK_CONDITION(false && "overflow is not expected");
+  return 0;
+}
+
+static void* ExpectNoUnderflow(int cpu, size_t cl) {
+  CHECK_CONDITION(false && "underflow is not expected");
+  return nullptr;
+}
+
+char TcmallocSlabTest::objects_[TcmallocSlabTest::kCapacity];
+void* TcmallocSlabTest::object_ptrs_[TcmallocSlabTest::kCapacity];
+int TcmallocSlabTest::current_cpu_;
+size_t TcmallocSlabTest::current_cl_;
+bool TcmallocSlabTest::overflow_called_;
+bool TcmallocSlabTest::underflow_called_;
+TcmallocSlab* TcmallocSlabTest::slab_test_;
+size_t TcmallocSlabTest::metadata_bytes_;
+
+TEST_P(TcmallocSlabTest, Metadata) {
+  PerCPUMetadataState r = slab_.MetadataMemoryUsage();
+
+  ASSERT_GT(metadata_bytes_, 0);
+  EXPECT_EQ(r.virtual_size, metadata_bytes_);
+  if (GetParam() == SlabInit::kLazy) {
+    EXPECT_EQ(r.resident_size, 0);
+
+    if (!IsFast()) {
+      GTEST_SKIP() << "Need fast percpu. Skipping.";
+      return;
+    }
+
+    // Initialize a core.  Verify that the increased RSS is proportional to a
+    // core.
+    slab_.InitCPU(0, [](size_t cl) { return kCapacity; });
+
+    r = slab_.MetadataMemoryUsage();
+    // We may fault a whole hugepage, so round up the expected per-core share to
+    // a full hugepage.
+    size_t expected = r.virtual_size / absl::base_internal::NumCPUs();
+    expected = (expected + kHugePageSize - 1) & ~(kHugePageSize - 1);
+
+    // A single core may be less than the full slab for that core, since we do
+    // not touch every page within the slab.
+    EXPECT_GE(expected, r.resident_size);
+
+    // Read stats from the slab.  This will fault additional memory.
+    for (int cpu = 0, n = absl::base_internal::NumCPUs(); cpu < n; ++cpu) {
+      // To inhibit optimization, verify the values are sensible.
+      for (int cl = 0; cl < kStressSlabs; ++cl) {
+        EXPECT_EQ(0, slab_.Length(cpu, cl));
+        EXPECT_EQ(0, slab_.Capacity(cpu, cl));
+      }
+    }
+
+    PerCPUMetadataState post_stats = slab_.MetadataMemoryUsage();
+    EXPECT_LE(post_stats.resident_size, metadata_bytes_);
+    EXPECT_GT(post_stats.resident_size, r.resident_size);
+  } else {
+    EXPECT_EQ(r.resident_size, metadata_bytes_);
+  }
+}
+
+TEST_P(TcmallocSlabTest, Unit) {
+  if (MallocExtension::PerCpuCachesActive()) {
+    // This test unregisters rseq temporarily, as to decrease flakiness.
+    GTEST_SKIP() << "per-CPU TCMalloc is incompatible with unregistering rseq";
+  }
+
+  if (!IsFast()) {
+    GTEST_SKIP() << "Need fast percpu. Skipping.";
+    return;
+  }
+
+  // Decide if we should expect a push or pop to be the first action on the CPU
+  // slab to trigger initialization.
+  absl::FixedArray<bool, 0> initialized(absl::base_internal::NumCPUs(),
+                                        GetParam() != SlabInit::kLazy);
+
+  for (auto cpu : AllowedCpus()) {
+    SCOPED_TRACE(cpu);
+
+    // Temporarily fake being on the given CPU.
+    ScopedFakeCpuId fake_cpu_id(cpu);
+
+#if !defined(__ppc__)
+    if (UsingFlatVirtualCpus()) {
+#if TCMALLOC_PERCPU_USE_RSEQ
+      __rseq_abi.vcpu_id = cpu ^ 1;
+#endif
+      cpu = cpu ^ 1;
+    }
+#endif
+    current_cpu_ = cpu;
+
+    for (size_t cl = 0; cl < kStressSlabs; ++cl) {
+      SCOPED_TRACE(cl);
+      current_cl_ = cl;
+
+#ifdef __ppc__
+      // This is imperfect but the window between operations below is small.  We
+      // can make this more precise around individual operations if we see
+      // measurable flakiness as a result.
+      if (fake_cpu_id.Tampered()) break;
+#endif
+
+      // Check new slab state.
+      ASSERT_EQ(slab_.Length(cpu, cl), 0);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+      if (!initialized[cpu]) {
+#pragma GCC diagnostic ignored "-Wnonnull"
+        void* ptr = slab_.Pop(cl, [](int cpu, size_t cl) {
+          slab_test_->InitCPU(cpu, [](size_t cl) { return kCapacity; });
+
+          return static_cast<void*>(slab_test_);
+        });
+
+        ASSERT_TRUE(ptr == slab_test_);
+        initialized[cpu] = true;
+      }
+
+      // Test overflow/underflow handlers.
+      ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+      ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+      ASSERT_FALSE(PushExpectOverflow<-2>(&slab_, cl, &objects_[0]));
+      ASSERT_TRUE(PushExpectOverflow<0>(&slab_, cl, &objects_[0]));
+
+      // Grow capacity to kCapacity / 2.
+      ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+      ASSERT_EQ(slab_.Length(cpu, cl), 0);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2);
+      ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+      ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow));
+      ASSERT_EQ(slab_.Length(cpu, cl), 1);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity / 2);
+      ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[0]);
+      ASSERT_EQ(slab_.Length(cpu, cl), 0);
+      for (size_t i = 0; i < kCapacity / 2; ++i) {
+        ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow));
+        ASSERT_EQ(slab_.Length(cpu, cl), i + 1);
+      }
+      ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+      for (size_t i = kCapacity / 2; i > 0; --i) {
+        ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]);
+        ASSERT_EQ(slab_.Length(cpu, cl), i - 1);
+      }
+      // Ensure that Shink don't underflow capacity.
+      ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity / 2);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+      // Grow capacity to kCapacity.
+      ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+      // Ensure that grow don't overflow max capacity.
+      ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity, kCapacity), kCapacity / 2);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), kCapacity);
+      for (size_t i = 0; i < kCapacity; ++i) {
+        ASSERT_TRUE(slab_.Push(cl, &objects_[i], ExpectNoOverflow));
+        ASSERT_EQ(slab_.Length(cpu, cl), i + 1);
+      }
+      ASSERT_FALSE(PushExpectOverflow<-1>(&slab_, cl, &objects_[0]));
+      for (size_t i = kCapacity; i > 0; --i) {
+        ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow), &objects_[i - 1]);
+        ASSERT_EQ(slab_.Length(cpu, cl), i - 1);
+      }
+
+      // Ensure that we can't shrink below length.
+      ASSERT_TRUE(slab_.Push(cl, &objects_[0], ExpectNoOverflow));
+      ASSERT_TRUE(slab_.Push(cl, &objects_[1], ExpectNoOverflow));
+      ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity), kCapacity - 2);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), 2);
+
+      // Test Drain.
+      ASSERT_EQ(slab_.Grow(cpu, cl, 2, kCapacity), 2);
+      slab_.Drain(cpu, &cl,
+                  [](void* ctx, size_t cl, void** batch, size_t n, size_t cap) {
+                    size_t mycl = *static_cast<size_t*>(ctx);
+                    if (cl == mycl) {
+                      ASSERT_EQ(n, 2);
+                      ASSERT_EQ(cap, 4);
+                      ASSERT_EQ(batch[0], &objects_[0]);
+                      ASSERT_EQ(batch[1], &objects_[1]);
+                    } else {
+                      ASSERT_EQ(n, 0);
+                      ASSERT_EQ(cap, 0);
+                    }
+                  });
+      ASSERT_EQ(slab_.Length(cpu, cl), 0);
+      ASSERT_EQ(slab_.Capacity(cpu, cl), 0);
+
+      // Test PushBatch/PopBatch.
+      void* batch[kCapacity + 1];
+      for (size_t i = 0; i < kCapacity; ++i) {
+        batch[i] = &objects_[i];
+      }
+      ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0);
+      ASSERT_EQ(slab_.PushBatch(cl, batch, kCapacity), 0);
+      ASSERT_EQ(slab_.Grow(cpu, cl, kCapacity / 2, kCapacity), kCapacity / 2);
+      ASSERT_EQ(slab_.PopBatch(cl, batch, kCapacity), 0);
+      // Push a batch of size i into empty slab.
+      for (size_t i = 1; i < kCapacity; ++i) {
+        const size_t expect = std::min(i, kCapacity / 2);
+        ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect);
+        ASSERT_EQ(slab_.Length(cpu, cl), expect);
+        for (size_t j = 0; j < expect; ++j) {
+          ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+                    &objects_[j + (i - expect)]);
+        }
+        ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+      }
+      // Push a batch of size i into non-empty slab.
+      for (size_t i = 1; i < kCapacity / 2; ++i) {
+        const size_t expect = std::min(i, kCapacity / 2 - i);
+        ASSERT_EQ(slab_.PushBatch(cl, batch, i), i);
+        ASSERT_EQ(slab_.PushBatch(cl, batch, i), expect);
+        ASSERT_EQ(slab_.Length(cpu, cl), i + expect);
+        for (size_t j = 0; j < expect; ++j) {
+          ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+                    static_cast<void*>(&objects_[j + (i - expect)]));
+        }
+        for (size_t j = 0; j < i; ++j) {
+          ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+                    static_cast<void*>(&objects_[j]));
+        }
+        ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+      }
+      for (size_t i = 0; i < kCapacity + 1; ++i) {
+        batch[i] = nullptr;
+      }
+      // Pop all elements in a single batch.
+      for (size_t i = 1; i < kCapacity / 2; ++i) {
+        for (size_t j = 0; j < i; ++j) {
+          ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+        }
+        ASSERT_EQ(slab_.PopBatch(cl, batch, i), i);
+        ASSERT_EQ(slab_.Length(cpu, cl), 0);
+        ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+        ASSERT_THAT(absl::MakeSpan(&batch[0], i),
+                    UnorderedElementsAreArray(&object_ptrs_[0], i));
+        ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr));
+        for (size_t j = 0; j < kCapacity + 1; ++j) {
+          batch[j] = nullptr;
+        }
+      }
+      // Pop half of elements in a single batch.
+      for (size_t i = 1; i < kCapacity / 2; ++i) {
+        for (size_t j = 0; j < i; ++j) {
+          ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+        }
+        size_t want = std::max<size_t>(1, i / 2);
+        ASSERT_EQ(slab_.PopBatch(cl, batch, want), want);
+        ASSERT_EQ(slab_.Length(cpu, cl), i - want);
+
+        for (size_t j = 0; j < i - want; ++j) {
+          ASSERT_EQ(slab_.Pop(cl, ExpectNoUnderflow),
+                    static_cast<void*>(&objects_[i - want - j - 1]));
+        }
+
+        ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+        ASSERT_GE(i, want);
+        ASSERT_THAT(absl::MakeSpan(&batch[0], want),
+                    UnorderedElementsAreArray(&object_ptrs_[i - want], want));
+        ASSERT_THAT(absl::MakeSpan(&batch[want], kCapacity - want),
+                    Each(nullptr));
+        for (size_t j = 0; j < kCapacity + 1; ++j) {
+          batch[j] = nullptr;
+        }
+      }
+      // Pop 2x elements in a single batch.
+      for (size_t i = 1; i < kCapacity / 2; ++i) {
+        for (size_t j = 0; j < i; ++j) {
+          ASSERT_TRUE(slab_.Push(cl, &objects_[j], ExpectNoOverflow));
+        }
+        ASSERT_EQ(slab_.PopBatch(cl, batch, i * 2), i);
+        ASSERT_EQ(slab_.Length(cpu, cl), 0);
+        ASSERT_EQ(PopExpectUnderflow<5>(&slab_, cl), &objects_[5]);
+
+        ASSERT_THAT(absl::MakeSpan(&batch[0], i),
+                    UnorderedElementsAreArray(&object_ptrs_[0], i));
+        ASSERT_THAT(absl::MakeSpan(&batch[i], kCapacity - i), Each(nullptr));
+        for (size_t j = 0; j < kCapacity + 1; ++j) {
+          batch[j] = nullptr;
+        }
+      }
+      ASSERT_EQ(slab_.Shrink(cpu, cl, kCapacity / 2), kCapacity / 2);
+    }
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(Instant, TcmallocSlabTest,
+                         testing::Values(SlabInit::kEager, SlabInit::kLazy));
+
+static void StressThread(size_t thread_id, TcmallocSlab* slab,
+                         std::vector<void*>* block,
+                         std::vector<absl::Mutex>* mutexes,
+                         std::atomic<size_t>* capacity,
+                         std::atomic<bool>* stop) {
+  EXPECT_TRUE(IsFast());
+
+  struct Handler {
+    static int Overflow(int cpu, size_t cl, void* item) {
+      EXPECT_GE(cpu, 0);
+      EXPECT_LT(cpu, absl::base_internal::NumCPUs());
+      EXPECT_LT(cl, kStressSlabs);
+      EXPECT_NE(item, nullptr);
+      return -1;
+    }
+
+    static void* Underflow(int cpu, size_t cl) {
+      EXPECT_GE(cpu, 0);
+      EXPECT_LT(cpu, absl::base_internal::NumCPUs());
+      EXPECT_LT(cl, kStressSlabs);
+      return nullptr;
+    }
+  };
+
+  absl::BitGen rnd(absl::SeedSeq({thread_id}));
+  while (!*stop) {
+    size_t cl = absl::Uniform<int32_t>(rnd, 0, kStressSlabs);
+    const int what = absl::Uniform<int32_t>(rnd, 0, 91);
+    if (what < 10) {
+      if (!block->empty()) {
+        if (slab->Push(cl, block->back(), &Handler::Overflow)) {
+          block->pop_back();
+        }
+      }
+    } else if (what < 20) {
+      if (void* item = slab->Pop(cl, &Handler::Underflow)) {
+        block->push_back(item);
+      }
+    } else if (what < 30) {
+      if (!block->empty()) {
+        void* batch[kStressCapacity];
+        size_t n = absl::Uniform<int32_t>(
+                       rnd, 0, std::min(block->size(), kStressCapacity)) +
+                   1;
+        for (size_t i = 0; i < n; ++i) {
+          batch[i] = block->back();
+          block->pop_back();
+        }
+        size_t pushed = slab->PushBatch(cl, batch, n);
+        EXPECT_LE(pushed, n);
+        for (size_t i = 0; i < n - pushed; ++i) {
+          block->push_back(batch[i]);
+        }
+      }
+    } else if (what < 40) {
+      void* batch[kStressCapacity];
+      size_t n = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+      size_t popped = slab->PopBatch(cl, batch, n);
+      EXPECT_LE(popped, n);
+      for (size_t i = 0; i < popped; ++i) {
+        block->push_back(batch[i]);
+      }
+    } else if (what < 50) {
+      size_t n = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+      for (;;) {
+        size_t c = capacity->load();
+        n = std::min(n, c);
+        if (n == 0) {
+          break;
+        }
+        if (capacity->compare_exchange_weak(c, c - n)) {
+          break;
+        }
+      }
+      if (n != 0) {
+        size_t res = slab->Grow(slab->GetCurrentVirtualCpuUnsafe(), cl, n,
+                                kStressCapacity);
+        EXPECT_LE(res, n);
+        capacity->fetch_add(n - res);
+      }
+    } else if (what < 60) {
+      size_t n =
+          slab->Shrink(slab->GetCurrentVirtualCpuUnsafe(), cl,
+                       absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1);
+      capacity->fetch_add(n);
+    } else if (what < 70) {
+      size_t len = slab->Length(
+          absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()), cl);
+      EXPECT_LE(len, kStressCapacity);
+    } else if (what < 80) {
+      size_t cap = slab->Capacity(
+          absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs()), cl);
+      EXPECT_LE(cap, kStressCapacity);
+    } else if (what < 90) {
+      struct Context {
+        std::vector<void*>* block;
+        std::atomic<size_t>* capacity;
+      };
+      Context ctx = {block, capacity};
+      int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+      if (mutexes->at(cpu).TryLock()) {
+        size_t to_shrink = absl::Uniform<int32_t>(rnd, 0, kStressCapacity) + 1;
+        size_t total_shrunk = slab->ShrinkOtherCache(
+            cpu, cl, to_shrink, &ctx,
+            [](void* arg, size_t cl, void** batch, size_t n) {
+              Context* ctx = static_cast<Context*>(arg);
+              EXPECT_LT(cl, kStressSlabs);
+              EXPECT_LE(n, kStressCapacity);
+              for (size_t i = 0; i < n; ++i) {
+                EXPECT_NE(batch[i], nullptr);
+                ctx->block->push_back(batch[i]);
+              }
+            });
+        EXPECT_LE(total_shrunk, to_shrink);
+        EXPECT_LE(0, total_shrunk);
+        capacity->fetch_add(total_shrunk);
+        mutexes->at(cpu).Unlock();
+      }
+    } else {
+      struct Context {
+        std::vector<void*>* block;
+        std::atomic<size_t>* capacity;
+      };
+      Context ctx = {block, capacity};
+      int cpu = absl::Uniform<int32_t>(rnd, 0, absl::base_internal::NumCPUs());
+      if (mutexes->at(cpu).TryLock()) {
+        slab->Drain(
+            cpu, &ctx,
+            [](void* arg, size_t cl, void** batch, size_t n, size_t cap) {
+              Context* ctx = static_cast<Context*>(arg);
+              EXPECT_LT(cl, kStressSlabs);
+              EXPECT_LE(n, kStressCapacity);
+              EXPECT_LE(cap, kStressCapacity);
+              for (size_t i = 0; i < n; ++i) {
+                EXPECT_NE(batch[i], nullptr);
+                ctx->block->push_back(batch[i]);
+              }
+              ctx->capacity->fetch_add(cap);
+            });
+        mutexes->at(cpu).Unlock();
+      }
+    }
+  }
+}
+
+static void* allocator(size_t bytes) {
+  void* ptr = malloc(bytes);
+  if (ptr) {
+    memset(ptr, 0, bytes);
+  }
+  return ptr;
+}
+
+TEST(TcmallocSlab, Stress) {
+  // The test creates 2 * NumCPUs() threads each executing all possible
+  // operations on TcmallocSlab. After that we verify that no objects
+  // lost/duplicated and that total capacity is preserved.
+
+  if (!IsFast()) {
+    GTEST_SKIP() << "Need fast percpu. Skipping.";
+    return;
+  }
+
+  EXPECT_LE(kStressSlabs, kStressSlabs);
+  TcmallocSlab slab;
+  slab.Init(
+      allocator,
+      [](size_t cl) { return cl < kStressSlabs ? kStressCapacity : 0; }, false,
+      kShift);
+  std::vector<std::thread> threads;
+  const int n_threads = 2 * absl::base_internal::NumCPUs();
+
+  // Mutexes protect Drain operation on a CPU.
+  std::vector<absl::Mutex> mutexes(absl::base_internal::NumCPUs());
+  // Give each thread an initial set of local objects.
+  std::vector<std::vector<void*>> blocks(n_threads);
+  for (size_t i = 0; i < blocks.size(); ++i) {
+    for (size_t j = 0; j < kStressCapacity; ++j) {
+      blocks[i].push_back(reinterpret_cast<void*>(i * kStressCapacity + j + 1));
+    }
+  }
+  std::atomic<bool> stop(false);
+  // Total capacity shared between all size classes and all CPUs.
+  const int kTotalCapacity = blocks.size() * kStressCapacity * 3 / 4;
+  std::atomic<size_t> capacity(kTotalCapacity);
+  // Create threads and let them work for 5 seconds.
+  threads.reserve(n_threads);
+  for (size_t t = 0; t < n_threads; ++t) {
+    threads.push_back(std::thread(StressThread, t, &slab, &blocks[t], &mutexes,
+                                  &capacity, &stop));
+  }
+  absl::SleepFor(absl::Seconds(5));
+  stop = true;
+  for (auto& t : threads) {
+    t.join();
+  }
+  // Collect objects and capacity from all slabs.
+  std::set<void*> objects;
+  struct Context {
+    std::set<void*>* objects;
+    std::atomic<size_t>* capacity;
+  };
+  Context ctx = {&objects, &capacity};
+  for (int cpu = 0; cpu < absl::base_internal::NumCPUs(); ++cpu) {
+    slab.Drain(cpu, &ctx,
+               [](void* arg, size_t cl, void** batch, size_t n, size_t cap) {
+                 Context* ctx = static_cast<Context*>(arg);
+                 for (size_t i = 0; i < n; ++i) {
+                   ctx->objects->insert(batch[i]);
+                 }
+                 ctx->capacity->fetch_add(cap);
+               });
+    for (size_t cl = 0; cl < kStressSlabs; ++cl) {
+      EXPECT_EQ(slab.Length(cpu, cl), 0);
+      EXPECT_EQ(slab.Capacity(cpu, cl), 0);
+    }
+  }
+  for (const auto& b : blocks) {
+    for (auto o : b) {
+      objects.insert(o);
+    }
+  }
+  EXPECT_EQ(objects.size(), blocks.size() * kStressCapacity);
+  EXPECT_EQ(capacity.load(), kTotalCapacity);
+  slab.Destroy(free);
+}
+
+TEST(TcmallocSlab, SMP) {
+  // For the other tests here to be meaningful, we need multiple cores.
+  ASSERT_GT(absl::base_internal::NumCPUs(), 1);
+}
+
+#if ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
+static int FilterElfHeader(struct dl_phdr_info* info, size_t size, void* data) {
+  *reinterpret_cast<uintptr_t*>(data) =
+      reinterpret_cast<uintptr_t>(info->dlpi_addr);
+  // No further iteration wanted.
+  return 1;
+}
+#endif
+
+TEST(TcmallocSlab, CriticalSectionMetadata) {
+// We cannot inhibit --gc-sections, except on GCC or Clang 9-or-newer.
+#if defined(__clang_major__) && __clang_major__ < 9
+  GTEST_SKIP() << "--gc-sections cannot be inhibited on this compiler.";
+#endif
+
+  // We expect that restartable sequence critical sections (rseq_cs) are in the
+  // __rseq_cs section (by convention, not hard requirement).  Additionally, for
+  // each entry in that section, there should be a pointer to it in
+  // __rseq_cs_ptr_array.
+#if ABSL_INTERNAL_HAVE_ELF_SYMBOLIZE
+  uintptr_t relocation = 0;
+  dl_iterate_phdr(FilterElfHeader, &relocation);
+
+  int fd = tcmalloc_internal::signal_safe_open("/proc/self/exe", O_RDONLY);
+  ASSERT_NE(fd, -1);
+
+  const kernel_rseq_cs* cs_start = nullptr;
+  const kernel_rseq_cs* cs_end = nullptr;
+
+  const kernel_rseq_cs** cs_array_start = nullptr;
+  const kernel_rseq_cs** cs_array_end = nullptr;
+
+  absl::debugging_internal::ForEachSection(
+      fd, [&](const absl::string_view name, const ElfW(Shdr) & hdr) {
+        uintptr_t start = relocation + reinterpret_cast<uintptr_t>(hdr.sh_addr);
+        uintptr_t end =
+            relocation + reinterpret_cast<uintptr_t>(hdr.sh_addr + hdr.sh_size);
+
+        if (name == "__rseq_cs") {
+          EXPECT_EQ(cs_start, nullptr);
+          EXPECT_EQ(start % alignof(kernel_rseq_cs), 0);
+          EXPECT_EQ(end % alignof(kernel_rseq_cs), 0);
+          EXPECT_LT(start, end) << "__rseq_cs must not be empty";
+
+          cs_start = reinterpret_cast<const kernel_rseq_cs*>(start);
+          cs_end = reinterpret_cast<const kernel_rseq_cs*>(end);
+        } else if (name == "__rseq_cs_ptr_array") {
+          EXPECT_EQ(cs_array_start, nullptr);
+          EXPECT_EQ(start % alignof(kernel_rseq_cs*), 0);
+          EXPECT_EQ(end % alignof(kernel_rseq_cs*), 0);
+          EXPECT_LT(start, end) << "__rseq_cs_ptr_array must not be empty";
+
+          cs_array_start = reinterpret_cast<const kernel_rseq_cs**>(start);
+          cs_array_end = reinterpret_cast<const kernel_rseq_cs**>(end);
+        }
+
+        return true;
+      });
+
+  close(fd);
+
+  // The length of the array in multiples of rseq_cs should be the same as the
+  // length of the array of pointers.
+  ASSERT_EQ(cs_end - cs_start, cs_array_end - cs_array_start);
+
+  // The array should not be empty.
+  ASSERT_NE(cs_start, nullptr);
+
+  absl::flat_hash_set<const kernel_rseq_cs*> cs_pointers;
+  for (auto* ptr = cs_start; ptr != cs_end; ++ptr) {
+    cs_pointers.insert(ptr);
+  }
+
+  absl::flat_hash_set<const kernel_rseq_cs*> cs_array_pointers;
+  for (auto** ptr = cs_array_start; ptr != cs_array_end; ++ptr) {
+    // __rseq_cs_ptr_array should have no duplicates.
+    EXPECT_TRUE(cs_array_pointers.insert(*ptr).second);
+  }
+
+  EXPECT_THAT(cs_pointers, ::testing::ContainerEq(cs_array_pointers));
+#endif
+}
+
+static void BM_PushPop(benchmark::State& state) {
+  CHECK_CONDITION(IsFast());
+  RunOnSingleCpu([&](int this_cpu) {
+    const int kBatchSize = 32;
+    TcmallocSlab slab;
+
+#pragma GCC diagnostic ignored "-Wnonnull"
+    slab.Init(
+        allocator, [](size_t cl) -> size_t { return kBatchSize; }, false,
+        kShift);
+
+    CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) ==
+                    kBatchSize);
+    void* batch[kBatchSize];
+    for (int i = 0; i < kBatchSize; i++) {
+      batch[i] = &batch[i];
+    }
+    for (auto _ : state) {
+      for (size_t x = 0; x < kBatchSize; x++) {
+        CHECK_CONDITION(slab.Push(0, batch[x], ExpectNoOverflow));
+      }
+      for (size_t x = 0; x < kBatchSize; x++) {
+        CHECK_CONDITION(slab.Pop(0, ExpectNoUnderflow) ==
+                        batch[kBatchSize - x - 1]);
+      }
+    }
+    return true;
+  });
+}
+BENCHMARK(BM_PushPop);
+
+static void BM_PushPopBatch(benchmark::State& state) {
+  CHECK_CONDITION(IsFast());
+  RunOnSingleCpu([&](int this_cpu) {
+    const int kBatchSize = 32;
+    TcmallocSlab slab;
+    slab.Init(
+        allocator, [](size_t cl) -> size_t { return kBatchSize; }, false,
+        kShift);
+    CHECK_CONDITION(slab.Grow(this_cpu, 0, kBatchSize, kBatchSize) ==
+                    kBatchSize);
+    void* batch[kBatchSize];
+    for (int i = 0; i < kBatchSize; i++) {
+      batch[i] = &batch[i];
+    }
+    for (auto _ : state) {
+      CHECK_CONDITION(slab.PushBatch(0, batch, kBatchSize) == kBatchSize);
+      CHECK_CONDITION(slab.PopBatch(0, batch, kBatchSize) == kBatchSize);
+    }
+    return true;
+  });
+}
+BENCHMARK(BM_PushPopBatch);
+
+}  // namespace
+}  // namespace percpu
+}  // namespace subtle
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc
new file mode 100644
index 0000000000..5a5586cfff
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.cc
@@ -0,0 +1,171 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/proc_maps.h"
+
+#include <fcntl.h>
+#include <sys/sysmacros.h>
+#include <unistd.h>
+
+#include <cstdio>
+#include <cstring>
+
+#include "absl/strings/str_format.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid) { Init(pid, nullptr); }
+
+ProcMapsIterator::ProcMapsIterator(pid_t pid, Buffer* buffer) {
+  Init(pid, buffer);
+}
+
+void ProcMapsIterator::Init(pid_t pid, Buffer* buffer) {
+  if (pid == 0) {
+    pid = getpid();
+  }
+
+  pid_ = pid;
+  if (!buffer) {
+    // If the user didn't pass in any buffer storage, allocate it
+    // now. This is the normal case; the signal handler passes in a
+    // static buffer.
+    buffer = dynamic_buffer_ = new Buffer;
+  } else {
+    dynamic_buffer_ = nullptr;
+  }
+
+  ibuf_ = buffer->buf;
+
+  stext_ = etext_ = nextline_ = ibuf_;
+  ebuf_ = ibuf_ + Buffer::kBufSize - 1;
+  nextline_ = ibuf_;
+
+#if defined(__linux__)
+  // /maps exists in two places: /proc/pid/ and /proc/pid/task/tid (for each
+  // thread in the process.)  The only difference between these is the "global"
+  // view (/proc/pid/maps) attempts to label each VMA which is the stack of a
+  // thread.  This is nice to have, but not critical, and scales quadratically.
+  // Use the main thread's "local" view to ensure adequate performance.
+  int path_length = absl::SNPrintF(ibuf_, Buffer::kBufSize,
+                                   "/proc/%d/task/%d/maps", pid, pid);
+  CHECK_CONDITION(path_length < Buffer::kBufSize);
+
+  // No error logging since this can be called from the crash dump
+  // handler at awkward moments. Users should call Valid() before
+  // using.
+  TCMALLOC_RETRY_ON_TEMP_FAILURE(fd_ = open(ibuf_, O_RDONLY));
+#else
+  fd_ = -1;  // so Valid() is always false
+#endif
+}
+
+ProcMapsIterator::~ProcMapsIterator() {
+  // As it turns out, Linux guarantees that close() does in fact close a file
+  // descriptor even when the return value is EINTR. According to the notes in
+  // the manpage for close(2), this is widespread yet not fully portable, which
+  // is unfortunate. POSIX explicitly leaves this behavior as unspecified.
+  if (fd_ >= 0) close(fd_);
+  delete dynamic_buffer_;
+}
+
+bool ProcMapsIterator::Valid() const { return fd_ != -1; }
+
+bool ProcMapsIterator::NextExt(uint64_t* start, uint64_t* end, char** flags,
+                               uint64_t* offset, int64_t* inode,
+                               char** filename, dev_t* dev) {
+#if defined __linux__
+  do {
+    // Advance to the start of the next line
+    stext_ = nextline_;
+
+    // See if we have a complete line in the buffer already
+    nextline_ = static_cast<char*>(memchr(stext_, '\n', etext_ - stext_));
+    if (!nextline_) {
+      // Shift/fill the buffer so we do have a line
+      int count = etext_ - stext_;
+
+      // Move the current text to the start of the buffer
+      memmove(ibuf_, stext_, count);
+      stext_ = ibuf_;
+      etext_ = ibuf_ + count;
+
+      int nread = 0;  // fill up buffer with text
+      while (etext_ < ebuf_) {
+        TCMALLOC_RETRY_ON_TEMP_FAILURE(nread =
+                                           read(fd_, etext_, ebuf_ - etext_));
+        if (nread > 0)
+          etext_ += nread;
+        else
+          break;
+      }
+
+      // Zero out remaining characters in buffer at EOF to avoid returning
+      // garbage from subsequent calls.
+      if (etext_ != ebuf_ && nread == 0) {
+        memset(etext_, 0, ebuf_ - etext_);
+      }
+      *etext_ = '\n';  // sentinel; safe because ibuf extends 1 char beyond ebuf
+      nextline_ = static_cast<char*>(memchr(stext_, '\n', etext_ + 1 - stext_));
+    }
+    *nextline_ = 0;                               // turn newline into nul
+    nextline_ += ((nextline_ < etext_) ? 1 : 0);  // skip nul if not end of text
+    // stext_ now points at a nul-terminated line
+    unsigned long long tmpstart, tmpend, tmpoffset;           // NOLINT
+    long long tmpinode, local_inode;                          // NOLINT
+    unsigned long long local_start, local_end, local_offset;  // NOLINT
+    int major, minor;
+    unsigned filename_offset = 0;
+    // for now, assume all linuxes have the same format
+    int para_num =
+        sscanf(stext_, "%llx-%llx %4s %llx %x:%x %lld %n",
+               start ? &local_start : &tmpstart, end ? &local_end : &tmpend,
+               flags_, offset ? &local_offset : &tmpoffset, &major, &minor,
+               inode ? &local_inode : &tmpinode, &filename_offset);
+
+    if (para_num != 7) continue;
+
+    if (start) *start = local_start;
+    if (end) *end = local_end;
+    if (offset) *offset = local_offset;
+    if (inode) *inode = local_inode;
+    // Depending on the Linux kernel being used, there may or may not be a space
+    // after the inode if there is no filename.  sscanf will in such situations
+    // nondeterministically either fill in filename_offset or not (the results
+    // differ on multiple calls in the same run even with identical arguments).
+    // We don't want to wander off somewhere beyond the end of the string.
+    size_t stext_length = strlen(stext_);
+    if (filename_offset == 0 || filename_offset > stext_length)
+      filename_offset = stext_length;
+
+    // We found an entry
+    if (flags) *flags = flags_;
+    if (filename) *filename = stext_ + filename_offset;
+    if (dev) *dev = makedev(major, minor);
+
+    return true;
+  } while (etext_ > ibuf_);
+#endif
+
+  // We didn't find anything
+  return false;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h
new file mode 100644
index 0000000000..c5c763a1e8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/proc_maps.h
@@ -0,0 +1,70 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_PROC_MAPS_H_
+#define TCMALLOC_INTERNAL_PROC_MAPS_H_
+
+#include <limits.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// A ProcMapsIterator abstracts access to /proc/maps for a given process.
+class ProcMapsIterator {
+ public:
+  struct Buffer {
+    static constexpr size_t kBufSize = PATH_MAX + 1024;
+    char buf[kBufSize];
+  };
+
+  // Create a new iterator for the specified pid.  pid can be 0 for "self".
+  explicit ProcMapsIterator(pid_t pid);
+
+  // Create an iterator with specified storage (for use in signal handler).
+  // "buffer" should point to a ProcMapsIterator::Buffer buffer can be null in
+  // which case a buffer will be allocated.
+  ProcMapsIterator(pid_t pid, Buffer* buffer);
+
+  // Returns true if the iterator successfully initialized;
+  bool Valid() const;
+
+  bool NextExt(uint64_t* start, uint64_t* end, char** flags, uint64_t* offset,
+               int64_t* inode, char** filename, dev_t* dev);
+
+  ~ProcMapsIterator();
+
+ private:
+  void Init(pid_t pid, Buffer* buffer);
+
+  char* ibuf_;      // input buffer
+  char* stext_;     // start of text
+  char* etext_;     // end of text
+  char* nextline_;  // start of next line
+  char* ebuf_;      // end of buffer (1 char for a nul)
+  int fd_;          // filehandle on /proc/*/maps
+  pid_t pid_;
+  char flags_[10];
+  Buffer* dynamic_buffer_;  // dynamically-allocated Buffer
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_PROC_MAPS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h
new file mode 100644
index 0000000000..25b863934f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker.h
@@ -0,0 +1,503 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_RANGE_TRACKER_H_
+#define TCMALLOC_INTERNAL_RANGE_TRACKER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include <climits>
+#include <limits>
+#include <type_traits>
+
+#include "absl/numeric/bits.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Keeps a bitmap of some fixed size (N bits).
+template <size_t N>
+class Bitmap {
+ public:
+  constexpr Bitmap() : bits_{} {}
+
+  size_t size() const { return N; }
+  bool GetBit(size_t i) const;
+
+  void SetBit(size_t i);
+  void ClearBit(size_t i);
+
+  // Returns the number of set bits [index, ..., index + n - 1].
+  size_t CountBits(size_t index, size_t n) const;
+
+  // Returns whether the bitmap is entirely zero or not.
+  bool IsZero() const;
+
+  // Equivalent to SetBit on bits [index, index + 1, ... index + n - 1].
+  void SetRange(size_t index, size_t n);
+  void ClearRange(size_t index, size_t n);
+
+  // Clears the lowest set bit. Special case is faster than more flexible code.
+  void ClearLowestBit();
+
+  // If there is at least one free range at or after <start>,
+  // put it in *index, *length and return true; else return false.
+  bool NextFreeRange(size_t start, size_t *index, size_t *length) const;
+
+  // Returns index of the first {true, false} bit >= index, or N if none.
+  size_t FindSet(size_t index) const;
+  size_t FindClear(size_t index) const;
+
+  // Returns index of the first {set, clear} bit in [index, 0] or -1 if none.
+  ssize_t FindSetBackwards(size_t index) const;
+  ssize_t FindClearBackwards(size_t index) const;
+
+  void Clear();
+
+ private:
+  static constexpr size_t kWordSize = sizeof(size_t) * 8;
+  static constexpr size_t kWords = (N + kWordSize - 1) / kWordSize;
+  static constexpr size_t kDeadBits = kWordSize * kWords - N;
+
+  size_t bits_[kWords];
+
+  size_t CountWordBits(size_t i, size_t from, size_t to) const;
+
+  template <bool Value>
+  void SetWordBits(size_t i, size_t from, size_t to);
+  template <bool Value>
+  void SetRangeValue(size_t index, size_t n);
+
+  template <bool Goal>
+  size_t FindValue(size_t index) const;
+  template <bool Goal>
+  ssize_t FindValueBackwards(size_t index) const;
+};
+
+// Tracks allocations in a range of items of fixed size.  Supports
+// finding an unset range of a given length, while keeping track of
+// the largest remaining unmarked length.
+template <size_t N>
+class RangeTracker {
+ public:
+  constexpr RangeTracker()
+      : bits_{}, longest_free_(N), nused_(0), nallocs_(0) {}
+
+  size_t size() const;
+  // Number of bits marked
+  size_t used() const;
+  // Number of bits clear
+  size_t total_free() const;
+  // Longest contiguous range of clear bits.
+  size_t longest_free() const;
+  // Count of live allocations.
+  size_t allocs() const;
+
+  // REQUIRES: there is a free range of at least n bits
+  // (i.e. n <= longest_free())
+  // finds and marks n free bits, returning index of the first bit.
+  // Chooses by best fit.
+  size_t FindAndMark(size_t n);
+
+  // REQUIRES: the range [index, index + n) is fully marked, and
+  // was the returned value from a call to FindAndMark.
+  // Unmarks it.
+  void Unmark(size_t index, size_t n);
+  // If there is at least one free range at or after <start>,
+  // put it in *index, *length and return true; else return false.
+  bool NextFreeRange(size_t start, size_t *index, size_t *length) const;
+
+  void Clear();
+
+ private:
+  Bitmap<N> bits_;
+
+  // Computes the smallest unsigned type that can hold the constant N.
+  class UnsignedTypeFittingSize {
+   private:
+    static_assert(N <= std::numeric_limits<uint64_t>::max(),
+                  "size_t more than 64 bits??");
+    template <typename T>
+    static constexpr bool Fit() {
+      return N <= std::numeric_limits<T>::max();
+    }
+    struct U32 {
+      using type =
+          typename std::conditional<Fit<uint32_t>(), uint32_t, uint64_t>::type;
+    };
+
+    struct U16 {
+      using type = typename std::conditional<Fit<uint16_t>(), uint16_t,
+                                             typename U32::type>::type;
+    };
+
+    struct U8 {
+      using type = typename std::conditional<Fit<uint8_t>(), uint8_t,
+                                             typename U16::type>::type;
+    };
+
+   public:
+    using type = typename U8::type;
+  };
+
+  // we keep various stats in the range [0, N]; make them as small as possible.
+  using Count = typename UnsignedTypeFittingSize::type;
+
+  Count longest_free_;
+  Count nused_;
+  Count nallocs_;
+};
+
+template <size_t N>
+inline size_t RangeTracker<N>::size() const {
+  return bits_.size();
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::used() const {
+  return nused_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::total_free() const {
+  return N - used();
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::longest_free() const {
+  return longest_free_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::allocs() const {
+  return nallocs_;
+}
+
+template <size_t N>
+inline size_t RangeTracker<N>::FindAndMark(size_t n) {
+  ASSERT(n > 0);
+
+  // We keep the two longest ranges in the bitmap since we might allocate
+  // from one.
+  size_t longest_len = 0;
+  size_t second_len = 0;
+
+  // the best (shortest) range we could use
+  // TODO(b/134691947): shortest? lowest-addressed?
+  size_t best_index = N;
+  size_t best_len = 2 * N;
+  // Iterate over free ranges:
+  size_t index = 0, len;
+
+  while (bits_.NextFreeRange(index, &index, &len)) {
+    if (len > longest_len) {
+      second_len = longest_len;
+      longest_len = len;
+    } else if (len > second_len) {
+      second_len = len;
+    }
+
+    if (len >= n && len < best_len) {
+      best_index = index;
+      best_len = len;
+    }
+
+    index += len;
+  }
+
+  CHECK_CONDITION(best_index < N);
+  bits_.SetRange(best_index, n);
+
+  if (best_len == longest_len) {
+    longest_len -= n;
+    if (longest_len < second_len) longest_len = second_len;
+  }
+
+  longest_free_ = longest_len;
+  nused_ += n;
+  nallocs_++;
+  return best_index;
+}
+
+// REQUIRES: the range [index, index + n) is fully marked.
+// Unmarks it.
+template <size_t N>
+inline void RangeTracker<N>::Unmark(size_t index, size_t n) {
+  ASSERT(bits_.FindClear(index) >= index + n);
+  bits_.ClearRange(index, n);
+  nused_ -= n;
+  nallocs_--;
+
+  // We just opened up a new free range--it might be the longest.
+  size_t lim = bits_.FindSet(index + n - 1);
+  index = bits_.FindSetBackwards(index) + 1;
+  n = lim - index;
+  if (n > longest_free()) {
+    longest_free_ = n;
+  }
+}
+
+// If there is at least one free range at or after <start>,
+// put it in *index, *length and return true; else return false.
+template <size_t N>
+inline bool RangeTracker<N>::NextFreeRange(size_t start, size_t *index,
+                                           size_t *length) const {
+  return bits_.NextFreeRange(start, index, length);
+}
+
+template <size_t N>
+inline void RangeTracker<N>::Clear() {
+  bits_.Clear();
+  nallocs_ = 0;
+  nused_ = 0;
+  longest_free_ = N;
+}
+
+// Count the set bits [from, to) in the i-th word to Value.
+template <size_t N>
+inline size_t Bitmap<N>::CountWordBits(size_t i, size_t from, size_t to) const {
+  ASSERT(from < kWordSize);
+  ASSERT(to <= kWordSize);
+  const size_t all_ones = ~static_cast<size_t>(0);
+  // how many bits are we setting?
+  const size_t n = to - from;
+  ASSERT(0 < n && n <= kWordSize);
+  const size_t mask = (all_ones >> (kWordSize - n)) << from;
+
+  ASSUME(i < kWords);
+  return absl::popcount(bits_[i] & mask);
+}
+
+// Set the bits [from, to) in the i-th word to Value.
+template <size_t N>
+template <bool Value>
+inline void Bitmap<N>::SetWordBits(size_t i, size_t from, size_t to) {
+  ASSERT(from < kWordSize);
+  ASSERT(to <= kWordSize);
+  const size_t all_ones = ~static_cast<size_t>(0);
+  // how many bits are we setting?
+  const size_t n = to - from;
+  ASSERT(n > 0 && n <= kWordSize);
+  const size_t mask = (all_ones >> (kWordSize - n)) << from;
+  ASSUME(i < kWords);
+  if (Value) {
+    bits_[i] |= mask;
+  } else {
+    bits_[i] &= ~mask;
+  }
+}
+
+template <size_t N>
+inline bool Bitmap<N>::GetBit(size_t i) const {
+  ASSERT(i < N);
+  size_t word = i / kWordSize;
+  size_t offset = i % kWordSize;
+  ASSUME(word < kWords);
+  return bits_[word] & (size_t{1} << offset);
+}
+
+template <size_t N>
+inline void Bitmap<N>::SetBit(size_t i) {
+  ASSERT(i < N);
+  size_t word = i / kWordSize;
+  size_t offset = i % kWordSize;
+  ASSUME(word < kWords);
+  bits_[word] |= (size_t{1} << offset);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearBit(size_t i) {
+  ASSERT(i < N);
+  size_t word = i / kWordSize;
+  size_t offset = i % kWordSize;
+  ASSUME(word < kWords);
+  bits_[word] &= ~(size_t{1} << offset);
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::CountBits(size_t index, size_t n) const {
+  ASSUME(index + n <= N);
+  size_t count = 0;
+  if (n == 0) {
+    return count;
+  }
+
+  size_t word = index / kWordSize;
+  size_t offset = index % kWordSize;
+  size_t k = std::min(offset + n, kWordSize);
+  count += CountWordBits(word, offset, k);
+  n -= k - offset;
+  while (n > 0) {
+    word++;
+    k = std::min(n, kWordSize);
+    count += CountWordBits(word, 0, k);
+    n -= k;
+  }
+
+  return count;
+}
+
+template <size_t N>
+inline bool Bitmap<N>::IsZero() const {
+  for (int i = 0; i < kWords; ++i) {
+    if (bits_[i] != 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <size_t N>
+inline void Bitmap<N>::SetRange(size_t index, size_t n) {
+  SetRangeValue<true>(index, n);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearRange(size_t index, size_t n) {
+  SetRangeValue<false>(index, n);
+}
+
+template <size_t N>
+inline void Bitmap<N>::ClearLowestBit() {
+  for (int i = 0; i < kWords; ++i) {
+    if (bits_[i] != 0) {
+      bits_[i] &= bits_[i] - 1;
+      break;
+    }
+  }
+}
+
+template <size_t N>
+template <bool Value>
+inline void Bitmap<N>::SetRangeValue(size_t index, size_t n) {
+  ASSERT(index + n <= N);
+  size_t word = index / kWordSize;
+  size_t offset = index % kWordSize;
+  size_t k = offset + n;
+  if (k > kWordSize) k = kWordSize;
+  SetWordBits<Value>(word, offset, k);
+  n -= k - offset;
+  while (n > 0) {
+    word++;
+    k = n;
+    if (k > kWordSize) k = kWordSize;
+    SetWordBits<Value>(word, 0, k);
+    n -= k;
+  }
+}
+
+template <size_t N>
+inline bool Bitmap<N>::NextFreeRange(size_t start, size_t *index,
+                                     size_t *length) const {
+  if (start >= N) return false;
+  size_t i = FindClear(start);
+  if (i == N) return false;
+  size_t j = FindSet(i);
+  *index = i;
+  *length = j - i;
+  return true;
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::FindSet(size_t index) const {
+  return FindValue<true>(index);
+}
+
+template <size_t N>
+inline size_t Bitmap<N>::FindClear(size_t index) const {
+  return FindValue<false>(index);
+}
+
+template <size_t N>
+inline ssize_t Bitmap<N>::FindSetBackwards(size_t index) const {
+  return FindValueBackwards<true>(index);
+}
+
+template <size_t N>
+inline ssize_t Bitmap<N>::FindClearBackwards(size_t index) const {
+  return FindValueBackwards<false>(index);
+}
+
+template <size_t N>
+inline void Bitmap<N>::Clear() {
+  for (int i = 0; i < kWords; ++i) {
+    bits_[i] = 0;
+  }
+}
+
+template <size_t N>
+template <bool Goal>
+inline size_t Bitmap<N>::FindValue(size_t index) const {
+  ASSERT(index < N);
+  size_t offset = index % kWordSize;
+  size_t word = index / kWordSize;
+  ASSUME(word < kWords);
+  size_t here = bits_[word];
+  if (!Goal) here = ~here;
+  size_t mask = ~static_cast<size_t>(0) << offset;
+  here &= mask;
+  while (here == 0) {
+    ++word;
+    if (word >= kWords) {
+      return N;
+    }
+    here = bits_[word];
+    if (!Goal) here = ~here;
+  }
+
+  word *= kWordSize;
+  ASSUME(here != 0);
+  size_t ret = absl::countr_zero(here) + word;
+  if (kDeadBits > 0) {
+    if (ret > N) ret = N;
+  }
+  return ret;
+}
+
+template <size_t N>
+template <bool Goal>
+inline ssize_t Bitmap<N>::FindValueBackwards(size_t index) const {
+  ASSERT(index < N);
+  size_t offset = index % kWordSize;
+  ssize_t word = index / kWordSize;
+  ASSUME(word < kWords);
+  size_t here = bits_[word];
+  if (!Goal) here = ~here;
+  size_t mask = (static_cast<size_t>(2) << offset) - 1;
+  here &= mask;
+  while (here == 0) {
+    --word;
+    if (word < 0) {
+      return -1;
+    }
+    here = bits_[word];
+    if (!Goal) here = ~here;
+  }
+
+  word *= kWordSize;
+  ASSUME(here != 0);
+  size_t ret = absl::bit_width(here) - 1 + word;
+  return ret;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_RANGE_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc
new file mode 100644
index 0000000000..278fc9ef1e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_benchmark.cc
@@ -0,0 +1,387 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/range_tracker.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+template <size_t N>
+static void BM_MarkUnmark(benchmark::State& state) {
+  RangeTracker<N> range;
+  absl::BitGen rng;
+  std::vector<std::pair<size_t, size_t>> things;
+  while (range.used() < N / 2) {
+    size_t len =
+        absl::LogUniform<int32_t>(rng, 0, range.longest_free() - 1) + 1;
+    size_t i = range.FindAndMark(len);
+    things.push_back({i, len});
+  }
+
+  // only count successes :/
+  for (auto s : state) {
+    size_t index = absl::Uniform<int32_t>(rng, 0, things.size());
+    auto p = things[index];
+    range.Unmark(p.first, p.second);
+    size_t len =
+        absl::LogUniform<int32_t>(rng, 0, range.longest_free() - 1) + 1;
+    things[index] = {range.FindAndMark(len), len};
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmark, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmark, 256 * 32);
+
+template <size_t N, size_t K>
+static void BM_MarkUnmarkEmpty(benchmark::State& state) {
+  RangeTracker<N> range;
+  for (auto s : state) {
+    size_t index = range.FindAndMark(K);
+    benchmark::DoNotOptimize(index);
+    range.Unmark(index, K);
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 1);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 1);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 128);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 256 * 16);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkEmpty, 256 * 32, 256 * 32);
+
+template <size_t N>
+static void BM_MarkUnmarkChunks(benchmark::State& state) {
+  RangeTracker<N> range;
+  range.FindAndMark(N);
+  size_t index = 0;
+  absl::BitGen rng;
+  while (index < N) {
+    size_t len = absl::Uniform<int32_t>(rng, 0, 32) + 1;
+    len = std::min(len, N - index);
+    size_t drop = absl::Uniform<int32_t>(rng, 0, len);
+    if (drop > 0) {
+      range.Unmark(index, drop);
+    }
+    index += len;
+  }
+  size_t m = range.longest_free();
+  for (auto s : state) {
+    size_t index = range.FindAndMark(m);
+    benchmark::DoNotOptimize(index);
+    range.Unmark(index, m);
+  }
+
+  state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 64);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 256);
+BENCHMARK_TEMPLATE(BM_MarkUnmarkChunks, 256 * 32);
+
+template <size_t N>
+static void BM_FillOnes(benchmark::State& state) {
+  RangeTracker<N> range;
+  while (state.KeepRunningBatch(N)) {
+    state.PauseTiming();
+    range.Clear();
+    state.ResumeTiming();
+    for (size_t j = 0; j < N; ++j) {
+      benchmark::DoNotOptimize(range.FindAndMark(1));
+    }
+  }
+
+  state.SetItemsProcessed(N * state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_FillOnes, 256);
+BENCHMARK_TEMPLATE(BM_FillOnes, 256 * 32);
+
+template <size_t N>
+static void BM_EmptyOnes(benchmark::State& state) {
+  RangeTracker<N> range;
+  while (state.KeepRunningBatch(N)) {
+    state.PauseTiming();
+    range.Clear();
+    range.FindAndMark(N);
+    state.ResumeTiming();
+    for (size_t j = 0; j < N; ++j) {
+      range.Unmark(j, 1);
+    }
+  }
+
+  state.SetItemsProcessed(N * state.iterations());
+}
+
+BENCHMARK_TEMPLATE(BM_EmptyOnes, 256);
+BENCHMARK_TEMPLATE(BM_EmptyOnes, 256 * 32);
+
+enum SearchDirection {
+  Forward,
+  Backward,
+};
+
+template <size_t N, bool Goal, SearchDirection Dir>
+ABSL_ATTRIBUTE_NOINLINE size_t ExamineDoFind(Bitmap<N>* map, size_t index) {
+  if (Dir == Forward) {
+    if (Goal) {
+      return map->FindSet(index);
+    } else {
+      return map->FindClear(index);
+    }
+  } else {
+    if (Goal) {
+      return map->FindSetBackwards(index);
+    } else {
+      return map->FindClearBackwards(index);
+    }
+  }
+}
+
+template <size_t N, bool Goal, SearchDirection Dir>
+ABSL_ATTRIBUTE_NOINLINE void DoSearchBenchmark(Bitmap<N>* map,
+                                               benchmark::State& state) {
+  if (Dir == Forward) {
+    size_t index = 0;
+    for (auto s : state) {
+      index = ExamineDoFind<N, Goal, Dir>(map, index);
+      benchmark::DoNotOptimize(index);
+      index++;
+      if (index >= N) index = 0;
+    }
+  } else {
+    ssize_t index = N - 1;
+    for (auto s : state) {
+      index = ExamineDoFind<N, Goal, Dir>(map, index);
+      benchmark::DoNotOptimize(index);
+      index--;
+      if (index < 0) index = N - 1;
+    }
+  }
+}
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindEmpty(benchmark::State& state) {
+  Bitmap<N> set;
+  // Volatile set/clears prevent the compiler from const-propagating the whole
+  // search.
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindEmpty, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindLast(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  set.SetBit(N - 1);
+  DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindLast, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindLast, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindFull(benchmark::State& state) {
+  Bitmap<N> set;
+  set.SetRange(0, N);
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindFull, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindFull, 256 * 32, true, Backward);
+
+template <size_t N, bool Goal, SearchDirection Dir>
+static void BM_FindRandom(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  absl::BitGen rng;
+  for (int i = 0; i < N; ++i) {
+    if (absl::Bernoulli(rng, 1.0 / 2)) set.SetBit(i);
+  }
+  DoSearchBenchmark<N, Goal, Dir>(&set, state);
+}
+
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 64, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256, true, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, false, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, false, Backward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, true, Forward);
+BENCHMARK_TEMPLATE(BM_FindRandom, 256 * 32, true, Backward);
+
+template <size_t N>
+ABSL_ATTRIBUTE_NOINLINE size_t DoScanBenchmark(Bitmap<N>* set,
+                                               benchmark::State& state) {
+  size_t total = 0;
+  for (auto s : state) {
+    size_t index = 0, len;
+    while (set->NextFreeRange(index, &index, &len)) {
+      benchmark::DoNotOptimize(index);
+      benchmark::DoNotOptimize(len);
+      index += len;
+      total++;
+    }
+  }
+
+  return total;
+}
+
+template <size_t N>
+static void BM_ScanEmpty(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  size_t total = DoScanBenchmark<N>(&set, state);
+  state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 64);
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 256);
+BENCHMARK_TEMPLATE(BM_ScanEmpty, 256 * 32);
+
+template <size_t N>
+static void BM_ScanFull(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  set.SetRange(0, N);
+
+  size_t total = DoScanBenchmark<N>(&set, state);
+  state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanFull, 64);
+BENCHMARK_TEMPLATE(BM_ScanFull, 256);
+BENCHMARK_TEMPLATE(BM_ScanFull, 256 * 32);
+
+template <size_t N>
+static void BM_ScanRandom(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  absl::BitGen rng;
+  for (int i = 0; i < N; ++i) {
+    if (absl::Bernoulli(rng, 1.0 / 2)) set.SetBit(i);
+  }
+  size_t total = DoScanBenchmark<N>(&set, state);
+  state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanRandom, 64);
+BENCHMARK_TEMPLATE(BM_ScanRandom, 256);
+BENCHMARK_TEMPLATE(BM_ScanRandom, 256 * 32);
+
+template <size_t N>
+static void BM_ScanChunks(benchmark::State& state) {
+  Bitmap<N> set;
+  volatile size_t to_set = 0;
+  volatile size_t to_clear = 0;
+  set.SetBit(to_set);
+  set.ClearBit(to_clear);
+  absl::BitGen rng;
+  size_t index = 0;
+  while (index < N) {
+    // Paint ~half of a chunk of random size.
+    size_t len = absl::Uniform<int32_t>(rng, 0, 32) + 1;
+    len = std::min(len, N - index);
+    size_t mid = absl::Uniform<int32_t>(rng, 0, len) + index;
+    size_t ones = mid + 1;
+    size_t limit = index + len;
+    if (ones < limit) {
+      set.SetRange(ones, limit - ones);
+    }
+    index = limit;
+  }
+  size_t total = DoScanBenchmark<N>(&set, state);
+  state.SetItemsProcessed(total);
+}
+
+BENCHMARK_TEMPLATE(BM_ScanChunks, 64);
+BENCHMARK_TEMPLATE(BM_ScanChunks, 256);
+BENCHMARK_TEMPLATE(BM_ScanChunks, 256 * 32);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc
new file mode 100644
index 0000000000..4f9202e221
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/range_tracker_test.cc
@@ -0,0 +1,294 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/range_tracker.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/container/fixed_array.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using testing::ElementsAre;
+using testing::Pair;
+
+class BitmapTest : public testing::Test {
+ protected:
+  template <size_t N>
+  std::vector<size_t> FindSetResults(const Bitmap<N> &map) {
+    return FindResults<N, true>(map);
+  }
+
+  template <size_t N>
+  std::vector<size_t> FindClearResults(const Bitmap<N> &map) {
+    return FindResults<N, false>(map);
+  }
+
+  template <size_t N, bool Value>
+  std::vector<size_t> FindResults(const Bitmap<N> &map) {
+    std::vector<size_t> results;
+    ssize_t last = -1;
+    for (size_t i = 0; i < N; ++i) {
+      ssize_t j = Value ? map.FindSet(i) : map.FindClear(i);
+      EXPECT_LE(last, j) << i;
+      EXPECT_LE(i, j) << i;
+      EXPECT_GE(N, j) << i;
+      if (last != j) {
+        results.push_back(j);
+        last = j;
+      }
+    }
+
+    return results;
+  }
+
+  template <size_t N>
+  std::vector<size_t> FindSetResultsBackwards(const Bitmap<N> &map) {
+    return FindResultsBackwards<N, true>(map);
+  }
+
+  template <size_t N>
+  std::vector<size_t> FindClearResultsBackwards(const Bitmap<N> &map) {
+    return FindResultsBackwards<N, false>(map);
+  }
+
+  template <size_t N, bool Value>
+  std::vector<size_t> FindResultsBackwards(const Bitmap<N> &map) {
+    std::vector<size_t> results;
+    ssize_t last = N;
+    for (ssize_t i = N - 1; i >= 0; --i) {
+      ssize_t j = Value ? map.FindSetBackwards(i) : map.FindClearBackwards(i);
+      EXPECT_GE(last, j) << i;
+      EXPECT_GE(i, j) << i;
+      EXPECT_LE(-1, j) << i;
+      if (last != j) {
+        results.push_back(j);
+        last = j;
+      }
+    }
+
+    return results;
+  }
+};
+
+TEST_F(BitmapTest, GetBitEmpty) {
+  Bitmap<253> map;
+  for (size_t i = 0; i < map.size(); ++i) {
+    EXPECT_EQ(map.GetBit(i), 0);
+  }
+}
+
+TEST_F(BitmapTest, CheckIsZero) {
+  Bitmap<253> map;
+  EXPECT_EQ(map.IsZero(), true);
+  for (size_t i = 0; i < map.size(); ++i) {
+    map.Clear();
+    EXPECT_EQ(map.IsZero(), true);
+    map.SetBit(i);
+    EXPECT_EQ(map.IsZero(), false);
+  }
+}
+
+TEST_F(BitmapTest, CheckClearLowestBit) {
+  Bitmap<253> map;
+  for (size_t i = 0; i < map.size(); ++i) {
+    map.SetBit(i);
+  }
+  for (size_t i = 0; i < map.size(); ++i) {
+    size_t index = map.FindSet(0);
+    EXPECT_EQ(index, i);
+    map.ClearLowestBit();
+  }
+}
+
+TEST_F(BitmapTest, GetBitOneSet) {
+  const size_t N = 251;
+  for (size_t s = 0; s < N; s++) {
+    Bitmap<N> map;
+    map.SetBit(s);
+    for (size_t i = 0; i < map.size(); ++i) {
+      EXPECT_EQ(map.GetBit(i), i == s ? 1 : 0);
+    }
+  }
+}
+
+TEST_F(BitmapTest, FindSet) {
+  Bitmap<253> map;
+  EXPECT_THAT(FindSetResults(map), ElementsAre(253));
+  EXPECT_THAT(FindSetResultsBackwards(map), ElementsAre(-1));
+  map.SetBit(7);
+  map.SetBit(14);
+  map.SetBit(15);
+  map.SetBit(63);
+  map.SetBit(128);
+  EXPECT_THAT(FindSetResults(map), ElementsAre(7, 14, 15, 63, 128, 253));
+  EXPECT_THAT(FindSetResultsBackwards(map),
+              ElementsAre(128, 63, 15, 14, 7, -1));
+  map.SetBit(195);
+  map.SetBit(196);
+  map.SetBit(251);
+  map.SetBit(252);
+  EXPECT_THAT(FindSetResults(map),
+              ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252));
+  EXPECT_THAT(FindSetResultsBackwards(map),
+              ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1));
+  map.SetBit(0);
+  EXPECT_THAT(FindSetResultsBackwards(map),
+              ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0));
+}
+
+TEST_F(BitmapTest, FindClear) {
+  Bitmap<253> map;
+  map.SetRange(0, 253);
+  EXPECT_THAT(FindClearResults(map), ElementsAre(253));
+  EXPECT_THAT(FindClearResultsBackwards(map), ElementsAre(-1));
+
+  map.ClearBit(7);
+  map.ClearBit(14);
+  map.ClearBit(15);
+  map.ClearBit(63);
+  map.ClearBit(128);
+  EXPECT_THAT(FindClearResults(map), ElementsAre(7, 14, 15, 63, 128, 253));
+  EXPECT_THAT(FindClearResultsBackwards(map),
+              ElementsAre(128, 63, 15, 14, 7, -1));
+  map.ClearBit(195);
+  map.ClearBit(196);
+  map.ClearBit(251);
+  map.ClearBit(252);
+  EXPECT_THAT(FindClearResults(map),
+              ElementsAre(7, 14, 15, 63, 128, 195, 196, 251, 252));
+  EXPECT_THAT(FindClearResultsBackwards(map),
+              ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, -1));
+  map.ClearBit(0);
+  EXPECT_THAT(FindClearResultsBackwards(map),
+              ElementsAre(252, 251, 196, 195, 128, 63, 15, 14, 7, 0));
+}
+
+TEST_F(BitmapTest, CountBits) {
+  Bitmap<253> map;
+  map.SetRange(0, 253);
+  EXPECT_EQ(map.CountBits(0, 253), 253);
+  EXPECT_EQ(map.CountBits(8, 245), 245);
+  EXPECT_EQ(map.CountBits(0, 250), 250);
+
+  map.ClearBit(7);
+  map.ClearBit(14);
+  map.ClearBit(15);
+  map.ClearBit(63);
+  map.ClearBit(128);
+
+  EXPECT_EQ(map.CountBits(0, 253), 248);
+  EXPECT_EQ(map.CountBits(8, 245), 241);
+  EXPECT_EQ(map.CountBits(0, 250), 245);
+
+  map.ClearBit(195);
+  map.ClearBit(196);
+  map.ClearBit(251);
+  map.ClearBit(252);
+
+  EXPECT_EQ(map.CountBits(0, 253), 244);
+  EXPECT_EQ(map.CountBits(8, 245), 237);
+  EXPECT_EQ(map.CountBits(0, 250), 243);
+
+  map.ClearBit(0);
+
+  EXPECT_EQ(map.CountBits(0, 253), 243);
+  EXPECT_EQ(map.CountBits(8, 245), 237);
+  EXPECT_EQ(map.CountBits(0, 250), 242);
+}
+
+TEST_F(BitmapTest, CountBitsFuzz) {
+  static constexpr size_t kBits = 253;
+  absl::FixedArray<bool> truth(kBits);
+  Bitmap<kBits> map;
+
+  absl::BitGen rng;
+  for (int i = 0; i < kBits; i++) {
+    bool v = absl::Bernoulli(rng, 0.3);
+    truth[i] = v;
+    if (v) {
+      map.SetBit(i);
+    }
+  }
+
+  for (int i = 0; i < 100; i++) {
+    SCOPED_TRACE(i);
+
+    // Pick a random starting point and a length, use a naive loop against truth
+    // to calculate the expected bit count.
+    size_t start = absl::Uniform(rng, 0u, kBits);
+    size_t length = absl::Uniform(rng, 0u, kBits - start);
+
+    size_t expected = 0;
+    for (int j = 0; j < length; j++) {
+      if (truth[start + j]) {
+        expected++;
+      }
+    }
+
+    EXPECT_EQ(expected, map.CountBits(start, length));
+  }
+}
+
+class RangeTrackerTest : public ::testing::Test {
+ protected:
+  std::vector<std::pair<size_t, size_t>> FreeRanges() {
+    std::vector<std::pair<size_t, size_t>> ret;
+    size_t index = 0, len;
+    while (range_.NextFreeRange(index, &index, &len)) {
+      ret.push_back({index, len});
+      index += len;
+    }
+    return ret;
+  }
+  static constexpr size_t kBits = 1017;
+  RangeTracker<kBits> range_;
+};
+
+TEST_F(RangeTrackerTest, Trivial) {
+  EXPECT_EQ(kBits, range_.size());
+  EXPECT_EQ(0, range_.used());
+  EXPECT_EQ(kBits, range_.longest_free());
+  EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, kBits)));
+  ASSERT_EQ(0, range_.FindAndMark(kBits));
+  EXPECT_EQ(0, range_.longest_free());
+  EXPECT_EQ(kBits, range_.used());
+  EXPECT_THAT(FreeRanges(), ElementsAre());
+  range_.Unmark(0, 100);
+  EXPECT_EQ(100, range_.longest_free());
+  EXPECT_EQ(kBits - 100, range_.used());
+  EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100)));
+  // non-contiguous - shouldn't increase longest
+  range_.Unmark(200, 100);
+  EXPECT_EQ(100, range_.longest_free());
+  EXPECT_EQ(kBits - 200, range_.used());
+  EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 100), Pair(200, 100)));
+  range_.Unmark(100, 100);
+  EXPECT_EQ(300, range_.longest_free());
+  EXPECT_EQ(kBits - 300, range_.used());
+  EXPECT_THAT(FreeRanges(), ElementsAre(Pair(0, 300)));
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h
new file mode 100644
index 0000000000..f1b6d3375f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker.h
@@ -0,0 +1,195 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
+#define TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <atomic>
+#include <limits>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/functional/function_ref.h"
+#include "absl/numeric/bits.h"
+#include "absl/numeric/int128.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/clock.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Aggregates a series of reported values of type S in a set of entries of type
+// T, one entry per epoch. This class factors out common functionality of
+// different time series trackers. S can be any type, T needs to implement:
+// Nil(), Report(S val), empty()
+template <typename T, typename S, size_t kEpochs = 16>
+class TimeSeriesTracker {
+ public:
+  enum SkipEntriesSetting { kSkipEmptyEntries, kDoNotSkipEmptyEntries };
+
+  explicit constexpr TimeSeriesTracker(Clock clock, absl::Duration w)
+      : window_(w), epoch_length_(window_ / kEpochs), clock_(clock) {
+    // See comment in GetCurrentEpoch().
+    auto d = static_cast<uint64_t>(absl::ToDoubleSeconds(epoch_length_) *
+                                   clock.freq());
+    div_precision_ = 63 + absl::bit_width(d);
+    epoch_ticks_m_ =
+        static_cast<uint64_t>(
+            (static_cast<absl::uint128>(1) << div_precision_) / d) +
+        1;
+  }
+
+  bool Report(S val);
+
+  // Iterates over the time series, starting from the oldest entry. The callback
+  // receives the offset of the entry, its timestamp according to the clock and
+  // the entry itself. Offsets are relative to the beginning of the buffer.
+  void Iter(absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+            SkipEntriesSetting skip_entries) const;
+
+  // Iterates over the last num_epochs data points (if -1, iterate to the
+  // oldest entry). Offsets are relative to the end of the buffer.
+  void IterBackwards(absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+                     int64_t num_epochs = -1) const;
+
+  // This retrieves a particular data point (if offset is outside the valid
+  // range, the default data point will be returned).
+  const T GetEpochAtOffset(size_t offset);
+
+  // Updates the time base to the current time. This is useful to report the
+  // most recent time window rather than the last time window that had any
+  // reported values.
+  void UpdateTimeBase() { UpdateClock(); }
+
+ private:
+  // Returns true if the tracker moved to a different epoch.
+  bool UpdateClock();
+
+  // Returns the current epoch based on the clock.
+  int64_t GetCurrentEpoch() {
+    // This is equivalent to
+    // `clock_.now() / (absl::ToDoubleSeconds(epoch_length_) * clock_.freq())`.
+    // We basically follow the technique from
+    // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html,
+    // except that we use one fewer bit of precision than necessary to always
+    // get the correct answer if the numerator were a 64-bit unsigned number. In
+    // this case, because clock_.now() returns a signed 64-bit number (i.e. max
+    // is <2^63), it shouldn't cause a problem. This way, we don't need to
+    // handle overflow so it's simpler. See also:
+    // https://lemire.me/blog/2019/02/20/more-fun-with-fast-remainders-when-the-divisor-is-a-constant/.
+    return static_cast<int64_t>(static_cast<absl::uint128>(epoch_ticks_m_) *
+                                    clock_.now() >>
+                                div_precision_);
+  }
+
+  const absl::Duration window_;
+  const absl::Duration epoch_length_;
+
+  T entries_[kEpochs]{};
+  size_t last_epoch_{0};
+  size_t current_epoch_{0};
+  // This is the magic constant from
+  // https://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html.
+  uint64_t epoch_ticks_m_;
+  uint8_t div_precision_;
+
+  Clock clock_;
+};
+
+// Erases values from the window that are out of date; sets the current epoch
+// to the current location in the ringbuffer.
+template <class T, class S, size_t kEpochs>
+bool TimeSeriesTracker<T, S, kEpochs>::UpdateClock() {
+  const size_t epoch = GetCurrentEpoch();
+  // How many time steps did we take?  (Since we only record kEpochs
+  // time steps, we can pretend it was at most that.)
+  size_t delta = epoch - last_epoch_;
+  delta = std::min(delta, kEpochs);
+  last_epoch_ = epoch;
+
+  if (delta == 0) {
+    return false;
+  }
+
+  // At each tick, we move our current location by one, to a new location
+  // that contains too-old data (which must be zeroed.)
+  for (size_t offset = 0; offset < delta; ++offset) {
+    current_epoch_++;
+    if (current_epoch_ == kEpochs) current_epoch_ = 0;
+    entries_[current_epoch_] = T::Nil();
+  }
+  return true;
+}
+
+template <class T, class S, size_t kEpochs>
+void TimeSeriesTracker<T, S, kEpochs>::Iter(
+    absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+    SkipEntriesSetting skip_entries) const {
+  size_t j = current_epoch_ + 1;
+  if (j == kEpochs) j = 0;
+  int64_t timestamp =
+      (last_epoch_ - kEpochs) * absl::ToInt64Nanoseconds(epoch_length_);
+  for (int offset = 0; offset < kEpochs; offset++) {
+    timestamp += absl::ToInt64Nanoseconds(epoch_length_);
+    if (skip_entries == kDoNotSkipEmptyEntries || !entries_[j].empty()) {
+      f(offset, timestamp, entries_[j]);
+    }
+    j++;
+    if (j == kEpochs) j = 0;
+  }
+}
+
+template <class T, class S, size_t kEpochs>
+void TimeSeriesTracker<T, S, kEpochs>::IterBackwards(
+    absl::FunctionRef<void(size_t, int64_t, const T&)> f,
+    int64_t num_epochs) const {
+  // -1 means that we are outputting all epochs.
+  num_epochs = (num_epochs == -1) ? kEpochs : num_epochs;
+  size_t j = current_epoch_;
+  ASSERT(num_epochs <= kEpochs);
+  int64_t timestamp = last_epoch_ * absl::ToInt64Nanoseconds(epoch_length_);
+  for (size_t offset = 0; offset < num_epochs; ++offset) {
+    // This is deliberately int64_t and not a time unit, since clock_ is not
+    // guaranteed to be a real time base.
+    f(offset, timestamp, entries_[j]);
+    timestamp -= absl::ToInt64Nanoseconds(epoch_length_);
+    if (j == 0) j = kEpochs;
+    --j;
+  }
+}
+
+template <class T, class S, size_t kEpochs>
+const T TimeSeriesTracker<T, S, kEpochs>::GetEpochAtOffset(size_t offset) {
+  return (offset >= kEpochs)
+             ? T::Nil()
+             : entries_[(current_epoch_ + kEpochs - offset) % kEpochs];
+}
+
+template <class T, class S, size_t kEpochs>
+bool TimeSeriesTracker<T, S, kEpochs>::Report(S val) {
+  bool updated_clock = UpdateClock();
+  entries_[current_epoch_].Report(val);
+  return updated_clock;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_TIMESERIES_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc
new file mode 100644
index 0000000000..1f75306161
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/timeseries_tracker_test.cc
@@ -0,0 +1,191 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/internal/timeseries_tracker.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class TimeSeriesTrackerTest : public testing::Test {
+ public:
+  struct TestEntry {
+    static TestEntry Nil() { return TestEntry(); }
+
+    void Report(int n) { values_.push_back(n); }
+
+    bool empty() const { return values_.empty(); }
+
+    std::vector<int> values_;
+  };
+
+ protected:
+  void Advance(absl::Duration d) {
+    clock_ += absl::ToDoubleSeconds(d) * GetFakeClockFrequency();
+  }
+
+  static constexpr absl::Duration kDuration = absl::Seconds(2);
+
+  TimeSeriesTracker<TestEntry, int, 8> tracker_{
+      Clock{.now = FakeClock, .freq = GetFakeClockFrequency}, kDuration};
+
+ private:
+  static int64_t FakeClock() { return clock_; }
+
+  static double GetFakeClockFrequency() {
+    return absl::ToDoubleNanoseconds(absl::Seconds(2));
+  }
+
+  static int64_t clock_;
+};
+
+int64_t TimeSeriesTrackerTest::clock_{0};
+
+// Test that frequency conversion in the cycle clock works correctly
+TEST(TimeSeriesTest, CycleClock) {
+  TimeSeriesTracker<TimeSeriesTrackerTest::TestEntry, int, 100> tracker{
+      Clock{absl::base_internal::CycleClock::Now,
+            absl::base_internal::CycleClock::Frequency},
+      absl::Seconds(10)};  // 100ms epochs
+
+  tracker.Report(1);
+  absl::SleepFor(absl::Milliseconds(100));
+  tracker.Report(2);
+
+  // Iterate through entries skipping empty entries.
+  int num_timestamps = 0;
+  int offset_1, offset_2;
+  tracker.Iter(
+      [&](size_t offset, int64_t ts,
+          const TimeSeriesTrackerTest::TestEntry& e) {
+        ASSERT_LT(num_timestamps, 2);
+        if (num_timestamps == 0) {
+          offset_1 = offset;
+          EXPECT_THAT(e.values_, ElementsAre(1));
+        } else {
+          offset_2 = offset;
+          EXPECT_THAT(e.values_, ElementsAre(2));
+        }
+        num_timestamps++;
+      },
+      tracker.kSkipEmptyEntries);
+
+  // If we are near an epoch boundary, we may skip two epochs.
+  EXPECT_GE(offset_2 - offset_1, 1);
+  EXPECT_LE(offset_2 - offset_1, 2);
+}
+
+TEST_F(TimeSeriesTrackerTest, Works) {
+  const int64_t kEpochLength = absl::ToInt64Nanoseconds(kDuration) / 8;
+  Advance(kDuration);
+
+  tracker_.Report(1);
+  Advance(absl::Nanoseconds(1));
+  tracker_.Report(2);
+  Advance(kDuration / 4);
+  tracker_.Report(4);
+
+  // Iterate through entries skipping empty entries.
+  int num_timestamps = 0;
+  int offset_1, offset_2;
+  tracker_.Iter(
+      [&](size_t offset, int64_t ts, const TestEntry& e) {
+        ASSERT_LT(num_timestamps, 2);
+        if (num_timestamps == 0) {
+          offset_1 = offset;
+          EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration), ts);
+          EXPECT_THAT(e.values_, ElementsAre(1, 2));
+        } else {
+          offset_2 = offset;
+          EXPECT_EQ(absl::ToInt64Nanoseconds(kDuration) +
+                        absl::ToInt64Nanoseconds(kDuration) / 4,
+                    ts);
+          EXPECT_THAT(e.values_, ElementsAre(4));
+        }
+        num_timestamps++;
+      },
+      tracker_.kSkipEmptyEntries);
+
+  EXPECT_EQ(2, num_timestamps);
+  EXPECT_EQ(offset_2 - offset_1, 2);
+
+  Advance(kDuration / 4);
+
+  // Iterate through entries not skipping empty entries.
+  int64_t expected_timestamp = absl::ToInt64Nanoseconds(kDuration) / 4;
+  num_timestamps = 0;
+
+  tracker_.Iter(
+      [&](size_t offset, int64_t ts, const TestEntry& e) {
+        expected_timestamp += kEpochLength;
+        ASSERT_LT(num_timestamps, 8);
+        EXPECT_EQ(expected_timestamp, ts);
+        num_timestamps++;
+      },
+      tracker_.kDoNotSkipEmptyEntries);
+
+  EXPECT_EQ(8, num_timestamps);
+
+  tracker_.Report(8);
+  Advance(kDuration / 4);
+  tracker_.Report(16);
+
+  // Iterate backwards.
+  num_timestamps = 0;
+  expected_timestamp =
+      7 * absl::ToInt64Nanoseconds(kDuration) / 4;  // Current time
+  tracker_.IterBackwards(
+      [&](size_t offset, int64_t ts, const TestEntry& e) {
+        ASSERT_LT(num_timestamps, 3);
+        EXPECT_EQ(num_timestamps, offset);
+        EXPECT_EQ(expected_timestamp, ts);
+        if (num_timestamps == 0) {
+          EXPECT_THAT(e.values_, ElementsAre(16));
+        } else if (num_timestamps == 1) {
+          EXPECT_TRUE(e.values_.empty());
+        } else {
+          EXPECT_THAT(e.values_, ElementsAre(8));
+        }
+        expected_timestamp -= kEpochLength;
+        num_timestamps++;
+      },
+      3);
+
+  EXPECT_EQ(3, num_timestamps);
+
+  EXPECT_THAT(tracker_.GetEpochAtOffset(0).values_, ElementsAre(16));
+  EXPECT_THAT(tracker_.GetEpochAtOffset(2).values_, ElementsAre(8));
+  EXPECT_TRUE(tracker_.GetEpochAtOffset(3).empty());
+  EXPECT_TRUE(tracker_.GetEpochAtOffset(1000).empty());
+
+  // This should annilate everything.
+  Advance(kDuration * 2);
+  tracker_.UpdateTimeBase();
+  tracker_.Iter(
+      [&](size_t offset, int64_t ts, const TestEntry& e) {
+        ASSERT_TRUE(false) << "Time series should be empty";
+      },
+      tracker_.kSkipEmptyEntries);
+
+  EXPECT_TRUE(tracker_.GetEpochAtOffset(1).empty());
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.cc b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc
new file mode 100644
index 0000000000..ef705b02e3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.cc
@@ -0,0 +1,195 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/internal/util.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <utility>
+
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int signal_safe_open(const char* path, int flags, ...) {
+  int fd;
+  va_list ap;
+
+  va_start(ap, flags);
+  mode_t mode = va_arg(ap, mode_t);
+  va_end(ap);
+
+  do {
+    fd = ((flags & O_CREAT) ? open(path, flags, mode) : open(path, flags));
+  } while (fd == -1 && errno == EINTR);
+
+  return fd;
+}
+
+int signal_safe_close(int fd) {
+  int rc;
+
+  do {
+    rc = close(fd);
+  } while (rc == -1 && errno == EINTR);
+
+  return rc;
+}
+
+ssize_t signal_safe_write(int fd, const char* buf, size_t count,
+                          size_t* bytes_written) {
+  ssize_t rc;
+  size_t total_bytes = 0;
+
+  do {
+    rc = write(fd, buf + total_bytes, count - total_bytes);
+    if (rc > 0) total_bytes += rc;
+  } while ((rc > 0 && count > total_bytes) || (rc == -1 && errno == EINTR));
+
+  if (bytes_written != nullptr) *bytes_written = total_bytes;
+
+  return rc;
+}
+
+int signal_safe_poll(struct pollfd* fds, int nfds, absl::Duration timeout) {
+  int rc = 0;
+  absl::Duration elapsed = absl::ZeroDuration();
+
+  // We can't use gettimeofday since it's not async signal safe.  We could use
+  // clock_gettime but that would require linking //base against librt.
+  // Fortunately, timeout is of sufficiently coarse granularity that we can just
+  // approximate it.
+  while ((elapsed <= timeout || timeout < absl::ZeroDuration()) && (rc == 0)) {
+    if (elapsed > absl::ZeroDuration())
+      ::absl::SleepFor(::absl::Milliseconds(1));
+    elapsed += absl::Milliseconds(1);
+    while ((rc = poll(fds, nfds, 0)) == -1 && errno == EINTR) {
+    }
+  }
+
+  return rc;
+}
+
+ssize_t signal_safe_read(int fd, char* buf, size_t count, size_t* bytes_read) {
+  ssize_t rc;
+  size_t total_bytes = 0;
+  struct pollfd pfd;
+
+  // poll is required for testing whether there is any data left on fd in the
+  // case of a signal interrupting a partial read.  This is needed since this
+  // case is only defined to return the number of bytes read up to that point,
+  // with no indication whether more could have been read (up to count).
+  pfd.fd = fd;
+  pfd.events = POLL_IN;
+  pfd.revents = 0;
+
+  do {
+    rc = read(fd, buf + total_bytes, count - total_bytes);
+    if (rc > 0) total_bytes += rc;
+
+    if (rc == 0) break;  // EOF
+    // try again if there's space to fill, no (non-interrupt) error,
+    // and data is available.
+  } while (total_bytes < count && (rc > 0 || errno == EINTR) &&
+           (signal_safe_poll(&pfd, 1, absl::ZeroDuration()) == 1 ||
+            total_bytes == 0));
+
+  if (bytes_read) *bytes_read = total_bytes;
+
+  if (rc != -1 || errno == EINTR)
+    rc = total_bytes;  // return the cumulative bytes read
+  return rc;
+}
+
+std::vector<int> AllowedCpus() {
+  // We have no need for dynamically sized sets (currently >1024 CPUs for glibc)
+  // at the present time.  We could change this in the future.
+  cpu_set_t allowed_cpus;
+  CHECK_CONDITION(sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) ==
+                  0);
+  int n = CPU_COUNT(&allowed_cpus), c = 0;
+
+  std::vector<int> result(n);
+  for (int i = 0; i < CPU_SETSIZE && n; i++) {
+    if (CPU_ISSET(i, &allowed_cpus)) {
+      result[c++] = i;
+      n--;
+    }
+  }
+  CHECK_CONDITION(0 == n);
+
+  return result;
+}
+
+static cpu_set_t SpanToCpuSetT(absl::Span<int> mask) {
+  cpu_set_t result;
+  CPU_ZERO(&result);
+  for (int cpu : mask) {
+    CPU_SET(cpu, &result);
+  }
+  return result;
+}
+
+ScopedAffinityMask::ScopedAffinityMask(absl::Span<int> allowed_cpus) {
+  specified_cpus_ = SpanToCpuSetT(allowed_cpus);
+  // getaffinity should never fail.
+  CHECK_CONDITION(
+      sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0);
+  // See destructor comments on setaffinity interactions.  Tampered() will
+  // necessarily be true in this case.
+  sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_);
+}
+
+ScopedAffinityMask::ScopedAffinityMask(int allowed_cpu) {
+  CPU_ZERO(&specified_cpus_);
+  CPU_SET(allowed_cpu, &specified_cpus_);
+
+  // getaffinity should never fail.
+  CHECK_CONDITION(
+      sched_getaffinity(0, sizeof(original_cpus_), &original_cpus_) == 0);
+  // See destructor comments on setaffinity interactions.  Tampered() will
+  // necessarily be true in this case.
+  sched_setaffinity(0, sizeof(specified_cpus_), &specified_cpus_);
+}
+
+ScopedAffinityMask::~ScopedAffinityMask() {
+  // If something else has already reset our affinity, do not attempt to
+  // restrict towards our original mask.  This is best-effort as the tampering
+  // may obviously occur during the destruction of *this.
+  if (!Tampered()) {
+    // Note:  We do not assert success here, conflicts may restrict us from all
+    // 'original_cpus_'.
+    sched_setaffinity(0, sizeof(original_cpus_), &original_cpus_);
+  }
+}
+
+bool ScopedAffinityMask::Tampered() {
+  cpu_set_t current_cpus;
+  CHECK_CONDITION(sched_getaffinity(0, sizeof(current_cpus), &current_cpus) ==
+                  0);
+  return !CPU_EQUAL(&current_cpus, &specified_cpus_);  // Mismatch => modified.
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal/util.h b/contrib/libs/tcmalloc/tcmalloc/internal/util.h
new file mode 100644
index 0000000000..b43e322257
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal/util.h
@@ -0,0 +1,138 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_INTERNAL_UTIL_H_
+#define TCMALLOC_INTERNAL_UTIL_H_
+
+#include <poll.h>  // IWYU pragma: keep
+#include <sched.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+#include <time.h>
+
+#include <vector>
+
+#include "absl/base/internal/sysinfo.h"
+#include "absl/time/time.h"
+#include "absl/types/span.h"
+#include "tcmalloc/internal/config.h"
+
+#define TCMALLOC_RETRY_ON_TEMP_FAILURE(expression)               \
+  (__extension__({                                               \
+    long int _temp_failure_retry_result;                         \
+    do _temp_failure_retry_result = (long int)(expression);      \
+    while (_temp_failure_retry_result == -1L && errno == EINTR); \
+    _temp_failure_retry_result;                                  \
+  }))
+
+// Useful internal utility functions.  These calls are async-signal safe
+// provided the signal handler saves errno at entry and restores it before
+// return.
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// signal_safe_open() - a wrapper for open(2) which ignores signals
+// Semantics equivalent to open(2):
+//   returns a file-descriptor (>=0) on success, -1 on failure, error in errno
+int signal_safe_open(const char *path, int flags, ...);
+
+// signal_safe_close() - a wrapper for close(2) which ignores signals
+// Semantics equivalent to close(2):
+//   returns 0 on success, -1 on failure, error in errno
+int signal_safe_close(int fd);
+
+// signal_safe_write() - a wrapper for write(2) which ignores signals
+// Semantics equivalent to write(2):
+//   returns number of bytes written, -1 on failure, error in errno
+//   additionally, (if not NULL) total bytes written in *bytes_written
+//
+// In the interrupted (EINTR) case, signal_safe_write will continue attempting
+// to write out buf.  This means that in the:
+//   write->interrupted by signal->write->error case
+// That it is possible for signal_safe_write to return -1 when there were bytes
+// flushed from the buffer in the first write.  To handle this case the optional
+// bytes_written parameter is provided, when not-NULL, it will always return the
+// total bytes written before any error.
+ssize_t signal_safe_write(int fd, const char *buf, size_t count,
+                          size_t *bytes_written);
+
+// signal_safe_read() - a wrapper for read(2) which ignores signals
+// Semantics equivalent to read(2):
+//   returns number of bytes written, -1 on failure, error in errno
+//   additionally, (if not NULL) total bytes written in *bytes_written
+//
+// In the interrupted (EINTR) case, signal_safe_read will continue attempting
+// to read into buf.  This means that in the:
+//   read->interrupted by signal->read->error case
+// That it is possible for signal_safe_read to return -1 when there were bytes
+// read by a previous read.  To handle this case the optional bytes_written
+// parameter is provided, when not-NULL, it will always return the total bytes
+// read before any error.
+ssize_t signal_safe_read(int fd, char *buf, size_t count, size_t *bytes_read);
+
+// signal_safe_poll() - a wrapper for poll(2) which ignores signals
+// Semantics equivalent to poll(2):
+//   Returns number of structures with non-zero revent fields.
+//
+// In the interrupted (EINTR) case, signal_safe_poll will continue attempting to
+// poll for data.  Unlike ppoll/pselect, signal_safe_poll is *ignoring* signals
+// not attempting to re-enable them.  Protecting us from the traditional races
+// involved with the latter.
+int signal_safe_poll(struct ::pollfd *fds, int nfds, absl::Duration timeout);
+
+// Affinity helpers.
+
+// Returns a vector of the which cpus the currently allowed thread is allowed to
+// run on.  There are no guarantees that this will not change before, after, or
+// even during, the call to AllowedCpus().
+std::vector<int> AllowedCpus();
+
+// Enacts a scoped affinity mask on the constructing thread.  Attempts to
+// restore the original affinity mask on destruction.
+//
+// REQUIRES: For test-use only.  Do not use this in production code.
+class ScopedAffinityMask {
+ public:
+  // When racing with an external restriction that has a zero-intersection with
+  // "allowed_cpus" we will construct, but immediately register as "Tampered()",
+  // without actual changes to affinity.
+  explicit ScopedAffinityMask(absl::Span<int> allowed_cpus);
+  explicit ScopedAffinityMask(int allowed_cpu);
+
+  // Restores original affinity iff our scoped affinity has not been externally
+  // modified (i.e. Tampered()).  Otherwise, the updated affinity is preserved.
+  ~ScopedAffinityMask();
+
+  // Returns true if the affinity mask no longer matches what was set at point
+  // of construction.
+  //
+  // Note:  This is instantaneous and not fool-proof.  It's possible for an
+  // external affinity modification to subsequently align with our originally
+  // specified "allowed_cpus".  In this case Tampered() will return false when
+  // time may have been spent executing previously on non-specified cpus.
+  bool Tampered();
+
+ private:
+  cpu_set_t original_cpus_, specified_cpus_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_INTERNAL_UTIL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h
new file mode 100644
index 0000000000..66027418ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/internal_malloc_extension.h
@@ -0,0 +1,133 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Extra extensions exported by some malloc implementations.  These
+// extensions are accessed through a virtual base class so an
+// application can link against a malloc that does not implement these
+// extensions, and it will get default versions that do nothing.
+
+#ifndef TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
+#define TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
+
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/functional/function_ref.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AllocationProfilingTokenAccessor and ProfileAccessor provide access to the
+// private constructors of AllocationProfilingToken and Profile that take a
+// pointer.
+class AllocationProfilingTokenAccessor {
+ public:
+  static MallocExtension::AllocationProfilingToken MakeToken(
+      std::unique_ptr<AllocationProfilingTokenBase> p) {
+    return MallocExtension::AllocationProfilingToken(std::move(p));
+  }
+};
+
+class ProfileAccessor {
+ public:
+  static Profile MakeProfile(std::unique_ptr<const ProfileBase> p) {
+    return Profile(std::move(p));
+  }
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#if ABSL_HAVE_ATTRIBUTE_WEAK && !defined(__APPLE__) && !defined(__EMSCRIPTEN__)
+
+extern "C" {
+
+ABSL_ATTRIBUTE_WEAK void TCMalloc_Internal_ForceCpuCacheActivation();
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::AddressRegionFactory*
+MallocExtension_Internal_GetRegionFactory();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetRegionFactory(
+    tcmalloc::AddressRegionFactory* factory);
+
+ABSL_ATTRIBUTE_WEAK const tcmalloc::tcmalloc_internal::ProfileBase*
+MallocExtension_Internal_SnapshotCurrent(tcmalloc::ProfileType type);
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::tcmalloc_internal::AllocationProfilingTokenBase*
+MallocExtension_Internal_StartAllocationProfiling();
+
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ActivateGuardedSampling();
+ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::Ownership
+MallocExtension_Internal_GetOwnership(const void* ptr);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetMemoryLimit(
+    tcmalloc::MallocExtension::MemoryLimit* limit);
+ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetNumericProperty(
+    const char* name_data, size_t name_size, size_t* value);
+ABSL_ATTRIBUTE_WEAK bool MallocExtension_Internal_GetPerCpuCachesActive();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_DeactivatePerCpuCaches();
+ABSL_ATTRIBUTE_WEAK int32_t MallocExtension_Internal_GetMaxPerCpuCacheSize();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetSkipSubreleaseInterval(
+    absl::Duration* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetProperties(
+    std::map<std::string, tcmalloc::MallocExtension::Property>* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_GetStats(std::string* ret);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxPerCpuCacheSize(
+    int32_t value);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetSkipSubreleaseInterval(
+    absl::Duration value);
+ABSL_ATTRIBUTE_WEAK size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ReleaseMemoryToSystem(
+    size_t bytes);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMemoryLimit(
+    const tcmalloc::MallocExtension::MemoryLimit* limit);
+
+ABSL_ATTRIBUTE_WEAK size_t
+MallocExtension_Internal_GetAllocatedSize(const void* ptr);
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadBusy();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_MarkThreadIdle();
+
+ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetProfileSamplingRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetProfileSamplingRate(
+    int64_t);
+
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_ProcessBackgroundActions();
+
+ABSL_ATTRIBUTE_WEAK tcmalloc::MallocExtension::BytesPerSecond
+MallocExtension_Internal_GetBackgroundReleaseRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetBackgroundReleaseRate(
+    tcmalloc::MallocExtension::BytesPerSecond);
+
+ABSL_ATTRIBUTE_WEAK int64_t MallocExtension_Internal_GetGuardedSamplingRate();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetGuardedSamplingRate(
+    int64_t);
+
+ABSL_ATTRIBUTE_WEAK int64_t
+MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+ABSL_ATTRIBUTE_WEAK void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(
+    int64_t value);
+
+ABSL_ATTRIBUTE_WEAK void
+MallocExtension_EnableForkSupport();
+
+ABSL_ATTRIBUTE_WEAK void
+MallocExtension_SetSampleUserDataCallbacks(
+    tcmalloc::MallocExtension::CreateSampleUserDataCallback create,
+    tcmalloc::MallocExtension::CopySampleUserDataCallback copy,
+    tcmalloc::MallocExtension::DestroySampleUserDataCallback destroy);
+
+}
+
+#endif
+
+#endif  // TCMALLOC_INTERNAL_MALLOC_EXTENSION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc
new file mode 100644
index 0000000000..5395252719
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/legacy_size_classes.cc
@@ -0,0 +1,711 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       24,       1,          32},  // 0.68%
+    {       32,       1,          32},  // 0.59%
+    {       40,       1,          32},  // 0.98%
+    {       48,       1,          32},  // 0.98%
+    {       56,       1,          32},  // 0.78%
+    {       64,       1,          32},  // 0.59%
+    {       72,       1,          32},  // 1.28%
+    {       80,       1,          32},  // 0.98%
+    {       88,       1,          32},  // 0.68%
+    {       96,       1,          32},  // 0.98%
+    {      104,       1,          32},  // 1.58%
+    {      112,       1,          32},  // 0.78%
+    {      120,       1,          32},  // 0.98%
+    {      128,       1,          32},  // 0.59%
+    {      136,       1,          32},  // 0.98%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      296,       1,          32},  // 3.10%
+    {      312,       1,          32},  // 1.58%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      408,       1,          32},  // 0.98%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       2,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       2,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    98304,      12,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   147456,      18,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       24,       1,          32},  // 0.17%
+    {       32,       1,          32},  // 0.15%
+    {       40,       1,          32},  // 0.17%
+    {       48,       1,          32},  // 0.24%
+    {       56,       1,          32},  // 0.17%
+    {       64,       1,          32},  // 0.15%
+    {       72,       1,          32},  // 0.17%
+    {       80,       1,          32},  // 0.29%
+    {       88,       1,          32},  // 0.24%
+    {       96,       1,          32},  // 0.24%
+    {      104,       1,          32},  // 0.17%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      280,       1,          32},  // 0.17%
+    {      304,       1,          32},  // 0.89%
+    {      328,       1,          32},  // 1.06%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      488,       1,          32},  // 0.37%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       24,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       40,       1,          32},  // 0.03%
+    {       48,       1,          32},  // 0.02%
+    {       56,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       72,       1,          32},  // 0.04%
+    {       80,       1,          32},  // 0.04%
+    {       88,       1,          32},  // 0.05%
+    {       96,       1,          32},  // 0.04%
+    {      104,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      360,       1,          32},  // 0.04%
+    {      408,       1,          32},  // 0.10%
+    {      456,       1,          32},  // 0.17%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10880,       1,           6},  // 0.41%
+    {    11904,       1,           5},  // 0.12%
+    {    13056,       1,           5},  // 0.41%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    18688,       1,           3},  // 0.21%
+    {    21760,       1,           3},  // 0.41%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       24,       1,          32},  // 1.57%
+    {       32,       1,          32},  // 1.17%
+    {       40,       1,          32},  // 1.57%
+    {       48,       1,          32},  // 1.57%
+    {       56,       1,          32},  // 1.37%
+    {       64,       1,          32},  // 1.17%
+    {       72,       1,          32},  // 2.78%
+    {       80,       1,          32},  // 1.57%
+    {       88,       1,          32},  // 2.37%
+    {       96,       1,          32},  // 2.78%
+    {      104,       1,          32},  // 2.17%
+    {      120,       1,          32},  // 1.57%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      184,       1,          32},  // 2.37%
+    {      208,       1,          32},  // 4.86%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      312,       1,          32},  // 2.17%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      408,       1,          32},  // 1.57%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3456,       6,          18},  // 1.79%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       48,       1,          32},  // 0.98%
+    {       64,       1,          32},  // 0.59%
+    {       80,       1,          32},  // 0.98%
+    {       96,       1,          32},  // 0.98%
+    {      112,       1,          32},  // 0.78%
+    {      128,       1,          32},  // 0.59%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      288,       1,          32},  // 2.18%
+    {      304,       1,          32},  // 4.25%
+    {      320,       1,          32},  // 3.00%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      384,       1,          32},  // 2.18%
+    {      400,       1,          32},  // 3.00%
+    {      416,       1,          32},  // 4.25%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       2,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     6784,       5,           9},  // 0.75%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       2,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    90112,      11,           2},  // 0.05%
+    {    98304,      12,           2},  // 0.05%
+    {   106496,      13,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   139264,      17,           2},  // 0.03%
+    {   155648,      19,           2},  // 0.03%
+    {   172032,      21,           2},  // 0.03%
+    {   188416,      23,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   221184,      27,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       48,       1,          32},  // 0.24%
+    {       64,       1,          32},  // 0.15%
+    {       80,       1,          32},  // 0.29%
+    {       96,       1,          32},  // 0.24%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      272,       1,          32},  // 0.54%
+    {      288,       1,          32},  // 0.84%
+    {      304,       1,          32},  // 0.89%
+    {      320,       1,          32},  // 0.54%
+    {      336,       1,          32},  // 0.69%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      480,       1,          32},  // 0.54%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      768,       1,          32},  // 1.74%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1408,       1,          32},  // 1.33%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2432,       1,          26},  // 3.80%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13056,       2,           5},  // 0.47%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       48,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       80,       1,          32},  // 0.04%
+    {       96,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      368,       1,          32},  // 0.07%
+    {      416,       1,          32},  // 0.04%
+    {      464,       1,          32},  // 0.19%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1408,       1,          32},  // 0.12%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3200,       1,          20},  // 1.15%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10368,       1,           6},  // 1.15%
+    {    11392,       1,           5},  // 0.07%
+    {    12416,       1,           5},  // 0.56%
+    {    13696,       1,           4},  // 0.76%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    17408,       1,           3},  // 0.41%
+    {    20096,       1,           3},  // 0.36%
+    {    21760,       1,           3},  // 0.41%
+    {    23808,       1,           2},  // 0.12%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   196608,       3,           2},  // 0.01%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kLegacySizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kLegacySizeClasses[SizeMap::kLegacySizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       48,       1,          32},  // 1.57%
+    {       64,       1,          32},  // 1.17%
+    {       80,       1,          32},  // 1.57%
+    {       96,       1,          32},  // 2.78%
+    {      112,       1,          32},  // 2.78%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      176,       1,          32},  // 2.37%
+    {      192,       1,          32},  // 2.78%
+    {      208,       1,          32},  // 4.86%
+    {      224,       1,          32},  // 2.78%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      288,       1,          32},  // 2.78%
+    {      304,       1,          32},  // 4.86%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      400,       1,          32},  // 3.60%
+    {      448,       1,          32},  // 2.78%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      640,       2,          32},  // 7.29%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3200,       4,          20},  // 2.70%
+    {     3584,       7,          18},  // 0.17%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override.h b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
new file mode 100644
index 0000000000..89f8e4e5c8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override.h
@@ -0,0 +1,39 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This .h file imports the code that causes tcmalloc to override libc
+// versions of malloc/free/new/delete/etc.  That is, it provides the
+// logic that makes it so calls to malloc(10) go through tcmalloc,
+// rather than the default (libc) malloc.
+//
+// Every libc has its own way of doing this, and sometimes the compiler
+// matters too, so we have a different file for each libc, and often
+// for different compilers and OS's.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_H_
+#define TCMALLOC_LIBC_OVERRIDE_H_
+
+#include <features.h>
+
+#include "tcmalloc/tcmalloc.h"
+
+#if defined(__GLIBC__)
+#include "tcmalloc/libc_override_glibc.h"
+
+#else
+#include "tcmalloc/libc_override_redefine.h"
+
+#endif
+
+#endif  // TCMALLOC_LIBC_OVERRIDE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h
new file mode 100644
index 0000000000..709bcb727f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_gcc_and_weak.h
@@ -0,0 +1,114 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used to override malloc routines on systems that define the
+// memory allocation routines to be weak symbols in their libc
+// (almost all unix-based systems are like this), on gcc, which
+// suppports the 'alias' attribute.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
+#define TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
+
+#include <stddef.h>
+
+#include <new>
+
+#include "tcmalloc/tcmalloc.h"
+
+#ifndef __GNUC__
+#error libc_override_gcc_and_weak.h is for gcc distributions only.
+#endif
+
+// visibility("default") ensures that these symbols are always exported, even
+// with -fvisibility=hidden.
+#define TCMALLOC_ALIAS(tc_fn) \
+  __attribute__((alias(#tc_fn), visibility("default")))
+
+void* operator new(size_t size) noexcept(false)
+    TCMALLOC_ALIAS(TCMallocInternalNew);
+void operator delete(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalDelete);
+void operator delete(void* p, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteSized);
+void* operator new[](size_t size) noexcept(false)
+    TCMALLOC_ALIAS(TCMallocInternalNewArray);
+void operator delete[](void* p) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArray);
+void operator delete[](void* p, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArraySized);
+void* operator new(size_t size, const std::nothrow_t& nt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalNewNothrow);
+void* operator new[](size_t size, const std::nothrow_t& nt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalNewArrayNothrow);
+void operator delete(void* p, const std::nothrow_t& nt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteNothrow);
+void operator delete[](void* p, const std::nothrow_t& nt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArrayNothrow);
+
+void* operator new(size_t size, std::align_val_t alignment) noexcept(false)
+    TCMALLOC_ALIAS(TCMallocInternalNewAligned);
+void* operator new(size_t size, std::align_val_t alignment,
+                   const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalNewAligned_nothrow);
+void operator delete(void* p, std::align_val_t alignment) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteAligned);
+void operator delete(void* p, std::align_val_t alignment,
+                     const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteAligned_nothrow);
+void operator delete(void* p, size_t size, std::align_val_t alignment) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned);
+void* operator new[](size_t size, std::align_val_t alignment) noexcept(false)
+    TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned);
+void* operator new[](size_t size, std::align_val_t alignment,
+                     const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalNewArrayAligned_nothrow);
+void operator delete[](void* p, std::align_val_t alignment) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned);
+void operator delete[](void* p, std::align_val_t alignment,
+                       const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArrayAligned_nothrow);
+void operator delete[](void* p, size_t size,
+                       std::align_val_t alignemnt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDeleteArraySizedAligned);
+
+extern "C" {
+void* malloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalMalloc);
+void free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree);
+void sdallocx(void* ptr, size_t size, int flags) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalSdallocx);
+void* realloc(void* ptr, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalRealloc);
+void* calloc(size_t n, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalCalloc);
+void cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree);
+void* memalign(size_t align, size_t s) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMemalign);
+void* aligned_alloc(size_t align, size_t s) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalAlignedAlloc);
+void* valloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalValloc);
+void* pvalloc(size_t size) noexcept TCMALLOC_ALIAS(TCMallocInternalPvalloc);
+int posix_memalign(void** r, size_t a, size_t s) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalPosixMemalign);
+void malloc_stats(void) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocStats);
+int mallopt(int cmd, int value) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMallOpt);
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+struct mallinfo mallinfo(void) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMallocInfo);
+#endif
+size_t malloc_size(void* p) noexcept TCMALLOC_ALIAS(TCMallocInternalMallocSize);
+size_t malloc_usable_size(void* p) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMallocSize);
+}  // extern "C"
+
+#endif  // TCMALLOC_LIBC_OVERRIDE_GCC_AND_WEAK_INL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h
new file mode 100644
index 0000000000..8e23b6eb78
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_glibc.h
@@ -0,0 +1,120 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used to override malloc routines on systems that are using glibc.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
+#define TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
+
+#include <features.h>
+#include <stddef.h>
+
+#include "tcmalloc/tcmalloc.h"
+
+#ifndef __GLIBC__
+#error libc_override_glibc.h is for glibc distributions only.
+#endif
+
+// In glibc, the memory-allocation methods are weak symbols, so we can
+// just override them with our own.  If we're using gcc, we can use
+// __attribute__((alias)) to do the overriding easily (exception:
+// Mach-O, which doesn't support aliases).  Otherwise we have to use a
+// function call.
+#if !defined(__GNUC__) || defined(__MACH__)
+
+#include "libc_override_redefine.h"
+
+#else  // #if !defined(__GNUC__) || defined(__MACH__)
+
+// If we get here, we're a gcc system, so do all the overriding we do
+// with gcc.  This does the overriding of all the 'normal' memory
+// allocation.
+#include "libc_override_gcc_and_weak.h"
+
+// We also have to do some glibc-specific overriding.  Some library
+// routines on RedHat 9 allocate memory using malloc() and free it
+// using __libc_free() (or vice-versa).  Since we provide our own
+// implementations of malloc/free, we need to make sure that the
+// __libc_XXX variants (defined as part of glibc) also point to the
+// same implementations.  Since it only matters for redhat, we
+// do it inside the gcc #ifdef, since redhat uses gcc.
+// TODO(b/134690953): only do this if we detect we're an old enough glibc?
+
+extern "C" {
+void* __libc_malloc(size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMalloc);
+void __libc_free(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalFree);
+void* __libc_realloc(void* ptr, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalRealloc);
+void* __libc_calloc(size_t n, size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalCalloc);
+void __libc_cfree(void* ptr) noexcept TCMALLOC_ALIAS(TCMallocInternalCfree);
+void* __libc_memalign(size_t align, size_t s) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMemalign);
+void* __libc_valloc(size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalValloc);
+void* __libc_pvalloc(size_t size) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalPvalloc);
+int __posix_memalign(void** r, size_t a, size_t s) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalPosixMemalign);
+}  // extern "C"
+
+#endif  // #if defined(__GNUC__) && !defined(__MACH__)
+
+// We also have to hook libc malloc.  While our work with weak symbols
+// should make sure libc malloc is never called in most situations, it
+// can be worked around by shared libraries with the DEEPBIND
+// environment variable set.  The below hooks libc to call our malloc
+// routines even in that situation.  In other situations, this hook
+// should never be called.
+extern "C" {
+static void* glibc_override_malloc(size_t size, const void* caller) {
+  return TCMallocInternalMalloc(size);
+}
+static void* glibc_override_realloc(void* ptr, size_t size,
+                                    const void* caller) {
+  return TCMallocInternalRealloc(ptr, size);
+}
+static void glibc_override_free(void* ptr, const void* caller) {
+  TCMallocInternalFree(ptr);
+}
+static void* glibc_override_memalign(size_t align, size_t size,
+                                     const void* caller) {
+  return TCMallocInternalMemalign(align, size);
+}
+
+// We should be using __malloc_initialize_hook here.  (See
+// http://swoolley.org/man.cgi/3/malloc_hook.)  However, this causes weird
+// linker errors with programs that link with -static, so instead we just assign
+// the vars directly at static-constructor time.  That should serve the same
+// effect of making sure the hooks are set before the first malloc call the
+// program makes.
+
+// Glibc-2.14 and above make __malloc_hook and friends volatile
+#ifndef __MALLOC_HOOK_VOLATILE
+#define __MALLOC_HOOK_VOLATILE /**/
+#endif
+
+void* (*__MALLOC_HOOK_VOLATILE __malloc_hook)(size_t, const void*) =
+    &glibc_override_malloc;
+void* (*__MALLOC_HOOK_VOLATILE __realloc_hook)(void*, size_t, const void*) =
+    &glibc_override_realloc;
+void (*__MALLOC_HOOK_VOLATILE __free_hook)(void*,
+                                           const void*) = &glibc_override_free;
+void* (*__MALLOC_HOOK_VOLATILE __memalign_hook)(size_t, size_t, const void*) =
+    &glibc_override_memalign;
+
+}  // extern "C"
+
+#endif  // TCMALLOC_LIBC_OVERRIDE_GLIBC_INL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
new file mode 100644
index 0000000000..b1655461c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/libc_override_redefine.h
@@ -0,0 +1,100 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Used on systems that don't have their own definition of
+// malloc/new/etc.  (Typically this will be a windows msvcrt.dll that
+// has been edited to remove the definitions.)  We can just define our
+// own as normal functions.
+//
+// This should also work on systems were all the malloc routines are
+// defined as weak symbols, and there's no support for aliasing.
+
+#ifndef TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
+#define TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
+
+#include <cstddef>
+#include <new>
+
+#include "tcmalloc/tcmalloc.h"
+
+void* operator new(size_t size) { return TCMallocInternalNew(size); }
+void operator delete(void* p) noexcept { TCMallocInternalDelete(p); }
+void* operator new[](size_t size) { return TCMallocInternalNewArray(size); }
+void operator delete[](void* p) noexcept { TCMallocInternalDeleteArray(p); }
+void* operator new(size_t size, const std::nothrow_t& nt) noexcept {
+  return TCMallocInternalNewNothrow(size, nt);
+}
+void* operator new[](size_t size, const std::nothrow_t& nt) noexcept {
+  return TCMallocInternalNewArrayNothrow(size, nt);
+}
+void operator delete(void* ptr, const std::nothrow_t& nt) noexcept {
+  return TCMallocInternalDeleteNothrow(ptr, nt);
+}
+void operator delete[](void* ptr, const std::nothrow_t& nt) noexcept {
+  return TCMallocInternalDeleteArrayNothrow(ptr, nt);
+}
+
+extern "C" {
+void* malloc(size_t s) { return TCMallocInternalMalloc(s); }
+void* calloc(size_t n, size_t s) { return TCMallocInternalCalloc(n, s); }
+void* realloc(void* p, size_t s) { return TCMallocInternalRealloc(p, s); }
+void free(void* p) { TCMallocInternalFree(p); }
+void* memalign(size_t a, size_t s) { return TCMallocInternalMemalign(a, s); }
+int posix_memalign(void** r, size_t a, size_t s) {
+  return TCMallocInternalPosixMemalign(r, a, s);
+}
+size_t malloc_usable_size(void* p) { return TCMallocInternalMallocSize(p); }
+
+// tcmalloc extension
+void sdallocx(void* p, size_t s, int flags) noexcept {
+  TCMallocInternalSdallocx(p, s, flags);
+}
+
+#if defined(__GLIBC__) || defined(__NEWLIB__)
+// SunOS extension
+void cfree(void* p) { TCMallocInternalCfree(p); }
+#endif
+
+#if defined(OS_MACOSX) || defined(__BIONIC__) || defined(__GLIBC__) || \
+    defined(__NEWLIB__) || defined(__UCLIBC__)
+// Obsolete memalign
+void* valloc(size_t s) { return TCMallocInternalValloc(s); }
+#endif
+
+#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__)
+// Obsolete memalign
+void* pvalloc(size_t s) { return TCMallocInternalPvalloc(s); }
+#endif
+
+#if defined(__GLIBC__) || defined(__NEWLIB__) || defined(__UCLIBC__)
+void malloc_stats(void) { TCMallocInternalMallocStats(); }
+#endif
+
+#if defined(__BIONIC__) || defined(__GLIBC__) || defined(__NEWLIB__) || \
+    defined(__UCLIBC__)
+int mallopt(int cmd, int v) { return TCMallocInternalMallOpt(cmd, v); }
+#endif
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+struct mallinfo mallinfo(void) {
+  return TCMallocInternalMallocInfo();
+}
+#endif
+
+#if defined(__GLIBC__)
+size_t malloc_size(void* p) { return TCMallocInternalMallocSize(p); }
+#endif
+}  // extern "C"
+
+#endif  // TCMALLOC_LIBC_OVERRIDE_REDEFINE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc
new file mode 100644
index 0000000000..ad3205fcdc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.cc
@@ -0,0 +1,530 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/malloc_extension.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include <atomic>
+#include <cstdlib>
+#include <memory>
+#include <new>
+#include <string>
+
+#include "absl/base/attributes.h"
+#include "absl/base/internal/low_level_alloc.h"
+#include "absl/memory/memory.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/internal_malloc_extension.h"
+
+namespace tcmalloc {
+
+MallocExtension::AllocationProfilingToken::AllocationProfilingToken(
+    std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> impl)
+    : impl_(std::move(impl)) {}
+
+MallocExtension::AllocationProfilingToken::~AllocationProfilingToken() {}
+
+Profile MallocExtension::AllocationProfilingToken::Stop() && {
+  std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> p(
+      std::move(impl_));
+  if (!p) {
+    return Profile();
+  }
+  return std::move(*p).Stop();
+}
+
+Profile::Profile(std::unique_ptr<const tcmalloc_internal::ProfileBase> impl)
+    : impl_(std::move(impl)) {}
+
+Profile::~Profile() {}
+
+void Profile::Iterate(absl::FunctionRef<void(const Sample&)> f) const {
+  if (!impl_) {
+    return;
+  }
+
+  impl_->Iterate(f);
+}
+
+int64_t Profile::Period() const {
+  if (!impl_) {
+    return -1;
+  }
+
+  return impl_->Period();
+}
+
+ProfileType Profile::Type() const {
+  if (!impl_) {
+    return ProfileType::kDoNotUse;
+  }
+
+  return impl_->Type();
+}
+
+AddressRegion::~AddressRegion() {}
+
+AddressRegionFactory::~AddressRegionFactory() {}
+
+size_t AddressRegionFactory::GetStats(absl::Span<char> buffer) {
+  static_cast<void>(buffer);
+  return 0;
+}
+
+size_t AddressRegionFactory::GetStatsInPbtxt(absl::Span<char> buffer) {
+  static_cast<void>(buffer);
+  return 0;
+}
+
+static std::atomic<size_t> address_region_factory_internal_bytes_allocated(0);
+
+size_t AddressRegionFactory::InternalBytesAllocated() {
+  return address_region_factory_internal_bytes_allocated.load(
+      std::memory_order_relaxed);
+}
+
+void* AddressRegionFactory::MallocInternal(size_t size) {
+  // Use arena without malloc hooks to avoid HeapChecker reporting a leak.
+  static auto* arena =
+      absl::base_internal::LowLevelAlloc::NewArena(/*flags=*/0);
+  void* result =
+      absl::base_internal::LowLevelAlloc::AllocWithArena(size, arena);
+  if (result) {
+    address_region_factory_internal_bytes_allocated.fetch_add(
+        size, std::memory_order_relaxed);
+  }
+  return result;
+}
+
+#if !ABSL_HAVE_ATTRIBUTE_WEAK || defined(__APPLE__) || defined(__EMSCRIPTEN__)
+#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 0
+#else
+#define ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS 1
+#endif
+
+std::string MallocExtension::GetStats() {
+  std::string ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetStats != nullptr) {
+    MallocExtension_Internal_GetStats(&ret);
+  }
+#endif
+  return ret;
+}
+
+void MallocExtension::ReleaseMemoryToSystem(size_t num_bytes) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_ReleaseMemoryToSystem != nullptr) {
+    MallocExtension_Internal_ReleaseMemoryToSystem(num_bytes);
+  }
+#endif
+}
+
+AddressRegionFactory* MallocExtension::GetRegionFactory() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetRegionFactory == nullptr) {
+    return nullptr;
+  }
+
+  return MallocExtension_Internal_GetRegionFactory();
+#else
+  return nullptr;
+#endif
+}
+
+void MallocExtension::SetRegionFactory(AddressRegionFactory* factory) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_SetRegionFactory == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_SetRegionFactory(factory);
+#endif
+  // Default implementation does nothing
+}
+
+Profile MallocExtension::SnapshotCurrent(tcmalloc::ProfileType type) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_SnapshotCurrent == nullptr) {
+    return Profile();
+  }
+
+  return tcmalloc_internal::ProfileAccessor::MakeProfile(
+      std::unique_ptr<const tcmalloc_internal::ProfileBase>(
+          MallocExtension_Internal_SnapshotCurrent(type)));
+#else
+  return Profile();
+#endif
+}
+
+MallocExtension::AllocationProfilingToken
+MallocExtension::StartAllocationProfiling() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_StartAllocationProfiling == nullptr) {
+    return {};
+  }
+
+  return tcmalloc_internal::AllocationProfilingTokenAccessor::MakeToken(
+      std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase>(
+          MallocExtension_Internal_StartAllocationProfiling()));
+#else
+  return {};
+#endif
+}
+
+void MallocExtension::MarkThreadIdle() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_MarkThreadIdle == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_MarkThreadIdle();
+#endif
+}
+
+void MallocExtension::MarkThreadBusy() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_MarkThreadBusy == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_MarkThreadBusy();
+#endif
+}
+
+MallocExtension::MemoryLimit MallocExtension::GetMemoryLimit() {
+  MemoryLimit ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetMemoryLimit != nullptr) {
+    MallocExtension_Internal_GetMemoryLimit(&ret);
+  }
+#endif
+  return ret;
+}
+
+void MallocExtension::SetMemoryLimit(
+    const MallocExtension::MemoryLimit& limit) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_SetMemoryLimit != nullptr) {
+    MallocExtension_Internal_SetMemoryLimit(&limit);
+  }
+#endif
+}
+
+int64_t MallocExtension::GetProfileSamplingRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetProfileSamplingRate != nullptr) {
+    return MallocExtension_Internal_GetProfileSamplingRate();
+  }
+#endif
+  return -1;
+}
+
+void MallocExtension::SetProfileSamplingRate(int64_t rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_SetProfileSamplingRate != nullptr) {
+    MallocExtension_Internal_SetProfileSamplingRate(rate);
+  }
+#endif
+  (void)rate;
+}
+
+int64_t MallocExtension::GetGuardedSamplingRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetGuardedSamplingRate == nullptr) {
+    return -1;
+  }
+
+  return MallocExtension_Internal_GetGuardedSamplingRate();
+#else
+  return -1;
+#endif
+}
+
+void MallocExtension::SetGuardedSamplingRate(int64_t rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_SetGuardedSamplingRate == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_SetGuardedSamplingRate(rate);
+#else
+  (void)rate;
+#endif
+}
+
+void MallocExtension::ActivateGuardedSampling() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_ActivateGuardedSampling != nullptr) {
+    MallocExtension_Internal_ActivateGuardedSampling();
+  }
+#endif
+}
+
+bool MallocExtension::PerCpuCachesActive() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetPerCpuCachesActive == nullptr) {
+    return false;
+  }
+
+  return MallocExtension_Internal_GetPerCpuCachesActive();
+#else
+  return false;
+#endif
+}
+
+void MallocExtension::DeactivatePerCpuCaches() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_DeactivatePerCpuCaches == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_DeactivatePerCpuCaches();
+#endif
+}
+
+int32_t MallocExtension::GetMaxPerCpuCacheSize() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetMaxPerCpuCacheSize == nullptr) {
+    return -1;
+  }
+
+  return MallocExtension_Internal_GetMaxPerCpuCacheSize();
+#else
+  return -1;
+#endif
+}
+
+void MallocExtension::SetMaxPerCpuCacheSize(int32_t value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_SetMaxPerCpuCacheSize == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_SetMaxPerCpuCacheSize(value);
+#else
+  (void)value;
+#endif
+}
+
+int64_t MallocExtension::GetMaxTotalThreadCacheBytes() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetMaxTotalThreadCacheBytes == nullptr) {
+    return -1;
+  }
+
+  return MallocExtension_Internal_GetMaxTotalThreadCacheBytes();
+#else
+  return -1;
+#endif
+}
+
+void MallocExtension::SetMaxTotalThreadCacheBytes(int64_t value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_SetMaxTotalThreadCacheBytes == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_SetMaxTotalThreadCacheBytes(value);
+#else
+  (void)value;
+#endif
+}
+
+absl::Duration MallocExtension::GetSkipSubreleaseInterval() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetSkipSubreleaseInterval == nullptr) {
+    return absl::ZeroDuration();
+  }
+
+  absl::Duration value;
+  MallocExtension_Internal_GetSkipSubreleaseInterval(&value);
+  return value;
+#else
+  return absl::ZeroDuration();
+#endif
+}
+
+void MallocExtension::SetSkipSubreleaseInterval(absl::Duration value) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_SetSkipSubreleaseInterval == nullptr) {
+    return;
+  }
+
+  MallocExtension_Internal_SetSkipSubreleaseInterval(value);
+#else
+  (void)value;
+#endif
+}
+
+absl::optional<size_t> MallocExtension::GetNumericProperty(
+    absl::string_view property) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetNumericProperty != nullptr) {
+    size_t value;
+    if (MallocExtension_Internal_GetNumericProperty(property.data(),
+                                                    property.size(), &value)) {
+      return value;
+    }
+  }
+#endif
+  return absl::nullopt;
+}
+
+size_t MallocExtension::GetEstimatedAllocatedSize(size_t size) {
+  return nallocx(size, 0);
+}
+
+absl::optional<size_t> MallocExtension::GetAllocatedSize(const void* p) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetAllocatedSize != nullptr) {
+    return MallocExtension_Internal_GetAllocatedSize(p);
+  }
+#endif
+  return absl::nullopt;
+}
+
+MallocExtension::Ownership MallocExtension::GetOwnership(const void* p) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_GetOwnership != nullptr) {
+    return MallocExtension_Internal_GetOwnership(p);
+  }
+#endif
+  return MallocExtension::Ownership::kUnknown;
+}
+
+std::map<std::string, MallocExtension::Property>
+MallocExtension::GetProperties() {
+  std::map<std::string, MallocExtension::Property> ret;
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetProperties != nullptr) {
+    MallocExtension_Internal_GetProperties(&ret);
+  }
+#endif
+  return ret;
+}
+
+size_t MallocExtension::ReleaseCpuMemory(int cpu) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (MallocExtension_Internal_ReleaseCpuMemory != nullptr) {
+    return MallocExtension_Internal_ReleaseCpuMemory(cpu);
+  }
+#endif
+  return 0;
+}
+
+void MallocExtension::ProcessBackgroundActions() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (NeedsProcessBackgroundActions()) {
+    MallocExtension_Internal_ProcessBackgroundActions();
+  }
+#endif
+}
+
+bool MallocExtension::NeedsProcessBackgroundActions() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  return &MallocExtension_Internal_ProcessBackgroundActions != nullptr;
+#else
+  return false;
+#endif
+}
+
+MallocExtension::BytesPerSecond MallocExtension::GetBackgroundReleaseRate() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_GetBackgroundReleaseRate != nullptr) {
+    return MallocExtension_Internal_GetBackgroundReleaseRate();
+  }
+#endif
+  return static_cast<MallocExtension::BytesPerSecond>(0);
+}
+
+void MallocExtension::SetBackgroundReleaseRate(BytesPerSecond rate) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_Internal_SetBackgroundReleaseRate != nullptr) {
+    MallocExtension_Internal_SetBackgroundReleaseRate(rate);
+  }
+#endif
+}
+
+void MallocExtension::EnableForkSupport() {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_EnableForkSupport != nullptr) {
+    MallocExtension_EnableForkSupport();
+  }
+#endif
+}
+
+void MallocExtension::SetSampleUserDataCallbacks(
+    CreateSampleUserDataCallback create,
+    CopySampleUserDataCallback copy,
+    DestroySampleUserDataCallback destroy) {
+#if ABSL_INTERNAL_HAVE_WEAK_MALLOCEXTENSION_STUBS
+  if (&MallocExtension_SetSampleUserDataCallbacks != nullptr) {
+    MallocExtension_SetSampleUserDataCallbacks(create, copy, destroy);
+  }
+#else
+  (void)create;
+  (void)copy;
+  (void)destroy;
+#endif
+}
+
+}  // namespace tcmalloc
+
+// Default implementation just returns size. The expectation is that
+// the linked-in malloc implementation might provide an override of
+// this weak function with a better implementation.
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE size_t nallocx(size_t size,
+                                                           int) noexcept {
+  return size;
+}
+
+// Default implementation just frees memory.  The expectation is that the
+// linked-in malloc implementation may provide an override with an
+// implementation that uses this optimization.
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE void sdallocx(void* ptr, size_t,
+                                                          int) noexcept {
+  free(ptr);
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new(size_t size) {
+  return {::operator new(size), size};
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_nothrow(size_t size) noexcept {
+  void* p = ::operator new(size, std::nothrow);
+  return {p, p ? size : 0};
+}
+
+#if defined(_LIBCPP_VERSION) && defined(__cpp_aligned_new)
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_aligned(size_t size,
+                                             std::align_val_t alignment) {
+  return {::operator new(size, alignment), size};
+}
+
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE tcmalloc::sized_ptr_t
+tcmalloc_size_returning_operator_new_aligned_nothrow(
+    size_t size, std::align_val_t alignment) noexcept {
+  void* p = ::operator new(size, alignment, std::nothrow);
+  return {p, p ? size : 0};
+}
+
+#endif  // _LIBCPP_VERSION && __cpp_aligned_new
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h
new file mode 100644
index 0000000000..fcbd347ca1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension.h
@@ -0,0 +1,617 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file documents extensions supported by TCMalloc. These extensions
+// provide hooks for both surfacing telemetric data about TCMalloc's usage and
+// tuning the internal implementation of TCMalloc. The internal implementation
+// functions use weak linkage, allowing an application to link against the
+// extensions without always linking against TCMalloc.
+
+#ifndef TCMALLOC_MALLOC_EXTENSION_H_
+#define TCMALLOC_MALLOC_EXTENSION_H_
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <map>
+#include <memory>
+#include <new>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/base/macros.h"
+#include "absl/base/policy_checks.h"
+#include "absl/base/port.h"
+#include "absl/functional/function_ref.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+class AllocationProfilingTokenAccessor;
+class AllocationProfilingTokenBase;
+class ProfileAccessor;
+class ProfileBase;
+}  // namespace tcmalloc_internal
+
+enum class ProfileType {
+  // Approximation of current heap usage
+  kHeap,
+
+  // Fragmentation report
+  kFragmentation,
+
+  // Sample of objects that were live at a recent peak of total heap usage. The
+  // specifics of when exactly this profile is collected are subject to change.
+  kPeakHeap,
+
+  // Sample of objects allocated from the start of allocation profiling until
+  // the profile was terminated with Stop().
+  kAllocations,
+
+  // Only present to prevent switch statements without a default clause so that
+  // we can extend this enumeration without breaking code.
+  kDoNotUse,
+};
+
+class Profile final {
+ public:
+  Profile() = default;
+  Profile(Profile&&) = default;
+  Profile(const Profile&) = delete;
+
+  ~Profile();
+
+  Profile& operator=(Profile&&) = default;
+  Profile& operator=(const Profile&) = delete;
+
+  struct Sample {
+    static constexpr int kMaxStackDepth = 64;
+
+    int64_t sum;
+    int64_t count;  // Total added with this <stack,requested_size,...>
+
+    size_t requested_size;
+    size_t requested_alignment;
+    size_t allocated_size;
+
+    int depth;
+    void* stack[kMaxStackDepth];
+
+    void* user_data;
+  };
+
+  void Iterate(absl::FunctionRef<void(const Sample&)> f) const;
+
+  int64_t Period() const;
+  ProfileType Type() const;
+
+ private:
+  explicit Profile(std::unique_ptr<const tcmalloc_internal::ProfileBase>);
+
+  std::unique_ptr<const tcmalloc_internal::ProfileBase> impl_;
+  friend class tcmalloc_internal::ProfileAccessor;
+};
+
+class AddressRegion {
+ public:
+  AddressRegion() {}
+  virtual ~AddressRegion();
+
+  // Allocates at least size bytes of memory from this region, aligned with
+  // alignment.  Returns a pair containing a pointer to the start the allocated
+  // memory and the actual size allocated.  Returns {nullptr, 0} on failure.
+  //
+  // Alloc must return memory located within the address range given in the call
+  // to AddressRegionFactory::Create that created this AddressRegion.
+  virtual std::pair<void*, size_t> Alloc(size_t size, size_t alignment) = 0;
+};
+
+// Interface to a pluggable address region allocator.
+class AddressRegionFactory {
+ public:
+  enum class UsageHint {
+    kNormal,                // Normal usage.
+    kInfrequentAllocation,  // TCMalloc allocates from these regions less
+                            // frequently than normal regions.
+    kInfrequent ABSL_DEPRECATED("Use kInfrequentAllocation") =
+        kInfrequentAllocation,
+  };
+
+  AddressRegionFactory() {}
+  virtual ~AddressRegionFactory();
+
+  // Returns an AddressRegion with the specified start address and size.  hint
+  // indicates how the caller intends to use the returned region (helpful for
+  // deciding which regions to remap with hugepages, which regions should have
+  // pages prefaulted, etc.).  The returned AddressRegion must never be deleted.
+  //
+  // The caller must have reserved size bytes of address space starting at
+  // start_addr with mmap(PROT_NONE) prior to calling this function (so it is
+  // safe for Create() to mmap(MAP_FIXED) over the specified address range).
+  // start_addr and size are always page-aligned.
+  virtual AddressRegion* Create(void* start_addr, size_t size,
+                                UsageHint hint) = 0;
+
+  // Gets a human-readable description of the current state of the allocator.
+  //
+  // The state is stored in the provided buffer.  The number of bytes used (or
+  // would have been required, had the buffer been of sufficient size) is
+  // returned.
+  virtual size_t GetStats(absl::Span<char> buffer);
+
+  // Gets a description of the current state of the allocator in pbtxt format.
+  //
+  // The state is stored in the provided buffer.  The number of bytes used (or
+  // would have been required, had the buffer been of sufficient size) is
+  // returned.
+  virtual size_t GetStatsInPbtxt(absl::Span<char> buffer);
+
+  // Returns the total number of bytes allocated by MallocInternal().
+  static size_t InternalBytesAllocated();
+
+ protected:
+  // Dynamically allocates memory for use by AddressRegionFactory.  Particularly
+  // useful for creating AddressRegions inside Create().
+  //
+  // This memory is never freed, so allocate sparingly.
+  static void* MallocInternal(size_t size);
+};
+
+class MallocExtension final {
+ public:
+  // Gets a human readable description of the current state of the malloc data
+  // structures.
+  //
+  // See https://github.com/google/tcmalloc/tree/master/docs/stats.md for how to interpret these
+  // statistics.
+  static std::string GetStats();
+
+  // -------------------------------------------------------------------
+  // Control operations for getting malloc implementation specific parameters.
+  // Some currently useful properties:
+  //
+  // generic
+  // -------
+  // "generic.current_allocated_bytes"
+  //      Number of bytes currently allocated by application
+  //
+  // "generic.heap_size"
+  //      Number of bytes in the heap ==
+  //            current_allocated_bytes +
+  //            fragmentation +
+  //            freed (but not released to OS) memory regions
+  //
+  // tcmalloc
+  // --------
+  // "tcmalloc.max_total_thread_cache_bytes"
+  //      Upper limit on total number of bytes stored across all
+  //      per-thread caches.  Default: 16MB.
+  //
+  // "tcmalloc.current_total_thread_cache_bytes"
+  //      Number of bytes used across all thread caches.
+  //
+  // "tcmalloc.pageheap_free_bytes"
+  //      Number of bytes in free, mapped pages in page heap.  These
+  //      bytes can be used to fulfill allocation requests.  They
+  //      always count towards virtual memory usage, and unless the
+  //      underlying memory is swapped out by the OS, they also count
+  //      towards physical memory usage.
+  //
+  // "tcmalloc.pageheap_unmapped_bytes"
+  //      Number of bytes in free, unmapped pages in page heap.
+  //      These are bytes that have been released back to the OS,
+  //      possibly by one of the MallocExtension "Release" calls.
+  //      They can be used to fulfill allocation requests, but
+  //      typically incur a page fault.  They always count towards
+  //      virtual memory usage, and depending on the OS, typically
+  //      do not count towards physical memory usage.
+  //
+  //  "tcmalloc.per_cpu_caches_active"
+  //      Whether tcmalloc is using per-CPU caches (1 or 0 respectively).
+  // -------------------------------------------------------------------
+
+  // Gets the named property's value or a nullopt if the property is not valid.
+  static absl::optional<size_t> GetNumericProperty(absl::string_view property);
+
+  // Marks the current thread as "idle".  This function may optionally be called
+  // by threads as a hint to the malloc implementation that any thread-specific
+  // resources should be released.  Note: this may be an expensive function, so
+  // it should not be called too often.
+  //
+  // Also, if the code that calls this function will go to sleep for a while, it
+  // should take care to not allocate anything between the call to this function
+  // and the beginning of the sleep.
+  static void MarkThreadIdle();
+
+  // Marks the current thread as "busy".  This function should be called after
+  // MarkThreadIdle() if the thread will now do more work.  If this method is
+  // not called, performance may suffer.
+  static void MarkThreadBusy();
+
+  // Attempts to free any resources associated with cpu <cpu> (in the sense of
+  // only being usable from that CPU.)  Returns the number of bytes previously
+  // assigned to "cpu" that were freed.  Safe to call from any processor, not
+  // just <cpu>.
+  static size_t ReleaseCpuMemory(int cpu);
+
+  // Gets the region factory used by the malloc extension instance. Returns null
+  // for malloc implementations that do not support pluggable region factories.
+  static AddressRegionFactory* GetRegionFactory();
+
+  // Sets the region factory to the specified.
+  //
+  // Users could register their own region factories by doing:
+  //   factory = new MyOwnRegionFactory();
+  //   MallocExtension::SetRegionFactory(factory);
+  //
+  // It's up to users whether to fall back (recommended) to the default region
+  // factory (use GetRegionFactory() above) or not. The caller is responsible to
+  // any necessary locking.
+  static void SetRegionFactory(AddressRegionFactory* a);
+
+  // Tries to release at least num_bytes of free memory back to the OS for
+  // reuse.
+  //
+  // Depending on the state of the malloc implementation, more than num_bytes of
+  // memory may be released to the OS.
+  //
+  // This request may not be completely honored if:
+  // * The underlying malloc implementation does not support releasing memory to
+  //   the OS.
+  // * There are not at least num_bytes of free memory cached, or free memory is
+  //   fragmented in ways that keep it from being returned to the OS.
+  //
+  // Returning memory to the OS can hurt performance in two ways:
+  // * Parts of huge pages may be free and returning them to the OS requires
+  //   breaking up the huge page they are located on.  This can slow accesses to
+  //   still-allocated memory due to increased TLB pressure for the working set.
+  // * If the memory is ultimately needed again, pages will need to be faulted
+  //   back in.
+  static void ReleaseMemoryToSystem(size_t num_bytes);
+
+  struct MemoryLimit {
+    // Make a best effort attempt to prevent more than limit bytes of memory
+    // from being allocated by the system. In particular, if satisfying a given
+    // malloc call would require passing this limit, release as much memory to
+    // the OS as needed to stay under it if possible.
+    //
+    // If hard is set, crash if returning memory is unable to get below the
+    // limit.
+    //
+    // Note:  limit=SIZE_T_MAX implies no limit.
+    size_t limit = std::numeric_limits<size_t>::max();
+    bool hard = false;
+
+    // Explicitly declare the ctor to put it in the google_malloc section.
+    MemoryLimit() = default;
+  };
+
+  static MemoryLimit GetMemoryLimit();
+  static void SetMemoryLimit(const MemoryLimit& limit);
+
+  // Gets the sampling rate.  Returns a value < 0 if unknown.
+  static int64_t GetProfileSamplingRate();
+  // Sets the sampling rate for heap profiles.  TCMalloc samples approximately
+  // every rate bytes allocated.
+  static void SetProfileSamplingRate(int64_t rate);
+
+  // Gets the guarded sampling rate.  Returns a value < 0 if unknown.
+  static int64_t GetGuardedSamplingRate();
+  // Sets the guarded sampling rate for sampled allocations.  TCMalloc samples
+  // approximately every rate bytes allocated, subject to implementation
+  // limitations in GWP-ASan.
+  //
+  // Guarded samples provide probablistic protections against buffer underflow,
+  // overflow, and use-after-free when GWP-ASan is active (via calling
+  // ActivateGuardedSampling).
+  static void SetGuardedSamplingRate(int64_t rate);
+
+  // Switches TCMalloc to guard sampled allocations for underflow, overflow, and
+  // use-after-free according to the guarded sample parameter value.
+  static void ActivateGuardedSampling();
+
+  // Gets whether TCMalloc is using per-CPU caches.
+  static bool PerCpuCachesActive();
+
+  // Extension for unified agent.
+  //
+  // Should be removed in the future https://st.yandex-team.ru/UNIFIEDAGENT-321
+  static void DeactivatePerCpuCaches();
+
+  // Gets the current maximum cache size per CPU cache.
+  static int32_t GetMaxPerCpuCacheSize();
+  // Sets the maximum cache size per CPU cache.  This is a per-core limit.
+  static void SetMaxPerCpuCacheSize(int32_t value);
+
+  // Gets the current maximum thread cache.
+  static int64_t GetMaxTotalThreadCacheBytes();
+  // Sets the maximum thread cache size.  This is a whole-process limit.
+  static void SetMaxTotalThreadCacheBytes(int64_t value);
+
+  // Gets the delayed subrelease interval (0 if delayed subrelease is disabled)
+  static absl::Duration GetSkipSubreleaseInterval();
+  // Sets the delayed subrelease interval (0 to disable delayed subrelease)
+  static void SetSkipSubreleaseInterval(absl::Duration value);
+
+  // Returns the estimated number of bytes that will be allocated for a request
+  // of "size" bytes.  This is an estimate: an allocation of "size" bytes may
+  // reserve more bytes, but will never reserve fewer.
+  static size_t GetEstimatedAllocatedSize(size_t size);
+
+  // Returns the actual number N of bytes reserved by tcmalloc for the pointer
+  // p.  This number may be equal to or greater than the number of bytes
+  // requested when p was allocated.
+  //
+  // This function is just useful for statistics collection.  The client must
+  // *not* read or write from the extra bytes that are indicated by this call.
+  //
+  // Example, suppose the client gets memory by calling
+  //    p = malloc(10)
+  // and GetAllocatedSize(p) returns 16.  The client must only use the first 10
+  // bytes p[0..9], and not attempt to read or write p[10..15].
+  //
+  // p must have been allocated by TCMalloc and must not be an interior pointer
+  // -- that is, must be exactly the pointer returned to by malloc() et al., not
+  // some offset from that -- and should not have been freed yet.  p may be
+  // null.
+  static absl::optional<size_t> GetAllocatedSize(const void* p);
+
+  // Returns
+  // * kOwned if TCMalloc allocated the memory pointed to by p, or
+  // * kNotOwned if allocated elsewhere or p is null.
+  //
+  // REQUIRES: p must be a value returned from a previous call to malloc(),
+  // calloc(), realloc(), memalign(), posix_memalign(), valloc(), pvalloc(),
+  // new, or new[], and must refer to memory that is currently allocated (so,
+  // for instance, you should not pass in a pointer after having called free()
+  // on it).
+  enum class Ownership { kUnknown = 0, kOwned, kNotOwned };
+  static Ownership GetOwnership(const void* p);
+
+  // Type used by GetProperties.  See comment on GetProperties.
+  struct Property {
+    size_t value;
+  };
+
+  // Returns detailed statistics about the state of TCMalloc.  The map is keyed
+  // by the name of the statistic.
+  //
+  // Common across malloc implementations:
+  //  generic.bytes_in_use_by_app  -- Bytes currently in use by application
+  //  generic.physical_memory_used -- Overall (including malloc internals)
+  //  generic.virtual_memory_used  -- Overall (including malloc internals)
+  //
+  // Tcmalloc specific properties
+  //  tcmalloc.cpu_free            -- Bytes in per-cpu free-lists
+  //  tcmalloc.thread_cache_free   -- Bytes in per-thread free-lists
+  //  tcmalloc.transfer_cache      -- Bytes in cross-thread transfer caches
+  //  tcmalloc.central_cache_free  -- Bytes in central cache
+  //  tcmalloc.page_heap_free      -- Bytes in page heap
+  //  tcmalloc.page_heap_unmapped  -- Bytes in page heap (no backing phys. mem)
+  //  tcmalloc.metadata_bytes      -- Used by internal data structures
+  //  tcmalloc.thread_cache_count  -- Number of thread caches in use
+  //  tcmalloc.experiment.NAME     -- Experiment NAME is running if 1
+  static std::map<std::string, Property> GetProperties();
+
+  static Profile SnapshotCurrent(tcmalloc::ProfileType type);
+
+  // AllocationProfilingToken tracks an active profiling session started with
+  // StartAllocationProfiling.  Profiling continues until Stop() is called.
+  class AllocationProfilingToken {
+   public:
+    AllocationProfilingToken() = default;
+    AllocationProfilingToken(AllocationProfilingToken&&) = default;
+    AllocationProfilingToken(const AllocationProfilingToken&) = delete;
+    ~AllocationProfilingToken();
+
+    AllocationProfilingToken& operator=(AllocationProfilingToken&&) = default;
+    AllocationProfilingToken& operator=(const AllocationProfilingToken&) =
+        delete;
+
+    // Finish the recording started by the corresponding call to
+    // StartAllocationProfile, and return samples of calls to each function.  If
+    // it is called more than once, subsequent calls will return an empty
+    // profile.
+    Profile Stop() &&;
+
+   private:
+    explicit AllocationProfilingToken(
+        std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase>);
+
+    std::unique_ptr<tcmalloc_internal::AllocationProfilingTokenBase> impl_;
+    friend class tcmalloc_internal::AllocationProfilingTokenAccessor;
+  };
+
+  // Start recording a sample of allocation and deallocation calls.  Returns
+  // null if the implementation does not support profiling.
+  static AllocationProfilingToken StartAllocationProfiling();
+
+  // Runs housekeeping actions for the allocator off of the main allocation path
+  // of new/delete.  As of 2020, this includes:
+  // * Inspecting the current CPU mask and releasing memory from inaccessible
+  //   CPUs.
+  // * Releasing GetBackgroundReleaseRate() bytes per second from the page
+  //   heap, if that many bytes are free, via ReleaseMemoryToSystem().
+  //
+  // When linked against TCMalloc, this method does not return.
+  static void ProcessBackgroundActions();
+
+  // Return true if ProcessBackgroundActions should be called on this platform.
+  // Not all platforms need/support background actions. As of 2021 this
+  // includes Apple and Emscripten.
+  static bool NeedsProcessBackgroundActions();
+
+  // Specifies a rate in bytes per second.
+  //
+  // The enum is used to provide strong-typing for the value.
+  enum class BytesPerSecond : size_t {};
+
+  // Gets the current release rate (in bytes per second) from the page heap.
+  // Zero inhibits the release path.
+  static BytesPerSecond GetBackgroundReleaseRate();
+  // Specifies the release rate from the page heap.  ProcessBackgroundActions
+  // must be called for this to be operative.
+  static void SetBackgroundReleaseRate(BytesPerSecond rate);
+
+  // Enables fork support.
+  // Allocator will continue to function correctly in the child, after calling fork().
+  static void EnableForkSupport();
+
+  using CreateSampleUserDataCallback = void*();
+  using CopySampleUserDataCallback = void*(void*);
+  using DestroySampleUserDataCallback = void(void*);
+
+  // Sets callbacks for lifetime control of custom user data attached to allocation samples
+  static void SetSampleUserDataCallbacks(
+    CreateSampleUserDataCallback create,
+    CopySampleUserDataCallback copy,
+    DestroySampleUserDataCallback destroy);
+};
+
+}  // namespace tcmalloc
+
+// The nallocx function allocates no memory, but it performs the same size
+// computation as the malloc function, and returns the real size of the
+// allocation that would result from the equivalent malloc function call.
+// Default weak implementation returns size unchanged, but tcmalloc overrides it
+// and returns rounded up size. See the following link for details:
+// http://www.unix.com/man-page/freebsd/3/nallocx/
+extern "C" size_t nallocx(size_t size, int flags) noexcept;
+
+// The sdallocx function deallocates memory allocated by malloc or memalign.  It
+// takes a size parameter to pass the original allocation size.
+//
+// The default weak implementation calls free(), but TCMalloc overrides it and
+// uses the size to improve deallocation performance.
+extern "C" void sdallocx(void* ptr, size_t size, int flags) noexcept;
+
+namespace tcmalloc {
+
+// Pointer / capacity information as returned by
+// tcmalloc_size_returning_operator_new(). See
+// tcmalloc_size_returning_operator_new() for more information.
+struct sized_ptr_t {
+  void* p;
+  size_t n;
+};
+
+}  // namespace tcmalloc
+
+// Allocates memory of at least the requested size.
+//
+// Returns a `sized_ptr_t` struct holding the allocated pointer, and the
+// capacity of the allocated memory, which may be larger than the requested
+// size.
+//
+// The returned pointer follows the alignment requirements of the standard new
+// operator. This function will terminate on failure, except for the APIs
+// accepting the std::nothrow parameter which will return {nullptr, 0} on
+// failure.
+//
+// The returned pointer must be freed calling the matching ::operator delete.
+//
+// If a sized operator delete operator is invoked, then the 'size' parameter
+// passed to delete must be greater or equal to the original requested size, and
+// less than or equal to the capacity of the allocated memory as returned by the
+// `tcmalloc_size_returning_operator_new` method.
+//
+// If neither the original size or capacity is known, then the non-sized
+// operator delete can be invoked, however, this should be avoided, as this is
+// substantially less efficient.
+//
+// The default weak implementation allocates the memory using the corresponding
+// (matching) ::operator new(size_t, ...).
+//
+// This is a prototype API for the extension to C++ "size feedback in operator
+// new" proposal:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0901r5.html
+extern "C" {
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new(size_t size);
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow(
+    size_t size) noexcept;
+
+// Aligned size returning new is only supported for libc++ because of issues
+// with libstdcxx.so linkage. See http://b/110969867 for background.
+#if defined(__cpp_aligned_new)
+
+// Identical to `tcmalloc_size_returning_operator_new` except that the returned
+// memory is aligned according to the `alignment` argument.
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned(
+    size_t size, std::align_val_t alignment);
+tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_aligned_nothrow(
+    size_t size, std::align_val_t alignment) noexcept;
+
+#endif  // __cpp_aligned_new
+
+}  // extern "C"
+
+#ifndef MALLOCX_LG_ALIGN
+#define MALLOCX_LG_ALIGN(la) (la)
+#endif
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AllocationProfilingTokenBase tracks an on-going profiling session of sampled
+// allocations.  The session ends when Stop() is called.
+//
+// This decouples the implementation details (of TCMalloc) from the interface,
+// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
+// while allowing the library to compile and link.
+class AllocationProfilingTokenBase {
+ public:
+  // Explicitly declare the ctor to put it in the google_malloc section.
+  AllocationProfilingTokenBase() = default;
+
+  virtual ~AllocationProfilingTokenBase() = default;
+
+  // Finish recording started during construction of this object.
+  //
+  // After the first call, Stop() will return an empty profile.
+  virtual Profile Stop() && = 0;
+};
+
+// ProfileBase contains a profile of allocations.
+//
+// This decouples the implementation details (of TCMalloc) from the interface,
+// allowing non-TCMalloc allocators (such as libc and sanitizers) to be provided
+// while allowing the library to compile and link.
+class ProfileBase {
+ public:
+  virtual ~ProfileBase() = default;
+
+  // For each sample in the profile, Iterate invokes the callback f on the
+  // sample.
+  virtual void Iterate(
+      absl::FunctionRef<void(const Profile::Sample&)> f) const = 0;
+
+  // The approximate interval between recorded samples of the event of interest.
+  // A period of 1 means every sample was recorded.
+  virtual int64_t Period() const = 0;
+
+  // The type of profile (live objects, allocated, etc.).
+  virtual ProfileType Type() const = 0;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_MALLOC_EXTENSION_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc
new file mode 100644
index 0000000000..26335bdef8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_fuzz.cc
@@ -0,0 +1,42 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdint.h>
+
+#include <map>
+#include <string>
+
+#include "absl/types/optional.h"
+#include "tcmalloc/malloc_extension.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+  using tcmalloc::MallocExtension;
+
+  const std::string property(reinterpret_cast<const char*>(d), size);
+  absl::optional<size_t> val = MallocExtension::GetNumericProperty(property);
+  if (!val.has_value()) {
+    // Rather than inspect the result of MallocExtension::GetProperties, we
+    // defer to the test in //tcmalloc/malloc_extension_test.cc to
+    // ensure that every key in GetProperties has a value returned by
+    // GetNumericProperty.
+    return 0;
+  }
+
+  std::map<std::string, MallocExtension::Property> properties =
+      MallocExtension::GetProperties();
+  if (properties.find(property) == properties.end()) {
+    __builtin_trap();
+  }
+  return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc
new file mode 100644
index 0000000000..81e7afa010
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_system_malloc_test.cc
@@ -0,0 +1,87 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// These tests assume TCMalloc is not linked in, and therefore the features
+// exposed by MallocExtension should be no-ops, but otherwise safe to call.
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(MallocExtension, SnapshotCurrentIsEmpty) {
+  // Allocate memory to use the allocator.
+  absl::BitGen gen;
+  int bytes_remaining = 1 << 24;
+  std::vector<void*> ptrs;
+
+  while (bytes_remaining > 0) {
+    int size = absl::LogUniform<int>(gen, 0, 1 << 20);
+    ptrs.push_back(::operator new(size));
+    bytes_remaining -= size;
+  }
+
+  // All of the profiles should be empty.
+  ProfileType types[] = {
+      ProfileType::kHeap,
+      ProfileType::kFragmentation, ProfileType::kPeakHeap,
+      ProfileType::kAllocations,
+  };
+
+  for (auto t : types) {
+    SCOPED_TRACE(static_cast<int>(t));
+
+    Profile p = MallocExtension::SnapshotCurrent(t);
+    int samples = 0;
+    p.Iterate([&](const Profile::Sample&) { samples++; });
+
+    EXPECT_EQ(samples, 0);
+  }
+
+  for (void* ptr : ptrs) {
+    ::operator delete(ptr);
+  }
+}
+
+TEST(MallocExtension, AllocationProfile) {
+  auto token = MallocExtension::StartAllocationProfiling();
+
+  // Allocate memory to use the allocator.
+  absl::BitGen gen;
+  int bytes_remaining = 1 << 24;
+  std::vector<void*> ptrs;
+
+  while (bytes_remaining > 0) {
+    int size = absl::LogUniform<int>(gen, 0, 1 << 20);
+    ptrs.push_back(::operator new(size));
+    bytes_remaining -= size;
+  }
+
+  // Finish profiling and verify the profile is empty.
+  Profile p = std::move(token).Stop();
+  int samples = 0;
+  p.Iterate([&](const Profile::Sample&) { samples++; });
+
+  EXPECT_EQ(samples, 0);
+
+  for (void* ptr : ptrs) {
+    ::operator delete(ptr);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc
new file mode 100644
index 0000000000..5088806ff8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/malloc_extension_test.cc
@@ -0,0 +1,67 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Test for TCMalloc implementation of MallocExtension
+
+#include "tcmalloc/malloc_extension.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/time/time.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(MallocExtension, BackgroundReleaseRate) {
+
+  // Mutate via MallocExtension.
+  MallocExtension::SetBackgroundReleaseRate(
+      MallocExtension::BytesPerSecond{100 << 20});
+
+  EXPECT_EQ(static_cast<size_t>(MallocExtension::GetBackgroundReleaseRate()),
+            100 << 20);
+
+  // Disable release
+  MallocExtension::SetBackgroundReleaseRate(MallocExtension::BytesPerSecond{0});
+
+  EXPECT_EQ(static_cast<size_t>(MallocExtension::GetBackgroundReleaseRate()),
+            0);
+}
+
+TEST(MallocExtension, SkipSubreleaseInterval) {
+
+  // Mutate via MallocExtension.
+  MallocExtension::SetSkipSubreleaseInterval(absl::Seconds(10));
+  EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::Seconds(10));
+
+  // Disable skip subrelease
+  MallocExtension::SetSkipSubreleaseInterval(absl::ZeroDuration());
+  EXPECT_EQ(MallocExtension::GetSkipSubreleaseInterval(), absl::ZeroDuration());
+}
+
+TEST(MallocExtension, Properties) {
+  // Verify that every property under GetProperties also works with
+  // GetNumericProperty.
+  const auto properties = MallocExtension::GetProperties();
+  for (const auto& property : properties) {
+    absl::optional<size_t> scalar =
+        MallocExtension::GetNumericProperty(property.first);
+    // The value of the property itself may have changed, so just check that it
+    // is present.
+    EXPECT_THAT(scalar, testing::Ne(absl::nullopt)) << property.first;
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc
new file mode 100644
index 0000000000..13308b947a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.cc
@@ -0,0 +1,64 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/mock_central_freelist.h"
+
+#include "absl/base/internal/spinlock.h"
+#include "tcmalloc/internal/logging.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void MinimalFakeCentralFreeList::AllocateBatch(void** batch, int n) {
+  for (int i = 0; i < n; ++i) batch[i] = &batch[i];
+}
+
+void MinimalFakeCentralFreeList::FreeBatch(absl::Span<void*> batch) {
+  for (void* x : batch) CHECK_CONDITION(x != nullptr);
+}
+
+void MinimalFakeCentralFreeList::InsertRange(absl::Span<void*> batch) {
+  absl::base_internal::SpinLockHolder h(&lock_);
+  FreeBatch(batch);
+}
+
+int MinimalFakeCentralFreeList::RemoveRange(void** batch, int n) {
+  absl::base_internal::SpinLockHolder h(&lock_);
+  AllocateBatch(batch, n);
+  return n;
+}
+
+void FakeCentralFreeList::AllocateBatch(void** batch, int n) {
+  for (int i = 0; i < n; ++i) {
+    batch[i] = ::operator new(4);
+  }
+}
+
+void FakeCentralFreeList::FreeBatch(absl::Span<void*> batch) {
+  for (void* x : batch) {
+    ::operator delete(x);
+  }
+}
+
+void FakeCentralFreeList::InsertRange(absl::Span<void*> batch) {
+  FreeBatch(batch);
+}
+
+int FakeCentralFreeList::RemoveRange(void** batch, int n) {
+  AllocateBatch(batch, n);
+  return n;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h
new file mode 100644
index 0000000000..c2a56c0c60
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_central_freelist.h
@@ -0,0 +1,89 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_MOCK_CENTRAL_FREELIST_H_
+#define TCMALLOC_MOCK_CENTRAL_FREELIST_H_
+
+#include <stddef.h>
+
+#include "gmock/gmock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/types/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class FakeCentralFreeListBase {
+ public:
+  FakeCentralFreeListBase() {}
+  FakeCentralFreeListBase(const FakeCentralFreeListBase&) = delete;
+  FakeCentralFreeListBase& operator=(const FakeCentralFreeListBase&) = delete;
+
+  static constexpr void Init(size_t) {}
+};
+
+// CentralFreeList implementation that backs onto the system's malloc.
+//
+// Useful for unit tests and fuzz tests where identifying leaks and correctness
+// is important.
+class FakeCentralFreeList : public FakeCentralFreeListBase {
+ public:
+  void InsertRange(absl::Span<void*> batch);
+  int RemoveRange(void** batch, int N);
+
+  void AllocateBatch(void** batch, int n);
+  void FreeBatch(absl::Span<void*> batch);
+};
+
+// CentralFreeList implementation that does minimal work but no correctness
+// checking.
+//
+// Useful for benchmarks where you want to avoid unrelated expensive operations.
+class MinimalFakeCentralFreeList : public FakeCentralFreeListBase {
+ public:
+  void InsertRange(absl::Span<void*> batch);
+  int RemoveRange(void** batch, int N);
+
+  void AllocateBatch(void** batch, int n);
+  void FreeBatch(absl::Span<void*> batch);
+
+ private:
+  absl::base_internal::SpinLock lock_;
+};
+
+// CentralFreeList implementation that allows intercepting specific calls.  By
+// default backs onto the system's malloc.
+//
+// Useful for intrusive unit tests that want to verify internal behavior.
+class RawMockCentralFreeList : public FakeCentralFreeList {
+ public:
+  RawMockCentralFreeList() : FakeCentralFreeList() {
+    ON_CALL(*this, InsertRange).WillByDefault([this](absl::Span<void*> batch) {
+      return static_cast<FakeCentralFreeList*>(this)->InsertRange(batch);
+    });
+    ON_CALL(*this, RemoveRange).WillByDefault([this](void** batch, int n) {
+      return static_cast<FakeCentralFreeList*>(this)->RemoveRange(batch, n);
+    });
+  }
+
+  MOCK_METHOD(void, InsertRange, (absl::Span<void*> batch));
+  MOCK_METHOD(int, RemoveRange, (void** batch, int N));
+};
+
+using MockCentralFreeList = testing::NiceMock<RawMockCentralFreeList>;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_MOCK_CENTRAL_FREELIST_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc
new file mode 100644
index 0000000000..b8b2bcf131
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.cc
@@ -0,0 +1,24 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/mock_transfer_cache.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int FakeTransferCacheManager::DetermineSizeClassToEvict() { return 3; }
+bool FakeTransferCacheManager::ShrinkCache(int) { return true; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h
new file mode 100644
index 0000000000..5b5192f6dc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/mock_transfer_cache.h
@@ -0,0 +1,310 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_MOCK_TRANSFER_CACHE_H_
+#define TCMALLOC_MOCK_TRANSFER_CACHE_H_
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+#include <random>
+
+#include "gmock/gmock.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+inline constexpr size_t kClassSize = 8;
+inline constexpr size_t kNumToMove = 32;
+inline constexpr int kSizeClass = 0;
+
+class FakeTransferCacheManagerBase {
+ public:
+  constexpr static size_t class_to_size(int size_class) { return kClassSize; }
+  constexpr static size_t num_objects_to_move(int size_class) {
+    // TODO(b/170732338): test with multiple different num_objects_to_move
+    return kNumToMove;
+  }
+  void* Alloc(size_t size) {
+    memory_.emplace_back(::operator new(size));
+    return memory_.back().get();
+  }
+  struct Free {
+    void operator()(void* b) { ::operator delete(b); }
+  };
+
+ private:
+  std::vector<std::unique_ptr<void, Free>> memory_;
+};
+
+// TransferCacheManager with basic stubs for everything.
+//
+// Useful for benchmarks where you want to unrelated expensive operations.
+class FakeTransferCacheManager : public FakeTransferCacheManagerBase {
+ public:
+  int DetermineSizeClassToEvict();
+  bool ShrinkCache(int);
+};
+
+// TransferCacheManager which allows intercepting intersting methods.
+//
+// Useful for intrusive unit tests that want to verify internal behavior.
+class RawMockTransferCacheManager : public FakeTransferCacheManagerBase {
+ public:
+  RawMockTransferCacheManager() : FakeTransferCacheManagerBase() {
+    // We want single threaded tests to be deterministic, so we use a
+    // deterministic generator.  Because we don't know about the threading for
+    // our tests we cannot keep the generator in a local variable.
+    ON_CALL(*this, ShrinkCache).WillByDefault([]() {
+      thread_local std::mt19937 gen{0};
+      return absl::Bernoulli(gen, 0.8);
+    });
+    ON_CALL(*this, GrowCache).WillByDefault([]() {
+      thread_local std::mt19937 gen{0};
+      return absl::Bernoulli(gen, 0.8);
+    });
+    ON_CALL(*this, DetermineSizeClassToEvict).WillByDefault([]() {
+      thread_local std::mt19937 gen{0};
+      return absl::Uniform<size_t>(gen, 1, kNumClasses);
+    });
+  }
+
+  MOCK_METHOD(int, DetermineSizeClassToEvict, ());
+  MOCK_METHOD(bool, ShrinkCache, (int size_class));
+  MOCK_METHOD(bool, GrowCache, (int size_class));
+};
+
+using MockTransferCacheManager = testing::NiceMock<RawMockTransferCacheManager>;
+
+// Wires up a largely functional TransferCache + TransferCacheManager +
+// MockCentralFreeList.
+//
+// By default, it fills allocations and responds sensibly.  Because it backs
+// onto malloc/free, it will detect leaks and memory misuse when run in asan or
+// tsan.
+//
+// Exposes the underlying mocks to allow for more whitebox tests.
+//
+// Drains the cache and verifies that no data was lost in the destructor.
+template <typename TransferCacheT>
+class FakeTransferCacheEnvironment {
+ public:
+  using TransferCache = TransferCacheT;
+  using Manager = typename TransferCache::Manager;
+  using FreeList = typename TransferCache::FreeList;
+
+  static constexpr int kMaxObjectsToMove =
+      ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove;
+  static constexpr int kBatchSize = Manager::num_objects_to_move(1);
+
+  FakeTransferCacheEnvironment() : manager_(), cache_(&manager_, 1) {}
+
+  ~FakeTransferCacheEnvironment() { Drain(); }
+
+  void Shrink() { cache_.ShrinkCache(kSizeClass); }
+  void Grow() { cache_.GrowCache(kSizeClass); }
+
+  void Insert(int n) {
+    std::vector<void*> bufs;
+    while (n > 0) {
+      int b = std::min(n, kBatchSize);
+      bufs.resize(b);
+      central_freelist().AllocateBatch(&bufs[0], b);
+      cache_.InsertRange(kSizeClass, absl::MakeSpan(bufs));
+      n -= b;
+    }
+  }
+
+  void Remove(int n) {
+    std::vector<void*> bufs;
+    while (n > 0) {
+      int b = std::min(n, kBatchSize);
+      bufs.resize(b);
+      int removed = cache_.RemoveRange(kSizeClass, &bufs[0], b);
+      // Ensure we make progress.
+      ASSERT_GT(removed, 0);
+      ASSERT_LE(removed, b);
+      central_freelist().FreeBatch({&bufs[0], static_cast<size_t>(removed)});
+      n -= removed;
+    }
+  }
+
+  void Drain() { Remove(cache_.tc_length()); }
+
+  void RandomlyPoke() {
+    absl::BitGen gen;
+    // We want a probabilistic steady state size:
+    // - grow/shrink balance on average
+    // - insert/remove balance on average
+    double choice = absl::Uniform(gen, 0.0, 1.0);
+    if (choice < 0.1) {
+      Shrink();
+    } else if (choice < 0.2) {
+      Grow();
+    } else if (choice < 0.3) {
+      cache_.HasSpareCapacity(kSizeClass);
+    } else if (choice < 0.65) {
+      Insert(absl::Uniform(gen, 1, kBatchSize));
+    } else {
+      Remove(absl::Uniform(gen, 1, kBatchSize));
+    }
+  }
+
+  TransferCache& transfer_cache() { return cache_; }
+
+  Manager& transfer_cache_manager() { return manager_; }
+
+  FreeList& central_freelist() { return cache_.freelist(); }
+
+ private:
+  Manager manager_;
+  TransferCache cache_;
+};
+
+// A fake transfer cache manager class which supports two size classes instead
+// of just the one. To make this work, we have to store the transfer caches
+// inside the cache manager, like in production code.
+template <typename FreeListT,
+          template <typename FreeList, typename Manager> class TransferCacheT>
+class TwoSizeClassManager : public FakeTransferCacheManagerBase {
+ public:
+  using FreeList = FreeListT;
+  using TransferCache = TransferCacheT<FreeList, TwoSizeClassManager>;
+
+  // This is 3 instead of 2 because we hard code cl == 0 to be invalid in many
+  // places. We only use cl 1 and 2 here.
+  static constexpr int kSizeClasses = 3;
+  static constexpr size_t kClassSize1 = 8;
+  static constexpr size_t kClassSize2 = 16;
+  static constexpr size_t kNumToMove1 = 32;
+  static constexpr size_t kNumToMove2 = 16;
+
+  TwoSizeClassManager() {
+    caches_.push_back(absl::make_unique<TransferCache>(this, 0));
+    caches_.push_back(absl::make_unique<TransferCache>(this, 1));
+    caches_.push_back(absl::make_unique<TransferCache>(this, 2));
+  }
+
+  constexpr static size_t class_to_size(int size_class) {
+    switch (size_class) {
+      case 1:
+        return kClassSize1;
+      case 2:
+        return kClassSize2;
+      default:
+        return 0;
+    }
+  }
+  constexpr static size_t num_objects_to_move(int size_class) {
+    switch (size_class) {
+      case 1:
+        return kNumToMove1;
+      case 2:
+        return kNumToMove2;
+      default:
+        return 0;
+    }
+  }
+
+  int DetermineSizeClassToEvict() { return evicting_from_; }
+
+  bool ShrinkCache(int size_class) {
+    return caches_[size_class]->ShrinkCache(size_class);
+  }
+
+  FreeList& central_freelist(int cl) { return caches_[cl]->freelist(); }
+
+  void InsertRange(int cl, absl::Span<void*> batch) {
+    caches_[cl]->InsertRange(cl, batch);
+  }
+
+  int RemoveRange(int cl, void** batch, int N) {
+    return caches_[cl]->RemoveRange(cl, batch, N);
+  }
+
+  bool HasSpareCapacity(int cl) { return caches_[cl]->HasSpareCapacity(cl); }
+
+  size_t tc_length(int cl) { return caches_[cl]->tc_length(); }
+
+  std::vector<std::unique_ptr<TransferCache>> caches_;
+
+  // From which size class to evict.
+  int evicting_from_ = 1;
+};
+
+template <template <typename FreeList, typename Manager> class TransferCacheT>
+class TwoSizeClassEnv {
+ public:
+  using FreeList = MockCentralFreeList;
+  using Manager = TwoSizeClassManager<FreeList, TransferCacheT>;
+  using TransferCache = typename Manager::TransferCache;
+
+  static constexpr int kMaxObjectsToMove =
+      ::tcmalloc::tcmalloc_internal::kMaxObjectsToMove;
+
+  explicit TwoSizeClassEnv() = default;
+
+  ~TwoSizeClassEnv() { Drain(); }
+
+  void Insert(int cl, int n) {
+    const size_t batch_size = Manager::num_objects_to_move(cl);
+    std::vector<void*> bufs;
+    while (n > 0) {
+      int b = std::min<int>(n, batch_size);
+      bufs.resize(b);
+      central_freelist(cl).AllocateBatch(&bufs[0], b);
+      manager_.InsertRange(cl, absl::MakeSpan(bufs));
+      n -= b;
+    }
+  }
+
+  void Remove(int cl, int n) {
+    const size_t batch_size = Manager::num_objects_to_move(cl);
+    std::vector<void*> bufs;
+    while (n > 0) {
+      const int b = std::min<int>(n, batch_size);
+      bufs.resize(b);
+      const int removed = manager_.RemoveRange(cl, &bufs[0], b);
+      // Ensure we make progress.
+      ASSERT_GT(removed, 0);
+      ASSERT_LE(removed, b);
+      central_freelist(cl).FreeBatch({&bufs[0], static_cast<size_t>(removed)});
+      n -= removed;
+    }
+  }
+
+  void Drain() {
+    for (int i = 0; i < Manager::kSizeClasses; ++i) {
+      Remove(i, manager_.tc_length(i));
+    }
+  }
+
+  Manager& transfer_cache_manager() { return manager_; }
+
+  FreeList& central_freelist(int cl) { return manager_.central_freelist(cl); }
+
+ private:
+  Manager manager_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_MOCK_TRANSFER_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc
new file mode 100644
index 0000000000..c6dc90adcc
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/noruntime_size_classes.cc
@@ -0,0 +1,33 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Default implementation doesn't load runtime size classes.
+// To enable runtime size classes, link with :runtime_size_classes.
+// This is in a separate library so that it doesn't get inlined inside common.cc
+ABSL_ATTRIBUTE_WEAK ABSL_ATTRIBUTE_NOINLINE int MaybeSizeClassesFromEnv(
+    int max_size, int max_classes, SizeClassInfo* parsed) {
+  return -1;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc
new file mode 100644
index 0000000000..e9599ef46a
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.cc
@@ -0,0 +1,196 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_allocator.h"
+
+#include <new>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+int ABSL_ATTRIBUTE_WEAK default_want_hpaa();
+
+bool decide_want_hpaa() {
+#if defined(__PPC64__) && defined(TCMALLOC_SMALL_BUT_SLOW)
+  // In small-but-slow, we choose a kMinSystemAlloc size that smaller than the
+  // hugepage size on PPC.  If this situation changes, this static_assert will
+  // begin failing.
+  static_assert(kHugePageSize > kMinSystemAlloc,
+                "HPAA may now support PPC, update tests");
+  return false;
+#endif
+
+  const char *e =
+      tcmalloc::tcmalloc_internal::thread_safe_getenv("TCMALLOC_HPAA_CONTROL");
+  if (e) {
+    switch (e[0]) {
+      case '0':
+        if (kPageShift <= 12) {
+          return false;
+        }
+
+        if (default_want_hpaa != nullptr) {
+          int default_hpaa = default_want_hpaa();
+          if (default_hpaa < 0) {
+            return false;
+          }
+        }
+
+        Log(kLog, __FILE__, __LINE__,
+            "Runtime opt-out from HPAA requires building with "
+            "//tcmalloc:want_no_hpaa."
+        );
+        break;
+      case '1':
+        return true;
+      case '2':
+        return true;
+      default:
+        Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+        return false;
+    }
+  }
+
+  if (default_want_hpaa != nullptr) {
+    int default_hpaa = default_want_hpaa();
+    if (default_hpaa != 0) {
+      return default_hpaa > 0;
+    }
+  }
+
+#if defined(TCMALLOC_SMALL_BUT_SLOW)
+  // HPAA is neither small nor slow :)
+  return false;
+#else
+  return true;
+#endif
+}
+
+bool want_hpaa() {
+  static bool use = decide_want_hpaa();
+
+  return use;
+}
+
+PageAllocator::PageAllocator() {
+  const bool kUseHPAA = want_hpaa();
+  if (kUseHPAA) {
+    normal_impl_[0] =
+        new (&choices_[0].hpaa) HugePageAwareAllocator(MemoryTag::kNormal);
+    if (Static::numa_topology().numa_aware()) {
+      normal_impl_[1] =
+          new (&choices_[1].hpaa) HugePageAwareAllocator(MemoryTag::kNormalP1);
+    }
+    sampled_impl_ = new (&choices_[kNumaPartitions + 0].hpaa)
+        HugePageAwareAllocator(MemoryTag::kSampled);
+    alg_ = HPAA;
+  } else {
+    normal_impl_[0] = new (&choices_[0].ph) PageHeap(MemoryTag::kNormal);
+    if (Static::numa_topology().numa_aware()) {
+      normal_impl_[1] = new (&choices_[1].ph) PageHeap(MemoryTag::kNormalP1);
+    }
+    sampled_impl_ =
+        new (&choices_[kNumaPartitions + 0].ph) PageHeap(MemoryTag::kSampled);
+    alg_ = PAGE_HEAP;
+  }
+}
+
+void PageAllocator::ShrinkToUsageLimit() {
+  if (limit_ == std::numeric_limits<size_t>::max()) {
+    return;
+  }
+  BackingStats s = stats();
+  size_t backed = s.system_bytes - s.unmapped_bytes + Static::metadata_bytes();
+  if (backed <= limit_) {
+    // We're already fine.
+    return;
+  }
+
+  limit_hits_++;
+  const size_t overage = backed - limit_;
+  const Length pages = LengthFromBytes(overage + kPageSize - 1);
+  if (ShrinkHardBy(pages)) {
+    return;
+  }
+
+  // We're still not below limit.
+  if (limit_is_hard_) {
+    limit_ = std::numeric_limits<decltype(limit_)>::max();
+    Crash(
+        kCrash, __FILE__, __LINE__,
+        "Hit hard tcmalloc heap limit (e.g. --tcmalloc_heap_size_hard_limit). "
+        "Aborting.\nIt was most likely set to catch "
+        "allocations that would crash the process anyway. "
+    );
+  }
+
+  // Print logs once.
+  static bool warned = false;
+  if (warned) return;
+  warned = true;
+  Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ",
+      limit_, "and OOM is likely to follow.");
+}
+
+bool PageAllocator::ShrinkHardBy(Length pages) {
+  Length ret = ReleaseAtLeastNPages(pages);
+  if (alg_ == HPAA) {
+    if (pages <= ret) {
+      // We released target amount.
+      return true;
+    }
+
+    // At this point, we have no choice but to break up hugepages.
+    // However, if the client has turned off subrelease, and is using hard
+    // limits, then respect desire to do no subrelease ever.
+    if (limit_is_hard_ && !Parameters::hpaa_subrelease()) return false;
+
+    static bool warned_hugepages = false;
+    if (!warned_hugepages) {
+      Log(kLogWithStack, __FILE__, __LINE__, "Couldn't respect usage limit of ",
+          limit_, "without breaking hugepages - performance will drop");
+      warned_hugepages = true;
+    }
+    for (int partition = 0; partition < active_numa_partitions(); partition++) {
+      ret += static_cast<HugePageAwareAllocator *>(normal_impl_[partition])
+                 ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret);
+      if (ret >= pages) {
+        return true;
+      }
+    }
+
+    ret += static_cast<HugePageAwareAllocator *>(sampled_impl_)
+               ->ReleaseAtLeastNPagesBreakingHugepages(pages - ret);
+  }
+  // Return "true", if we got back under the limit.
+  return (pages <= ret);
+}
+
+size_t PageAllocator::active_numa_partitions() const {
+  return Static::numa_topology().active_partitions();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h
new file mode 100644
index 0000000000..611482f999
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator.h
@@ -0,0 +1,241 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_ALLOCATOR_H_
+#define TCMALLOC_PAGE_ALLOCATOR_H_
+
+#include <inttypes.h>
+#include <stddef.h>
+
+#include <utility>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PageAllocator {
+ public:
+  PageAllocator();
+  ~PageAllocator() = delete;
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  // Caller should not pass "n == 0" -- instead, n should have
+  // been rounded up already.
+  //
+  // Any address in the returned Span is guaranteed to satisfy
+  // GetMemoryTag(addr) == "tag".
+  Span* New(Length n, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  // As New, but the returned span is aligned to a <align>-page boundary.
+  // <align> must be a power of two.
+  Span* NewAligned(Length n, Length align, MemoryTag tag)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  // Delete the span "[p, p+n-1]".
+  // REQUIRES: span was returned by earlier call to New() with the same value of
+  //           "tag" and has not yet been deleted.
+  void Delete(Span* span, MemoryTag tag)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  BackingStats stats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  void GetSmallSpanStats(SmallSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  void GetLargeSpanStats(LargeSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Try to release at least num_pages for reuse by the OS.  Returns
+  // the actual number of pages released, which may be less than
+  // num_pages if there weren't enough pages to release. The result
+  // may also be larger than num_pages since page_heap might decide to
+  // release one large range instead of fragmenting it into two
+  // smaller released and unreleased ranges.
+  Length ReleaseAtLeastNPages(Length num_pages)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Prints stats about the page heap to *out.
+  void Print(Printer* out, MemoryTag tag) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+  void PrintInPbtxt(PbtxtRegion* region, MemoryTag tag)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  void set_limit(size_t limit, bool is_hard) ABSL_LOCKS_EXCLUDED(pageheap_lock);
+  std::pair<size_t, bool> limit() const ABSL_LOCKS_EXCLUDED(pageheap_lock);
+  int64_t limit_hits() const ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  // If we have a usage limit set, ensure we're not violating it from our latest
+  // allocation.
+  void ShrinkToUsageLimit() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  const PageAllocInfo& info(MemoryTag tag) const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  enum Algorithm {
+    PAGE_HEAP = 0,
+    HPAA = 1,
+  };
+
+  Algorithm algorithm() const { return alg_; }
+
+ private:
+  bool ShrinkHardBy(Length pages) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  ABSL_ATTRIBUTE_RETURNS_NONNULL PageAllocatorInterface* impl(
+      MemoryTag tag) const;
+
+  size_t active_numa_partitions() const;
+
+  static constexpr size_t kNumHeaps = kNumaPartitions + 1;
+
+  union Choices {
+    Choices() : dummy(0) {}
+    ~Choices() {}
+    int dummy;
+    PageHeap ph;
+    HugePageAwareAllocator hpaa;
+  } choices_[kNumHeaps];
+  std::array<PageAllocatorInterface*, kNumaPartitions> normal_impl_;
+  PageAllocatorInterface* sampled_impl_;
+  Algorithm alg_;
+
+  bool limit_is_hard_{false};
+  // Max size of backed spans we will attempt to maintain.
+  size_t limit_{std::numeric_limits<size_t>::max()};
+  // The number of times the limit has been hit.
+  int64_t limit_hits_{0};
+};
+
+inline PageAllocatorInterface* PageAllocator::impl(MemoryTag tag) const {
+  switch (tag) {
+    case MemoryTag::kNormalP0:
+      return normal_impl_[0];
+    case MemoryTag::kNormalP1:
+      return normal_impl_[1];
+    case MemoryTag::kSampled:
+      return sampled_impl_;
+    default:
+      ASSUME(false);
+      __builtin_unreachable();
+  }
+}
+
+inline Span* PageAllocator::New(Length n, MemoryTag tag) {
+  return impl(tag)->New(n);
+}
+
+inline Span* PageAllocator::NewAligned(Length n, Length align, MemoryTag tag) {
+  return impl(tag)->NewAligned(n, align);
+}
+
+inline void PageAllocator::Delete(Span* span, MemoryTag tag) {
+  impl(tag)->Delete(span);
+}
+
+inline BackingStats PageAllocator::stats() const {
+  BackingStats ret = normal_impl_[0]->stats();
+  for (int partition = 1; partition < active_numa_partitions(); partition++) {
+    ret += normal_impl_[partition]->stats();
+  }
+  ret += sampled_impl_->stats();
+  return ret;
+}
+
+inline void PageAllocator::GetSmallSpanStats(SmallSpanStats* result) {
+  SmallSpanStats normal, sampled;
+  for (int partition = 0; partition < active_numa_partitions(); partition++) {
+    SmallSpanStats part_stats;
+    normal_impl_[partition]->GetSmallSpanStats(&part_stats);
+    normal += part_stats;
+  }
+  sampled_impl_->GetSmallSpanStats(&sampled);
+  *result = normal + sampled;
+}
+
+inline void PageAllocator::GetLargeSpanStats(LargeSpanStats* result) {
+  LargeSpanStats normal, sampled;
+  for (int partition = 0; partition < active_numa_partitions(); partition++) {
+    LargeSpanStats part_stats;
+    normal_impl_[partition]->GetLargeSpanStats(&part_stats);
+    normal += part_stats;
+  }
+  sampled_impl_->GetLargeSpanStats(&sampled);
+  *result = normal + sampled;
+}
+
+inline Length PageAllocator::ReleaseAtLeastNPages(Length num_pages) {
+  Length released;
+  for (int partition = 0; partition < active_numa_partitions(); partition++) {
+    released +=
+        normal_impl_[partition]->ReleaseAtLeastNPages(num_pages - released);
+    if (released >= num_pages) {
+      return released;
+    }
+  }
+
+  released += sampled_impl_->ReleaseAtLeastNPages(num_pages - released);
+  return released;
+}
+
+inline void PageAllocator::Print(Printer* out, MemoryTag tag) {
+  const absl::string_view label = MemoryTagToLabel(tag);
+  if (tag != MemoryTag::kNormal) {
+    out->printf("\n>>>>>>> Begin %s page allocator <<<<<<<\n", label);
+  }
+  impl(tag)->Print(out);
+  if (tag != MemoryTag::kNormal) {
+    out->printf(">>>>>>> End %s page allocator <<<<<<<\n", label);
+  }
+}
+
+inline void PageAllocator::PrintInPbtxt(PbtxtRegion* region, MemoryTag tag) {
+  PbtxtRegion pa = region->CreateSubRegion("page_allocator");
+  pa.PrintRaw("tag", MemoryTagToLabel(tag));
+  impl(tag)->PrintInPbtxt(&pa);
+}
+
+inline void PageAllocator::set_limit(size_t limit, bool is_hard) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  limit_ = limit;
+  limit_is_hard_ = is_hard;
+}
+
+inline std::pair<size_t, bool> PageAllocator::limit() const {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  return {limit_, limit_is_hard_};
+}
+
+inline int64_t PageAllocator::limit_hits() const {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  return limit_hits_;
+}
+
+inline const PageAllocInfo& PageAllocator::info(MemoryTag tag) const {
+  return impl(tag)->info();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGE_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc
new file mode 100644
index 0000000000..3173247acb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.cc
@@ -0,0 +1,89 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_allocator_interface.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static int OpenLog(MemoryTag tag) {
+  const char *fname = [&]() {
+    switch (tag) {
+      case MemoryTag::kNormal:
+        return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE");
+      case MemoryTag::kNormalP1:
+        return thread_safe_getenv("TCMALLOC_PAGE_LOG_FILE_P1");
+      case MemoryTag::kSampled:
+        return thread_safe_getenv("TCMALLOC_SAMPLED_PAGE_LOG_FILE");
+      default:
+        ASSUME(false);
+        __builtin_unreachable();
+    }
+  }();
+
+  if (ABSL_PREDICT_TRUE(!fname)) return -1;
+
+  if (getuid() != geteuid() || getgid() != getegid()) {
+    Log(kLog, __FILE__, __LINE__, "Cannot take a pagetrace from setuid binary");
+    return -1;
+  }
+  char buf[PATH_MAX];
+  // Tag file with PID - handles forking children much better.
+  int pid = getpid();
+  // Blaze tests can output here for recovery of the output file
+  const char *test_dir = thread_safe_getenv("TEST_UNDECLARED_OUTPUTS_DIR");
+  if (test_dir) {
+    snprintf(buf, sizeof(buf), "%s/%s.%d", test_dir, fname, pid);
+  } else {
+    snprintf(buf, sizeof(buf), "%s.%d", fname, pid);
+  }
+  int fd =
+      signal_safe_open(buf, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
+
+  if (fd < 0) {
+    Crash(kCrash, __FILE__, __LINE__, fd, errno, fname);
+  }
+
+  return fd;
+}
+
+PageAllocatorInterface::PageAllocatorInterface(const char *label, MemoryTag tag)
+    : PageAllocatorInterface(label, &Static::pagemap(), tag) {}
+
+PageAllocatorInterface::PageAllocatorInterface(const char *label, PageMap *map,
+                                               MemoryTag tag)
+    : info_(label, OpenLog(tag)), pagemap_(map), tag_(tag) {}
+
+PageAllocatorInterface::~PageAllocatorInterface() {
+  // This is part of tcmalloc statics - they must be immortal.
+  Crash(kCrash, __FILE__, __LINE__, "should never destroy this");
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h
new file mode 100644
index 0000000000..cf1dc67897
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_interface.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
+#define TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
+
+#include <stddef.h>
+
+#include <limits>
+#include <utility>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PageMap;
+
+class PageAllocatorInterface {
+ public:
+  PageAllocatorInterface(const char* label, MemoryTag tag);
+  // For testing: use a non-default pagemap.
+  PageAllocatorInterface(const char* label, PageMap* map, MemoryTag tag);
+  virtual ~PageAllocatorInterface();
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  // Caller should not pass "n == 0" -- instead, n should have
+  // been rounded up already.
+  virtual Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+  // As New, but the returned span is aligned to a <align>-page boundary.
+  // <align> must be a power of two.
+  virtual Span* NewAligned(Length n, Length align)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+  // Delete the span "[p, p+n-1]".
+  // REQUIRES: span was returned by earlier call to New() and
+  //           has not yet been deleted.
+  virtual void Delete(Span* span)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+  virtual BackingStats stats() const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+  virtual void GetSmallSpanStats(SmallSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+  virtual void GetLargeSpanStats(LargeSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+  // Try to release at least num_pages for reuse by the OS.  Returns
+  // the actual number of pages released, which may be less than
+  // num_pages if there weren't enough pages to release. The result
+  // may also be larger than num_pages since page_heap might decide to
+  // release one large range instead of fragmenting it into two
+  // smaller released and unreleased ranges.
+  virtual Length ReleaseAtLeastNPages(Length num_pages)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) = 0;
+
+  // Prints stats about the page heap to *out.
+  virtual void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+  // Prints stats about the page heap in pbtxt format.
+  //
+  // TODO(b/130249686): Remove this one and make `Print` print in pbtxt.
+  virtual void PrintInPbtxt(PbtxtRegion* region)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) = 0;
+
+  const PageAllocInfo& info() const { return info_; }
+
+ protected:
+  PageAllocInfo info_ ABSL_GUARDED_BY(pageheap_lock);
+  PageMap* pagemap_;
+
+  MemoryTag tag_;  // The type of tagged memory this heap manages
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGE_ALLOCATOR_INTERFACE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc
new file mode 100644
index 0000000000..d302c085a9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test.cc
@@ -0,0 +1,145 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Tests for infrastructure common to page allocator implementations
+// (stats and logging.)
+#include "tcmalloc/page_allocator.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <memory>
+#include <new>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "absl/strings/str_cat.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator_test_util.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class PageAllocatorTest : public testing::Test {
+ protected:
+  // Not in constructor so subclasses can mess about with environment
+  // variables.
+  void SetUp() override {
+    // If this test is not linked against TCMalloc, the global arena used for
+    // metadata will not be initialized.
+    Static::InitIfNecessary();
+
+    before_ = MallocExtension::GetRegionFactory();
+    extra_ = new ExtraRegionFactory(before_);
+    MallocExtension::SetRegionFactory(extra_);
+    void *p = malloc(sizeof(PageAllocator));
+    allocator_ = new (p) PageAllocator;
+  }
+  void TearDown() override {
+    MallocExtension::SetRegionFactory(before_);
+    delete extra_;
+    free(allocator_);
+  }
+
+  Span *New(Length n) { return allocator_->New(n, MemoryTag::kNormal); }
+  Span *NewAligned(Length n, Length align) {
+    return allocator_->NewAligned(n, align, MemoryTag::kNormal);
+  }
+  void Delete(Span *s) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    allocator_->Delete(s, MemoryTag::kNormal);
+  }
+
+  Length Release(Length n) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    return allocator_->ReleaseAtLeastNPages(n);
+  }
+
+  std::string Print() {
+    std::vector<char> buf(1024 * 1024);
+    Printer out(&buf[0], buf.size());
+    allocator_->Print(&out, MemoryTag::kNormal);
+
+    return std::string(&buf[0]);
+  }
+
+  PageAllocator *allocator_;
+  ExtraRegionFactory *extra_;
+  AddressRegionFactory *before_;
+};
+
+// We've already tested in stats_test that PageAllocInfo keeps good stats;
+// here we're just testing that we make the proper Record calls.
+TEST_F(PageAllocatorTest, Record) {
+  for (int i = 0; i < 15; ++i) {
+    Delete(New(Length(1)));
+  }
+
+  std::vector<Span *> spans;
+  for (int i = 0; i < 20; ++i) {
+    spans.push_back(New(Length(2)));
+  }
+
+  for (int i = 0; i < 25; ++i) {
+    Delete(NewAligned(Length(3), Length(2)));
+  }
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    auto info = allocator_->info(MemoryTag::kNormal);
+
+    CHECK_CONDITION(15 == info.counts_for(Length(1)).nalloc);
+    CHECK_CONDITION(15 == info.counts_for(Length(1)).nfree);
+
+    CHECK_CONDITION(20 == info.counts_for(Length(2)).nalloc);
+    CHECK_CONDITION(0 == info.counts_for(Length(2)).nfree);
+
+    CHECK_CONDITION(25 == info.counts_for(Length(3)).nalloc);
+    CHECK_CONDITION(25 == info.counts_for(Length(3)).nfree);
+
+    for (auto i = Length(4); i <= kMaxPages; ++i) {
+      CHECK_CONDITION(0 == info.counts_for(i).nalloc);
+      CHECK_CONDITION(0 == info.counts_for(i).nfree);
+    }
+
+    const Length absurd =
+        Length(uintptr_t{1} << (kAddressBits - 1 - kPageShift));
+    for (Length i = kMaxPages + Length(1); i < absurd; i *= 2) {
+      CHECK_CONDITION(0 == info.counts_for(i).nalloc);
+      CHECK_CONDITION(0 == info.counts_for(i).nfree);
+    }
+  }
+  for (auto s : spans) Delete(s);
+}
+
+// And that we call the print method properly.
+TEST_F(PageAllocatorTest, PrintIt) {
+  Delete(New(Length(1)));
+  std::string output = Print();
+  EXPECT_THAT(output, testing::ContainsRegex("stats on allocation sizes"));
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h
new file mode 100644
index 0000000000..55f134bfdd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_allocator_test_util.h
@@ -0,0 +1,79 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
+#define TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
+
+#include <tuple>
+#include <utility>
+
+#include "tcmalloc/malloc_extension.h"
+
+// TODO(b/116000878): Remove dependency on common.h if it causes ODR issues.
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// AddressRegion that adds some padding on either side of each
+// allocation.  This prevents multiple PageAllocators in the system
+// from noticing one another's presence in the pagemap.
+class ExtraRegion : public AddressRegion {
+ public:
+  explicit ExtraRegion(AddressRegion *under) : under_(under) {}
+
+  std::pair<void *, size_t> Alloc(size_t size, size_t alignment) override {
+    size_t big = size + alignment + alignment;
+    // Can't pad if allocation is within 2 * alignment of region size.
+    if (big > kMinMmapAlloc) {
+      return under_->Alloc(size, alignment);
+    }
+    void *ptr;
+    size_t actual_size;
+    std::tie(ptr, actual_size) = under_->Alloc(big, alignment);
+    if (!ptr) return {nullptr, 0};
+    actual_size = actual_size - alignment * 2;
+    return {static_cast<char *>(ptr) + alignment, actual_size};
+  }
+
+ private:
+  AddressRegion *under_;
+};
+
+class ExtraRegionFactory : public AddressRegionFactory {
+ public:
+  explicit ExtraRegionFactory(AddressRegionFactory *under) : under_(under) {}
+
+  AddressRegion *Create(void *start, size_t size, UsageHint hint) override {
+    AddressRegion *underlying_region = under_->Create(start, size, hint);
+    CHECK_CONDITION(underlying_region);
+    void *region_space = MallocInternal(sizeof(ExtraRegion));
+    CHECK_CONDITION(region_space);
+    return new (region_space) ExtraRegion(underlying_region);
+  }
+
+  size_t GetStats(absl::Span<char> buffer) override {
+    return under_->GetStats(buffer);
+  }
+
+ private:
+  AddressRegionFactory *under_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_HUGE_PAGE_AWARE_ALLOCATOR_TEST_UTIL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc
new file mode 100644
index 0000000000..c6b4c6dbd1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.cc
@@ -0,0 +1,528 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_heap.h"
+
+#include <stddef.h>
+
+#include <limits>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/system-alloc.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Helper function to record span address into pageheap
+void PageHeap::RecordSpan(Span* span) {
+  pagemap_->Set(span->first_page(), span);
+  if (span->num_pages() > Length(1)) {
+    pagemap_->Set(span->last_page(), span);
+  }
+}
+
+PageHeap::PageHeap(MemoryTag tag) : PageHeap(&Static::pagemap(), tag) {}
+
+PageHeap::PageHeap(PageMap* map, MemoryTag tag)
+    : PageAllocatorInterface("PageHeap", map, tag),
+      // Start scavenging at kMaxPages list
+      release_index_(kMaxPages.raw_num()) {}
+
+Span* PageHeap::SearchFreeAndLargeLists(Length n, bool* from_returned) {
+  ASSERT(Check());
+  ASSERT(n > Length(0));
+
+  // Find first size >= n that has a non-empty list
+  for (Length s = n; s < kMaxPages; ++s) {
+    SpanList* ll = &free_[s.raw_num()].normal;
+    // If we're lucky, ll is non-empty, meaning it has a suitable span.
+    if (!ll->empty()) {
+      ASSERT(ll->first()->location() == Span::ON_NORMAL_FREELIST);
+      *from_returned = false;
+      return Carve(ll->first(), n);
+    }
+    // Alternatively, maybe there's a usable returned span.
+    ll = &free_[s.raw_num()].returned;
+    if (!ll->empty()) {
+      ASSERT(ll->first()->location() == Span::ON_RETURNED_FREELIST);
+      *from_returned = true;
+      return Carve(ll->first(), n);
+    }
+  }
+  // No luck in free lists, our last chance is in a larger class.
+  return AllocLarge(n, from_returned);  // May be NULL
+}
+
+Span* PageHeap::AllocateSpan(Length n, bool* from_returned) {
+  ASSERT(Check());
+  Span* result = SearchFreeAndLargeLists(n, from_returned);
+  if (result != nullptr) return result;
+
+  // Grow the heap and try again.
+  if (!GrowHeap(n)) {
+    ASSERT(Check());
+    return nullptr;
+  }
+
+  result = SearchFreeAndLargeLists(n, from_returned);
+  // our new memory should be unbacked
+  ASSERT(*from_returned);
+  return result;
+}
+
+Span* PageHeap::New(Length n) {
+  ASSERT(n > Length(0));
+  bool from_returned;
+  Span* result;
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    result = AllocateSpan(n, &from_returned);
+    if (result) Static::page_allocator().ShrinkToUsageLimit();
+    if (result) info_.RecordAlloc(result->first_page(), result->num_pages());
+  }
+
+  if (result != nullptr && from_returned) {
+    SystemBack(result->start_address(), result->bytes_in_span());
+  }
+
+  ASSERT(!result || GetMemoryTag(result->start_address()) == tag_);
+  return result;
+}
+
+static bool IsSpanBetter(Span* span, Span* best, Length n) {
+  if (span->num_pages() < n) {
+    return false;
+  }
+  if (best == nullptr) {
+    return true;
+  }
+  if (span->num_pages() < best->num_pages()) {
+    return true;
+  }
+  if (span->num_pages() > best->num_pages()) {
+    return false;
+  }
+  return span->first_page() < best->first_page();
+}
+
+// We could do slightly more efficient things here (we do some
+// unnecessary Carves in New) but it's not anywhere
+// close to a fast path, and is going to be replaced soon anyway, so
+// don't bother.
+Span* PageHeap::NewAligned(Length n, Length align) {
+  ASSERT(n > Length(0));
+  ASSERT(absl::has_single_bit(align.raw_num()));
+
+  if (align <= Length(1)) {
+    return New(n);
+  }
+
+  bool from_returned;
+  Span* span;
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    Length extra = align - Length(1);
+    span = AllocateSpan(n + extra, &from_returned);
+    if (span == nullptr) return nullptr;
+    // <span> certainly contains an appropriately aligned region; find it
+    // and chop off the rest.
+    PageId p = span->first_page();
+    const Length mask = align - Length(1);
+    PageId aligned = PageId{(p.index() + mask.raw_num()) & ~mask.raw_num()};
+    ASSERT(aligned.index() % align.raw_num() == 0);
+    ASSERT(p <= aligned);
+    ASSERT(aligned + n <= p + span->num_pages());
+    // we have <extra> too many pages now, possible all before, possibly all
+    // after, maybe both
+    Length before = aligned - p;
+    Length after = extra - before;
+    span->set_first_page(aligned);
+    span->set_num_pages(n);
+    RecordSpan(span);
+
+    const Span::Location loc =
+        from_returned ? Span::ON_RETURNED_FREELIST : Span::ON_NORMAL_FREELIST;
+    if (before > Length(0)) {
+      Span* extra = Span::New(p, before);
+      extra->set_location(loc);
+      RecordSpan(extra);
+      MergeIntoFreeList(extra);
+    }
+
+    if (after > Length(0)) {
+      Span* extra = Span::New(aligned + n, after);
+      extra->set_location(loc);
+      RecordSpan(extra);
+      MergeIntoFreeList(extra);
+    }
+
+    info_.RecordAlloc(aligned, n);
+  }
+
+  if (span != nullptr && from_returned) {
+    SystemBack(span->start_address(), span->bytes_in_span());
+  }
+
+  ASSERT(!span || GetMemoryTag(span->start_address()) == tag_);
+  return span;
+}
+
+Span* PageHeap::AllocLarge(Length n, bool* from_returned) {
+  // find the best span (closest to n in size).
+  // The following loops implements address-ordered best-fit.
+  Span* best = nullptr;
+
+  // Search through normal list
+  for (Span* span : large_.normal) {
+    ASSERT(span->location() == Span::ON_NORMAL_FREELIST);
+    if (IsSpanBetter(span, best, n)) {
+      best = span;
+      *from_returned = false;
+    }
+  }
+
+  // Search through released list in case it has a better fit
+  for (Span* span : large_.returned) {
+    ASSERT(span->location() == Span::ON_RETURNED_FREELIST);
+    if (IsSpanBetter(span, best, n)) {
+      best = span;
+      *from_returned = true;
+    }
+  }
+
+  return best == nullptr ? nullptr : Carve(best, n);
+}
+
+Span* PageHeap::Carve(Span* span, Length n) {
+  ASSERT(n > Length(0));
+  ASSERT(span->location() != Span::IN_USE);
+  const Span::Location old_location = span->location();
+  RemoveFromFreeList(span);
+  span->set_location(Span::IN_USE);
+
+  const Length extra = span->num_pages() - n;
+  if (extra > Length(0)) {
+    Span* leftover = nullptr;
+    // Check if this span has another span on the right but not on the left.
+    // There is one special case we want to handle: if heap grows down (as it is
+    // usually happens with mmap allocator) and user allocates lots of large
+    // persistent memory blocks (namely, kMinSystemAlloc + epsilon), then we
+    // want to return the last part of the span to user and push the beginning
+    // to the freelist.
+    // Otherwise system allocator would allocate 2 * kMinSystemAlloc, we return
+    // the first kMinSystemAlloc + epsilon to user and add the remaining
+    // kMinSystemAlloc - epsilon to the freelist. The remainder is not large
+    // enough to satisfy the next allocation request, so we allocate
+    // another 2 * kMinSystemAlloc from system and the process repeats wasting
+    // half of memory.
+    // If we return the last part to user, then the remainder will be merged
+    // with the next system allocation which will result in dense packing.
+    // There are no other known cases where span splitting strategy matters,
+    // so in other cases we return beginning to user.
+    if (pagemap_->GetDescriptor(span->first_page() - Length(1)) == nullptr &&
+        pagemap_->GetDescriptor(span->last_page() + Length(1)) != nullptr) {
+      leftover = Span::New(span->first_page(), extra);
+      span->set_first_page(span->first_page() + extra);
+      pagemap_->Set(span->first_page(), span);
+    } else {
+      leftover = Span::New(span->first_page() + n, extra);
+    }
+    leftover->set_location(old_location);
+    RecordSpan(leftover);
+    PrependToFreeList(leftover);  // Skip coalescing - no candidates possible
+    leftover->set_freelist_added_time(span->freelist_added_time());
+    span->set_num_pages(n);
+    pagemap_->Set(span->last_page(), span);
+  }
+  ASSERT(Check());
+  return span;
+}
+
+void PageHeap::Delete(Span* span) {
+  ASSERT(GetMemoryTag(span->start_address()) == tag_);
+  info_.RecordFree(span->first_page(), span->num_pages());
+  ASSERT(Check());
+  ASSERT(span->location() == Span::IN_USE);
+  ASSERT(!span->sampled());
+  ASSERT(span->num_pages() > Length(0));
+  ASSERT(pagemap_->GetDescriptor(span->first_page()) == span);
+  ASSERT(pagemap_->GetDescriptor(span->last_page()) == span);
+  span->set_location(Span::ON_NORMAL_FREELIST);
+  MergeIntoFreeList(span);  // Coalesces if possible
+  ASSERT(Check());
+}
+
+void PageHeap::MergeIntoFreeList(Span* span) {
+  ASSERT(span->location() != Span::IN_USE);
+  span->set_freelist_added_time(absl::base_internal::CycleClock::Now());
+
+  // Coalesce -- we guarantee that "p" != 0, so no bounds checking
+  // necessary.  We do not bother resetting the stale pagemap
+  // entries for the pieces we are merging together because we only
+  // care about the pagemap entries for the boundaries.
+  //
+  // Note that only similar spans are merged together.  For example,
+  // we do not coalesce "returned" spans with "normal" spans.
+  const PageId p = span->first_page();
+  const Length n = span->num_pages();
+  Span* prev = pagemap_->GetDescriptor(p - Length(1));
+  if (prev != nullptr && prev->location() == span->location()) {
+    // Merge preceding span into this span
+    ASSERT(prev->last_page() + Length(1) == p);
+    const Length len = prev->num_pages();
+    span->AverageFreelistAddedTime(prev);
+    RemoveFromFreeList(prev);
+    Span::Delete(prev);
+    span->set_first_page(span->first_page() - len);
+    span->set_num_pages(span->num_pages() + len);
+    pagemap_->Set(span->first_page(), span);
+  }
+  Span* next = pagemap_->GetDescriptor(p + n);
+  if (next != nullptr && next->location() == span->location()) {
+    // Merge next span into this span
+    ASSERT(next->first_page() == p + n);
+    const Length len = next->num_pages();
+    span->AverageFreelistAddedTime(next);
+    RemoveFromFreeList(next);
+    Span::Delete(next);
+    span->set_num_pages(span->num_pages() + len);
+    pagemap_->Set(span->last_page(), span);
+  }
+
+  PrependToFreeList(span);
+}
+
+void PageHeap::PrependToFreeList(Span* span) {
+  ASSERT(span->location() != Span::IN_USE);
+  SpanListPair* list = (span->num_pages() < kMaxPages)
+                           ? &free_[span->num_pages().raw_num()]
+                           : &large_;
+  if (span->location() == Span::ON_NORMAL_FREELIST) {
+    stats_.free_bytes += span->bytes_in_span();
+    list->normal.prepend(span);
+  } else {
+    stats_.unmapped_bytes += span->bytes_in_span();
+    list->returned.prepend(span);
+  }
+}
+
+void PageHeap::RemoveFromFreeList(Span* span) {
+  ASSERT(span->location() != Span::IN_USE);
+  if (span->location() == Span::ON_NORMAL_FREELIST) {
+    stats_.free_bytes -= span->bytes_in_span();
+  } else {
+    stats_.unmapped_bytes -= span->bytes_in_span();
+  }
+  span->RemoveFromList();
+}
+
+Length PageHeap::ReleaseLastNormalSpan(SpanListPair* slist) {
+  Span* s = slist->normal.last();
+  ASSERT(s->location() == Span::ON_NORMAL_FREELIST);
+  RemoveFromFreeList(s);
+
+  // We're dropping very important and otherwise contended pageheap_lock around
+  // call to potentially very slow syscall to release pages. Those syscalls can
+  // be slow even with "advanced" things such as MADV_FREE{,ABLE} because they
+  // have to walk actual page tables, and we sometimes deal with large spans,
+  // which sometimes takes lots of time. Plus Linux grabs per-address space
+  // mm_sem lock which could be extremely contended at times. So it is best if
+  // we avoid holding one contended lock while waiting for another.
+  //
+  // Note, we set span location to in-use, because our span could be found via
+  // pagemap in e.g. MergeIntoFreeList while we're not holding the lock. By
+  // marking it in-use we prevent this possibility. So span is removed from free
+  // list and marked "unmergable" and that guarantees safety during unlock-ful
+  // release.
+  //
+  // Taking the span off the free list will make our stats reporting wrong if
+  // another thread happens to try to measure memory usage during the release,
+  // so we fix up the stats during the unlocked period.
+  stats_.free_bytes += s->bytes_in_span();
+  s->set_location(Span::IN_USE);
+  pageheap_lock.Unlock();
+
+  const Length n = s->num_pages();
+  SystemRelease(s->start_address(), s->bytes_in_span());
+
+  pageheap_lock.Lock();
+  stats_.free_bytes -= s->bytes_in_span();
+  s->set_location(Span::ON_RETURNED_FREELIST);
+  MergeIntoFreeList(s);  // Coalesces if possible.
+  return n;
+}
+
+Length PageHeap::ReleaseAtLeastNPages(Length num_pages) {
+  Length released_pages;
+  Length prev_released_pages = Length::max() + Length(1);
+
+  // Round robin through the lists of free spans, releasing the last
+  // span in each list.  Stop after releasing at least num_pages.
+  while (released_pages < num_pages) {
+    if (released_pages == prev_released_pages) {
+      // Last iteration of while loop made no progress.
+      break;
+    }
+    prev_released_pages = released_pages;
+
+    for (int i = 0; i < kMaxPages.raw_num() + 1 && released_pages < num_pages;
+         i++, release_index_++) {
+      if (release_index_ > kMaxPages.raw_num()) release_index_ = 0;
+      SpanListPair* slist = (release_index_ == kMaxPages.raw_num())
+                                ? &large_
+                                : &free_[release_index_];
+      if (!slist->normal.empty()) {
+        Length released_len = ReleaseLastNormalSpan(slist);
+        released_pages += released_len;
+      }
+    }
+  }
+  info_.RecordRelease(num_pages, released_pages);
+  return released_pages;
+}
+
+void PageHeap::GetSmallSpanStats(SmallSpanStats* result) {
+  for (int s = 0; s < kMaxPages.raw_num(); s++) {
+    result->normal_length[s] = free_[s].normal.length();
+    result->returned_length[s] = free_[s].returned.length();
+  }
+}
+
+void PageHeap::GetLargeSpanStats(LargeSpanStats* result) {
+  result->spans = 0;
+  result->normal_pages = Length(0);
+  result->returned_pages = Length(0);
+  for (Span* s : large_.normal) {
+    result->normal_pages += s->num_pages();
+    result->spans++;
+  }
+  for (Span* s : large_.returned) {
+    result->returned_pages += s->num_pages();
+    result->spans++;
+  }
+}
+
+bool PageHeap::GrowHeap(Length n) {
+  if (n > Length::max()) return false;
+  size_t actual_size;
+  void* ptr = SystemAlloc(n.in_bytes(), &actual_size, kPageSize, tag_);
+  if (ptr == nullptr) return false;
+  n = BytesToLengthFloor(actual_size);
+
+  stats_.system_bytes += actual_size;
+  const PageId p = PageIdContaining(ptr);
+  ASSERT(p > PageId{0});
+
+  // If we have already a lot of pages allocated, just pre allocate a bunch of
+  // memory for the page map. This prevents fragmentation by pagemap metadata
+  // when a program keeps allocating and freeing large blocks.
+
+  // Make sure pagemap has entries for all of the new pages.
+  // Plus ensure one before and one after so coalescing code
+  // does not need bounds-checking.
+  if (pagemap_->Ensure(p - Length(1), n + Length(2))) {
+    // Pretend the new area is allocated and then return it to cause
+    // any necessary coalescing to occur.
+    Span* span = Span::New(p, n);
+    RecordSpan(span);
+    span->set_location(Span::ON_RETURNED_FREELIST);
+    MergeIntoFreeList(span);
+    ASSERT(Check());
+    return true;
+  } else {
+    // We could not allocate memory within the pagemap.
+    // Note the following leaks virtual memory, but at least it gets rid of
+    // the underlying physical memory.
+    SystemRelease(ptr, actual_size);
+    return false;
+  }
+}
+
+bool PageHeap::Check() {
+  ASSERT(free_[0].normal.empty());
+  ASSERT(free_[0].returned.empty());
+  return true;
+}
+
+void PageHeap::PrintInPbtxt(PbtxtRegion* region) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  SmallSpanStats small;
+  GetSmallSpanStats(&small);
+  LargeSpanStats large;
+  GetLargeSpanStats(&large);
+
+  struct Helper {
+    static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) {
+      for (const Span* s : pair.normal) {
+        ages->RecordRange(s->num_pages(), false, s->freelist_added_time());
+      }
+
+      for (const Span* s : pair.returned) {
+        ages->RecordRange(s->num_pages(), true, s->freelist_added_time());
+      }
+    }
+  };
+
+  PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+  for (int s = 0; s < kMaxPages.raw_num(); ++s) {
+    Helper::RecordAges(&ages, free_[s]);
+  }
+  Helper::RecordAges(&ages, large_);
+  PrintStatsInPbtxt(region, small, large, ages);
+  // We do not collect info_.PrintInPbtxt for now.
+}
+
+void PageHeap::Print(Printer* out) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  SmallSpanStats small;
+  GetSmallSpanStats(&small);
+  LargeSpanStats large;
+  GetLargeSpanStats(&large);
+  PrintStats("PageHeap", out, stats_, small, large, true);
+
+  struct Helper {
+    static void RecordAges(PageAgeHistograms* ages, const SpanListPair& pair) {
+      for (const Span* s : pair.normal) {
+        ages->RecordRange(s->num_pages(), false, s->freelist_added_time());
+      }
+
+      for (const Span* s : pair.returned) {
+        ages->RecordRange(s->num_pages(), true, s->freelist_added_time());
+      }
+    }
+  };
+
+  PageAgeHistograms ages(absl::base_internal::CycleClock::Now());
+  for (int s = 0; s < kMaxPages.raw_num(); ++s) {
+    Helper::RecordAges(&ages, free_[s]);
+  }
+  Helper::RecordAges(&ages, large_);
+  ages.Print("PageHeap", out);
+
+  info_.Print(out);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap.h b/contrib/libs/tcmalloc/tcmalloc/page_heap.h
new file mode 100644
index 0000000000..86cf5d01df
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap.h
@@ -0,0 +1,161 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_HEAP_H_
+#define TCMALLOC_PAGE_HEAP_H_
+
+#include <stdint.h>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/page_allocator_interface.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// -------------------------------------------------------------------------
+// Page-level allocator
+//  * Eager coalescing
+//
+// Heap for page-level allocation.  We allow allocating and freeing a
+// contiguous runs of pages (called a "span").
+// -------------------------------------------------------------------------
+
+class PageHeap final : public PageAllocatorInterface {
+ public:
+  explicit PageHeap(MemoryTag tag);
+  // for testing
+  PageHeap(PageMap* map, MemoryTag tag);
+  ~PageHeap() override = default;
+
+  // Allocate a run of "n" pages.  Returns zero if out of memory.
+  // Caller should not pass "n == 0" -- instead, n should have
+  // been rounded up already.
+  // The returned memory is backed.
+  Span* New(Length n) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  // As New, but the returned span is aligned to a <align>-page boundary.
+  // <align> must be a power of two.
+  Span* NewAligned(Length n, Length align)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  // Delete the span "[p, p+n-1]".
+  // REQUIRES: span was returned by earlier call to New() and
+  //           has not yet been deleted.
+  void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  inline BackingStats stats() const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override {
+    return stats_;
+  }
+
+  void GetSmallSpanStats(SmallSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  void GetLargeSpanStats(LargeSpanStats* result)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  // Try to release at least num_pages for reuse by the OS.  Returns
+  // the actual number of pages released, which may be less than
+  // num_pages if there weren't enough pages to release. The result
+  // may also be larger than num_pages since page_heap might decide to
+  // release one large range instead of fragmenting it into two
+  // smaller released and unreleased ranges.
+  Length ReleaseAtLeastNPages(Length num_pages)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) override;
+
+  // Prints stats about the page heap to *out.
+  void Print(Printer* out) ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+  void PrintInPbtxt(PbtxtRegion* region)
+      ABSL_LOCKS_EXCLUDED(pageheap_lock) override;
+
+ private:
+  // We segregate spans of a given size into two circular linked
+  // lists: one for normal spans, and one for spans whose memory
+  // has been returned to the system.
+  struct SpanListPair {
+    SpanList normal;
+    SpanList returned;
+  };
+
+  // List of free spans of length >= kMaxPages
+  SpanListPair large_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // Array mapping from span length to a doubly linked list of free spans
+  SpanListPair free_[kMaxPages.raw_num()] ABSL_GUARDED_BY(pageheap_lock);
+
+  // Statistics on system, free, and unmapped bytes
+  BackingStats stats_ ABSL_GUARDED_BY(pageheap_lock);
+
+  Span* SearchFreeAndLargeLists(Length n, bool* from_returned)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  bool GrowHeap(Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // REQUIRES: span->length >= n
+  // REQUIRES: span->location != IN_USE
+  // Remove span from its free list, and move any leftover part of
+  // span into appropriate free lists.  Also update "span" to have
+  // length exactly "n" and mark it as non-free so it can be returned
+  // to the client.  After all that, decrease free_pages_ by n and
+  // return span.
+  Span* Carve(Span* span, Length n)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Allocate a large span of length == n.  If successful, returns a
+  // span of exactly the specified length.  Else, returns NULL.
+  Span* AllocLarge(Length n, bool* from_returned)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Coalesce span with neighboring spans if possible, prepend to
+  // appropriate free list, and adjust stats.
+  void MergeIntoFreeList(Span* span)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Prepends span to appropriate free list, and adjusts stats.  You'll probably
+  // want to adjust span->freelist_added_time before/after calling this
+  // function.
+  void PrependToFreeList(Span* span)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Removes span from its free list, and adjust stats.
+  void RemoveFromFreeList(Span* span)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Release the last span on the normal portion of this list.
+  // Return the length of that span.
+  Length ReleaseLastNormalSpan(SpanListPair* slist)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Do invariant testing.
+  bool Check() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Index of last free list where we released memory to the OS.
+  int release_index_ ABSL_GUARDED_BY(pageheap_lock);
+
+  Span* AllocateSpan(Length n, bool* from_returned)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  void RecordSpan(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGE_HEAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h
new file mode 100644
index 0000000000..5d2bbfe92c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_allocator.h
@@ -0,0 +1,93 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
+#define TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
+
+#include <stddef.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct AllocatorStats {
+  // Number of allocated but unfreed objects
+  size_t in_use;
+  // Number of objects created (both free and allocated)
+  size_t total;
+};
+
+// Simple allocator for objects of a specified type.  External locking
+// is required before accessing one of these objects.
+template <class T>
+class PageHeapAllocator {
+ public:
+  constexpr PageHeapAllocator()
+      : arena_(nullptr), free_list_(nullptr), stats_{0, 0} {}
+
+  // We use an explicit Init function because these variables are statically
+  // allocated and their constructors might not have run by the time some
+  // other static variable tries to allocate memory.
+  void Init(Arena* arena) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    arena_ = arena;
+    // Reserve some space at the beginning to avoid fragmentation.
+    Delete(New());
+  }
+
+  ABSL_ATTRIBUTE_RETURNS_NONNULL T* New()
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    // Consult free list
+    T* result = free_list_;
+    stats_.in_use++;
+    if (ABSL_PREDICT_FALSE(result == nullptr)) {
+      stats_.total++;
+      return reinterpret_cast<T*>(arena_->Alloc(sizeof(T)));
+    }
+    free_list_ = *(reinterpret_cast<T**>(free_list_));
+    return result;
+  }
+
+  void Delete(T* p) ABSL_ATTRIBUTE_NONNULL()
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    *(reinterpret_cast<void**>(p)) = free_list_;
+    free_list_ = p;
+    stats_.in_use--;
+  }
+
+  AllocatorStats stats() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    return stats_;
+  }
+
+ private:
+  // Arena from which to allocate memory
+  Arena* arena_;
+
+  // Free list of already carved objects
+  T* free_list_ ABSL_GUARDED_BY(pageheap_lock);
+
+  AllocatorStats stats_ ABSL_GUARDED_BY(pageheap_lock);
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGE_HEAP_ALLOCATOR_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc
new file mode 100644
index 0000000000..dc13a60cb7
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/page_heap_test.cc
@@ -0,0 +1,109 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/page_heap.h"
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <memory>
+#include <new>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// PageHeap expands by kMinSystemAlloc by default, so use this as the minimum
+// Span length to not get more memory than expected.
+constexpr Length kMinSpanLength = BytesToLengthFloor(kMinSystemAlloc);
+
+void CheckStats(const PageHeap* ph, Length system_pages, Length free_pages,
+                Length unmapped_pages) ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+  BackingStats stats;
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    stats = ph->stats();
+  }
+
+  ASSERT_EQ(system_pages.in_bytes(), stats.system_bytes);
+  ASSERT_EQ(free_pages.in_bytes(), stats.free_bytes);
+  ASSERT_EQ(unmapped_pages.in_bytes(), stats.unmapped_bytes);
+}
+
+static void Delete(PageHeap* ph, Span* s) ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    ph->Delete(s);
+  }
+}
+
+static Length Release(PageHeap* ph, Length n) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  return ph->ReleaseAtLeastNPages(n);
+}
+
+class PageHeapTest : public ::testing::Test {
+ public:
+  PageHeapTest() {
+    // If this test is not linked against TCMalloc, the global arena used for
+    // metadata will not be initialized.
+    Static::InitIfNecessary();
+  }
+};
+
+// TODO(b/36484267): replace this test wholesale.
+TEST_F(PageHeapTest, Stats) {
+  auto pagemap = absl::make_unique<PageMap>();
+  void* memory = calloc(1, sizeof(PageHeap));
+  PageHeap* ph = new (memory) PageHeap(pagemap.get(), MemoryTag::kNormal);
+
+  // Empty page heap
+  CheckStats(ph, Length(0), Length(0), Length(0));
+
+  // Allocate a span 's1'
+  Span* s1 = ph->New(kMinSpanLength);
+  CheckStats(ph, kMinSpanLength, Length(0), Length(0));
+
+  // Allocate an aligned span 's2'
+  static const Length kHalf = kMinSpanLength / 2;
+  Span* s2 = ph->NewAligned(kHalf, kHalf);
+  ASSERT_EQ(s2->first_page().index() % kHalf.raw_num(), 0);
+  CheckStats(ph, kMinSpanLength * 2, Length(0), kHalf);
+
+  // Delete the old one
+  Delete(ph, s1);
+  CheckStats(ph, kMinSpanLength * 2, kMinSpanLength, kHalf);
+
+  // Release the space from there:
+  Length released = Release(ph, Length(1));
+  ASSERT_EQ(released, kMinSpanLength);
+  CheckStats(ph, kMinSpanLength * 2, Length(0), kHalf + kMinSpanLength);
+
+  // and delete the new one
+  Delete(ph, s2);
+  CheckStats(ph, kMinSpanLength * 2, kHalf, kHalf + kMinSpanLength);
+
+  free(memory);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc
new file mode 100644
index 0000000000..25962302c3
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.cc
@@ -0,0 +1,73 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/pagemap.h"
+
+#include <sys/mman.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void PageMap::RegisterSizeClass(Span* span, size_t sc) {
+  ASSERT(span->location() == Span::IN_USE);
+  const PageId first = span->first_page();
+  const PageId last = span->last_page();
+  ASSERT(GetDescriptor(first) == span);
+  for (PageId p = first; p <= last; ++p) {
+    map_.set_with_sizeclass(p.index(), span, sc);
+  }
+}
+
+void PageMap::UnregisterSizeClass(Span* span) {
+  ASSERT(span->location() == Span::IN_USE);
+  const PageId first = span->first_page();
+  const PageId last = span->last_page();
+  ASSERT(GetDescriptor(first) == span);
+  for (PageId p = first; p <= last; ++p) {
+    map_.clear_sizeclass(p.index());
+  }
+}
+
+void PageMap::MapRootWithSmallPages() {
+  constexpr size_t kHugePageMask = ~(kHugePageSize - 1);
+  uintptr_t begin = reinterpret_cast<uintptr_t>(map_.RootAddress());
+  // Round begin up to the nearest hugepage, this avoids causing memory before
+  // the start of the pagemap to become mapped onto small pages.
+  uintptr_t rbegin = (begin + kHugePageSize) & kHugePageMask;
+  size_t length = map_.RootSize();
+  // Round end down to the nearest hugepage, this avoids causing memory after
+  // the end of the pagemap becoming mapped onto small pages.
+  size_t rend = (begin + length) & kHugePageMask;
+  // Since we have rounded the start up, and the end down, we also want to
+  // confirm that there is something left between them for us to modify.
+  // For small but slow, the root pagemap is less than a hugepage in size,
+  // so we will not end up forcing it to be small pages.
+  if (rend > rbegin) {
+    size_t rlength = rend - rbegin;
+    madvise(reinterpret_cast<void*>(rbegin), rlength, MADV_NOHUGEPAGE);
+  }
+}
+
+void* MetaDataAlloc(size_t bytes) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+  return Static::arena().Alloc(bytes);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap.h b/contrib/libs/tcmalloc/tcmalloc/pagemap.h
new file mode 100644
index 0000000000..0cafa8a38d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap.h
@@ -0,0 +1,431 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A data structure used by the caching malloc.  It maps from page# to
+// a pointer that contains info about that page using a two-level array.
+//
+// The BITS parameter should be the number of bits required to hold
+// a page number.  E.g., with 32 bit pointers and 8K pages (i.e.,
+// page offset fits in lower 13 bits), BITS == 19.
+//
+// A PageMap requires external synchronization, except for the get/sizeclass
+// methods (see explanation at top of tcmalloc.cc).
+
+#ifndef TCMALLOC_PAGEMAP_H_
+#define TCMALLOC_PAGEMAP_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Two-level radix tree
+typedef void* (*PagemapAllocator)(size_t);
+void* MetaDataAlloc(size_t bytes);
+
+template <int BITS, PagemapAllocator Allocator>
+class PageMap2 {
+ private:
+  // The leaf node (regardless of pointer size) always maps 2^15 entries;
+  // with 8K pages, this gives us 256MB mapped per leaf node.
+  static constexpr int kLeafBits = 15;
+  static constexpr int kLeafLength = 1 << kLeafBits;
+  static constexpr int kRootBits = (BITS >= kLeafBits) ? (BITS - kLeafBits) : 0;
+  // (1<<kRootBits) must not overflow an "int"
+  static_assert(kRootBits < sizeof(int) * 8 - 1, "kRootBits is too large");
+  static constexpr int kRootLength = 1 << kRootBits;
+
+  static constexpr size_t kLeafCoveredBytes = 1ul << (kLeafBits + kPageShift);
+  static_assert(kLeafCoveredBytes >= kHugePageSize, "leaf too small");
+  static constexpr size_t kLeafHugeBits =
+      (kLeafBits + kPageShift - kHugePageShift);
+  static constexpr size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize;
+  static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity");
+  struct Leaf {
+    // We keep parallel arrays indexed by page number.  One keeps the
+    // size class; another span pointers; the last hugepage-related
+    // information.  The size class information is kept segregated
+    // since small object deallocations are so frequent and do not
+    // need the other information kept in a Span.
+    CompactSizeClass sizeclass[kLeafLength];
+    Span* span[kLeafLength];
+    void* hugepage[kLeafHugepages];
+  };
+
+  Leaf* root_[kRootLength];  // Top-level node
+  size_t bytes_used_;
+
+ public:
+  typedef uintptr_t Number;
+
+  constexpr PageMap2() : root_{}, bytes_used_(0) {}
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  void* get(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    if ((k >> BITS) > 0 || root_[i1] == nullptr) {
+      return nullptr;
+    }
+    return root_[i1]->span[i2];
+  }
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  // Requires that the span is known to already exist.
+  Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    ASSERT((k >> BITS) == 0);
+    ASSERT(root_[i1] != nullptr);
+    return root_[i1]->span[i2];
+  }
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  // REQUIRES: Must be a valid page number previously Ensure()d.
+  CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE
+  sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    ASSERT((k >> BITS) == 0);
+    ASSERT(root_[i1] != nullptr);
+    return root_[i1]->sizeclass[i2];
+  }
+
+  void set(Number k, Span* s) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    root_[i1]->span[i2] = s;
+  }
+
+  void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    Leaf* leaf = root_[i1];
+    leaf->span[i2] = s;
+    leaf->sizeclass[i2] = sc;
+  }
+
+  void clear_sizeclass(Number k) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    root_[i1]->sizeclass[i2] = 0;
+  }
+
+  void* get_hugepage(Number k) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    const Leaf* leaf = root_[i1];
+    ASSERT(leaf != nullptr);
+    return leaf->hugepage[i2 >> (kLeafBits - kLeafHugeBits)];
+  }
+
+  void set_hugepage(Number k, void* v) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> kLeafBits;
+    const Number i2 = k & (kLeafLength - 1);
+    root_[i1]->hugepage[i2 >> (kLeafBits - kLeafHugeBits)] = v;
+  }
+
+  bool Ensure(Number start, size_t n) {
+    ASSERT(n > 0);
+    for (Number key = start; key <= start + n - 1;) {
+      const Number i1 = key >> kLeafBits;
+
+      // Check for overflow
+      if (i1 >= kRootLength) return false;
+
+      // Make 2nd level node if necessary
+      if (root_[i1] == nullptr) {
+        Leaf* leaf = reinterpret_cast<Leaf*>(Allocator(sizeof(Leaf)));
+        if (leaf == nullptr) return false;
+        bytes_used_ += sizeof(Leaf);
+        memset(leaf, 0, sizeof(*leaf));
+        root_[i1] = leaf;
+      }
+
+      // Advance key past whatever is covered by this leaf node
+      key = ((key >> kLeafBits) + 1) << kLeafBits;
+    }
+    return true;
+  }
+
+  size_t bytes_used() const {
+    // Account for size of root node, etc.
+    return bytes_used_ + sizeof(*this);
+  }
+
+  constexpr size_t RootSize() const { return sizeof(root_); }
+  const void* RootAddress() { return root_; }
+};
+
+// Three-level radix tree
+// Currently only used for TCMALLOC_SMALL_BUT_SLOW
+template <int BITS, PagemapAllocator Allocator>
+class PageMap3 {
+ private:
+  // For x86 we currently have 48 usable bits, for POWER we have 46. With
+  // 4KiB page sizes (12 bits) we end up with 36 bits for x86 and 34 bits
+  // for POWER. So leaf covers 4KiB * 1 << 12 = 16MiB - which is huge page
+  // size for POWER.
+  static constexpr int kLeafBits = (BITS + 2) / 3;  // Round up
+  static constexpr int kLeafLength = 1 << kLeafBits;
+  static constexpr int kMidBits = (BITS + 2) / 3;  // Round up
+  static constexpr int kMidLength = 1 << kMidBits;
+  static constexpr int kRootBits = BITS - kLeafBits - kMidBits;
+  static_assert(kRootBits > 0, "Too many bits assigned to leaf and mid");
+  // (1<<kRootBits) must not overflow an "int"
+  static_assert(kRootBits < sizeof(int) * 8 - 1, "Root bits too large");
+  static constexpr int kRootLength = 1 << kRootBits;
+
+  static constexpr size_t kLeafCoveredBytes = size_t{1}
+                                              << (kLeafBits + kPageShift);
+  static_assert(kLeafCoveredBytes >= kHugePageSize, "leaf too small");
+  static constexpr size_t kLeafHugeBits =
+      (kLeafBits + kPageShift - kHugePageShift);
+  static constexpr size_t kLeafHugepages = kLeafCoveredBytes / kHugePageSize;
+  static_assert(kLeafHugepages == 1 << kLeafHugeBits, "sanity");
+  struct Leaf {
+    // We keep parallel arrays indexed by page number.  One keeps the
+    // size class; another span pointers; the last hugepage-related
+    // information.  The size class information is kept segregated
+    // since small object deallocations are so frequent and do not
+    // need the other information kept in a Span.
+    CompactSizeClass sizeclass[kLeafLength];
+    Span* span[kLeafLength];
+    void* hugepage[kLeafHugepages];
+  };
+
+  struct Node {
+    // Mid-level structure that holds pointers to leafs
+    Leaf* leafs[kMidLength];
+  };
+
+  Node* root_[kRootLength];  // Top-level node
+  size_t bytes_used_;
+
+ public:
+  typedef uintptr_t Number;
+
+  constexpr PageMap3() : root_{}, bytes_used_(0) {}
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  void* get(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    if ((k >> BITS) > 0 || root_[i1] == nullptr ||
+        root_[i1]->leafs[i2] == nullptr) {
+      return nullptr;
+    }
+    return root_[i1]->leafs[i2]->span[i3];
+  }
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  // Requires that the span is known to already exist.
+  Span* get_existing(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    ASSERT((k >> BITS) == 0);
+    ASSERT(root_[i1] != nullptr);
+    ASSERT(root_[i1]->leafs[i2] != nullptr);
+    return root_[i1]->leafs[i2]->span[i3];
+  }
+
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  // REQUIRES: Must be a valid page number previously Ensure()d.
+  CompactSizeClass ABSL_ATTRIBUTE_ALWAYS_INLINE
+  sizeclass(Number k) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    ASSERT((k >> BITS) == 0);
+    ASSERT(root_[i1] != nullptr);
+    ASSERT(root_[i1]->leafs[i2] != nullptr);
+    return root_[i1]->leafs[i2]->sizeclass[i3];
+  }
+
+  void set(Number k, Span* s) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    root_[i1]->leafs[i2]->span[i3] = s;
+  }
+
+  void set_with_sizeclass(Number k, Span* s, CompactSizeClass sc) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    Leaf* leaf = root_[i1]->leafs[i2];
+    leaf->span[i3] = s;
+    leaf->sizeclass[i3] = sc;
+  }
+
+  void clear_sizeclass(Number k) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    root_[i1]->leafs[i2]->sizeclass[i3] = 0;
+  }
+
+  void* get_hugepage(Number k) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    const Node* node = root_[i1];
+    ASSERT(node != nullptr);
+    const Leaf* leaf = node->leafs[i2];
+    ASSERT(leaf != nullptr);
+    return leaf->hugepage[i3 >> (kLeafBits - kLeafHugeBits)];
+  }
+
+  void set_hugepage(Number k, void* v) {
+    ASSERT(k >> BITS == 0);
+    const Number i1 = k >> (kLeafBits + kMidBits);
+    const Number i2 = (k >> kLeafBits) & (kMidLength - 1);
+    const Number i3 = k & (kLeafLength - 1);
+    root_[i1]->leafs[i2]->hugepage[i3 >> (kLeafBits - kLeafHugeBits)] = v;
+  }
+
+  bool Ensure(Number start, size_t n) {
+    for (Number key = start; key <= start + n - 1;) {
+      const Number i1 = key >> (kLeafBits + kMidBits);
+      const Number i2 = (key >> kLeafBits) & (kMidLength - 1);
+
+      // Check within root
+      if (i1 >= kRootLength) return false;
+
+      // Allocate Node if necessary
+      if (root_[i1] == nullptr) {
+        Node* node = reinterpret_cast<Node*>(Allocator(sizeof(Node)));
+        if (node == nullptr) return false;
+        bytes_used_ += sizeof(Node);
+        memset(node, 0, sizeof(*node));
+        root_[i1] = node;
+      }
+
+      // Allocate Leaf if necessary
+      if (root_[i1]->leafs[i2] == nullptr) {
+        Leaf* leaf = reinterpret_cast<Leaf*>(Allocator(sizeof(Leaf)));
+        if (leaf == nullptr) return false;
+        bytes_used_ += sizeof(Leaf);
+        memset(leaf, 0, sizeof(*leaf));
+        root_[i1]->leafs[i2] = leaf;
+      }
+
+      // Advance key past whatever is covered by this leaf node
+      key = ((key >> kLeafBits) + 1) << kLeafBits;
+    }
+    return true;
+  }
+
+  size_t bytes_used() const { return bytes_used_ + sizeof(*this); }
+
+  constexpr size_t RootSize() const { return sizeof(root_); }
+  const void* RootAddress() { return root_; }
+};
+
+class PageMap {
+ public:
+  constexpr PageMap() : map_{} {}
+
+  // Return the size class for p, or 0 if it is not known to tcmalloc
+  // or is a page containing large objects.
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  CompactSizeClass sizeclass(PageId p) ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    return map_.sizeclass(p.index());
+  }
+
+  void Set(PageId p, Span* span) { map_.set(p.index(), span); }
+
+  bool Ensure(PageId p, Length n) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    return map_.Ensure(p.index(), n.raw_num());
+  }
+
+  // Mark an allocated span as being used for small objects of the
+  // specified size-class.
+  // REQUIRES: span was returned by an earlier call to PageAllocator::New()
+  //           and has not yet been deleted.
+  // Concurrent calls to this method are safe unless they mark the same span.
+  void RegisterSizeClass(Span* span, size_t sc);
+
+  // Mark an allocated span as being not used for any size-class.
+  // REQUIRES: span was returned by an earlier call to PageAllocator::New()
+  //           and has not yet been deleted.
+  // Concurrent calls to this method are safe unless they mark the same span.
+  void UnregisterSizeClass(Span* span);
+
+  // Return the descriptor for the specified page.  Returns NULL if
+  // this PageId was not allocated previously.
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  inline Span* GetDescriptor(PageId p) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    return reinterpret_cast<Span*>(map_.get(p.index()));
+  }
+
+  // Return the descriptor for the specified page.
+  // PageId must have been previously allocated.
+  // No locks required.  See SYNCHRONIZATION explanation at top of tcmalloc.cc.
+  ABSL_ATTRIBUTE_RETURNS_NONNULL inline Span* GetExistingDescriptor(
+      PageId p) const ABSL_NO_THREAD_SAFETY_ANALYSIS {
+    Span* span = map_.get_existing(p.index());
+    ASSERT(span != nullptr);
+    return span;
+  }
+
+  size_t bytes() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    return map_.bytes_used();
+  }
+
+  void* GetHugepage(PageId p) { return map_.get_hugepage(p.index()); }
+
+  void SetHugepage(PageId p, void* v) { map_.set_hugepage(p.index(), v); }
+
+  // The PageMap root node can be quite large and sparsely used. If this
+  // gets mapped with hugepages we potentially end up holding a large
+  // amount of unused memory. So it is better to map the root node with
+  // small pages to minimise the amount of unused memory.
+  void MapRootWithSmallPages();
+
+ private:
+#ifdef TCMALLOC_USE_PAGEMAP3
+  PageMap3<kAddressBits - kPageShift, MetaDataAlloc> map_;
+#else
+  PageMap2<kAddressBits - kPageShift, MetaDataAlloc> map_;
+#endif
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGEMAP_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc
new file mode 100644
index 0000000000..49ef5477d8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pagemap_test.cc
@@ -0,0 +1,166 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/pagemap.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <new>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+
+// Note: we leak memory every time a map is constructed, so do not
+// create too many maps.
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Pick span pointer to use for page numbered i
+Span* span(intptr_t i) { return reinterpret_cast<Span*>(i + 1); }
+
+// Pick sizeclass to use for page numbered i
+uint8_t sc(intptr_t i) { return i % 16; }
+
+class PageMapTest : public ::testing::TestWithParam<int> {
+ public:
+  PageMapTest() {
+    // Arrange to pass zero-filled memory as the backing store for map.
+    memset(storage, 0, sizeof(Map));
+    map = new (storage) Map();
+  }
+
+  ~PageMapTest() override {
+    for (void* ptr : *ptrs()) {
+      ::operator delete(ptr);
+    }
+    ptrs()->clear();
+  }
+
+ private:
+  static std::vector<void*>* ptrs() {
+    static std::vector<void*>* ret = new std::vector<void*>();
+    return ret;
+  }
+
+  static void* alloc(size_t n) {
+    void* ptr = ::operator new(n);
+    ptrs()->push_back(ptr);
+    return ptr;
+  }
+
+ public:
+  using Map = PageMap2<20, alloc>;
+  Map* map;
+
+ private:
+  alignas(Map) char storage[sizeof(Map)];
+};
+
+TEST_P(PageMapTest, Sequential) {
+  const intptr_t limit = GetParam();
+
+  for (intptr_t i = 0; i < limit; i++) {
+    map->Ensure(i, 1);
+    map->set(i, span(i));
+    ASSERT_EQ(map->get(i), span(i));
+
+    // Test size class handling
+    ASSERT_EQ(0, map->sizeclass(i));
+    map->set_with_sizeclass(i, span(i), sc(i));
+    ASSERT_EQ(sc(i), map->sizeclass(i));
+  }
+  for (intptr_t i = 0; i < limit; i++) {
+    ASSERT_EQ(map->get(i), span(i));
+  }
+}
+
+TEST_P(PageMapTest, Bulk) {
+  const intptr_t limit = GetParam();
+
+  map->Ensure(0, limit);
+  for (intptr_t i = 0; i < limit; i++) {
+    map->set(i, span(i));
+    ASSERT_EQ(map->get(i), span(i));
+  }
+  for (intptr_t i = 0; i < limit; i++) {
+    ASSERT_EQ(map->get(i), span(i));
+  }
+}
+
+TEST_P(PageMapTest, Overflow) {
+  const intptr_t kLimit = 1 << 20;
+  ASSERT_FALSE(map->Ensure(kLimit, kLimit + 1));
+}
+
+TEST_P(PageMapTest, RandomAccess) {
+  const intptr_t limit = GetParam();
+
+  std::vector<intptr_t> elements;
+  for (intptr_t i = 0; i < limit; i++) {
+    elements.push_back(i);
+  }
+  std::shuffle(elements.begin(), elements.end(), absl::BitGen());
+
+  for (intptr_t i = 0; i < limit; i++) {
+    map->Ensure(elements[i], 1);
+    map->set(elements[i], span(elements[i]));
+    ASSERT_EQ(map->get(elements[i]), span(elements[i]));
+  }
+  for (intptr_t i = 0; i < limit; i++) {
+    ASSERT_EQ(map->get(i), span(i));
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(Limits, PageMapTest, ::testing::Values(100, 1 << 20));
+
+// Surround pagemap with unused memory. This isolates it so that it does not
+// share pages with any other structures. This avoids the risk that adjacent
+// objects might cause it to be mapped in. The padding is of sufficient size
+// that this is true even if this structure is mapped with huge pages.
+static struct PaddedPageMap {
+  constexpr PaddedPageMap() : padding_before{}, pagemap{}, padding_after{} {}
+  uint64_t padding_before[kHugePageSize / sizeof(uint64_t)];
+  PageMap pagemap;
+  uint64_t padding_after[kHugePageSize / sizeof(uint64_t)];
+} padded_pagemap_;
+
+TEST(TestMemoryFootprint, Test) {
+  uint64_t pagesize = sysconf(_SC_PAGESIZE);
+  ASSERT_NE(pagesize, 0);
+  size_t pages = sizeof(PageMap) / pagesize + 1;
+  std::vector<unsigned char> present(pages);
+
+  // mincore needs the address rounded to the start page
+  uint64_t basepage =
+      reinterpret_cast<uintptr_t>(&padded_pagemap_.pagemap) & ~(pagesize - 1);
+  ASSERT_EQ(mincore(reinterpret_cast<void*>(basepage), sizeof(PageMap),
+                    present.data()),
+            0);
+  for (int i = 0; i < pages; i++) {
+    EXPECT_EQ(present[i], 0);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/pages.h b/contrib/libs/tcmalloc/tcmalloc/pages.h
new file mode 100644
index 0000000000..e674c9c9c8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/pages.h
@@ -0,0 +1,298 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PAGES_H_
+#define TCMALLOC_PAGES_H_
+
+#include <cmath>
+#include <string>
+
+#include "absl/strings/numbers.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Type that can hold the length of a run of pages
+class Length {
+ public:
+  constexpr Length() : n_(0) {}
+  explicit constexpr Length(uintptr_t n) : n_(n) {}
+
+  constexpr Length(const Length&) = default;
+  constexpr Length& operator=(const Length&) = default;
+
+  constexpr size_t raw_num() const { return n_; }
+  constexpr size_t in_bytes() const { return n_ * kPageSize; }
+  double in_mib() const {
+    return std::ldexp(static_cast<double>(n_),
+                      static_cast<int>(kPageShift) - 20);
+  }
+  constexpr Length in_pages() const { return *this; }
+
+  static constexpr Length min() { return Length(0); }
+  static constexpr Length max() {
+    return Length(std::numeric_limits<uintptr_t>::max() >> kPageShift);
+  }
+
+  constexpr Length& operator+=(Length rhs) {
+    n_ += rhs.n_;
+    return *this;
+  }
+
+  constexpr Length& operator-=(Length rhs) {
+    ASSERT(n_ >= rhs.n_);
+    n_ -= rhs.n_;
+    return *this;
+  }
+
+  constexpr Length& operator*=(size_t rhs) {
+    n_ *= rhs;
+    return *this;
+  }
+
+  constexpr Length& operator/=(size_t rhs) {
+    ASSERT(rhs != 0);
+    n_ /= rhs;
+    return *this;
+  }
+
+  constexpr Length& operator%=(Length rhs) {
+    ASSERT(rhs.n_ != 0);
+    n_ %= rhs.n_;
+    return *this;
+  }
+
+  friend constexpr bool operator<(Length lhs, Length rhs);
+  friend constexpr bool operator>(Length lhs, Length rhs);
+  friend constexpr bool operator<=(Length lhs, Length rhs);
+  friend constexpr bool operator>=(Length lhs, Length rhs);
+  friend constexpr bool operator==(Length lhs, Length rhs);
+  friend constexpr bool operator!=(Length lhs, Length rhs);
+
+ private:
+  uintptr_t n_;
+};
+
+inline bool AbslParseFlag(absl::string_view text, Length* l,
+                          std::string* /* error */) {
+  uintptr_t n;
+  if (!absl::SimpleAtoi(text, &n)) {
+    return false;
+  }
+  *l = Length(n);
+  return true;
+}
+
+inline std::string AbslUnparseFlag(Length l) {
+  return absl::StrCat(l.raw_num());
+}
+
+// A single aligned page.
+class PageId {
+ public:
+  constexpr PageId() : pn_(0) {}
+  constexpr PageId(const PageId& p) = default;
+  constexpr PageId& operator=(const PageId& p) = default;
+
+  constexpr explicit PageId(uintptr_t pn) : pn_(pn) {}
+
+  void* start_addr() const {
+    return reinterpret_cast<void*>(pn_ << kPageShift);
+  }
+
+  uintptr_t start_uintptr() const { return pn_ << kPageShift; }
+
+  size_t index() const { return pn_; }
+
+  constexpr PageId& operator+=(Length rhs) {
+    pn_ += rhs.raw_num();
+    return *this;
+  }
+
+  constexpr PageId& operator-=(Length rhs) {
+    ASSERT(pn_ >= rhs.raw_num());
+    pn_ -= rhs.raw_num();
+    return *this;
+  }
+
+ private:
+  friend constexpr bool operator<(PageId lhs, PageId rhs);
+  friend constexpr bool operator>(PageId lhs, PageId rhs);
+  friend constexpr bool operator<=(PageId lhs, PageId rhs);
+  friend constexpr bool operator>=(PageId lhs, PageId rhs);
+  friend constexpr bool operator==(PageId lhs, PageId rhs);
+  friend constexpr bool operator!=(PageId lhs, PageId rhs);
+  friend constexpr Length operator-(PageId lhs, PageId rhs);
+
+  uintptr_t pn_;
+};
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length LengthFromBytes(size_t bytes) {
+  return Length(bytes >> kPageShift);
+}
+
+// Convert byte size into pages.  This won't overflow, but may return
+// an unreasonably large value if bytes is huge enough.
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length BytesToLengthCeil(size_t bytes) {
+  return Length((bytes >> kPageShift) +
+                ((bytes & (kPageSize - 1)) > 0 ? 1 : 0));
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length BytesToLengthFloor(size_t bytes) {
+  return Length(bytes >> kPageShift);
+}
+
+inline constexpr Length kMaxValidPages = Length::max();
+// For all span-lengths < kMaxPages we keep an exact-size list.
+inline constexpr Length kMaxPages = Length(1 << (20 - kPageShift));
+
+inline PageId& operator++(PageId& p) {  // NOLINT(runtime/references)
+  return p += Length(1);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(PageId lhs, PageId rhs) {
+  return lhs.pn_ < rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(PageId lhs, PageId rhs) {
+  return lhs.pn_ > rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(PageId lhs, PageId rhs) {
+  return lhs.pn_ <= rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(PageId lhs, PageId rhs) {
+  return lhs.pn_ >= rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(PageId lhs, PageId rhs) {
+  return lhs.pn_ == rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(PageId lhs, PageId rhs) {
+  return lhs.pn_ != rhs.pn_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator+(PageId lhs, Length rhs) { return lhs += rhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator+(Length lhs, PageId rhs) { return rhs += lhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr PageId operator-(PageId lhs, Length rhs) { return lhs -= rhs; }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator-(PageId lhs, PageId rhs) {
+  ASSERT(lhs.pn_ >= rhs.pn_);
+  return Length(lhs.pn_ - rhs.pn_);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline PageId PageIdContaining(const void* p) {
+  return PageId(reinterpret_cast<uintptr_t>(p) >> kPageShift);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<(Length lhs, Length rhs) {
+  return lhs.n_ < rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>(Length lhs, Length rhs) {
+  return lhs.n_ > rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator<=(Length lhs, Length rhs) {
+  return lhs.n_ <= rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator>=(Length lhs, Length rhs) {
+  return lhs.n_ >= rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator==(Length lhs, Length rhs) {
+  return lhs.n_ == rhs.n_;
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr bool operator!=(Length lhs, Length rhs) {
+  return lhs.n_ != rhs.n_;
+}
+
+inline Length& operator++(Length& l) { return l += Length(1); }
+
+inline Length& operator--(Length& l) { return l -= Length(1); }
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator+(Length lhs, Length rhs) {
+  return Length(lhs.raw_num() + rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator-(Length lhs, Length rhs) {
+  return Length(lhs.raw_num() - rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator*(Length lhs, size_t rhs) {
+  return Length(lhs.raw_num() * rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator*(size_t lhs, Length rhs) {
+  return Length(lhs * rhs.raw_num());
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr size_t operator/(Length lhs, Length rhs) {
+  return lhs.raw_num() / rhs.raw_num();
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator/(Length lhs, size_t rhs) {
+  ASSERT(rhs != 0);
+  return Length(lhs.raw_num() / rhs);
+}
+
+TCMALLOC_ATTRIBUTE_CONST
+inline constexpr Length operator%(Length lhs, Length rhs) {
+  ASSERT(rhs.raw_num() != 0);
+  return Length(lhs.raw_num() % rhs.raw_num());
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PAGES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.cc b/contrib/libs/tcmalloc/tcmalloc/parameters.cc
new file mode 100644
index 0000000000..3f8e6e1ef8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/parameters.cc
@@ -0,0 +1,271 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "tcmalloc/parameters.h"
+
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/experiment_config.h"
+#include "tcmalloc/huge_page_aware_allocator.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/thread_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// As decide_subrelease() is determined at runtime, we cannot require constant
+// initialization for the atomic.  This avoids an initialization order fiasco.
+static std::atomic<bool>* hpaa_subrelease_ptr() {
+  static std::atomic<bool> v(decide_subrelease());
+  return &v;
+}
+
+// As skip_subrelease_interval_ns() is determined at runtime, we cannot require
+// constant initialization for the atomic.  This avoids an initialization order
+// fiasco.
+static std::atomic<int64_t>& skip_subrelease_interval_ns() {
+  static std::atomic<int64_t> v(absl::ToInt64Nanoseconds(absl::Seconds(60)));
+  return v;
+}
+
+uint64_t Parameters::heap_size_hard_limit() {
+  size_t amount;
+  bool is_hard;
+  std::tie(amount, is_hard) = Static::page_allocator().limit();
+  if (!is_hard) {
+    amount = 0;
+  }
+  return amount;
+}
+
+void Parameters::set_heap_size_hard_limit(uint64_t value) {
+  TCMalloc_Internal_SetHeapSizeHardLimit(value);
+}
+
+bool Parameters::hpaa_subrelease() {
+  return hpaa_subrelease_ptr()->load(std::memory_order_relaxed);
+}
+
+void Parameters::set_hpaa_subrelease(bool value) {
+  TCMalloc_Internal_SetHPAASubrelease(value);
+}
+
+ABSL_CONST_INIT std::atomic<MallocExtension::BytesPerSecond>
+    Parameters::background_release_rate_(MallocExtension::BytesPerSecond{
+        0
+    });
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::guarded_sampling_rate_(
+    50 * kDefaultProfileSamplingRate);
+ABSL_CONST_INIT std::atomic<bool> Parameters::shuffle_per_cpu_caches_enabled_(
+    false);
+ABSL_CONST_INIT std::atomic<bool>
+    Parameters::reclaim_idle_per_cpu_caches_enabled_(true);
+ABSL_CONST_INIT std::atomic<bool> Parameters::lazy_per_cpu_caches_enabled_(
+    true);
+ABSL_CONST_INIT std::atomic<int32_t> Parameters::max_per_cpu_cache_size_(
+    kMaxCpuCacheSize);
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::max_total_thread_cache_bytes_(
+    kDefaultOverallThreadCacheSize);
+ABSL_CONST_INIT std::atomic<double>
+    Parameters::peak_sampling_heap_growth_fraction_(1.1);
+ABSL_CONST_INIT std::atomic<bool> Parameters::per_cpu_caches_enabled_(
+#if defined(TCMALLOC_DEPRECATED_PERTHREAD)
+    false
+#else
+    true
+#endif
+);
+
+ABSL_CONST_INIT std::atomic<int64_t> Parameters::profile_sampling_rate_(
+    kDefaultProfileSamplingRate);
+
+absl::Duration Parameters::filler_skip_subrelease_interval() {
+  return absl::Nanoseconds(
+      skip_subrelease_interval_ns().load(std::memory_order_relaxed));
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+using tcmalloc::tcmalloc_internal::kLog;
+using tcmalloc::tcmalloc_internal::Log;
+using tcmalloc::tcmalloc_internal::Parameters;
+using tcmalloc::tcmalloc_internal::Static;
+
+extern "C" {
+
+int64_t MallocExtension_Internal_GetProfileSamplingRate() {
+  return Parameters::profile_sampling_rate();
+}
+
+void MallocExtension_Internal_SetProfileSamplingRate(int64_t value) {
+  Parameters::set_profile_sampling_rate(value);
+}
+
+int64_t MallocExtension_Internal_GetGuardedSamplingRate() {
+  return Parameters::guarded_sampling_rate();
+}
+
+void MallocExtension_Internal_SetGuardedSamplingRate(int64_t value) {
+  Parameters::set_guarded_sampling_rate(value);
+}
+
+int64_t MallocExtension_Internal_GetMaxTotalThreadCacheBytes() {
+  return Parameters::max_total_thread_cache_bytes();
+}
+
+void MallocExtension_Internal_SetMaxTotalThreadCacheBytes(int64_t value) {
+  Parameters::set_max_total_thread_cache_bytes(value);
+}
+
+void MallocExtension_Internal_GetSkipSubreleaseInterval(absl::Duration* ret) {
+  *ret = Parameters::filler_skip_subrelease_interval();
+}
+
+void MallocExtension_Internal_SetSkipSubreleaseInterval(absl::Duration value) {
+  Parameters::set_filler_skip_subrelease_interval(value);
+}
+
+tcmalloc::MallocExtension::BytesPerSecond
+MallocExtension_Internal_GetBackgroundReleaseRate() {
+  return Parameters::background_release_rate();
+}
+
+void MallocExtension_Internal_SetBackgroundReleaseRate(
+    tcmalloc::MallocExtension::BytesPerSecond rate) {
+  Parameters::set_background_release_rate(rate);
+}
+
+void TCMalloc_Internal_SetBackgroundReleaseRate(size_t value) {
+  Parameters::background_release_rate_.store(
+      static_cast<tcmalloc::MallocExtension::BytesPerSecond>(value));
+}
+
+uint64_t TCMalloc_Internal_GetHeapSizeHardLimit() {
+  return Parameters::heap_size_hard_limit();
+}
+
+bool TCMalloc_Internal_GetHPAASubrelease() {
+  return Parameters::hpaa_subrelease();
+}
+
+bool TCMalloc_Internal_GetShufflePerCpuCachesEnabled() {
+  return Parameters::shuffle_per_cpu_caches();
+}
+
+bool TCMalloc_Internal_GetReclaimIdlePerCpuCachesEnabled() {
+  return Parameters::reclaim_idle_per_cpu_caches();
+}
+
+bool TCMalloc_Internal_GetLazyPerCpuCachesEnabled() {
+  return Parameters::lazy_per_cpu_caches();
+}
+
+double TCMalloc_Internal_GetPeakSamplingHeapGrowthFraction() {
+  return Parameters::peak_sampling_heap_growth_fraction();
+}
+
+bool TCMalloc_Internal_GetPerCpuCachesEnabled() {
+  return Parameters::per_cpu_caches();
+}
+
+void TCMalloc_Internal_SetGuardedSamplingRate(int64_t v) {
+  Parameters::guarded_sampling_rate_.store(v, std::memory_order_relaxed);
+}
+
+// update_lock guards changes via SetHeapSizeHardLimit.
+ABSL_CONST_INIT static absl::base_internal::SpinLock update_lock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+void TCMalloc_Internal_SetHeapSizeHardLimit(uint64_t value) {
+  // Ensure that page allocator is set up.
+  Static::InitIfNecessary();
+
+  absl::base_internal::SpinLockHolder l(&update_lock);
+
+  size_t limit = std::numeric_limits<size_t>::max();
+  bool active = false;
+  if (value > 0) {
+    limit = value;
+    active = true;
+  }
+
+  bool currently_hard = Static::page_allocator().limit().second;
+  if (active || currently_hard) {
+    // Avoid resetting limit when current limit is soft.
+    Static::page_allocator().set_limit(limit, active /* is_hard */);
+    Log(kLog, __FILE__, __LINE__, "[tcmalloc] set page heap hard limit to",
+        limit, "bytes");
+  }
+}
+
+void TCMalloc_Internal_SetHPAASubrelease(bool v) {
+  tcmalloc::tcmalloc_internal::hpaa_subrelease_ptr()->store(
+      v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v) {
+  Parameters::shuffle_per_cpu_caches_enabled_.store(v,
+                                                    std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v) {
+  Parameters::reclaim_idle_per_cpu_caches_enabled_.store(
+      v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v) {
+  Parameters::lazy_per_cpu_caches_enabled_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v) {
+  Parameters::max_per_cpu_cache_size_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v) {
+  Parameters::max_total_thread_cache_bytes_.store(v, std::memory_order_relaxed);
+
+  absl::base_internal::SpinLockHolder l(
+      &tcmalloc::tcmalloc_internal::pageheap_lock);
+  tcmalloc::tcmalloc_internal::ThreadCache::set_overall_thread_cache_size(v);
+}
+
+void TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v) {
+  Parameters::peak_sampling_heap_growth_fraction_.store(
+      v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetPerCpuCachesEnabled(bool v) {
+  Parameters::per_cpu_caches_enabled_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_SetProfileSamplingRate(int64_t v) {
+  Parameters::profile_sampling_rate_.store(v, std::memory_order_relaxed);
+}
+
+void TCMalloc_Internal_GetHugePageFillerSkipSubreleaseInterval(
+    absl::Duration* v) {
+  *v = Parameters::filler_skip_subrelease_interval();
+}
+
+void TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+    absl::Duration v) {
+  tcmalloc::tcmalloc_internal::skip_subrelease_interval_ns().store(
+      absl::ToInt64Nanoseconds(v), std::memory_order_relaxed);
+}
+
+}  // extern "C"
diff --git a/contrib/libs/tcmalloc/tcmalloc/parameters.h b/contrib/libs/tcmalloc/tcmalloc/parameters.h
new file mode 100644
index 0000000000..64893f0402
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/parameters.h
@@ -0,0 +1,152 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PARAMETERS_H_
+#define TCMALLOC_PARAMETERS_H_
+
+#include <atomic>
+#include <cmath>
+#include <string>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/time/time.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class Parameters {
+ public:
+  static MallocExtension::BytesPerSecond background_release_rate() {
+    return background_release_rate_.load(std::memory_order_relaxed);
+  }
+
+  static void set_background_release_rate(
+      MallocExtension::BytesPerSecond value) {
+    TCMalloc_Internal_SetBackgroundReleaseRate(static_cast<size_t>(value));
+  }
+
+  static uint64_t heap_size_hard_limit();
+  static void set_heap_size_hard_limit(uint64_t value);
+
+  static bool hpaa_subrelease();
+  static void set_hpaa_subrelease(bool value);
+
+  static int64_t guarded_sampling_rate() {
+    return guarded_sampling_rate_.load(std::memory_order_relaxed);
+  }
+
+  static void set_guarded_sampling_rate(int64_t value) {
+    TCMalloc_Internal_SetGuardedSamplingRate(value);
+  }
+
+  static int32_t max_per_cpu_cache_size() {
+    return max_per_cpu_cache_size_.load(std::memory_order_relaxed);
+  }
+
+  static void set_max_per_cpu_cache_size(int32_t value) {
+    TCMalloc_Internal_SetMaxPerCpuCacheSize(value);
+  }
+
+  static int64_t max_total_thread_cache_bytes() {
+    return max_total_thread_cache_bytes_.load(std::memory_order_relaxed);
+  }
+
+  static void set_max_total_thread_cache_bytes(int64_t value) {
+    TCMalloc_Internal_SetMaxTotalThreadCacheBytes(value);
+  }
+
+  static double peak_sampling_heap_growth_fraction() {
+    return peak_sampling_heap_growth_fraction_.load(std::memory_order_relaxed);
+  }
+
+  static void set_peak_sampling_heap_growth_fraction(double value) {
+    TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(value);
+  }
+
+  static bool shuffle_per_cpu_caches() {
+    return shuffle_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+  }
+
+  static bool reclaim_idle_per_cpu_caches() {
+    return reclaim_idle_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+  }
+
+  static bool lazy_per_cpu_caches() {
+    return lazy_per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+  }
+
+  static void set_lazy_per_cpu_caches(bool value) {
+    TCMalloc_Internal_SetLazyPerCpuCachesEnabled(value);
+  }
+
+  static bool per_cpu_caches() {
+    return per_cpu_caches_enabled_.load(std::memory_order_relaxed);
+  }
+
+  static void set_per_cpu_caches(bool value) {
+    TCMalloc_Internal_SetPerCpuCachesEnabled(value);
+  }
+
+  static int64_t profile_sampling_rate() {
+    return profile_sampling_rate_.load(std::memory_order_relaxed);
+  }
+
+  static void set_profile_sampling_rate(int64_t value) {
+    TCMalloc_Internal_SetProfileSamplingRate(value);
+  }
+
+  static void set_filler_skip_subrelease_interval(absl::Duration value) {
+    TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(value);
+  }
+
+  static absl::Duration filler_skip_subrelease_interval();
+
+ private:
+  friend void ::TCMalloc_Internal_SetBackgroundReleaseRate(size_t v);
+  friend void ::TCMalloc_Internal_SetGuardedSamplingRate(int64_t v);
+  friend void ::TCMalloc_Internal_SetHPAASubrelease(bool v);
+  friend void ::TCMalloc_Internal_SetShufflePerCpuCachesEnabled(bool v);
+  friend void ::TCMalloc_Internal_SetReclaimIdlePerCpuCachesEnabled(bool v);
+  friend void ::TCMalloc_Internal_SetLazyPerCpuCachesEnabled(bool v);
+  friend void ::TCMalloc_Internal_SetMaxPerCpuCacheSize(int32_t v);
+  friend void ::TCMalloc_Internal_SetMaxTotalThreadCacheBytes(int64_t v);
+  friend void ::TCMalloc_Internal_SetPeakSamplingHeapGrowthFraction(double v);
+  friend void ::TCMalloc_Internal_SetPerCpuCachesEnabled(bool v);
+  friend void ::TCMalloc_Internal_SetProfileSamplingRate(int64_t v);
+
+  friend void ::TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+      absl::Duration v);
+
+  static std::atomic<MallocExtension::BytesPerSecond> background_release_rate_;
+  static std::atomic<int64_t> guarded_sampling_rate_;
+  static std::atomic<bool> shuffle_per_cpu_caches_enabled_;
+  static std::atomic<bool> reclaim_idle_per_cpu_caches_enabled_;
+  static std::atomic<bool> lazy_per_cpu_caches_enabled_;
+  static std::atomic<int32_t> max_per_cpu_cache_size_;
+  static std::atomic<int64_t> max_total_thread_cache_bytes_;
+  static std::atomic<double> peak_sampling_heap_growth_fraction_;
+  static std::atomic<bool> per_cpu_caches_enabled_;
+  static std::atomic<int64_t> profile_sampling_rate_;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PARAMETERS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc
new file mode 100644
index 0000000000..0dcc0df536
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.cc
@@ -0,0 +1,93 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/peak_heap_tracker.h"
+
+#include <stdio.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/memory/memory.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool PeakHeapTracker::IsNewPeak() {
+  return peak_sampled_heap_size_.value() == 0 ||
+         (static_cast<double>(Static::sampled_objects_size_.value()) /
+              peak_sampled_heap_size_.value() >
+          Parameters::peak_sampling_heap_growth_fraction());
+}
+
+void PeakHeapTracker::MaybeSaveSample() {
+  if (Parameters::peak_sampling_heap_growth_fraction() <= 0 || !IsNewPeak()) {
+    return;
+  }
+
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+  // double-check in case another allocation was sampled (or a sampled
+  // allocation freed) while we were waiting for the lock
+  if (!IsNewPeak()) {
+    return;
+  }
+  peak_sampled_heap_size_.LossyAdd(Static::sampled_objects_size_.value() -
+                                   peak_sampled_heap_size_.value());
+
+  StackTrace *t = peak_sampled_span_stacks_, *next = nullptr;
+  while (t != nullptr) {
+    next = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1]);
+    Static::DestroySampleUserData(t->user_data);
+    Static::stacktrace_allocator().Delete(t);
+    t = next;
+  }
+
+  next = nullptr;
+  for (Span* s : Static::sampled_objects_) {
+    t = Static::stacktrace_allocator().New();
+
+    StackTrace* sampled_stack = s->sampled_stack();
+    *t = *sampled_stack;
+    t->user_data = Static::CopySampleUserData(sampled_stack->user_data);
+    if (t->depth == kMaxStackDepth) {
+      t->depth = kMaxStackDepth - 1;
+    }
+    t->stack[kMaxStackDepth - 1] = reinterpret_cast<void*>(next);
+    next = t;
+  }
+  peak_sampled_span_stacks_ = t;
+}
+
+std::unique_ptr<ProfileBase> PeakHeapTracker::DumpSample() const {
+  auto profile = absl::make_unique<StackTraceTable>(
+      ProfileType::kPeakHeap, Sampler::GetSamplePeriod(), true, true);
+
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  for (StackTrace* t = peak_sampled_span_stacks_; t != nullptr;
+       t = reinterpret_cast<StackTrace*>(t->stack[kMaxStackDepth - 1])) {
+    profile->AddTrace(1.0, *t);
+  }
+  return profile;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h
new file mode 100644
index 0000000000..a9f071d1b5
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/peak_heap_tracker.h
@@ -0,0 +1,61 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_PEAK_HEAP_TRACKER_H_
+#define TCMALLOC_PEAK_HEAP_TRACKER_H_
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class PeakHeapTracker {
+ public:
+  constexpr PeakHeapTracker() : peak_sampled_span_stacks_(nullptr) {}
+
+  // Possibly save high-water-mark allocation stack traces for peak-heap
+  // profile. Should be called immediately after sampling an allocation. If
+  // the heap has grown by a sufficient amount since the last high-water-mark,
+  // it will save a copy of the sample profile.
+  void MaybeSaveSample() ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  // Return the saved high-water-mark heap profile, if any.
+  std::unique_ptr<ProfileBase> DumpSample() const
+      ABSL_LOCKS_EXCLUDED(pageheap_lock);
+
+  size_t CurrentPeakSize() const { return peak_sampled_heap_size_.value(); }
+
+ private:
+  // Linked list of stack traces from sampled allocations saved (from
+  // sampled_objects_ above) when we allocate memory from the system. The
+  // linked list pointer is stored in StackTrace::stack[kMaxStackDepth-1].
+  StackTrace* peak_sampled_span_stacks_;
+
+  // Sampled heap size last time peak_sampled_span_stacks_ was saved. Only
+  // written under pageheap_lock; may be read without it.
+  StatsCounter peak_sampled_heap_size_;
+
+  bool IsNewPeak();
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_PEAK_HEAP_TRACKER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/profile_test.cc b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc
new file mode 100644
index 0000000000..0bd62cd428
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/profile_test.cc
@@ -0,0 +1,281 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <new>
+#include <set>
+#include <thread>  // NOLINT(build/c++11)
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_map.h"
+#include "absl/synchronization/blocking_counter.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/declarations.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/testing/testutil.h"
+
+namespace tcmalloc {
+namespace {
+
+TEST(AllocationSampleTest, TokenAbuse) {
+  auto token = MallocExtension::StartAllocationProfiling();
+  void *ptr = ::operator new(512 * 1024 * 1024);
+  // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+  benchmark::DoNotOptimize(ptr);
+  ::operator delete(ptr);
+  // Repeated Claims should happily return null.
+  auto profile = std::move(token).Stop();
+  int count = 0;
+  profile.Iterate([&](const Profile::Sample &) { count++; });
+
+#if !defined(UNDEFINED_BEHAVIOR_SANITIZER)
+  // UBSan does not implement our profiling API, but running the test can
+  // validate the correctness of the new/delete pairs.
+  EXPECT_EQ(count, 1);
+#endif
+
+  auto profile2 = std::move(token).Stop();  // NOLINT: use-after-move intended
+  int count2 = 0;
+  profile2.Iterate([&](const Profile::Sample &) { count2++; });
+  EXPECT_EQ(count2, 0);
+
+  // Delete (on the scope ending) without Claim should also be OK.
+  { MallocExtension::StartAllocationProfiling(); }
+}
+
+// Verify that profiling sessions concurrent with allocations do not crash due
+// to mutating pointers accessed by the sampling code (b/143623146).
+TEST(AllocationSampleTest, RaceToClaim) {
+  MallocExtension::SetProfileSamplingRate(1 << 14);
+
+  absl::BlockingCounter counter(2);
+  std::atomic<bool> stop{false};
+
+  std::thread t1([&]() {
+    counter.DecrementCount();
+
+    while (!stop) {
+      auto token = MallocExtension::StartAllocationProfiling();
+      absl::SleepFor(absl::Microseconds(1));
+      auto profile = std::move(token).Stop();
+    }
+  });
+
+  std::thread t2([&]() {
+    counter.DecrementCount();
+
+    const int kNum = 1000000;
+    std::vector<void *> ptrs;
+    while (!stop) {
+      for (int i = 0; i < kNum; i++) {
+        ptrs.push_back(::operator new(1));
+      }
+      for (void *p : ptrs) {
+        sized_delete(p, 1);
+      }
+      ptrs.clear();
+    }
+  });
+
+  // Verify the threads are up and running before we start the clock.
+  counter.Wait();
+
+  absl::SleepFor(absl::Seconds(1));
+
+  stop.store(true);
+
+  t1.join();
+  t2.join();
+}
+
+TEST(AllocationSampleTest, SampleAccuracy) {
+  // Disable GWP-ASan, since it allocates different sizes than normal samples.
+  MallocExtension::SetGuardedSamplingRate(-1);
+
+  // Allocate about 512 MiB each of various sizes. For _some_ but not all
+  // sizes, delete it as we go--it shouldn't matter for the sample count.
+  static const size_t kTotalPerSize = 512 * 1024 * 1024;
+
+  // (object size, object alignment, keep objects)
+  struct Requests {
+    size_t size;
+    size_t alignment;
+    bool keep;
+    // objects we don't delete as we go
+    void *list = nullptr;
+  };
+  std::vector<Requests> sizes = {
+      {8, 0, false},          {16, 16, true},        {1024, 0, false},
+      {64 * 1024, 64, false}, {512 * 1024, 0, true}, {1024 * 1024, 128, true}};
+  std::set<size_t> sizes_expected;
+  for (auto s : sizes) {
+    sizes_expected.insert(s.size);
+  }
+  auto token = MallocExtension::StartAllocationProfiling();
+
+  // We use new/delete to allocate memory, as malloc returns objects aligned to
+  // std::max_align_t.
+  for (auto &s : sizes) {
+    for (size_t bytes = 0; bytes < kTotalPerSize; bytes += s.size) {
+      void *obj;
+      if (s.alignment > 0) {
+        obj = operator new(s.size, static_cast<std::align_val_t>(s.alignment));
+      } else {
+        obj = operator new(s.size);
+      }
+      if (s.keep) {
+        tcmalloc_internal::SLL_Push(&s.list, obj);
+      } else if (s.alignment > 0) {
+        operator delete(obj, static_cast<std::align_val_t>(s.alignment));
+      } else {
+        operator delete(obj);
+      }
+    }
+  }
+  auto profile = std::move(token).Stop();
+
+  // size -> bytes seen
+  absl::flat_hash_map<size_t, size_t> m;
+
+  // size -> alignment request
+  absl::flat_hash_map<size_t, size_t> alignment;
+
+  for (auto s : sizes) {
+    alignment[s.size] = s.alignment;
+  }
+
+  profile.Iterate([&](const tcmalloc::Profile::Sample &e) {
+    // Skip unexpected sizes.  They may have been triggered by a background
+    // thread.
+    if (sizes_expected.find(e.allocated_size) == sizes_expected.end()) {
+      return;
+    }
+
+    // Don't check stack traces until we have evidence that's broken, it's
+    // tedious and done fairly well elsewhere.
+    m[e.allocated_size] += e.sum;
+    EXPECT_EQ(alignment[e.requested_size], e.requested_alignment);
+  });
+
+#if !defined(UNDEFINED_BEHAVIOR_SANITIZER)
+  // UBSan does not implement our profiling API, but running the test can
+  // validate the correctness of the new/delete pairs.
+  size_t max_bytes = 0, min_bytes = std::numeric_limits<size_t>::max();
+  EXPECT_EQ(m.size(), sizes_expected.size());
+  for (auto seen : m) {
+    size_t bytes = seen.second;
+    min_bytes = std::min(min_bytes, bytes);
+    max_bytes = std::max(max_bytes, bytes);
+  }
+  // Hopefully we're in a fairly small range, that contains our actual
+  // allocation.
+  // TODO(b/134690164): better statistical tests here.
+  EXPECT_GE((min_bytes * 3) / 2, max_bytes);
+  EXPECT_LE((min_bytes * 3) / 4, kTotalPerSize);
+  EXPECT_LE(kTotalPerSize, (max_bytes * 4) / 3);
+#endif
+
+  // Remove the objects we left alive
+  for (auto &s : sizes) {
+    while (s.list != nullptr) {
+      void *obj = tcmalloc_internal::SLL_Pop(&s.list);
+      if (s.alignment > 0) {
+        operator delete(obj, static_cast<std::align_val_t>(s.alignment));
+      } else {
+        operator delete(obj);
+      }
+    }
+  }
+}
+
+TEST(FragmentationzTest, Accuracy) {
+  // Disable GWP-ASan, since it allocates different sizes than normal samples.
+  MallocExtension::SetGuardedSamplingRate(-1);
+
+  // a fairly odd allocation size - will be rounded to 128.  This lets
+  // us find our record in the table.
+  static const size_t kItemSize = 115;
+  // allocate about 3.5 GiB:
+  static const size_t kNumItems = 32 * 1024 * 1024;
+
+  std::vector<std::unique_ptr<char[]>> keep;
+  std::vector<std::unique_ptr<char[]>> drop;
+  // hint expected sizes:
+  drop.reserve(kNumItems * 8 / 10);
+  keep.reserve(kNumItems * 2 / 10);
+
+  // We allocate many items, then free 80% of them "randomly". (To
+  // decrease noise and speed up, we just keep every 5th one exactly.)
+  for (int i = 0; i < kNumItems; ++i) {
+    // Ideally we should use a malloc() here, for consistency; but unique_ptr
+    // doesn't come with a have a "free()" deleter; use ::operator new insted.
+    (i % 5 == 0 ? keep : drop)
+        .push_back(std::unique_ptr<char[]>(
+            static_cast<char *>(::operator new[](kItemSize))));
+  }
+  drop.resize(0);
+
+  // there are at least 64 items per span here. (8/10)^64 = 6.2e-7 ~= 0
+  // probability we actually managed to free a page; every page is fragmented.
+  // We still have 20% or so of it allocated, so we should see 80% of it
+  // charged to these allocations as fragmentations.
+  auto profile = MallocExtension::SnapshotCurrent(ProfileType::kFragmentation);
+
+  // Pull out the fragmentationz entry corresponding to this
+  size_t requested_size = 0;
+  size_t allocated_size = 0;
+  size_t sum = 0;
+  size_t count = 0;
+  profile.Iterate([&](const Profile::Sample &e) {
+    if (e.requested_size != kItemSize) return;
+
+    if (requested_size == 0) {
+      allocated_size = e.allocated_size;
+      requested_size = e.requested_size;
+    } else {
+      // we will usually have single entry in
+      // profile, but in builds without optimization
+      // our fast-path code causes same call-site to
+      // have two different stack traces. Thus we
+      // expect and deal with second entry for same
+      // allocation.
+      EXPECT_EQ(requested_size, e.requested_size);
+      EXPECT_EQ(allocated_size, e.allocated_size);
+    }
+    sum += e.sum;
+    count += e.count;
+  });
+
+  double frag_bytes = sum;
+  double real_frag_bytes =
+      static_cast<double>(allocated_size * kNumItems) * 0.8;
+  // We should be pretty close with this much data:
+  // TODO(b/134690164): this is still slightly flaky (<1%) - why?
+  EXPECT_NEAR(real_frag_bytes, frag_bytes, real_frag_bytes * 0.15)
+      << " sum = " << sum << " allocated = " << allocated_size
+      << " requested = " << requested_size << " count = " << count;
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc
new file mode 100644
index 0000000000..e0e6aba606
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/realloc_test.cc
@@ -0,0 +1,104 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Test realloc() functionality
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <algorithm>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+
+namespace tcmalloc {
+namespace {
+
+// Fill a buffer of the specified size with a predetermined pattern
+void Fill(unsigned char* buffer, int n) {
+  for (int i = 0; i < n; i++) {
+    buffer[i] = (i & 0xff);
+  }
+}
+
+// Check that the specified buffer has the predetermined pattern
+// generated by Fill()
+void ExpectValid(unsigned char* buffer, int n) {
+  for (int i = 0; i < n; i++) {
+    ASSERT_EQ((i & 0xff), buffer[i]);
+  }
+}
+
+// Return the next interesting size/delta to check.  Returns -1 if no more.
+int NextSize(int size) {
+  if (size < 100) {
+    return size + 1;
+  } else if (size < 100000) {
+    // Find next power of two
+    int power = 1;
+    while (power < size) {
+      power <<= 1;
+    }
+
+    // Yield (power-1, power, power+1)
+    if (size < power - 1) {
+      return power - 1;
+    } else if (size == power - 1) {
+      return power;
+    } else {
+      assert(size == power);
+      return power + 1;
+    }
+  } else {
+    return -1;
+  }
+}
+
+TEST(ReallocTest, TestWithinCache) {
+  for (int src_size = 0; src_size >= 0; src_size = NextSize(src_size)) {
+    for (int dst_size = 0; dst_size >= 0; dst_size = NextSize(dst_size)) {
+      unsigned char* src = static_cast<unsigned char*>(malloc(src_size));
+      Fill(src, src_size);
+      unsigned char* dst = static_cast<unsigned char*>(realloc(src, dst_size));
+      ExpectValid(dst, std::min(src_size, dst_size));
+      Fill(dst, dst_size);
+      ExpectValid(dst, dst_size);
+      if (dst != nullptr) free(dst);
+    }
+  }
+}
+
+TEST(ReallocTest, AlignedAllocRealloc) {
+  std::pair<size_t, size_t> sizes[] = {{1024, 2048}, {512, 128}};
+
+  for (const auto& p : sizes) {
+    size_t src_size = p.first, dst_size = p.second;
+
+    auto src = static_cast<unsigned char*>(aligned_alloc(32, src_size));
+    Fill(src, src_size);
+    auto dst = static_cast<unsigned char*>(realloc(src, dst_size));
+    ExpectValid(dst, std::min(src_size, dst_size));
+    Fill(dst, dst_size);
+    ExpectValid(dst, dst_size);
+    if (dst != nullptr) free(dst);
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc
new file mode 100644
index 0000000000..4bca6485ca
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.cc
@@ -0,0 +1,81 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/runtime_size_classes.h"
+
+#include <string.h>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace runtime_size_classes_internal {
+
+int ParseSizeClasses(absl::string_view env, int max_size, int max_classes,
+                     SizeClassInfo* parsed) {
+  int c = 1;
+  int t = 0;
+  memset(parsed, 0, sizeof(parsed[0]) * max_classes);
+  for (char e : env) {
+    // TODO(b/120885588): replace with absl::from_chars, once it is fully
+    // implemented.
+    if ('0' <= e && e <= '9') {
+      int n = e - '0';
+      int v = 10 * parsed[c].Value(t) + n;
+      if (v > max_size) {
+        Log(kLog, __FILE__, __LINE__, "size class integer overflow", v, n);
+        return -3;
+      }
+      parsed[c].SetValue(t, v);
+    } else if (e == ';') {
+      // next size class
+      t = 0;
+      c++;
+      if (c >= max_classes) {
+        return c;
+      }
+    } else if (e == ',') {
+      t++;
+      if (t >= kSizeClassInfoMembers) {
+        Log(kLog, __FILE__, __LINE__, "size class too many commas", c);
+        return -1;
+      }
+    } else {
+      Log(kLog, __FILE__, __LINE__, "Delimiter not , or ;", c, e);
+      return -2;
+    }
+  }
+  // The size class [0, 0, 0] counts as a size class, but is not parsed.
+  return c + 1;
+}
+
+}  // namespace runtime_size_classes_internal
+
+int ABSL_ATTRIBUTE_NOINLINE MaybeSizeClassesFromEnv(int max_size,
+                                                    int max_classes,
+                                                    SizeClassInfo* parsed) {
+  const char* e = thread_safe_getenv("TCMALLOC_SIZE_CLASSES");
+  if (!e) {
+    return 0;
+  }
+  return runtime_size_classes_internal::ParseSizeClasses(e, max_size,
+                                                         max_classes, parsed);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h
new file mode 100644
index 0000000000..42c5aa8859
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes.h
@@ -0,0 +1,49 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Run-time specification of Size classes
+#ifndef TCMALLOC_RUNTIME_SIZE_CLASSES_H_
+#define TCMALLOC_RUNTIME_SIZE_CLASSES_H_
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/size_class_info.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace runtime_size_classes_internal {
+
+// Set size classes from a string.
+// Format: "size,pages,num_to_move;"
+// Example: "8,1,32;16;32;40,1,16;128,2;256;512"
+// This function doesn't do validity checking. If a field is missing, its
+// value is set to zero.
+// The number of size classes parsed is returned.
+int ParseSizeClasses(absl::string_view env, int max_size, int max_classes,
+                     SizeClassInfo* parsed);
+
+}  // namespace runtime_size_classes_internal
+
+// If the environment variable TCMALLOC_SIZE_CLASSES is defined, its value is
+// parsed using ParseSizeClasses and ApplySizeClassDefaults into parsed. The
+// number of size classes parsed is returned. On error, a negative value is
+// returned.
+int MaybeSizeClassesFromEnv(int max_size, int max_classes,
+                            SizeClassInfo* parsed);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_RUNTIME_SIZE_CLASSES_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc
new file mode 100644
index 0000000000..89a111e3b8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_fuzz.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/runtime_size_classes.h"
+#include "tcmalloc/size_class_info.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* d, size_t size) {
+  absl::string_view env =
+      absl::string_view(reinterpret_cast<const char*>(d), size);
+
+  tcmalloc::tcmalloc_internal::SizeClassInfo
+      parsed[tcmalloc::tcmalloc_internal::kNumClasses];
+  tcmalloc::tcmalloc_internal::runtime_size_classes_internal::ParseSizeClasses(
+      env, tcmalloc::tcmalloc_internal::kMaxSize,
+      tcmalloc::tcmalloc_internal::kNumClasses, parsed);
+  return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc
new file mode 100644
index 0000000000..6a8771f9e2
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/runtime_size_classes_test.cc
@@ -0,0 +1,114 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/runtime_size_classes.h"
+
+#include <stdlib.h>
+
+#include "gtest/gtest.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using runtime_size_classes_internal::ParseSizeClasses;
+
+constexpr int kNumClasses = 4;
+constexpr int kMaxSize = 1024 * 1024;
+
+TEST(RuntimeSizeClassesTest, EnvSingleFullClass) {
+  // Validate simple parsing.
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8,1,32", kMaxSize, kNumClasses, parsed), 2);
+  EXPECT_EQ(parsed[1].size, 8);
+  EXPECT_EQ(parsed[1].pages, 1);
+  EXPECT_EQ(parsed[1].num_to_move, 32);
+
+  EXPECT_EQ(parsed[0].size, 0);
+  EXPECT_EQ(parsed[0].pages, 0);
+  EXPECT_EQ(parsed[0].num_to_move, 0);
+}
+
+TEST(RuntimeSizeClassesTest, EnvSingleSizeOnlyClass) {
+  // Validate simple parsing.
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8,1,2", kMaxSize, kNumClasses, parsed), 2);
+  EXPECT_EQ(parsed[1].size, 8);
+  EXPECT_EQ(parsed[1].pages, 1);
+  EXPECT_EQ(parsed[1].num_to_move, 2);
+}
+
+TEST(RuntimeSizeClassesTest, EnvTwoFullClasses) {
+  // Validate two classes
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, kNumClasses, parsed),
+            3);
+  EXPECT_EQ(parsed[1].size, 8);
+  EXPECT_EQ(parsed[1].pages, 1);
+  EXPECT_EQ(parsed[1].num_to_move, 32);
+
+  EXPECT_EQ(parsed[2].size, 1024);
+  EXPECT_EQ(parsed[2].pages, 2);
+  EXPECT_EQ(parsed[2].num_to_move, 16);
+}
+
+TEST(RuntimeSizeClassesTest, ParseArrayLimit) {
+  // Validate that the limit on the number of size classes is enforced.
+  SizeClassInfo parsed[kNumClasses] = {
+      {0, 0, 0},
+      {9, 9, 9},
+      {7, 7, 7},
+  };
+  EXPECT_EQ(ParseSizeClasses("8,1,32;1024,2,16", kMaxSize, 2, parsed), 2);
+
+  EXPECT_EQ(parsed[1].size, 8);
+  EXPECT_EQ(parsed[1].pages, 1);
+  EXPECT_EQ(parsed[1].num_to_move, 32);
+
+  EXPECT_EQ(parsed[2].size, 7);
+  EXPECT_EQ(parsed[2].pages, 7);
+  EXPECT_EQ(parsed[2].num_to_move, 7);
+}
+
+TEST(RuntimeSizeClassesTest, EnvBadDelimiter) {
+  // Invalid class sizes should be caught
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8/4,16,3,1", kMaxSize, kNumClasses, parsed), -2);
+}
+
+TEST(RuntimeSizeClassesTest, EnvTooManyCommas) {
+  // Invalid class sizes should be caught
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8,4,16,3", kMaxSize, kNumClasses, parsed), -1);
+}
+
+TEST(RuntimeSizeClassesTest, EnvIntOverflow) {
+  // Invalid class sizes should be caught
+  SizeClassInfo parsed[kNumClasses];
+  EXPECT_EQ(ParseSizeClasses("8,4,2147483648", kMaxSize, kNumClasses, parsed),
+            -3);
+}
+
+TEST(RuntimeSizeClassesTest, EnvVariableExamined) {
+  SizeClassInfo parsed[kNumClasses];
+  setenv("TCMALLOC_SIZE_CLASSES", "256,13,31", 1);
+  EXPECT_EQ(MaybeSizeClassesFromEnv(kMaxSize, kNumClasses, parsed), 2);
+  EXPECT_EQ(parsed[1].size, 256);
+  EXPECT_EQ(parsed[1].pages, 13);
+  EXPECT_EQ(parsed[1].num_to_move, 31);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.cc b/contrib/libs/tcmalloc/tcmalloc/sampler.cc
new file mode 100644
index 0000000000..5e89c9e830
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/sampler.cc
@@ -0,0 +1,206 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/sampler.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <limits>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+ssize_t Sampler::GetSamplePeriod() {
+  return Parameters::profile_sampling_rate();
+}
+
+// Run this before using your sampler
+ABSL_ATTRIBUTE_NOINLINE void Sampler::Init(uint64_t seed) {
+  ASSERT(seed != 0);
+
+  // do_malloc comes here without having initialized statics, and
+  // PickNextSamplingPoint uses data initialized in static vars.
+  Static::InitIfNecessary();
+
+  // Initialize PRNG
+  rnd_ = seed;
+  // Step it forward 20 times for good measure
+  for (int i = 0; i < 20; i++) {
+    rnd_ = NextRandom(rnd_);
+  }
+  // Initialize counters
+  true_bytes_until_sample_ = PickNextSamplingPoint();
+  if (Static::IsOnFastPath()) {
+    bytes_until_sample_ = true_bytes_until_sample_;
+    was_on_fast_path_ = true;
+  } else {
+    // Force the next allocation to hit the slow path.
+    ASSERT(bytes_until_sample_ == 0);
+    was_on_fast_path_ = false;
+  }
+  allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint();
+}
+
+ssize_t Sampler::PickNextSamplingPoint() {
+  sample_period_ = GetSamplePeriod();
+  if (sample_period_ <= 0) {
+    // In this case, we don't want to sample ever, and the larger a
+    // value we put here, the longer until we hit the slow path
+    // again. However, we have to support the flag changing at
+    // runtime, so pick something reasonably large (to keep overhead
+    // low) but small enough that we'll eventually start to sample
+    // again.
+    return 128 << 20;
+  }
+  if (ABSL_PREDICT_FALSE(sample_period_ == 1)) {
+    // A sample period of 1, generally used only in tests due to its exorbitant
+    // cost, is a request for *every* allocation to be sampled.
+    return 1;
+  }
+  return GetGeometricVariable(sample_period_);
+}
+
+ssize_t Sampler::PickNextGuardedSamplingPoint() {
+  double guarded_sample_rate = Parameters::guarded_sampling_rate();
+  double profile_sample_rate = Parameters::profile_sampling_rate();
+  if (guarded_sample_rate < 0 || profile_sample_rate <= 0) {
+    // Guarded sampling is disabled but could be turned on at run time.  So we
+    // return a sampling point (default mean=100) in case guarded sampling is
+    // later enabled.  Since the flag is also checked in
+    // ShouldSampleGuardedAllocation(), guarded sampling is still guaranteed
+    // not to run until it is enabled.
+    return GetGeometricVariable(/*mean=*/100);
+  }
+  return GetGeometricVariable(
+      std::ceil(guarded_sample_rate / profile_sample_rate));
+}
+
+// Generates a geometric variable with the specified mean.
+// This is done by generating a random number between 0 and 1 and applying
+// the inverse cumulative distribution function for an exponential.
+// Specifically: Let m be the inverse of the sample period, then
+// the probability distribution function is m*exp(-mx) so the CDF is
+// p = 1 - exp(-mx), so
+// q = 1 - p = exp(-mx)
+// log_e(q) = -mx
+// -log_e(q)/m = x
+// log_2(q) * (-log_e(2) * 1/m) = x
+// In the code, q is actually in the range 1 to 2**26, hence the -26 below
+ssize_t Sampler::GetGeometricVariable(ssize_t mean) {
+  rnd_ = NextRandom(rnd_);
+  // Take the top 26 bits as the random number
+  // (This plus the 1<<58 sampling bound give a max possible step of
+  // 5194297183973780480 bytes.)
+  const uint64_t prng_mod_power = 48;  // Number of bits in prng
+  // The uint32_t cast is to prevent a (hard-to-reproduce) NAN
+  // under piii debug for some binaries.
+  double q = static_cast<uint32_t>(rnd_ >> (prng_mod_power - 26)) + 1.0;
+  // Put the computed p-value through the CDF of a geometric.
+  double interval = (std::log2(q) - 26) * (-std::log(2.0) * mean);
+
+  // Very large values of interval overflow ssize_t. If we happen to hit this
+  // improbable condition, we simply cheat and clamp interval to the largest
+  // supported value.  This is slightly tricky, since casting the maximum
+  // ssize_t value to a double rounds it up, and casting that rounded value
+  // back to an ssize_t will still overflow.  Thus, we specifically need to
+  // use a ">=" condition here, rather than simply ">" as would be appropriate
+  // if the arithmetic were exact.
+  if (interval >= static_cast<double>(std::numeric_limits<ssize_t>::max()))
+    return std::numeric_limits<ssize_t>::max();
+  else
+    return static_cast<ssize_t>(interval);
+}
+
+size_t Sampler::RecordAllocationSlow(size_t k) {
+  static std::atomic<uint64_t> global_randomness;
+
+  if (ABSL_PREDICT_FALSE(!initialized_)) {
+    initialized_ = true;
+    uint64_t global_seed =
+        global_randomness.fetch_add(1, std::memory_order_relaxed);
+    Init(reinterpret_cast<uintptr_t>(this) ^ global_seed);
+    if (static_cast<size_t>(true_bytes_until_sample_) > k) {
+      true_bytes_until_sample_ -= k;
+      if (Static::IsOnFastPath()) {
+        bytes_until_sample_ -= k;
+        was_on_fast_path_ = true;
+      }
+      return 0;
+    }
+  }
+
+  if (ABSL_PREDICT_FALSE(true_bytes_until_sample_ > k)) {
+    // The last time we picked a sampling point, we were on the slow path.  We
+    // don't want to sample yet since true_bytes_until_sample_ >= k.
+    true_bytes_until_sample_ -= k;
+
+    if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) {
+      // We've moved from the slow path to the fast path since the last sampling
+      // point was picked.
+      bytes_until_sample_ = true_bytes_until_sample_;
+      true_bytes_until_sample_ = 0;
+      was_on_fast_path_ = true;
+    } else {
+      bytes_until_sample_ = 0;
+      was_on_fast_path_ = false;
+    }
+
+    return 0;
+  }
+
+  // Compute sampling weight (i.e. the number of bytes represented by this
+  // sample in expectation).
+  //
+  // Let k be the size of the allocation, p be the sample period
+  // (sample_period_), and f the number of bytes after which we decided to
+  // sample (either bytes_until_sample_ or true_bytes_until_sample_). On
+  // average, if we were to continue taking samples every p bytes, we would take
+  // (k - f) / p additional samples in this allocation, plus the one we are
+  // taking now, for 1 + (k - f) / p total samples. Multiplying by p, the mean
+  // number of bytes between samples, gives us a weight of p + k - f.
+  //
+  size_t weight =
+      sample_period_ + k -
+      (was_on_fast_path_ ? bytes_until_sample_ : true_bytes_until_sample_);
+  const auto point = PickNextSamplingPoint();
+  if (ABSL_PREDICT_TRUE(Static::IsOnFastPath())) {
+    bytes_until_sample_ = point;
+    true_bytes_until_sample_ = 0;
+    was_on_fast_path_ = true;
+  } else {
+    bytes_until_sample_ = 0;
+    true_bytes_until_sample_ = point;
+    was_on_fast_path_ = false;
+  }
+  return GetSamplePeriod() <= 0 ? 0 : weight;
+}
+
+double AllocatedBytes(const StackTrace& stack, bool unsample) {
+  if (unsample) {
+    return static_cast<double>(stack.weight) * stack.allocated_size /
+           (stack.requested_size + 1);
+  } else {
+    return stack.allocated_size;
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/sampler.h b/contrib/libs/tcmalloc/tcmalloc/sampler.h
new file mode 100644
index 0000000000..d18dd44234
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/sampler.h
@@ -0,0 +1,298 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_SAMPLER_H_
+#define TCMALLOC_SAMPLER_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Sampler to decide when to create a sample trace for an allocation
+// Not thread safe: Each thread should have it's own sampler object.
+// Caller must use external synchronization if used
+// from multiple threads.
+//
+// With 512K average sample step (the default):
+//  the probability of sampling a 4K allocation is about 0.00778
+//  the probability of sampling a 1MB allocation is about 0.865
+//  the probability of sampling a 1GB allocation is about 1.00000
+// In general, the probablity of sampling is an allocation of size X
+// given a flag value of Y (default 1M) is:
+//  1 - e^(-X/Y)
+//
+// With 128K average sample step:
+//  the probability of sampling a 1MB allocation is about 0.99966
+//  the probability of sampling a 1GB allocation is about 1.0
+//  (about 1 - 2**(-26))
+// With 1M average sample step:
+//  the probability of sampling a 4K allocation is about 0.00390
+//  the probability of sampling a 1MB allocation is about 0.632
+//  the probability of sampling a 1GB allocation is about 1.0
+//
+// The sampler works by representing memory as a long stream from
+// which allocations are taken. Some of the bytes in this stream are
+// marked and if an allocation includes a marked byte then it is
+// sampled. Bytes are marked according to a Poisson point process
+// with each byte being marked independently with probability
+// p = 1/profile_sampling_rate.  This makes the probability
+// of sampling an allocation of X bytes equal to the CDF of
+// a geometric with mean profile_sampling_rate. (ie. the
+// probability that at least one byte in the range is marked). This
+// is accurately given by the CDF of the corresponding exponential
+// distribution : 1 - e^(-X/profile_sampling_rate)
+// Independence of the byte marking ensures independence of
+// the sampling of each allocation.
+//
+// This scheme is implemented by noting that, starting from any
+// fixed place, the number of bytes until the next marked byte
+// is geometrically distributed. This number is recorded as
+// bytes_until_sample_.  Every allocation subtracts from this
+// number until it is less than 0. When this happens the current
+// allocation is sampled.
+//
+// When an allocation occurs, bytes_until_sample_ is reset to
+// a new independtly sampled geometric number of bytes. The
+// memoryless property of the point process means that this may
+// be taken as the number of bytes after the end of the current
+// allocation until the next marked byte. This ensures that
+// very large allocations which would intersect many marked bytes
+// only result in a single call to PickNextSamplingPoint.
+//-------------------------------------------------------------------
+
+class SamplerTest;
+
+class Sampler {
+ public:
+  // Record allocation of "k" bytes. If the allocation needs to be sampled,
+  // return its sampling weight (i.e., the expected number of allocations of
+  // this size represented by this sample); otherwise return 0.
+  size_t RecordAllocation(size_t k);
+
+  // Same as above (but faster), except:
+  // a) REQUIRES(k < std::numeric_limits<ssize_t>::max())
+  // b) if this returns false, you must call RecordAllocation
+  //    to confirm if sampling truly needed.
+  //
+  // The point of this function is to only deal with common case of no
+  // sampling and let caller (which is in malloc fast-path) to
+  // "escalate" to fuller and slower logic only if necessary.
+  bool TryRecordAllocationFast(size_t k);
+
+  // If the guarded sampling point has been reached, selects a new sampling
+  // point and returns true.  Otherwise returns false.
+  bool ShouldSampleGuardedAllocation();
+
+  // Returns the Sampler's cached Static::IsOnFastPath state.  This may differ
+  // from a fresh computation due to activating per-CPU mode or the
+  // addition/removal of hooks.
+  bool IsOnFastPath() const;
+  void UpdateFastPathState();
+
+  // Generate a geometric with mean profile_sampling_rate.
+  //
+  // Remembers the value of sample_rate for use in reweighing the sample
+  // later (so that if the flag value changes before the next sample is taken,
+  // the next sample is still weighed properly).
+  ssize_t PickNextSamplingPoint();
+
+  // Generates a geometric with mean guarded_sample_rate.
+  ssize_t PickNextGuardedSamplingPoint();
+
+  // Returns the current sample period
+  static ssize_t GetSamplePeriod();
+
+  // The following are public for the purposes of testing
+  static uint64_t NextRandom(uint64_t rnd_);  // Returns the next prng value
+
+  constexpr Sampler()
+      : bytes_until_sample_(0),
+        sample_period_(0),
+        true_bytes_until_sample_(0),
+        allocs_until_guarded_sample_(0),
+        rnd_(0),
+        initialized_(false),
+        was_on_fast_path_(false) {}
+
+ private:
+  // Bytes until we sample next.
+  //
+  // More specifically when bytes_until_sample_ is X, we can allocate
+  // X bytes without triggering sampling; on the (X+1)th allocated
+  // byte, the containing allocation will be sampled.
+  //
+  // Always non-negative with only very brief exceptions (see
+  // DecrementFast{,Finish}, so casting to size_t is ok.
+  ssize_t bytes_until_sample_;
+
+  // Saved copy of the sampling period from when we actually set
+  // (true_)bytes_until_sample_. This allows us to properly calculate the sample
+  // weight of the first sample after the sampling period is changed.
+  ssize_t sample_period_;
+
+  // true_bytes_until_sample_ tracks the sampling point when we are on the slow
+  // path when picking sampling points (!Static::IsOnFastPath()) up until we
+  // notice (due to another allocation) that this state has changed.
+  ssize_t true_bytes_until_sample_;
+
+  // Number of sampled allocations until we do a guarded allocation.
+  ssize_t allocs_until_guarded_sample_;
+
+  uint64_t rnd_;  // Cheap random number generator
+  bool initialized_;
+  bool was_on_fast_path_;
+
+ private:
+  friend class SamplerTest;
+  // Initialize this sampler.
+  void Init(uint64_t seed);
+  size_t RecordAllocationSlow(size_t k);
+  ssize_t GetGeometricVariable(ssize_t mean);
+};
+
+inline size_t Sampler::RecordAllocation(size_t k) {
+  // The first time we enter this function we expect bytes_until_sample_
+  // to be zero, and we must call SampleAllocationSlow() to ensure
+  // proper initialization of static vars.
+  ASSERT(Static::IsInited() || bytes_until_sample_ == 0);
+
+  // Avoid missampling 0.
+  k++;
+
+  // Note that we have to deal with arbitrarily large values of k
+  // here. Thus we're upcasting bytes_until_sample_ to unsigned rather
+  // than the other way around. And this is why this code cannot be
+  // merged with DecrementFast code below.
+  if (static_cast<size_t>(bytes_until_sample_) <= k) {
+    size_t result = RecordAllocationSlow(k);
+    ASSERT(Static::IsInited());
+    return result;
+  } else {
+    bytes_until_sample_ -= k;
+    ASSERT(Static::IsInited());
+    return 0;
+  }
+}
+
+inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+Sampler::TryRecordAllocationFast(size_t k) {
+  // Avoid missampling 0.  Callers pass in requested size (which based on the
+  // assertion below k>=0 at this point).  Since subtracting 0 from
+  // bytes_until_sample_ is a no-op, we increment k by one and resolve the
+  // effect on the distribution in Sampler::Unsample.
+  k++;
+
+  // For efficiency reason, we're testing bytes_until_sample_ after
+  // decrementing it by k. This allows compiler to do sub <reg>, <mem>
+  // followed by conditional jump on sign. But it is correct only if k
+  // is actually smaller than largest ssize_t value. Otherwise
+  // converting k to signed value overflows.
+  //
+  // It would be great for generated code to be sub <reg>, <mem>
+  // followed by conditional jump on 'carry', which would work for
+  // arbitrary values of k, but there seem to be no way to express
+  // that in C++.
+  //
+  // Our API contract explicitly states that only small values of k
+  // are permitted. And thus it makes sense to assert on that.
+  ASSERT(static_cast<ssize_t>(k) > 0);
+
+  bytes_until_sample_ -= static_cast<ssize_t>(k);
+  if (ABSL_PREDICT_FALSE(bytes_until_sample_ <= 0)) {
+    // Note, we undo sampling counter update, since we're not actually
+    // handling slow path in the "needs sampling" case (calling
+    // RecordAllocationSlow to reset counter). And we do that in order
+    // to avoid non-tail calls in malloc fast-path. See also comments
+    // on declaration inside Sampler class.
+    //
+    // volatile is used here to improve compiler's choice of
+    // instuctions. We know that this path is very rare and that there
+    // is no need to keep previous value of bytes_until_sample_ in
+    // register. This helps compiler generate slightly more efficient
+    // sub <reg>, <mem> instruction for subtraction above.
+    volatile ssize_t *ptr =
+        const_cast<volatile ssize_t *>(&bytes_until_sample_);
+    *ptr += k;
+    return false;
+  }
+  return true;
+}
+
+inline bool ABSL_ATTRIBUTE_ALWAYS_INLINE
+Sampler::ShouldSampleGuardedAllocation() {
+  if (Parameters::guarded_sampling_rate() < 0) return false;
+  allocs_until_guarded_sample_--;
+  if (ABSL_PREDICT_FALSE(allocs_until_guarded_sample_ < 0)) {
+    allocs_until_guarded_sample_ = PickNextGuardedSamplingPoint();
+    return true;
+  }
+  return false;
+}
+
+// Inline functions which are public for testing purposes
+
+// Returns the next prng value.
+// pRNG is: aX+b mod c with a = 0x5DEECE66D, b =  0xB, c = 1<<48
+// This is the lrand64 generator.
+inline uint64_t Sampler::NextRandom(uint64_t rnd) {
+  const uint64_t prng_mult = UINT64_C(0x5DEECE66D);
+  const uint64_t prng_add = 0xB;
+  const uint64_t prng_mod_power = 48;
+  const uint64_t prng_mod_mask =
+      ~((~static_cast<uint64_t>(0)) << prng_mod_power);
+  return (prng_mult * rnd + prng_add) & prng_mod_mask;
+}
+
+inline bool Sampler::IsOnFastPath() const { return was_on_fast_path_; }
+
+inline void Sampler::UpdateFastPathState() {
+  const bool is_on_fast_path = Static::IsOnFastPath();
+  if (ABSL_PREDICT_TRUE(was_on_fast_path_ == is_on_fast_path)) {
+    return;
+  }
+
+  was_on_fast_path_ = is_on_fast_path;
+
+  if (is_on_fast_path) {
+    bytes_until_sample_ = true_bytes_until_sample_;
+    true_bytes_until_sample_ = 0;
+  } else {
+    true_bytes_until_sample_ = bytes_until_sample_;
+    bytes_until_sample_ = 0;
+  }
+}
+
+// If unsample is true, return the approximate number of bytes that would have
+// been allocated to obtain this sample.  This is only accurate if the sample
+// period hasn't changed since the allocation(s) were made.
+//
+// If unsample is false, the caller will handle unsampling.
+double AllocatedBytes(const StackTrace &stack, bool unsample);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_SAMPLER_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_class_info.h b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h
new file mode 100644
index 0000000000..a424432b75
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_class_info.h
@@ -0,0 +1,79 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Specification of Size classes
+#ifndef TCMALLOC_size_class_info_H_
+#define TCMALLOC_size_class_info_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/internal/logging.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// The number of members in SizeClassInfo
+static constexpr int kSizeClassInfoMembers = 3;
+
+// Precomputed size class parameters.
+struct SizeClassInfo {
+  int Value(int index) const {
+    switch (index) {
+      case 0:
+        return size;
+      case 1:
+        return pages;
+      case 2:
+        return num_to_move;
+    }
+    CHECK_CONDITION(index < kSizeClassInfoMembers);
+    return 0;
+  }
+
+  void SetValue(int index, size_t v) {
+    switch (index) {
+      case 0:
+        size = v;
+        break;
+      case 1:
+        pages = v;
+        break;
+      case 2:
+        num_to_move = v;
+        break;
+      default:
+        CHECK_CONDITION(index < kSizeClassInfoMembers);
+    }
+  }
+
+  // Max size storable in that class
+  size_t size;
+
+  // Number of pages to allocate at a time
+  size_t pages;
+
+  // Number of objects to move between a per-thread list and a central list in
+  // one shot.  We want this to be not too small so we can amortize the lock
+  // overhead for accessing the central list.  Making it too big may temporarily
+  // cause unnecessary memory wastage in the per-thread free list until the
+  // scavenger cleans up the list.
+  size_t num_to_move;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_size_class_info_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc
new file mode 100644
index 0000000000..f4b444994d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes.cc
@@ -0,0 +1,711 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/common.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+
+namespace tcmalloc_internal {
+
+// <fixed> is fixed per-size-class overhead due to end-of-span fragmentation
+// and other factors. For instance, if we have a 96 byte size class, and use a
+// single 8KiB page, then we will hold 85 objects per span, and have 32 bytes
+// left over. There is also a fixed component of 48 bytes of TCMalloc metadata
+// per span. Together, the fixed overhead would be wasted/allocated =
+// (32 + 48) / (8192 - 32) ~= 0.98%.
+// There is also a dynamic component to overhead based on mismatches between the
+// number of bytes requested and the number of bytes provided by the size class.
+// Together they sum to the total overhead; for instance if you asked for a
+// 50-byte allocation that rounds up to a 64-byte size class, the dynamic
+// overhead would be 28%, and if <fixed> were 22% it would mean (on average)
+// 25 bytes of overhead for allocations of that size.
+
+// clang-format off
+#if defined(__cpp_aligned_new) && __STDCPP_DEFAULT_NEW_ALIGNMENT__ <= 8
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       24,       1,          32},  // 0.68%
+    {       32,       1,          32},  // 0.59%
+    {       40,       1,          32},  // 0.98%
+    {       48,       1,          32},  // 0.98%
+    {       56,       1,          32},  // 0.78%
+    {       64,       1,          32},  // 0.59%
+    {       72,       1,          32},  // 1.28%
+    {       80,       1,          32},  // 0.98%
+    {       88,       1,          32},  // 0.68%
+    {       96,       1,          32},  // 0.98%
+    {      104,       1,          32},  // 1.58%
+    {      112,       1,          32},  // 0.78%
+    {      120,       1,          32},  // 0.98%
+    {      128,       1,          32},  // 0.59%
+    {      136,       1,          32},  // 0.98%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      296,       1,          32},  // 3.10%
+    {      312,       1,          32},  // 1.58%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      408,       1,          32},  // 0.98%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       1,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    98304,      12,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   147456,      18,           2},  // 0.03%
+    {   163840,      20,           2},  // 0.03%
+    {   180224,      22,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       24,       1,          32},  // 0.17%
+    {       32,       1,          32},  // 0.15%
+    {       40,       1,          32},  // 0.17%
+    {       48,       1,          32},  // 0.24%
+    {       56,       1,          32},  // 0.17%
+    {       64,       1,          32},  // 0.15%
+    {       72,       1,          32},  // 0.17%
+    {       80,       1,          32},  // 0.29%
+    {       88,       1,          32},  // 0.24%
+    {       96,       1,          32},  // 0.24%
+    {      104,       1,          32},  // 0.17%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      280,       1,          32},  // 0.17%
+    {      304,       1,          32},  // 0.89%
+    {      328,       1,          32},  // 1.06%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      488,       1,          32},  // 0.37%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       24,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       40,       1,          32},  // 0.03%
+    {       48,       1,          32},  // 0.02%
+    {       56,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       72,       1,          32},  // 0.04%
+    {       80,       1,          32},  // 0.04%
+    {       88,       1,          32},  // 0.05%
+    {       96,       1,          32},  // 0.04%
+    {      104,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      360,       1,          32},  // 0.04%
+    {      408,       1,          32},  // 0.10%
+    {      456,       1,          32},  // 0.17%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10880,       1,           6},  // 0.41%
+    {    11904,       1,           5},  // 0.12%
+    {    13056,       1,           5},  // 0.41%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    18688,       1,           3},  // 0.21%
+    {    21760,       1,           3},  // 0.41%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       24,       1,          32},  // 1.57%
+    {       32,       1,          32},  // 1.17%
+    {       40,       1,          32},  // 1.57%
+    {       48,       1,          32},  // 1.57%
+    {       56,       1,          32},  // 1.37%
+    {       64,       1,          32},  // 1.17%
+    {       72,       1,          32},  // 2.78%
+    {       80,       1,          32},  // 1.57%
+    {       88,       1,          32},  // 2.37%
+    {       96,       1,          32},  // 2.78%
+    {      104,       1,          32},  // 2.17%
+    {      120,       1,          32},  // 1.57%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      184,       1,          32},  // 2.37%
+    {      208,       1,          32},  // 4.86%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      312,       1,          32},  // 2.17%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      408,       1,          32},  // 1.57%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3456,       6,          18},  // 1.79%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#else
+#if TCMALLOC_PAGE_SHIFT == 13
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 86;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.59%
+    {       16,       1,          32},  // 0.59%
+    {       32,       1,          32},  // 0.59%
+    {       48,       1,          32},  // 0.98%
+    {       64,       1,          32},  // 0.59%
+    {       80,       1,          32},  // 0.98%
+    {       96,       1,          32},  // 0.98%
+    {      112,       1,          32},  // 0.78%
+    {      128,       1,          32},  // 0.59%
+    {      144,       1,          32},  // 2.18%
+    {      160,       1,          32},  // 0.98%
+    {      176,       1,          32},  // 1.78%
+    {      192,       1,          32},  // 2.18%
+    {      208,       1,          32},  // 1.58%
+    {      224,       1,          32},  // 2.18%
+    {      240,       1,          32},  // 0.98%
+    {      256,       1,          32},  // 0.59%
+    {      272,       1,          32},  // 0.98%
+    {      288,       1,          32},  // 2.18%
+    {      304,       1,          32},  // 4.25%
+    {      320,       1,          32},  // 3.00%
+    {      336,       1,          32},  // 2.18%
+    {      352,       1,          32},  // 1.78%
+    {      368,       1,          32},  // 1.78%
+    {      384,       1,          32},  // 2.18%
+    {      400,       1,          32},  // 3.00%
+    {      416,       1,          32},  // 4.25%
+    {      448,       1,          32},  // 2.18%
+    {      480,       1,          32},  // 0.98%
+    {      512,       1,          32},  // 0.59%
+    {      576,       1,          32},  // 2.18%
+    {      640,       1,          32},  // 7.29%
+    {      704,       1,          32},  // 6.40%
+    {      768,       1,          32},  // 7.29%
+    {      896,       1,          32},  // 2.18%
+    {     1024,       1,          32},  // 0.59%
+    {     1152,       2,          32},  // 1.88%
+    {     1280,       2,          32},  // 6.98%
+    {     1408,       2,          32},  // 6.10%
+    {     1536,       2,          32},  // 6.98%
+    {     1792,       2,          32},  // 1.88%
+    {     2048,       2,          32},  // 0.29%
+    {     2304,       2,          28},  // 1.88%
+    {     2688,       2,          24},  // 1.88%
+    {     2816,       3,          23},  // 9.30%
+    {     3200,       2,          20},  // 2.70%
+    {     3456,       3,          18},  // 1.79%
+    {     3584,       4,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.29%
+    {     4736,       3,          13},  // 3.99%
+    {     5376,       2,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.20%
+    {     6528,       4,          10},  // 0.54%
+    {     6784,       5,           9},  // 0.75%
+    {     7168,       7,           9},  // 0.08%
+    {     8192,       1,           8},  // 0.29%
+    {     9472,       5,           6},  // 8.23%
+    {    10240,       4,           6},  // 6.82%
+    {    12288,       3,           5},  // 0.20%
+    {    13568,       5,           4},  // 0.75%
+    {    14336,       7,           4},  // 0.08%
+    {    16384,       2,           4},  // 0.29%
+    {    20480,       5,           3},  // 0.12%
+    {    24576,       3,           2},  // 0.20%
+    {    28672,       7,           2},  // 0.08%
+    {    32768,       4,           2},  // 0.15%
+    {    40960,       5,           2},  // 0.12%
+    {    49152,       6,           2},  // 0.10%
+    {    57344,       7,           2},  // 0.08%
+    {    65536,       8,           2},  // 0.07%
+    {    73728,       9,           2},  // 0.07%
+    {    81920,      10,           2},  // 0.06%
+    {    90112,      11,           2},  // 0.05%
+    {    98304,      12,           2},  // 0.05%
+    {   106496,      13,           2},  // 0.05%
+    {   114688,      14,           2},  // 0.04%
+    {   131072,      16,           2},  // 0.04%
+    {   139264,      17,           2},  // 0.03%
+    {   155648,      19,           2},  // 0.03%
+    {   172032,      21,           2},  // 0.03%
+    {   188416,      23,           2},  // 0.03%
+    {   204800,      25,           2},  // 0.02%
+    {   221184,      27,           2},  // 0.02%
+    {   237568,      29,           2},  // 0.02%
+    {   262144,      32,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 15
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 78;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.15%
+    {       16,       1,          32},  // 0.15%
+    {       32,       1,          32},  // 0.15%
+    {       48,       1,          32},  // 0.24%
+    {       64,       1,          32},  // 0.15%
+    {       80,       1,          32},  // 0.29%
+    {       96,       1,          32},  // 0.24%
+    {      112,       1,          32},  // 0.34%
+    {      128,       1,          32},  // 0.15%
+    {      144,       1,          32},  // 0.39%
+    {      160,       1,          32},  // 0.54%
+    {      176,       1,          32},  // 0.24%
+    {      192,       1,          32},  // 0.54%
+    {      208,       1,          32},  // 0.49%
+    {      224,       1,          32},  // 0.34%
+    {      240,       1,          32},  // 0.54%
+    {      256,       1,          32},  // 0.15%
+    {      272,       1,          32},  // 0.54%
+    {      288,       1,          32},  // 0.84%
+    {      304,       1,          32},  // 0.89%
+    {      320,       1,          32},  // 0.54%
+    {      336,       1,          32},  // 0.69%
+    {      352,       1,          32},  // 0.24%
+    {      384,       1,          32},  // 0.54%
+    {      416,       1,          32},  // 1.13%
+    {      448,       1,          32},  // 0.34%
+    {      480,       1,          32},  // 0.54%
+    {      512,       1,          32},  // 0.15%
+    {      576,       1,          32},  // 1.74%
+    {      640,       1,          32},  // 0.54%
+    {      704,       1,          32},  // 1.33%
+    {      768,       1,          32},  // 1.74%
+    {      832,       1,          32},  // 1.13%
+    {      896,       1,          32},  // 1.74%
+    {     1024,       1,          32},  // 0.15%
+    {     1152,       1,          32},  // 1.74%
+    {     1280,       1,          32},  // 2.55%
+    {     1408,       1,          32},  // 1.33%
+    {     1536,       1,          32},  // 1.74%
+    {     1792,       1,          32},  // 1.74%
+    {     2048,       1,          32},  // 0.15%
+    {     2176,       1,          30},  // 0.54%
+    {     2304,       1,          28},  // 1.74%
+    {     2432,       1,          26},  // 3.80%
+    {     2688,       1,          24},  // 1.74%
+    {     2944,       1,          22},  // 1.33%
+    {     3200,       1,          20},  // 2.55%
+    {     3584,       1,          18},  // 1.74%
+    {     4096,       1,          16},  // 0.15%
+    {     4608,       1,          14},  // 1.74%
+    {     5376,       1,          12},  // 1.74%
+    {     6528,       1,          10},  // 0.54%
+    {     7168,       2,           9},  // 1.66%
+    {     8192,       1,           8},  // 0.15%
+    {     9344,       2,           7},  // 0.27%
+    {    10880,       1,           6},  // 0.54%
+    {    13056,       2,           5},  // 0.47%
+    {    13952,       3,           4},  // 0.70%
+    {    16384,       1,           4},  // 0.15%
+    {    19072,       3,           3},  // 3.14%
+    {    21760,       2,           3},  // 0.47%
+    {    24576,       3,           2},  // 0.05%
+    {    28032,       6,           2},  // 0.22%
+    {    32768,       1,           2},  // 0.15%
+    {    38144,       5,           2},  // 7.41%
+    {    40960,       4,           2},  // 6.71%
+    {    49152,       3,           2},  // 0.05%
+    {    57344,       7,           2},  // 0.02%
+    {    65536,       2,           2},  // 0.07%
+    {    81920,       5,           2},  // 0.03%
+    {    98304,       3,           2},  // 0.05%
+    {   114688,       7,           2},  // 0.02%
+    {   131072,       4,           2},  // 0.04%
+    {   163840,       5,           2},  // 0.03%
+    {   196608,       6,           2},  // 0.02%
+    {   229376,       7,           2},  // 0.02%
+    {   262144,       8,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 18
+static_assert(kMaxSize == 262144, "kMaxSize mismatch");
+static const int kCount = 89;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 0.02%
+    {       16,       1,          32},  // 0.02%
+    {       32,       1,          32},  // 0.02%
+    {       48,       1,          32},  // 0.02%
+    {       64,       1,          32},  // 0.02%
+    {       80,       1,          32},  // 0.04%
+    {       96,       1,          32},  // 0.04%
+    {      112,       1,          32},  // 0.04%
+    {      128,       1,          32},  // 0.02%
+    {      144,       1,          32},  // 0.04%
+    {      160,       1,          32},  // 0.04%
+    {      176,       1,          32},  // 0.05%
+    {      192,       1,          32},  // 0.04%
+    {      208,       1,          32},  // 0.04%
+    {      240,       1,          32},  // 0.04%
+    {      256,       1,          32},  // 0.02%
+    {      304,       1,          32},  // 0.05%
+    {      336,       1,          32},  // 0.04%
+    {      368,       1,          32},  // 0.07%
+    {      416,       1,          32},  // 0.04%
+    {      464,       1,          32},  // 0.19%
+    {      512,       1,          32},  // 0.02%
+    {      576,       1,          32},  // 0.04%
+    {      640,       1,          32},  // 0.17%
+    {      704,       1,          32},  // 0.12%
+    {      768,       1,          32},  // 0.12%
+    {      832,       1,          32},  // 0.04%
+    {      896,       1,          32},  // 0.21%
+    {     1024,       1,          32},  // 0.02%
+    {     1152,       1,          32},  // 0.26%
+    {     1280,       1,          32},  // 0.41%
+    {     1408,       1,          32},  // 0.12%
+    {     1536,       1,          32},  // 0.41%
+    {     1664,       1,          32},  // 0.36%
+    {     1792,       1,          32},  // 0.21%
+    {     1920,       1,          32},  // 0.41%
+    {     2048,       1,          32},  // 0.02%
+    {     2176,       1,          30},  // 0.41%
+    {     2304,       1,          28},  // 0.71%
+    {     2432,       1,          26},  // 0.76%
+    {     2560,       1,          25},  // 0.41%
+    {     2688,       1,          24},  // 0.56%
+    {     2816,       1,          23},  // 0.12%
+    {     2944,       1,          22},  // 0.07%
+    {     3072,       1,          21},  // 0.41%
+    {     3200,       1,          20},  // 1.15%
+    {     3328,       1,          19},  // 1.00%
+    {     3584,       1,          18},  // 0.21%
+    {     3840,       1,          17},  // 0.41%
+    {     4096,       1,          16},  // 0.02%
+    {     4736,       1,          13},  // 0.66%
+    {     5504,       1,          11},  // 1.35%
+    {     6144,       1,          10},  // 1.61%
+    {     6528,       1,          10},  // 0.41%
+    {     6784,       1,           9},  // 1.71%
+    {     7168,       1,           9},  // 1.61%
+    {     7680,       1,           8},  // 0.41%
+    {     8192,       1,           8},  // 0.02%
+    {     8704,       1,           7},  // 0.41%
+    {     9344,       1,           7},  // 0.21%
+    {    10368,       1,           6},  // 1.15%
+    {    11392,       1,           5},  // 0.07%
+    {    12416,       1,           5},  // 0.56%
+    {    13696,       1,           4},  // 0.76%
+    {    14464,       1,           4},  // 0.71%
+    {    16384,       1,           4},  // 0.02%
+    {    17408,       1,           3},  // 0.41%
+    {    20096,       1,           3},  // 0.36%
+    {    21760,       1,           3},  // 0.41%
+    {    23808,       1,           2},  // 0.12%
+    {    26112,       1,           2},  // 0.41%
+    {    29056,       1,           2},  // 0.26%
+    {    32768,       1,           2},  // 0.02%
+    {    37376,       1,           2},  // 0.21%
+    {    43648,       1,           2},  // 0.12%
+    {    52352,       1,           2},  // 0.17%
+    {    56064,       2,           2},  // 3.92%
+    {    65536,       1,           2},  // 0.02%
+    {    74880,       2,           2},  // 0.03%
+    {    87296,       1,           2},  // 0.12%
+    {   104832,       2,           2},  // 0.03%
+    {   112256,       3,           2},  // 0.09%
+    {   131072,       1,           2},  // 0.02%
+    {   149760,       3,           2},  // 5.03%
+    {   174720,       2,           2},  // 0.03%
+    {   196608,       3,           2},  // 0.01%
+    {   209664,       4,           2},  // 0.03%
+    {   262144,       1,           2},  // 0.02%
+};
+#elif TCMALLOC_PAGE_SHIFT == 12
+static_assert(kMaxSize == 8192, "kMaxSize mismatch");
+static const int kCount = 46;
+static_assert(kCount <= kNumClasses);
+const int SizeMap::kSizeClassesCount = kCount;
+const SizeClassInfo SizeMap::kSizeClasses[SizeMap::kSizeClassesCount] = {
+    // <bytes>, <pages>, <batch size>    <fixed>
+    {        0,       0,           0},  // +Inf%
+    {        8,       1,          32},  // 1.17%
+    {       16,       1,          32},  // 1.17%
+    {       32,       1,          32},  // 1.17%
+    {       48,       1,          32},  // 1.57%
+    {       64,       1,          32},  // 1.17%
+    {       80,       1,          32},  // 1.57%
+    {       96,       1,          32},  // 2.78%
+    {      112,       1,          32},  // 2.78%
+    {      128,       1,          32},  // 1.17%
+    {      144,       1,          32},  // 2.78%
+    {      160,       1,          32},  // 3.60%
+    {      176,       1,          32},  // 2.37%
+    {      192,       1,          32},  // 2.78%
+    {      208,       1,          32},  // 4.86%
+    {      224,       1,          32},  // 2.78%
+    {      240,       1,          32},  // 1.57%
+    {      256,       1,          32},  // 1.17%
+    {      272,       1,          32},  // 1.57%
+    {      288,       1,          32},  // 2.78%
+    {      304,       1,          32},  // 4.86%
+    {      336,       1,          32},  // 2.78%
+    {      368,       1,          32},  // 2.37%
+    {      400,       1,          32},  // 3.60%
+    {      448,       1,          32},  // 2.78%
+    {      512,       1,          32},  // 1.17%
+    {      576,       2,          32},  // 2.18%
+    {      640,       2,          32},  // 7.29%
+    {      704,       2,          32},  // 6.40%
+    {      768,       2,          32},  // 7.29%
+    {      896,       2,          32},  // 2.18%
+    {     1024,       2,          32},  // 0.59%
+    {     1152,       3,          32},  // 7.08%
+    {     1280,       3,          32},  // 7.08%
+    {     1536,       3,          32},  // 0.39%
+    {     1792,       4,          32},  // 1.88%
+    {     2048,       4,          32},  // 0.29%
+    {     2304,       4,          28},  // 1.88%
+    {     2688,       4,          24},  // 1.88%
+    {     3200,       4,          20},  // 2.70%
+    {     3584,       7,          18},  // 0.17%
+    {     4096,       4,          16},  // 0.29%
+    {     5376,       4,          12},  // 1.88%
+    {     6144,       3,          10},  // 0.39%
+    {     7168,       7,           9},  // 0.17%
+    {     8192,       4,           8},  // 0.29%
+};
+#else
+#error "Unsupported TCMALLOC_PAGE_SHIFT value!"
+#endif
+#endif
+// clang-format on
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc
new file mode 100644
index 0000000000..d66ce5b186
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_test.cc
@@ -0,0 +1,469 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "gtest/gtest.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/size_class_info.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/tcmalloc_policy.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Moved out of anonymous namespace so that it can be found by friend class in
+// span.h. This allows tests to access span internals so that we can
+// validate that scaling by a reciprocal correctly converts a pointer into
+// an offset within a span.
+class SpanTestPeer {
+ public:
+  static uint16_t CalcReciprocal(size_t size) {
+    return Span::CalcReciprocal(size);
+  }
+  static Span::ObjIdx TestOffsetToIdx(uintptr_t offset, size_t size,
+                                      uint16_t reciprocal) {
+    return Span::TestOffsetToIdx(offset, size, reciprocal);
+  }
+};
+
+namespace {
+
+size_t Alignment(size_t size) {
+  size_t ret = kAlignment;
+  if (size >= 1024) {
+    // SizeMap::ClassIndexMaybe requires 128-byte alignment for sizes >=1024.
+    ret = 128;
+  } else if (size >= 512) {
+    // Per //tcmalloc/span.h, we have 64 byte alignment for sizes
+    // >=512.
+    ret = 64;
+  } else if (size >= 8) {
+    ret = 8;
+  }
+
+  return ret;
+}
+
+class SizeClassesTest : public ::testing::Test {
+ protected:
+  SizeClassesTest() { m_.Init(); }
+
+  SizeMap m_;
+};
+
+TEST_F(SizeClassesTest, SmallClassesSinglePage) {
+  // Per //tcmalloc/span.h, the compressed index implementation
+  // added by cl/126729493 requires small size classes to be placed on a single
+  // page span so they can be addressed.
+  for (int c = 1; c < kNumClasses; c++) {
+    const size_t max_size_in_class = m_.class_to_size(c);
+    if (max_size_in_class >= SizeMap::kMultiPageSize) {
+      continue;
+    }
+    if (max_size_in_class == 0) {
+      continue;
+    }
+    EXPECT_EQ(m_.class_to_pages(c), 1) << max_size_in_class;
+  }
+}
+
+TEST_F(SizeClassesTest, SpanPages) {
+  for (int c = 1; c < kNumClasses; c++) {
+    const size_t max_size_in_class = m_.class_to_size(c);
+    if (max_size_in_class == 0) {
+      continue;
+    }
+    // A span of class_to_pages(c) must be able to hold at least one object.
+    EXPECT_GE(Length(m_.class_to_pages(c)).in_bytes(), max_size_in_class);
+  }
+}
+
+TEST_F(SizeClassesTest, ValidateSufficientBitmapCapacity) {
+  // Validate that all the objects in a span can fit into a bitmap.
+  // The cut-off for using a bitmap is kBitmapMinObjectSize, so it is
+  // theoretically possible that a span could exceed this threshold
+  // for object size and contain more than 64 objects.
+  for (int c = 1; c < kNumClasses; ++c) {
+    const size_t max_size_in_class = m_.class_to_size(c);
+    if (max_size_in_class >= kBitmapMinObjectSize) {
+      const size_t objects_per_span =
+          Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c);
+      // Span can hold at most 64 objects of this size.
+      EXPECT_LE(objects_per_span, 64);
+    }
+  }
+}
+
+TEST_F(SizeClassesTest, ValidateCorrectScalingByReciprocal) {
+  // Validate that multiplying by the reciprocal works for all size classes.
+  // When converting an offset within a span into an index we avoid a
+  // division operation by scaling by the reciprocal. The test ensures
+  // that this approach works for all objects in a span, for all object
+  // sizes.
+  for (int c = 1; c < kNumClasses; ++c) {
+    const size_t max_size_in_class = m_.class_to_size(c);
+    // Only test for sizes where object availability is recorded in a bitmap.
+    if (max_size_in_class < kBitmapMinObjectSize) {
+      continue;
+    }
+    size_t reciprocal = SpanTestPeer::CalcReciprocal(max_size_in_class);
+    const size_t objects_per_span =
+        Length(m_.class_to_pages(c)).in_bytes() / m_.class_to_size(c);
+    for (int index = 0; index < objects_per_span; index++) {
+      // Calculate the address of the object.
+      uintptr_t address = index * max_size_in_class;
+      // Calculate the index into the page using the reciprocal method.
+      int idx =
+          SpanTestPeer::TestOffsetToIdx(address, max_size_in_class, reciprocal);
+      // Check that the starting address back is correct.
+      ASSERT_EQ(address, idx * max_size_in_class);
+    }
+  }
+}
+
+TEST_F(SizeClassesTest, Aligned) {
+  // Validate that each size class is properly aligned.
+  for (int c = 1; c < kNumClasses; c++) {
+    const size_t max_size_in_class = m_.class_to_size(c);
+    size_t alignment = Alignment(max_size_in_class);
+
+    EXPECT_EQ(0, max_size_in_class % alignment) << max_size_in_class;
+  }
+}
+
+TEST_F(SizeClassesTest, Distinguishable) {
+  // Validate that the size to class lookup table is able to distinguish each
+  // size class from one another.
+  //
+  // ClassIndexMaybe provides 8 byte granularity below 1024 bytes and 128 byte
+  // granularity for larger sizes, so our chosen size classes cannot be any
+  // finer (otherwise they would map to the same entry in the lookup table).
+  //
+  // We don't check expanded size classes which are intentionally duplicated.
+  for (int partition = 0; partition < kNumaPartitions; partition++) {
+    for (int c = (partition * kNumBaseClasses) + 1;
+         c < (partition + 1) * kNumBaseClasses; c++) {
+      const size_t max_size_in_class = m_.class_to_size(c);
+      if (max_size_in_class == 0) {
+        continue;
+      }
+      const int class_index = m_.SizeClass(
+          CppPolicy().InNumaPartition(partition), max_size_in_class);
+
+      EXPECT_EQ(c, class_index) << max_size_in_class;
+    }
+  }
+}
+
+// This test is disabled until we use a different span size allocation
+// algorithm (such as the one in effect from cl/130150125 until cl/139955211).
+TEST_F(SizeClassesTest, DISABLED_WastedSpan) {
+  // Validate that each size class does not waste (number of objects) *
+  // (alignment) at the end of the span.
+  for (int c = 1; c < kNumClasses; c++) {
+    const size_t span_size = kPageSize * m_.class_to_pages(c);
+    const size_t max_size_in_class = m_.class_to_size(c);
+    const size_t alignment = Alignment(max_size_in_class);
+    const size_t n_objects = span_size / max_size_in_class;
+    const size_t waste = span_size - n_objects * max_size_in_class;
+
+    EXPECT_LT(waste, n_objects * alignment) << max_size_in_class;
+  }
+}
+
+TEST_F(SizeClassesTest, DoubleCheckedConsistency) {
+  // Validate that every size on [0, kMaxSize] maps to a size class that is
+  // neither too big nor too small.
+  for (size_t size = 0; size <= kMaxSize; size++) {
+    const int sc = m_.SizeClass(CppPolicy(), size);
+    EXPECT_GT(sc, 0) << size;
+    EXPECT_LT(sc, kNumClasses) << size;
+
+    if ((sc % kNumBaseClasses) > 1) {
+      EXPECT_GT(size, m_.class_to_size(sc - 1))
+          << "Allocating unnecessarily large class";
+    }
+
+    const size_t s = m_.class_to_size(sc);
+    EXPECT_LE(size, s);
+    EXPECT_NE(s, 0) << size;
+  }
+}
+
+TEST_F(SizeClassesTest, NumToMove) {
+  for (int c = 1; c < kNumClasses; c++) {
+    // For non-empty size classes, we should move at least 1 object to/from each
+    // layer of the caches.
+    const size_t max_size_in_class = m_.class_to_size(c);
+    if (max_size_in_class == 0) {
+      continue;
+    }
+    EXPECT_GT(m_.num_objects_to_move(c), 0) << max_size_in_class;
+  }
+}
+
+class TestingSizeMap : public SizeMap {
+ public:
+  TestingSizeMap() {}
+
+  bool ValidSizeClasses(int num_classes, const SizeClassInfo* parsed) {
+    return SizeMap::ValidSizeClasses(num_classes, parsed);
+  }
+
+  const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; }
+  const int DefaultSizeClassesCount() const { return kSizeClassesCount; }
+};
+
+class RunTimeSizeClassesTest : public ::testing::Test {
+ protected:
+  RunTimeSizeClassesTest() {}
+
+  TestingSizeMap m_;
+};
+
+TEST_F(RunTimeSizeClassesTest, ExpandedSizeClasses) {
+  // Verify that none of the default size classes are considered expanded size
+  // classes.
+  for (int i = 0; i < kNumClasses; i++) {
+    EXPECT_EQ(i < (m_.DefaultSizeClassesCount() * kNumaPartitions),
+              !IsExpandedSizeClass(i))
+        << i;
+  }
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizeIncreases) {
+  SizeClassInfo parsed[] = {
+      {0, 0, 0},
+      {16, 1, 14},
+      {32, 1, 15},
+      {kMaxSize, 1, 15},
+  };
+  EXPECT_TRUE(m_.ValidSizeClasses(4, parsed));
+
+  parsed[2].size = 8;  // Change 32 to 8
+  EXPECT_FALSE(m_.ValidSizeClasses(4, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizeMax) {
+  SizeClassInfo parsed[] = {
+      {0, 0, 0},
+      {kMaxSize - 128, 1, 15},
+  };
+  // Last class must cover kMaxSize
+  EXPECT_FALSE(m_.ValidSizeClasses(2, parsed));
+
+  // Check Max Size is allowed 256 KiB = 262144
+  parsed[1].size = kMaxSize;
+  EXPECT_TRUE(m_.ValidSizeClasses(2, parsed));
+  // But kMaxSize + 128 is not allowed
+  parsed[1].size = kMaxSize + 128;
+  EXPECT_FALSE(m_.ValidSizeClasses(2, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateClassSizesAlignment) {
+  SizeClassInfo parsed[] = {
+      {0, 0, 0},
+      {8, 1, 14},
+      {kMaxSize, 1, 15},
+  };
+  EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+  // Doesn't meet alignment requirements
+  parsed[1].size = 7;
+  EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+
+  // Over 512, expect alignment of 64 bytes.
+  // 512 + 64 = 576
+  parsed[1].size = 576;
+  EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+  // 512 + 8
+  parsed[1].size = 520;
+  EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+
+  // Over 1024, expect alignment of 128 bytes.
+  // 1024 + 128 = 1152
+  parsed[1].size = 1024 + 128;
+  EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+  // 1024 + 64 = 1088
+  parsed[1].size = 1024 + 64;
+  EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateBatchSize) {
+  SizeClassInfo parsed[] = {
+      {0, 0, 0},
+      {8, 1, kMaxObjectsToMove},
+      {kMaxSize, 1, 15},
+  };
+  EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+
+  ++parsed[1].num_to_move;
+  EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidatePageSize) {
+  SizeClassInfo parsed[] = {
+      {0, 0, 0},
+      {1024, 255, kMaxObjectsToMove},
+      {kMaxSize, 1, 15},
+  };
+  EXPECT_TRUE(m_.ValidSizeClasses(3, parsed));
+
+  parsed[1].pages = 256;
+  EXPECT_FALSE(m_.ValidSizeClasses(3, parsed));
+}
+
+TEST_F(RunTimeSizeClassesTest, ValidateDefaultSizeClasses) {
+  // The default size classes also need to be valid.
+  EXPECT_TRUE(m_.ValidSizeClasses(m_.DefaultSizeClassesCount(),
+                                  m_.DefaultSizeClasses()));
+}
+
+TEST_F(RunTimeSizeClassesTest, EnvVariableNotExamined) {
+  // Set a valid runtime size class environment variable
+  setenv("TCMALLOC_SIZE_CLASSES", "256,1,1", 1);
+  m_.Init();
+  // Without runtime_size_classes library linked, the environment variable
+  // should have no affect.
+  EXPECT_NE(m_.class_to_size(1), 256);
+}
+
+TEST(SizeMapTest, GetSizeClass) {
+  absl::BitGen rng;
+  constexpr int kTrials = 1000;
+
+  SizeMap m;
+  // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+  // non-zero NUMA partition.
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+    uint32_t cl;
+    if (m.GetSizeClass(CppPolicy(), size, &cl)) {
+      EXPECT_EQ(cl % kNumBaseClasses, 0) << size;
+      EXPECT_LT(cl, kExpandedClassesStart) << size;
+    } else {
+      // We should only fail to lookup the size class when size is outside of
+      // the size classes.
+      ASSERT_GT(size, kMaxSize);
+    }
+  }
+
+  // After m.Init(), GetSizeClass should return a size class.
+  m.Init();
+
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+    uint32_t cl;
+    if (m.GetSizeClass(CppPolicy(), size, &cl)) {
+      const size_t mapped_size = m.class_to_size(cl);
+      // The size class needs to hold size.
+      ASSERT_GE(mapped_size, size);
+    } else {
+      // We should only fail to lookup the size class when size is outside of
+      // the size classes.
+      ASSERT_GT(size, kMaxSize);
+    }
+  }
+}
+
+TEST(SizeMapTest, GetSizeClassWithAlignment) {
+  absl::BitGen rng;
+  constexpr int kTrials = 1000;
+
+  SizeMap m;
+  // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+  // non-zero NUMA partition.
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+    const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift);
+    uint32_t cl;
+    if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) {
+      EXPECT_EQ(cl % kNumBaseClasses, 0) << size << " " << alignment;
+      EXPECT_LT(cl, kExpandedClassesStart) << size << " " << alignment;
+    } else if (alignment < kPageSize) {
+      // When alignment > kPageSize, we do not produce a size class.
+      // TODO(b/172060547): alignment == kPageSize could fit into the size
+      // classes too.
+      //
+      // We should only fail to lookup the size class when size is large.
+      ASSERT_GT(size, kMaxSize) << alignment;
+    }
+  }
+
+  // After m.Init(), GetSizeClass should return a size class.
+  m.Init();
+
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform(rng, 0, 4 << 20);
+    const size_t alignment = 1 << absl::Uniform(rng, 0u, kHugePageShift);
+    uint32_t cl;
+    if (m.GetSizeClass(CppPolicy().AlignAs(alignment), size, &cl)) {
+      const size_t mapped_size = m.class_to_size(cl);
+      // The size class needs to hold size.
+      ASSERT_GE(mapped_size, size);
+      // The size needs to be a multiple of alignment.
+      ASSERT_EQ(mapped_size % alignment, 0);
+    } else if (alignment < kPageSize) {
+      // When alignment > kPageSize, we do not produce a size class.
+      // TODO(b/172060547): alignment == kPageSize could fit into the size
+      // classes too.
+      //
+      // We should only fail to lookup the size class when size is large.
+      ASSERT_GT(size, kMaxSize) << alignment;
+    }
+  }
+}
+
+TEST(SizeMapTest, SizeClass) {
+  absl::BitGen rng;
+  constexpr int kTrials = 1000;
+
+  SizeMap m;
+  // Before m.Init(), SizeClass should always return 0 or the equivalent in a
+  // non-zero NUMA partition.
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize);
+    const uint32_t cl = m.SizeClass(CppPolicy(), size);
+    EXPECT_EQ(cl % kNumBaseClasses, 0) << size;
+    EXPECT_LT(cl, kExpandedClassesStart) << size;
+  }
+
+  // After m.Init(), SizeClass should return a size class.
+  m.Init();
+
+  for (int i = 0; i < kTrials; ++i) {
+    const size_t size = absl::LogUniform<size_t>(rng, 0u, kMaxSize);
+    uint32_t cl = m.SizeClass(CppPolicy(), size);
+
+    const size_t mapped_size = m.class_to_size(cl);
+    // The size class needs to hold size.
+    ASSERT_GE(mapped_size, size);
+  }
+}
+
+TEST(SizeMapTest, Preinit) {
+  ABSL_CONST_INIT static SizeMap m;
+
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    EXPECT_EQ(m.class_to_size(cl), 0) << cl;
+    EXPECT_EQ(m.class_to_pages(cl), 0) << cl;
+    EXPECT_EQ(m.num_objects_to_move(cl), 0) << cl;
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc
new file mode 100644
index 0000000000..17badddac9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/size_classes_with_runtime_size_classes_test.cc
@@ -0,0 +1,127 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "absl/strings/str_format.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/size_class_info.h"
+#include "tcmalloc/span.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class TestingSizeMap : public SizeMap {
+ public:
+  TestingSizeMap() {}
+
+  const SizeClassInfo* DefaultSizeClasses() const { return kSizeClasses; }
+  int DefaultSizeClassesCount() const { return kSizeClassesCount; }
+};
+
+class RunTimeSizeClassesTest : public ::testing::Test {
+ protected:
+  RunTimeSizeClassesTest() {}
+
+  TestingSizeMap m_;
+};
+
+// Convert size classes into a string that can be passed to ParseSizeClasses().
+std::string SizeClassesToString(int num_classes, const SizeClassInfo* parsed) {
+  std::string result;
+  for (int c = 1; c < num_classes; c++) {
+    std::string one_size = absl::StrFormat(
+        "%d,%d,%d", parsed[c].size, parsed[c].pages, parsed[c].num_to_move);
+    if (c == 1) {
+      result = one_size;
+    } else {
+      absl::StrAppend(&result, ";", one_size);
+    }
+  }
+  return result;
+}
+
+std::string ModifiedSizeClassesString(int num_classes,
+                                      const SizeClassInfo* source) {
+  // Set a valid runtime size class environment variable, which
+  // is a modified version of the default class sizes.
+  SizeClassInfo parsed[kNumClasses];
+  for (int c = 0; c < num_classes; c++) {
+    parsed[c] = source[c];
+  }
+  // Change num_to_move to a different valid value so that
+  // loading from the ENV can be detected.
+  EXPECT_NE(parsed[1].num_to_move, 3);
+  parsed[1].num_to_move = 3;
+  return SizeClassesToString(num_classes, parsed);
+}
+
+TEST_F(RunTimeSizeClassesTest, EnvVariableExamined) {
+  std::string e = ModifiedSizeClassesString(m_.DefaultSizeClassesCount(),
+                                            m_.DefaultSizeClasses());
+  setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+  m_.Init();
+
+  // Confirm that the expected change is seen.
+  EXPECT_EQ(m_.num_objects_to_move(1), 3);
+}
+
+// TODO(b/122839049) - Remove this test after bug is fixed.
+TEST_F(RunTimeSizeClassesTest, ReducingSizeClassCountNotAllowed) {
+  // Try reducing the mumber of size classes by 1, which is expected to fail.
+  std::string e = ModifiedSizeClassesString(m_.DefaultSizeClassesCount() - 1,
+                                            m_.DefaultSizeClasses());
+  setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+  m_.Init();
+
+  // Confirm that the expected change is not seen.
+  EXPECT_EQ(m_.num_objects_to_move(1), m_.DefaultSizeClasses()[1].num_to_move);
+}
+
+// Convert the static classes to a string, parse that string via
+// the environement variable and check that we get exactly the same
+// results. Note, if the environement variable was not read, this test
+// would still pass.
+TEST_F(RunTimeSizeClassesTest, EnvRealClasses) {
+  const int count = m_.DefaultSizeClassesCount();
+  std::string e = SizeClassesToString(count, m_.DefaultSizeClasses());
+  setenv("TCMALLOC_SIZE_CLASSES", e.c_str(), 1);
+  m_.Init();
+  // With the runtime_size_classes library linked, the environment variable
+  // will be parsed.
+
+  for (int c = 0; c < kNumClasses;) {
+    for (int end = c + count; c < end; c++) {
+      const SizeClassInfo& default_info =
+          m_.DefaultSizeClasses()[c % kNumBaseClasses];
+      EXPECT_EQ(m_.class_to_size(c), default_info.size) << c;
+      EXPECT_EQ(m_.class_to_pages(c), default_info.pages);
+      EXPECT_EQ(m_.num_objects_to_move(c), default_info.num_to_move);
+    }
+    for (; (c % kNumBaseClasses) != 0; c++) {
+      EXPECT_EQ(m_.class_to_size(c), 0);
+      EXPECT_EQ(m_.class_to_pages(c), 0);
+      EXPECT_EQ(m_.num_objects_to_move(c), 0);
+    }
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/span.cc b/contrib/libs/tcmalloc/tcmalloc/span.cc
new file mode 100644
index 0000000000..87e6f29244
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span.cc
@@ -0,0 +1,332 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/span.h"
+
+#include <stdint.h>
+
+#include <algorithm>
+
+#include "absl/base/optimization.h"  // ABSL_INTERNAL_ASSUME
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+void Span::Sample(StackTrace* stack) {
+  ASSERT(!sampled_ && stack);
+  sampled_ = 1;
+  sampled_stack_ = stack;
+  Static::sampled_objects_.prepend(this);
+
+  // The cast to value matches Unsample.
+  tcmalloc_internal::StatsCounter::Value allocated_bytes =
+      static_cast<tcmalloc_internal::StatsCounter::Value>(
+          AllocatedBytes(*stack, true));
+  // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock.
+  Static::sampled_objects_size_.LossyAdd(allocated_bytes);
+}
+
+StackTrace* Span::Unsample() {
+  if (!sampled_) {
+    return nullptr;
+  }
+  sampled_ = 0;
+  StackTrace* stack = sampled_stack_;
+  sampled_stack_ = nullptr;
+  RemoveFromList();  // from Static::sampled_objects_
+  // The cast to Value ensures no funny business happens during the negation if
+  // sizeof(size_t) != sizeof(Value).
+  tcmalloc_internal::StatsCounter::Value neg_allocated_bytes =
+      -static_cast<tcmalloc_internal::StatsCounter::Value>(
+          AllocatedBytes(*stack, true));
+  // LossyAdd is ok: writes to sampled_objects_size_ guarded by pageheap_lock.
+  Static::sampled_objects_size_.LossyAdd(neg_allocated_bytes);
+  return stack;
+}
+
+double Span::Fragmentation() const {
+  const size_t cl = Static::pagemap().sizeclass(first_page_);
+  if (cl == 0) {
+    // Avoid crashes in production mode code, but report in tests.
+    ASSERT(cl != 0);
+    return 0;
+  }
+  const size_t obj_size = Static::sizemap().class_to_size(cl);
+  const size_t span_objects = bytes_in_span() / obj_size;
+  const size_t live = allocated_;
+  if (live == 0) {
+    // Avoid crashes in production mode code, but report in tests.
+    ASSERT(live != 0);
+    return 0;
+  }
+  // Assume that all in-use objects in this span are spread evenly
+  // through this span.  So charge the free space in span evenly
+  // to each of the live objects.
+  // A note on units here: StackTraceTable::AddTrace(1, *t)
+  // represents usage (of whatever kind: heap space, allocation,
+  // fragmentation) of 1 object of size t->allocated_size.
+  // So we want to report here the number of objects we are "responsible"
+  // for pinning - NOT bytes.
+  return static_cast<double>(span_objects - live) / live;
+}
+
+void Span::AverageFreelistAddedTime(const Span* other) {
+  // Do this computation as floating-point to avoid overflowing our uint64_t.
+  freelist_added_time_ = static_cast<uint64_t>(
+      (static_cast<double>(freelist_added_time_) * num_pages_ +
+       static_cast<double>(other->freelist_added_time_) * other->num_pages_) /
+      (num_pages_ + other->num_pages_));
+}
+
+// Freelist organization.
+//
+// Partially full spans in CentralFreeList contain a list of free objects
+// (freelist). We could use the free objects as linked list nodes and form
+// a stack, but since the free objects are not likely to be cache-hot the
+// chain of dependent misses is very cache-unfriendly. The current
+// organization reduces number of cache misses during push/pop.
+//
+// Objects in the freelist are represented by 2-byte indices. The index is
+// object offset from the span start divided by a constant. For small objects
+// (<512) divider is 8, for larger -- 64. This allows to fit all indices into
+// 2 bytes.
+//
+// The freelist has two components. First, we have a small array-based cache
+// (4 objects) embedded directly into the Span (cache_ and cache_size_). We can
+// access this without touching any objects themselves.
+//
+// The rest of the freelist is stored as arrays inside free objects themselves.
+// We can store object_size / 2 indexes in any object, but this is not always
+// sufficient to store the entire contents of a Span in a single object. So we
+// reserve the first index slot in an object to form a linked list. We use the
+// first object in that list (freelist_) as an array to push/pop from; any
+// subsequent objects in the list's arrays are guaranteed to be full.
+//
+// Graphically this can be depicted as follows:
+//
+//         freelist_  embed_count_         cache_        cache_size_
+// Span: [  |idx|         4          |idx|idx|---|---|        2      ]
+//            |
+//            \/
+//            [idx|idx|idx|idx|idx|---|---|---]  16-byte object
+//              |
+//              \/
+//              [---|idx|idx|idx|idx|idx|idx|idx]  16-byte object
+//
+
+Span::ObjIdx Span::PtrToIdx(void* ptr, size_t size) const {
+  // Object index is an offset from span start divided by a power-of-two.
+  // The divisors are choosen so that
+  // (1) objects are aligned on the divisor,
+  // (2) index fits into 16 bits and
+  // (3) the index of the beginning of all objects is strictly less than
+  //     kListEnd (note that we have 256K pages and multi-page spans).
+  // For example with 1M spans we need kMultiPageAlignment >= 16.
+  // An ASSERT in BuildFreelist() verifies a condition which implies (3).
+  uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+  uintptr_t off;
+  if (size <= SizeMap::kMultiPageSize) {
+    // Generally we need to load first_page_ to compute the offset.
+    // But first_page_ can be in a different cache line then the fields that
+    // we use in FreelistPush otherwise (cache_, cache_size_, freelist_).
+    // So we avoid loading first_page_ for smaller sizes that have one page per
+    // span, instead we compute the offset by taking low kPageShift bits of the
+    // pointer.
+    ASSERT(PageIdContaining(ptr) == first_page_);
+    off = (p & (kPageSize - 1)) / kAlignment;
+  } else {
+    off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment;
+  }
+  ObjIdx idx = static_cast<ObjIdx>(off);
+  ASSERT(idx != kListEnd);
+  ASSERT(idx == off);
+  return idx;
+}
+
+Span::ObjIdx* Span::IdxToPtr(ObjIdx idx, size_t size) const {
+  ASSERT(idx != kListEnd);
+  uintptr_t off = first_page_.start_uintptr() +
+                  (static_cast<uintptr_t>(idx)
+                   << (size <= SizeMap::kMultiPageSize
+                           ? kAlignmentShift
+                           : SizeMap::kMultiPageAlignmentShift));
+  ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+  ASSERT(PtrToIdx(ptr, size) == idx);
+  return ptr;
+}
+
+Span::ObjIdx* Span::BitmapIdxToPtr(ObjIdx idx, size_t size) const {
+  uintptr_t off =
+      first_page_.start_uintptr() + (static_cast<uintptr_t>(idx) * size);
+  ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+  return ptr;
+}
+
+size_t Span::BitmapFreelistPopBatch(void** __restrict batch, size_t N,
+                                    size_t size) {
+#ifndef NDEBUG
+  size_t before = bitmap_.CountBits(0, 64);
+#endif  // NDEBUG
+
+  size_t count = 0;
+  // Want to fill the batch either with N objects, or the number of objects
+  // remaining in the span.
+  while (!bitmap_.IsZero() && count < N) {
+    size_t offset = bitmap_.FindSet(0);
+    ASSERT(offset < 64);
+    batch[count] = BitmapIdxToPtr(offset, size);
+    bitmap_.ClearLowestBit();
+    count++;
+  }
+
+#ifndef NDEBUG
+  size_t after = bitmap_.CountBits(0, 64);
+  ASSERT(after + count == before);
+  ASSERT(allocated_ + count == embed_count_ - after);
+#endif  // NDEBUG
+  allocated_ += count;
+  return count;
+}
+
+size_t Span::FreelistPopBatch(void** __restrict batch, size_t N, size_t size) {
+  // Handle spans with 64 or fewer objects using a bitmap. We expect spans
+  // to frequently hold smaller objects.
+  if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) {
+    return BitmapFreelistPopBatch(batch, N, size);
+  }
+  if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+    return FreelistPopBatchSized<Align::SMALL>(batch, N, size);
+  } else {
+    return FreelistPopBatchSized<Align::LARGE>(batch, N, size);
+  }
+}
+
+uint16_t Span::CalcReciprocal(size_t size) {
+  // Calculate scaling factor. We want to avoid dividing by the size of the
+  // object. Instead we'll multiply by a scaled version of the reciprocal.
+  // We divide kBitmapScalingDenominator by the object size, so later we can
+  // multiply by this reciprocal, and then divide this scaling factor out.
+  // TODO(djgove) These divides can be computed once at start up.
+  size_t reciprocal = 0;
+  // The spans hold objects up to kMaxSize, so it's safe to assume.
+  ABSL_INTERNAL_ASSUME(size <= kMaxSize);
+  if (size <= SizeMap::kMultiPageSize) {
+    reciprocal = kBitmapScalingDenominator / (size >> kAlignmentShift);
+  } else {
+    reciprocal =
+        kBitmapScalingDenominator / (size >> SizeMap::kMultiPageAlignmentShift);
+  }
+  ASSERT(reciprocal < 65536);
+  return static_cast<uint16_t>(reciprocal);
+}
+
+void Span::BitmapBuildFreelist(size_t size, size_t count) {
+  // We are using a bitmap to indicate whether objects are used or not. The
+  // maximum capacity for the bitmap is 64 objects.
+  ASSERT(count <= 64);
+#ifndef NDEBUG
+  // For bitmap_ use embed_count_ to record objects per span.
+  embed_count_ = count;
+#endif  // NDEBUG
+  reciprocal_ = CalcReciprocal(size);
+  allocated_ = 0;
+  bitmap_.Clear();  // bitmap_ can be non-zero from a previous use.
+  bitmap_.SetRange(0, count);
+  ASSERT(bitmap_.CountBits(0, 64) == count);
+}
+
+int Span::BuildFreelist(size_t size, size_t count, void** batch, int N) {
+  freelist_ = kListEnd;
+
+  if (size >= kBitmapMinObjectSize) {
+    BitmapBuildFreelist(size, count);
+    return BitmapFreelistPopBatch(batch, N, size);
+  }
+
+  // First, push as much as we can into the batch.
+  char* ptr = static_cast<char*>(start_address());
+  int result = N <= count ? N : count;
+  for (int i = 0; i < result; ++i) {
+    batch[i] = ptr;
+    ptr += size;
+  }
+  allocated_ = result;
+
+  ObjIdx idxStep = size / kAlignment;
+  // Valid objects are {0, idxStep, idxStep * 2, ..., idxStep * (count - 1)}.
+  if (size > SizeMap::kMultiPageSize) {
+    idxStep = size / SizeMap::kMultiPageAlignment;
+  }
+  ObjIdx idx = idxStep * result;
+
+  // Verify that the end of the useful portion of the span (and the beginning of
+  // the span waste) has an index that doesn't overflow or risk confusion with
+  // kListEnd. This is slightly stronger than we actually need (see comment in
+  // PtrToIdx for that) but rules out some bugs and weakening it wouldn't
+  // actually help. One example of the potential bugs that are ruled out is the
+  // possibility of idxEnd (below) overflowing.
+  ASSERT(count * idxStep < kListEnd);
+
+  // The index of the end of the useful portion of the span.
+  ObjIdx idxEnd = count * idxStep;
+
+  // Then, push as much as we can into the cache_.
+  int cache_size = 0;
+  for (; idx < idxEnd && cache_size < kCacheSize; idx += idxStep) {
+    cache_[cache_size] = idx;
+    cache_size++;
+  }
+  cache_size_ = cache_size;
+
+  // Now, build freelist and stack other objects onto freelist objects.
+  // Note: we take freelist objects from the beginning and stacked objects
+  // from the end. This has a nice property of not paging in whole span at once
+  // and not draining whole cache.
+  ObjIdx* host = nullptr;  // cached first object on freelist
+  const size_t max_embed = size / sizeof(ObjIdx) - 1;
+  int embed_count = 0;
+  while (idx < idxEnd) {
+    // Check the no idx can be confused with kListEnd.
+    ASSERT(idx != kListEnd);
+    if (host && embed_count != max_embed) {
+      // Push onto first object on the freelist.
+      embed_count++;
+      idxEnd -= idxStep;
+      host[embed_count] = idxEnd;
+    } else {
+      // The first object is full, push new object onto freelist.
+      host = IdxToPtr(idx, size);
+      host[0] = freelist_;
+      freelist_ = idx;
+      embed_count = 0;
+      idx += idxStep;
+    }
+  }
+  embed_count_ = embed_count;
+  return result;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/span.h b/contrib/libs/tcmalloc/tcmalloc/span.h
new file mode 100644
index 0000000000..c589709094
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span.h
@@ -0,0 +1,589 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// A Span is a contiguous run of pages.
+
+#ifndef TCMALLOC_SPAN_H_
+#define TCMALLOC_SPAN_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "absl/base/thread_annotations.h"
+#include "absl/numeric/bits.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/range_tracker.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Can fit 64 objects into a bitmap, so determine what the minimum object
+// size needs to be in order for that to work. This makes the assumption that
+// we don't increase the number of pages at a point where the object count
+// ends up exceeding 64.
+inline constexpr size_t kBitmapMinObjectSize = kPageSize / 64;
+
+// Denominator for bitmap scaling factor. The idea is that instead of dividing
+// by N we multiply by M = kBitmapScalingDenominator / N and round the resulting
+// value.
+inline constexpr size_t kBitmapScalingDenominator = 65536;
+
+// Information kept for a span (a contiguous run of pages).
+//
+// Spans can be in different states. The current state determines set of methods
+// that can be called on the span (and the active member in the union below).
+// States are:
+//  - SMALL_OBJECT: the span holds multiple small objects.
+//    The span is owned by CentralFreeList and is generally on
+//    CentralFreeList::nonempty_ list (unless has no free objects).
+//    location_ == IN_USE.
+//  - LARGE_OBJECT: the span holds a single large object.
+//    The span can be considered to be owner by user until the object is freed.
+//    location_ == IN_USE.
+//  - SAMPLED: the span holds a single sampled object.
+//    The span can be considered to be owner by user until the object is freed.
+//    location_ == IN_USE && sampled_ == 1.
+//  - ON_NORMAL_FREELIST: the span has no allocated objects, owned by PageHeap
+//    and is on normal PageHeap list.
+//    location_ == ON_NORMAL_FREELIST.
+//  - ON_RETURNED_FREELIST: the span has no allocated objects, owned by PageHeap
+//    and is on returned PageHeap list.
+//    location_ == ON_RETURNED_FREELIST.
+class Span;
+typedef TList<Span> SpanList;
+
+class Span : public SpanList::Elem {
+ public:
+  // Allocator/deallocator for spans. Note that these functions are defined
+  // in static_vars.h, which is weird: see there for why.
+  static Span* New(PageId p, Length len)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  static void Delete(Span* span) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Remove this from the linked list in which it resides.
+  // REQUIRES: this span is on some list.
+  void RemoveFromList();
+
+  // locations used to track what list a span resides on.
+  enum Location {
+    IN_USE,                // not on PageHeap lists
+    ON_NORMAL_FREELIST,    // on normal PageHeap list
+    ON_RETURNED_FREELIST,  // on returned PageHeap list
+  };
+  Location location() const;
+  void set_location(Location loc);
+
+  // ---------------------------------------------------------------------------
+  // Support for sampled allocations.
+  // There is one-to-one correspondence between a sampled allocation and a span.
+  // ---------------------------------------------------------------------------
+
+  // Mark this span as sampling allocation at the stack. Sets state to SAMPLED.
+  void Sample(StackTrace* stack) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Unmark this span as sampling an allocation.
+  // Returns stack trace previously passed to Sample,
+  // or nullptr if this is a non-sampling span.
+  // REQUIRES: this is a SAMPLED span.
+  StackTrace* Unsample() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Returns stack for the sampled allocation.
+  // pageheap_lock is not required, but caller either needs to hold the lock or
+  // ensure by some other means that the sampling state can't be changed
+  // concurrently.
+  // REQUIRES: this is a SAMPLED span.
+  StackTrace* sampled_stack() const;
+
+  // Is it a sampling span?
+  // For debug checks. pageheap_lock is not required, but caller needs to ensure
+  // that sampling state can't be changed concurrently.
+  bool sampled() const;
+
+  // ---------------------------------------------------------------------------
+  // Span memory range.
+  // ---------------------------------------------------------------------------
+
+  // Returns first page of the span.
+  PageId first_page() const;
+
+  // Returns the last page in the span.
+  PageId last_page() const;
+
+  // Sets span first page.
+  void set_first_page(PageId p);
+
+  // Returns start address of the span.
+  void* start_address() const;
+
+  // Returns number of pages in the span.
+  Length num_pages() const;
+
+  // Sets number of pages in the span.
+  void set_num_pages(Length len);
+
+  // Total memory bytes in the span.
+  size_t bytes_in_span() const;
+
+  // ---------------------------------------------------------------------------
+  // Age tracking (for free spans in PageHeap).
+  // ---------------------------------------------------------------------------
+
+  uint64_t freelist_added_time() const;
+  void set_freelist_added_time(uint64_t t);
+
+  // Sets this span freelist added time to average of this and other times
+  // weighted by their sizes.
+  // REQUIRES: this is a ON_NORMAL_FREELIST or ON_RETURNED_FREELIST span.
+  void AverageFreelistAddedTime(const Span* other);
+
+  // Returns internal fragmentation of the span.
+  // REQUIRES: this is a SMALL_OBJECT span.
+  double Fragmentation() const;
+
+  // ---------------------------------------------------------------------------
+  // Freelist management.
+  // Used for spans in CentralFreelist to manage free objects.
+  // These methods REQUIRE a SMALL_OBJECT span.
+  // ---------------------------------------------------------------------------
+
+  // Indicates whether the object is considered large or small based on
+  // size > SizeMap::kMultiPageSize.
+  enum class Align { SMALL, LARGE };
+
+  // Indicate whether the Span is empty. Size is used to determine whether
+  // the span is using a compressed linked list of objects, or a bitmap
+  // to hold available objects.
+  bool FreelistEmpty(size_t size) const;
+
+  // Pushes ptr onto freelist unless the freelist becomes full,
+  // in which case just return false.
+  bool FreelistPush(void* ptr, size_t size) {
+    ASSERT(allocated_ > 0);
+    if (ABSL_PREDICT_FALSE(allocated_ == 1)) {
+      return false;
+    }
+    allocated_--;
+    // Bitmaps are used to record object availability when there are fewer than
+    // 64 objects in a span.
+    if (ABSL_PREDICT_FALSE(size >= kBitmapMinObjectSize)) {
+      if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+        return BitmapFreelistPush<Align::SMALL>(ptr, size);
+      } else {
+        return BitmapFreelistPush<Align::LARGE>(ptr, size);
+      }
+    }
+    if (ABSL_PREDICT_TRUE(size <= SizeMap::kMultiPageSize)) {
+      return FreelistPushSized<Align::SMALL>(ptr, size);
+    } else {
+      return FreelistPushSized<Align::LARGE>(ptr, size);
+    }
+  }
+
+  // Pops up to N objects from the freelist and returns them in the batch array.
+  // Returns number of objects actually popped.
+  size_t FreelistPopBatch(void** batch, size_t N, size_t size);
+
+  // Reset a Span object to track the range [p, p + n).
+  void Init(PageId p, Length n);
+
+  // Initialize freelist to contain all objects in the span.
+  // Pops up to N objects from the freelist and returns them in the batch array.
+  // Returns number of objects actually popped.
+  int BuildFreelist(size_t size, size_t count, void** batch, int N);
+
+  // Prefetch cacheline containing most important span information.
+  void Prefetch();
+
+  static constexpr size_t kCacheSize = 4;
+
+ private:
+  // See the comment on freelist organization in cc file.
+  typedef uint16_t ObjIdx;
+  static constexpr ObjIdx kListEnd = -1;
+
+  // Use uint16_t or uint8_t for 16 bit and 8 bit fields instead of bitfields.
+  // LLVM will generate widen load/store and bit masking operations to access
+  // bitfields and this hurts performance. Although compiler flag
+  // -ffine-grained-bitfield-accesses can help the performance if bitfields
+  // are used here, but the flag could potentially hurt performance in other
+  // cases so it is not enabled by default. For more information, please
+  // look at b/35680381 and cl/199502226.
+  uint16_t allocated_;  // Number of non-free objects
+  uint16_t embed_count_;
+  // For available objects stored as a compressed linked list, the index of
+  // the first object in recorded in freelist_. When a bitmap is used to
+  // represent available objects, the reciprocal of the object size is
+  // stored to enable conversion from the offset of an object within a
+  // span to the index of the object.
+  union {
+    uint16_t freelist_;
+    uint16_t reciprocal_;
+  };
+  uint8_t cache_size_;
+  uint8_t location_ : 2;  // Is the span on a freelist, and if so, which?
+  uint8_t sampled_ : 1;   // Sampled object?
+
+  union {
+    // Used only for spans in CentralFreeList (SMALL_OBJECT state).
+    // Embed cache of free objects.
+    ObjIdx cache_[kCacheSize];
+
+    // Used for spans with in CentralFreeList with fewer than 64 objects.
+    // Each bit is set to one when the object is available, and zero
+    // when the object is used.
+    Bitmap<64> bitmap_{};
+
+    // Used only for sampled spans (SAMPLED state).
+    StackTrace* sampled_stack_;
+
+    // Used only for spans in PageHeap
+    // (ON_NORMAL_FREELIST or ON_RETURNED_FREELIST state).
+    // Time when this span was added to a freelist.  Units: cycles.  When a span
+    // is merged into this one, we set this to the average of now and the
+    // current freelist_added_time, weighted by the two spans' sizes.
+    uint64_t freelist_added_time_;
+  };
+
+  PageId first_page_;  // Starting page number.
+  Length num_pages_;   // Number of pages in span.
+
+  // Convert object pointer <-> freelist index.
+  ObjIdx PtrToIdx(void* ptr, size_t size) const;
+  ObjIdx* IdxToPtr(ObjIdx idx, size_t size) const;
+
+  // For bitmap'd spans conversion from an offset to an index is performed
+  // by multiplying by the scaled reciprocal of the object size.
+  static uint16_t CalcReciprocal(size_t size);
+
+  // Convert object pointer <-> freelist index for bitmap managed objects.
+  template <Align align>
+  ObjIdx BitmapPtrToIdx(void* ptr, size_t size) const;
+  ObjIdx* BitmapIdxToPtr(ObjIdx idx, size_t size) const;
+
+  // Helper function for converting a pointer to an index.
+  template <Align align>
+  static ObjIdx OffsetToIdx(uintptr_t offset, size_t size, uint16_t reciprocal);
+  // Helper function for testing round trips between pointers and indexes.
+  static ObjIdx TestOffsetToIdx(uintptr_t ptr, size_t size,
+                                uint16_t reciprocal) {
+    if (size <= SizeMap::kMultiPageSize) {
+      return OffsetToIdx<Align::SMALL>(ptr, size, reciprocal);
+    } else {
+      return OffsetToIdx<Align::LARGE>(ptr, size, reciprocal);
+    }
+  }
+
+  template <Align align>
+  ObjIdx* IdxToPtrSized(ObjIdx idx, size_t size) const;
+
+  template <Align align>
+  ObjIdx PtrToIdxSized(void* ptr, size_t size) const;
+
+  template <Align align>
+  size_t FreelistPopBatchSized(void** __restrict batch, size_t N, size_t size);
+
+  template <Align align>
+  bool FreelistPushSized(void* ptr, size_t size);
+
+  // For spans containing 64 or fewer objects, indicate that the object at the
+  // index has been returned. Always returns true.
+  template <Align align>
+  bool BitmapFreelistPush(void* ptr, size_t size);
+
+  // A bitmap is used to indicate object availability for spans containing
+  // 64 or fewer objects.
+  void BitmapBuildFreelist(size_t size, size_t count);
+
+  // For spans with 64 or fewer objects populate batch with up to N objects.
+  // Returns number of objects actually popped.
+  size_t BitmapFreelistPopBatch(void** batch, size_t N, size_t size);
+
+  // Friend class to enable more indepth testing of bitmap code.
+  friend class SpanTestPeer;
+};
+
+template <Span::Align align>
+Span::ObjIdx* Span::IdxToPtrSized(ObjIdx idx, size_t size) const {
+  ASSERT(idx != kListEnd);
+  static_assert(align == Align::LARGE || align == Align::SMALL);
+  uintptr_t off =
+      first_page_.start_uintptr() +
+      (static_cast<uintptr_t>(idx)
+       << (align == Align::SMALL ? kAlignmentShift
+                                 : SizeMap::kMultiPageAlignmentShift));
+  ObjIdx* ptr = reinterpret_cast<ObjIdx*>(off);
+  ASSERT(PtrToIdx(ptr, size) == idx);
+  return ptr;
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::PtrToIdxSized(void* ptr, size_t size) const {
+  // Object index is an offset from span start divided by a power-of-two.
+  // The divisors are choosen so that
+  // (1) objects are aligned on the divisor,
+  // (2) index fits into 16 bits and
+  // (3) the index of the beginning of all objects is strictly less than
+  //     kListEnd (note that we have 256K pages and multi-page spans).
+  // For example with 1M spans we need kMultiPageAlignment >= 16.
+  // An ASSERT in BuildFreelist() verifies a condition which implies (3).
+  uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+  uintptr_t off;
+  if (align == Align::SMALL) {
+    // Generally we need to load first_page_ to compute the offset.
+    // But first_page_ can be in a different cache line then the fields that
+    // we use in FreelistPush otherwise (cache_, cache_size_, freelist_).
+    // So we avoid loading first_page_ for smaller sizes that have one page per
+    // span, instead we compute the offset by taking low kPageShift bits of the
+    // pointer.
+    ASSERT(PageIdContaining(ptr) == first_page_);
+    ASSERT(num_pages_ == Length(1));
+    off = (p & (kPageSize - 1)) / kAlignment;
+  } else {
+    off = (p - first_page_.start_uintptr()) / SizeMap::kMultiPageAlignment;
+  }
+  ObjIdx idx = static_cast<ObjIdx>(off);
+  ASSERT(idx != kListEnd);
+  ASSERT(idx == off);
+  ASSERT(IdxToPtr(idx, size) == ptr);
+  return idx;
+}
+
+template <Span::Align align>
+size_t Span::FreelistPopBatchSized(void** __restrict batch, size_t N,
+                                   size_t size) {
+  size_t result = 0;
+
+  // Pop from cache.
+  auto csize = cache_size_;
+  ASSUME(csize <= kCacheSize);
+  auto cache_reads = csize < N ? csize : N;
+  for (; result < cache_reads; result++) {
+    batch[result] = IdxToPtrSized<align>(cache_[csize - result - 1], size);
+  }
+
+  // Store this->cache_size_ one time.
+  cache_size_ = csize - result;
+
+  while (result < N) {
+    if (freelist_ == kListEnd) {
+      break;
+    }
+
+    ObjIdx* const host = IdxToPtrSized<align>(freelist_, size);
+    uint16_t embed_count = embed_count_;
+    ObjIdx current = host[embed_count];
+
+    size_t iter = embed_count;
+    if (result + embed_count > N) {
+      iter = N - result;
+    }
+    for (size_t i = 0; i < iter; i++) {
+      // Pop from the first object on freelist.
+      batch[result + i] = IdxToPtrSized<align>(host[embed_count - i], size);
+    }
+    embed_count -= iter;
+    result += iter;
+
+    // Update current for next cycle.
+    current = host[embed_count];
+
+    if (result == N) {
+      embed_count_ = embed_count;
+      break;
+    }
+
+    // The first object on the freelist is empty, pop it.
+    ASSERT(embed_count == 0);
+
+    batch[result] = host;
+    result++;
+
+    freelist_ = current;
+    embed_count_ = size / sizeof(ObjIdx) - 1;
+  }
+  allocated_ += result;
+  return result;
+}
+
+template <Span::Align align>
+bool Span::FreelistPushSized(void* ptr, size_t size) {
+  ObjIdx idx = PtrToIdxSized<align>(ptr, size);
+  if (cache_size_ != kCacheSize) {
+    // Have empty space in the cache, push there.
+    cache_[cache_size_] = idx;
+    cache_size_++;
+  } else if (ABSL_PREDICT_TRUE(freelist_ != kListEnd) &&
+             // -1 because the first slot is used by freelist link.
+             ABSL_PREDICT_TRUE(embed_count_ != size / sizeof(ObjIdx) - 1)) {
+    // Push onto the first object on freelist.
+    ObjIdx* host;
+    if (align == Align::SMALL) {
+      // Avoid loading first_page_ in this case (see the comment in PtrToIdx).
+      ASSERT(num_pages_ == Length(1));
+      host = reinterpret_cast<ObjIdx*>(
+          (reinterpret_cast<uintptr_t>(ptr) & ~(kPageSize - 1)) +
+          static_cast<uintptr_t>(freelist_) * kAlignment);
+      ASSERT(PtrToIdx(host, size) == freelist_);
+    } else {
+      host = IdxToPtrSized<align>(freelist_, size);
+    }
+    embed_count_++;
+    host[embed_count_] = idx;
+  } else {
+    // Push onto freelist.
+    *reinterpret_cast<ObjIdx*>(ptr) = freelist_;
+    freelist_ = idx;
+    embed_count_ = 0;
+  }
+  return true;
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::OffsetToIdx(uintptr_t offset, size_t size,
+                               uint16_t reciprocal) {
+  if (align == Align::SMALL) {
+    return static_cast<ObjIdx>(
+        // Add kBitmapScalingDenominator / 2 to round to nearest integer.
+        ((offset >> kAlignmentShift) * reciprocal +
+         kBitmapScalingDenominator / 2) /
+        kBitmapScalingDenominator);
+  } else {
+    return static_cast<ObjIdx>(
+        ((offset >> SizeMap::kMultiPageAlignmentShift) * reciprocal +
+         kBitmapScalingDenominator / 2) /
+        kBitmapScalingDenominator);
+  }
+}
+
+template <Span::Align align>
+Span::ObjIdx Span::BitmapPtrToIdx(void* ptr, size_t size) const {
+  uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+  uintptr_t off = static_cast<uint32_t>(p - first_page_.start_uintptr());
+  ObjIdx idx = OffsetToIdx<align>(off, size, reciprocal_);
+  ASSERT(BitmapIdxToPtr(idx, size) == ptr);
+  return idx;
+}
+
+template <Span::Align align>
+bool Span::BitmapFreelistPush(void* ptr, size_t size) {
+#ifndef NDEBUG
+  size_t before = bitmap_.CountBits(0, 64);
+#endif
+  // TODO(djgove) Conversions to offsets can be computed outside of lock.
+  ObjIdx idx = BitmapPtrToIdx<align>(ptr, size);
+  // Check that the object is not already returned.
+  ASSERT(bitmap_.GetBit(idx) == 0);
+  // Set the bit indicating where the object was returned.
+  bitmap_.SetBit(idx);
+#ifndef NDEBUG
+  size_t after = bitmap_.CountBits(0, 64);
+  ASSERT(before + 1 == after);
+  ASSERT(allocated_ == embed_count_ - after);
+#endif
+  return true;
+}
+
+inline Span::Location Span::location() const {
+  return static_cast<Location>(location_);
+}
+
+inline void Span::set_location(Location loc) {
+  location_ = static_cast<uint64_t>(loc);
+}
+
+inline StackTrace* Span::sampled_stack() const {
+  ASSERT(sampled_);
+  return sampled_stack_;
+}
+
+inline bool Span::sampled() const { return sampled_; }
+
+inline PageId Span::first_page() const { return first_page_; }
+
+inline PageId Span::last_page() const {
+  return first_page_ + num_pages_ - Length(1);
+}
+
+inline void Span::set_first_page(PageId p) { first_page_ = p; }
+
+inline void* Span::start_address() const { return first_page_.start_addr(); }
+
+inline Length Span::num_pages() const { return num_pages_; }
+
+inline void Span::set_num_pages(Length len) { num_pages_ = len; }
+
+inline size_t Span::bytes_in_span() const { return num_pages_.in_bytes(); }
+
+inline void Span::set_freelist_added_time(uint64_t t) {
+  freelist_added_time_ = t;
+}
+
+inline uint64_t Span::freelist_added_time() const {
+  return freelist_added_time_;
+}
+
+inline bool Span::FreelistEmpty(size_t size) const {
+  if (size < kBitmapMinObjectSize) {
+    return (cache_size_ == 0 && freelist_ == kListEnd);
+  } else {
+    return (bitmap_.IsZero());
+  }
+}
+
+inline void Span::RemoveFromList() { SpanList::Elem::remove(); }
+
+inline void Span::Prefetch() {
+  // The first 16 bytes of a Span are the next and previous pointers
+  // for when it is stored in a linked list. Since the sizeof(Span) is
+  // 48 bytes, spans fit into 2 cache lines 50% of the time, with either
+  // the first 16-bytes or the last 16-bytes in a different cache line.
+  // Prefetch the cacheline that contains the most frequestly accessed
+  // data by offseting into the middle of the Span.
+#if defined(__GNUC__)
+#if __WORDSIZE == 32
+  // The Span fits in one cache line, so simply prefetch the base pointer.
+  static_assert(sizeof(Span) == 32, "Update span prefetch offset");
+  __builtin_prefetch(this, 0, 3);
+#else
+  // The Span can occupy two cache lines, so prefetch the cacheline with the
+  // most frequently accessed parts of the Span.
+  static_assert(sizeof(Span) == 48, "Update span prefetch offset");
+  __builtin_prefetch(&this->allocated_, 0, 3);
+#endif
+#endif
+}
+
+inline void Span::Init(PageId p, Length n) {
+#ifndef NDEBUG
+  // In debug mode we have additional checking of our list ops; these must be
+  // initialized.
+  new (this) Span();
+#endif
+  first_page_ = p;
+  num_pages_ = n;
+  location_ = IN_USE;
+  sampled_ = 0;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_SPAN_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc
new file mode 100644
index 0000000000..6e4569dd83
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_benchmark.cc
@@ -0,0 +1,212 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <stdlib.h>
+
+#include <utility>
+#include <vector>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/random/random.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class RawSpan {
+ public:
+  void Init(size_t cl) {
+    size_t size = Static::sizemap().class_to_size(cl);
+    auto npages = Length(Static::sizemap().class_to_pages(cl));
+    size_t objects_per_span = npages.in_bytes() / size;
+
+    void *mem;
+    int res = posix_memalign(&mem, kPageSize, npages.in_bytes());
+    CHECK_CONDITION(res == 0);
+    span_.set_first_page(PageIdContaining(mem));
+    span_.set_num_pages(npages);
+    span_.BuildFreelist(size, objects_per_span, nullptr, 0);
+  }
+
+  ~RawSpan() { free(span_.start_address()); }
+
+  Span &span() { return span_; }
+
+ private:
+  Span span_;
+};
+
+// BM_single_span repeatedly pushes and pops the same num_objects_to_move(cl)
+// objects from the span.
+void BM_single_span(benchmark::State &state) {
+  const int cl = state.range(0);
+
+  size_t size = Static::sizemap().class_to_size(cl);
+  size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+  RawSpan raw_span;
+  raw_span.Init(cl);
+  Span &span = raw_span.span();
+
+  void *batch[kMaxObjectsToMove];
+
+  int64_t processed = 0;
+  while (state.KeepRunningBatch(batch_size)) {
+    int n = span.FreelistPopBatch(batch, batch_size, size);
+    processed += n;
+
+    for (int j = 0; j < n; j++) {
+      span.FreelistPush(batch[j], size);
+    }
+  }
+
+  state.SetItemsProcessed(processed);
+}
+
+// BM_single_span_fulldrain alternates between fully draining and filling the
+// span.
+void BM_single_span_fulldrain(benchmark::State &state) {
+  const int cl = state.range(0);
+
+  size_t size = Static::sizemap().class_to_size(cl);
+  size_t npages = Static::sizemap().class_to_pages(cl);
+  size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+  size_t objects_per_span = npages * kPageSize / size;
+  RawSpan raw_span;
+  raw_span.Init(cl);
+  Span &span = raw_span.span();
+
+  std::vector<void *> objects(objects_per_span, nullptr);
+  size_t oindex = 0;
+
+  size_t processed = 0;
+  while (state.KeepRunningBatch(objects_per_span)) {
+    // Drain span
+    while (oindex < objects_per_span) {
+      size_t popped = span.FreelistPopBatch(&objects[oindex], batch_size, size);
+      oindex += popped;
+      processed += popped;
+    }
+
+    // Fill span
+    while (oindex > 0) {
+      void *p = objects[oindex - 1];
+      if (!span.FreelistPush(p, size)) {
+        break;
+      }
+
+      oindex--;
+    }
+  }
+
+  state.SetItemsProcessed(processed);
+}
+
+BENCHMARK(BM_single_span)
+    ->Arg(1)
+    ->Arg(2)
+    ->Arg(3)
+    ->Arg(4)
+    ->Arg(5)
+    ->Arg(7)
+    ->Arg(10)
+    ->Arg(12)
+    ->Arg(16)
+    ->Arg(20)
+    ->Arg(30)
+    ->Arg(40)
+    ->Arg(kNumClasses - 1);
+
+BENCHMARK(BM_single_span_fulldrain)
+    ->Arg(1)
+    ->Arg(2)
+    ->Arg(3)
+    ->Arg(4)
+    ->Arg(5)
+    ->Arg(7)
+    ->Arg(10)
+    ->Arg(12)
+    ->Arg(16)
+    ->Arg(20)
+    ->Arg(30)
+    ->Arg(40)
+    ->Arg(kNumClasses - 1);
+
+void BM_NewDelete(benchmark::State &state) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  for (auto s : state) {
+    Span *sp = Span::New(PageId{0}, Length(1));
+    benchmark::DoNotOptimize(sp);
+    Span::Delete(sp);
+  }
+  state.SetItemsProcessed(state.iterations());
+}
+
+BENCHMARK(BM_NewDelete);
+
+void BM_multiple_spans(benchmark::State &state) {
+  const int cl = state.range(0);
+
+  // Should be large enough to cause cache misses
+  const int num_spans = 10000000;
+  std::vector<Span> spans(num_spans);
+  size_t size = Static::sizemap().class_to_size(cl);
+  size_t batch_size = Static::sizemap().num_objects_to_move(cl);
+  for (int i = 0; i < num_spans; i++) {
+    RawSpan raw_span;
+    raw_span.Init(cl);
+    spans[i] = raw_span.span();
+  }
+  absl::BitGen rng;
+
+  void *batch[kMaxObjectsToMove];
+
+  int64_t processed = 0;
+  while (state.KeepRunningBatch(batch_size)) {
+    int current_span = absl::Uniform(rng, 0, num_spans);
+    int n = spans[current_span].FreelistPopBatch(batch, batch_size, size);
+    processed += n;
+
+    for (int j = 0; j < n; j++) {
+      spans[current_span].FreelistPush(batch[j], size);
+    }
+  }
+
+  state.SetItemsProcessed(processed);
+}
+
+BENCHMARK(BM_multiple_spans)
+    ->Arg(1)
+    ->Arg(2)
+    ->Arg(3)
+    ->Arg(4)
+    ->Arg(5)
+    ->Arg(7)
+    ->Arg(10)
+    ->Arg(12)
+    ->Arg(16)
+    ->Arg(20)
+    ->Arg(30)
+    ->Arg(40)
+    ->Arg(kNumClasses - 1);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_stats.h b/contrib/libs/tcmalloc/tcmalloc/span_stats.h
new file mode 100644
index 0000000000..8c0b40b0fd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_stats.h
@@ -0,0 +1,50 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_SPAN_STATS_H_
+#define TCMALLOC_SPAN_STATS_H_
+
+#include <stddef.h>
+
+#include "absl/base/macros.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct SpanStats {
+  size_t num_spans_requested = 0;
+  size_t num_spans_returned = 0;
+  size_t obj_capacity = 0;  // cap of number of objs that could be live anywhere
+
+  size_t num_live_spans() {
+    if (num_spans_requested < num_spans_returned) {
+      return 0;
+    }
+    return num_spans_requested - num_spans_returned;
+  }
+
+  // Probability that a span will be returned
+  double prob_returned() {
+    if (ABSL_PREDICT_FALSE(num_spans_requested == 0)) return 0.0;
+    return static_cast<double>(num_spans_returned) / num_spans_requested;
+  }
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_SPAN_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/span_test.cc b/contrib/libs/tcmalloc/tcmalloc/span_test.cc
new file mode 100644
index 0000000000..750f3cca26
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/span_test.cc
@@ -0,0 +1,191 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/span.h"
+
+#include <stdlib.h>
+
+#include <utility>
+#include <vector>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/random/random.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class RawSpan {
+ public:
+  void Init(size_t cl) {
+    size_t size = Static::sizemap().class_to_size(cl);
+    auto npages = Length(Static::sizemap().class_to_pages(cl));
+    size_t objects_per_span = npages.in_bytes() / size;
+
+    void *mem;
+    int res = posix_memalign(&mem, kPageSize, npages.in_bytes());
+    CHECK_CONDITION(res == 0);
+    span_.set_first_page(PageIdContaining(mem));
+    span_.set_num_pages(npages);
+    span_.BuildFreelist(size, objects_per_span, nullptr, 0);
+  }
+
+  ~RawSpan() { free(span_.start_address()); }
+
+  Span &span() { return span_; }
+
+ private:
+  Span span_;
+};
+
+class SpanTest : public testing::TestWithParam<size_t> {
+ protected:
+  size_t cl_;
+  size_t size_;
+  size_t npages_;
+  size_t batch_size_;
+  size_t objects_per_span_;
+  RawSpan raw_span_;
+
+ private:
+  void SetUp() override {
+    cl_ = GetParam();
+    size_ = Static::sizemap().class_to_size(cl_);
+    if (size_ == 0) {
+      GTEST_SKIP() << "Skipping empty size class.";
+    }
+
+    npages_ = Static::sizemap().class_to_pages(cl_);
+    batch_size_ = Static::sizemap().num_objects_to_move(cl_);
+    objects_per_span_ = npages_ * kPageSize / size_;
+
+    raw_span_.Init(cl_);
+  }
+
+  void TearDown() override {}
+};
+
+TEST_P(SpanTest, FreelistBasic) {
+  Span &span_ = raw_span_.span();
+
+  EXPECT_FALSE(span_.FreelistEmpty(size_));
+  void *batch[kMaxObjectsToMove];
+  size_t popped = 0;
+  size_t want = 1;
+  char *start = static_cast<char *>(span_.start_address());
+  std::vector<bool> objects(objects_per_span_);
+  for (size_t x = 0; x < 2; ++x) {
+    // Pop all objects in batches of varying size and ensure that we've got
+    // all objects.
+    for (;;) {
+      size_t n = span_.FreelistPopBatch(batch, want, size_);
+      popped += n;
+      EXPECT_EQ(span_.FreelistEmpty(size_), popped == objects_per_span_);
+      for (size_t i = 0; i < n; ++i) {
+        void *p = batch[i];
+        uintptr_t off = reinterpret_cast<char *>(p) - start;
+        EXPECT_LT(off, span_.bytes_in_span());
+        EXPECT_EQ(off % size_, 0);
+        size_t idx = off / size_;
+        EXPECT_FALSE(objects[idx]);
+        objects[idx] = true;
+      }
+      if (n < want) {
+        break;
+      }
+      ++want;
+      if (want > batch_size_) {
+        want = 1;
+      }
+    }
+    EXPECT_TRUE(span_.FreelistEmpty(size_));
+    EXPECT_EQ(span_.FreelistPopBatch(batch, 1, size_), 0);
+    EXPECT_EQ(popped, objects_per_span_);
+
+    // Push all objects back except the last one (which would not be pushed).
+    for (size_t idx = 0; idx < objects_per_span_ - 1; ++idx) {
+      EXPECT_TRUE(objects[idx]);
+      bool ok = span_.FreelistPush(start + idx * size_, size_);
+      EXPECT_TRUE(ok);
+      EXPECT_FALSE(span_.FreelistEmpty(size_));
+      objects[idx] = false;
+      --popped;
+    }
+    // On the last iteration we can actually push the last object.
+    if (x == 1) {
+      bool ok =
+          span_.FreelistPush(start + (objects_per_span_ - 1) * size_, size_);
+      EXPECT_FALSE(ok);
+    }
+  }
+}
+
+TEST_P(SpanTest, FreelistRandomized) {
+  Span &span_ = raw_span_.span();
+
+  char *start = static_cast<char *>(span_.start_address());
+
+  // Do a bunch of random pushes/pops with random batch size.
+  absl::BitGen rng;
+  absl::flat_hash_set<void *> objects;
+  void *batch[kMaxObjectsToMove];
+  for (size_t x = 0; x < 10000; ++x) {
+    if (!objects.empty() && absl::Bernoulli(rng, 1.0 / 2)) {
+      void *p = *objects.begin();
+      if (span_.FreelistPush(p, size_)) {
+        objects.erase(objects.begin());
+      } else {
+        EXPECT_EQ(objects.size(), 1);
+      }
+      EXPECT_EQ(span_.FreelistEmpty(size_), objects_per_span_ == 1);
+    } else {
+      size_t want = absl::Uniform<int32_t>(rng, 0, batch_size_) + 1;
+      size_t n = span_.FreelistPopBatch(batch, want, size_);
+      if (n < want) {
+        EXPECT_TRUE(span_.FreelistEmpty(size_));
+      }
+      for (size_t i = 0; i < n; ++i) {
+        EXPECT_TRUE(objects.insert(batch[i]).second);
+      }
+    }
+  }
+  // Now pop everything what's there.
+  for (;;) {
+    size_t n = span_.FreelistPopBatch(batch, batch_size_, size_);
+    for (size_t i = 0; i < n; ++i) {
+      EXPECT_TRUE(objects.insert(batch[i]).second);
+    }
+    if (n < batch_size_) {
+      break;
+    }
+  }
+  // Check that we have collected all objects.
+  EXPECT_EQ(objects.size(), objects_per_span_);
+  for (void *p : objects) {
+    uintptr_t off = reinterpret_cast<char *>(p) - start;
+    EXPECT_LT(off, span_.bytes_in_span());
+    EXPECT_EQ(off % size_, 0);
+  }
+}
+
+INSTANTIATE_TEST_SUITE_P(All, SpanTest, testing::Range(size_t(1), kNumClasses));
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc
new file mode 100644
index 0000000000..5b5741b6a8
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.cc
@@ -0,0 +1,155 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stack_trace_table.h"
+
+#include <stddef.h>
+#include <string.h>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/hash/hash.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+bool StackTraceTable::Bucket::KeyEqual(uintptr_t h, const StackTrace& t) const {
+  // Do not merge entries with different sizes so that profiling tools
+  // can allow size-based analysis of the resulting profiles.  Note
+  // that sizes being supplied here are already quantized (to either
+  // the size-class size for small objects, or a multiple of pages for
+  // big objects).  So the number of distinct buckets kept per stack
+  // trace should be fairly small.
+  if (this->hash != h || this->trace.depth != t.depth ||
+      this->trace.requested_size != t.requested_size ||
+      this->trace.requested_alignment != t.requested_alignment ||
+      // These could theoretically differ due to e.g. memalign choices.
+      // Split the buckets just in case that happens (though it should be rare.)
+      this->trace.allocated_size != t.allocated_size) {
+    return false;
+  }
+  for (int i = 0; i < t.depth; ++i) {
+    if (this->trace.stack[i] != t.stack[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+StackTraceTable::StackTraceTable(ProfileType type, int64_t period, bool merge,
+                                 bool unsample)
+    : type_(type),
+      period_(period),
+      bucket_mask_(merge ? (1 << 14) - 1 : 0),
+      depth_total_(0),
+      table_(new Bucket*[num_buckets()]()),
+      bucket_total_(0),
+      merge_(merge),
+      error_(false),
+      unsample_(unsample) {
+  memset(table_, 0, num_buckets() * sizeof(Bucket*));
+}
+
+StackTraceTable::~StackTraceTable() {
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    for (int i = 0; i < num_buckets(); ++i) {
+      Bucket* b = table_[i];
+      while (b != nullptr) {
+        Bucket* next = b->next;
+        Static::DestroySampleUserData(b->trace.user_data);
+        Static::bucket_allocator().Delete(b);
+        b = next;
+      }
+    }
+  }
+  delete[] table_;
+}
+
+void StackTraceTable::AddTrace(double count, const StackTrace& t) {
+  if (error_) {
+    return;
+  }
+
+  uintptr_t h = absl::Hash<StackTrace>()(t);
+
+  const int idx = h & bucket_mask_;
+
+  Bucket* b = merge_ ? table_[idx] : nullptr;
+  while (b != nullptr && !b->KeyEqual(h, t)) {
+    b = b->next;
+  }
+  if (b != nullptr) {
+    b->count += count;
+    b->total_weight += count * t.weight;
+    b->trace.weight = b->total_weight / b->count + 0.5;
+  } else {
+    depth_total_ += t.depth;
+    bucket_total_++;
+    b = Static::bucket_allocator().New();
+    b->hash = h;
+    b->trace = t;
+    b->trace.user_data = Static::CopySampleUserData(t.user_data);
+    b->count = count;
+    b->total_weight = t.weight * count;
+    b->next = table_[idx];
+    table_[idx] = b;
+  }
+}
+
+void StackTraceTable::Iterate(
+    absl::FunctionRef<void(const Profile::Sample&)> func) const {
+  if (error_) {
+    return;
+  }
+
+  for (int i = 0; i < num_buckets(); ++i) {
+    Bucket* b = table_[i];
+    while (b != nullptr) {
+      // Report total bytes that are a multiple of the object size.
+      size_t allocated_size = b->trace.allocated_size;
+      size_t requested_size = b->trace.requested_size;
+
+      uintptr_t bytes = b->count * AllocatedBytes(b->trace, unsample_) + 0.5;
+
+      Profile::Sample e;
+      // We want sum to be a multiple of allocated_size; pick the nearest
+      // multiple rather than always rounding up or down.
+      e.count = (bytes + allocated_size / 2) / allocated_size;
+      e.sum = e.count * allocated_size;
+      e.requested_size = requested_size;
+      e.requested_alignment = b->trace.requested_alignment;
+      e.allocated_size = allocated_size;
+
+      e.user_data = b->trace.user_data;
+
+      e.depth = b->trace.depth;
+      static_assert(kMaxStackDepth <= Profile::Sample::kMaxStackDepth,
+                    "Profile stack size smaller than internal stack sizes");
+      memcpy(e.stack, b->trace.stack, sizeof(e.stack[0]) * e.depth);
+      func(e);
+
+      b = b->next;
+    }
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h
new file mode 100644
index 0000000000..a5a4a03636
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table.h
@@ -0,0 +1,97 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utility class for coalescing sampled stack traces.  Not thread-safe.
+
+#ifndef TCMALLOC_STACK_TRACE_TABLE_H_
+#define TCMALLOC_STACK_TRACE_TABLE_H_
+
+#include <stdint.h>
+
+#include <string>
+
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class StackTraceTable final : public ProfileBase {
+ public:
+  // If merge is true, traces with identical size and stack are merged
+  // together.  Else they are kept distinct.
+  // If unsample is true, Iterate() will scale counts to report estimates
+  // of the true total assuming traces were added by the sampler.
+  // REQUIRES: L < pageheap_lock
+  StackTraceTable(ProfileType type, int64_t period, bool merge, bool unsample);
+
+  // REQUIRES: L < pageheap_lock
+  ~StackTraceTable() override;
+
+  // base::Profile methods.
+  void Iterate(
+      absl::FunctionRef<void(const Profile::Sample&)> func) const override;
+
+  int64_t Period() const override { return period_; }
+
+  ProfileType Type() const override { return type_; }
+
+  // Adds stack trace "t" to table with the specified count.
+  // The count is a floating point value to reduce rounding
+  // errors when accounting for sampling probabilities.
+  void AddTrace(double count, const StackTrace& t)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Exposed for PageHeapAllocator
+  struct Bucket {
+    // Key
+    uintptr_t hash;
+    StackTrace trace;
+
+    // Payload
+    double count;
+    size_t total_weight;
+    Bucket* next;
+
+    bool KeyEqual(uintptr_t h, const StackTrace& t) const;
+  };
+
+  // For testing
+  int depth_total() const { return depth_total_; }
+  int bucket_total() const { return bucket_total_; }
+
+ private:
+  static constexpr int kHashTableSize = 1 << 14;  // => table_ is 128k
+
+  ProfileType type_;
+  int64_t period_;
+  int bucket_mask_;
+  int depth_total_;
+  Bucket** table_;
+  int bucket_total_;
+  bool merge_;
+  bool error_;
+  bool unsample_;
+
+  int num_buckets() const { return bucket_mask_ + 1; }
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_STACK_TRACE_TABLE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc
new file mode 100644
index 0000000000..4579798906
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stack_trace_table_test.cc
@@ -0,0 +1,389 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stack_trace_table.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/attributes.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/strings/str_format.h"
+#include "absl/strings/str_join.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/static_vars.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Rather than deal with heap allocating stack/tags, AllocationEntry contains
+// them inline.
+struct AllocationEntry {
+  int64_t sum;
+  int count;
+  size_t requested_size;
+  size_t requested_alignment;
+  size_t allocated_size;
+  int depth;
+  void* stack[64];
+
+  friend bool operator==(const AllocationEntry& x, const AllocationEntry& y);
+  friend bool operator!=(const AllocationEntry& x, const AllocationEntry& y) {
+    return !(x == y);
+  }
+
+  friend std::ostream& operator<<(std::ostream& os, const AllocationEntry& e) {
+    os << "sum = " << e.sum << "; ";
+    os << "count = " << e.count << "; ";
+
+    std::vector<std::string> ptrs;
+    for (int i = 0; i < e.depth; i++) {
+      ptrs.push_back(absl::StrFormat("%p", e.stack[i]));
+    }
+    os << "stack = [" << absl::StrJoin(ptrs, ", ") << "]; ";
+
+    os << "requested_size = " << e.requested_size << "; ";
+    os << "requested_alignment = " << e.requested_alignment << "; ";
+    os << "allocated_size = " << e.allocated_size << "; ";
+    return os;
+  }
+};
+
+inline bool operator==(const AllocationEntry& x, const AllocationEntry& y) {
+  if (x.sum != y.sum) {
+    return false;
+  }
+
+  if (x.count != y.count) {
+    return false;
+  }
+
+  if (x.depth != y.depth) {
+    return false;
+  }
+
+  if (x.depth > 0 && !std::equal(x.stack, x.stack + x.depth, y.stack)) {
+    return false;
+  }
+
+  if (x.requested_size != y.requested_size) {
+    return false;
+  }
+
+  if (x.requested_alignment != y.requested_alignment) {
+    return false;
+  }
+
+  if (x.allocated_size != y.allocated_size) {
+    return false;
+  }
+
+  return true;
+}
+
+void CheckTraces(const StackTraceTable& table,
+                 std::initializer_list<AllocationEntry> expected) {
+  std::vector<AllocationEntry> actual;
+
+  table.Iterate([&](const Profile::Sample& e) {
+    AllocationEntry tmp;
+    tmp.sum = e.sum;
+    tmp.count = e.count;
+    tmp.depth = e.depth;
+    ASSERT_LE(tmp.depth, ABSL_ARRAYSIZE(tmp.stack));
+    std::copy(e.stack, e.stack + e.depth, tmp.stack);
+
+    tmp.requested_size = e.requested_size;
+    tmp.requested_alignment = e.requested_alignment;
+    tmp.allocated_size = e.allocated_size;
+
+    actual.push_back(tmp);
+  });
+
+  EXPECT_THAT(actual, testing::UnorderedElementsAreArray(expected));
+}
+
+void AddTrace(StackTraceTable* table, double count, const StackTrace& t) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  table->AddTrace(count, t);
+}
+
+TEST(StackTraceTableTest, StackTraceTable) {
+  // If this test is not linked against TCMalloc, the global arena used for
+  // StackTraceTable's buckets will not be initialized.
+  Static::InitIfNecessary();
+
+  // Empty table
+  {
+    SCOPED_TRACE("empty");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    EXPECT_EQ(0, table.depth_total());
+    EXPECT_EQ(0, table.bucket_total());
+
+    CheckTraces(table, {});
+  }
+
+  StackTrace t1 = {};
+  t1.requested_size = static_cast<uintptr_t>(512);
+  t1.requested_alignment = static_cast<uintptr_t>(16);
+  t1.allocated_size = static_cast<uintptr_t>(1024);
+  t1.depth = static_cast<uintptr_t>(2);
+  t1.stack[0] = reinterpret_cast<void*>(1);
+  t1.stack[1] = reinterpret_cast<void*>(2);
+  t1.weight = 2 << 20;
+
+  const AllocationEntry k1 = {
+      1024,
+      1,
+      512,
+      16,
+      1024,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  StackTrace t2 = {};
+  t2.requested_size = static_cast<uintptr_t>(375);
+  t2.requested_alignment = static_cast<uintptr_t>(0);
+  t2.allocated_size = static_cast<uintptr_t>(512);
+  t2.depth = static_cast<uintptr_t>(2);
+  t2.stack[0] = reinterpret_cast<void*>(2);
+  t2.stack[1] = reinterpret_cast<void*>(1);
+  t2.weight = 1;
+
+  const AllocationEntry k2 = {
+      512,
+      1,
+      375,
+      0,
+      512,
+      2,
+      {reinterpret_cast<void*>(2), reinterpret_cast<void*>(1)},
+  };
+
+  // Table w/ just t1
+  {
+    SCOPED_TRACE("t1");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 1.0, t1);
+    EXPECT_EQ(2, table.depth_total());
+    EXPECT_EQ(1, table.bucket_total());
+
+    CheckTraces(table, {k1});
+  }
+
+  // We made our last sample at t1.weight (2<<20 bytes).  We sample according to
+  // t1.requested_size + 1 (513 bytes).  Therefore we overweight the sample to
+  // construct the distribution.
+  //
+  // We rely on the profiling tests to verify that this correctly reconstructs
+  // the distribution (+/- an error tolerance)
+  const int t1_sampled_weight =
+      static_cast<double>(t1.weight) / (t1.requested_size + 1);
+  ASSERT_EQ(t1_sampled_weight, 4088);
+  const AllocationEntry k1_unsampled = {
+      t1_sampled_weight * 1024,
+      t1_sampled_weight,
+      512,
+      16,
+      1024,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  // Table w/ just t1 (unsampled)
+  {
+    SCOPED_TRACE("t1 unsampled");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, true);
+    AddTrace(&table, 1.0, t1);
+    EXPECT_EQ(2, table.depth_total());
+    EXPECT_EQ(1, table.bucket_total());
+
+    CheckTraces(table, {k1_unsampled});
+  }
+
+  const AllocationEntry k1_merged = {
+      2048,
+      2,
+      512,
+      16,
+      1024,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  // Table w/ 2x t1 (merge)
+  {
+    SCOPED_TRACE("2x t1 merge");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t1);
+    EXPECT_EQ(2, table.depth_total());
+    EXPECT_EQ(1, table.bucket_total());
+
+    CheckTraces(table, {k1_merged});
+  }
+
+  // Table w/ 2x t1 (no merge)
+  {
+    SCOPED_TRACE("2x t1 no merge");
+
+    StackTraceTable table(ProfileType::kHeap, 1, false, false);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t1);
+    EXPECT_EQ(4, table.depth_total());
+    EXPECT_EQ(2, table.bucket_total());
+
+    CheckTraces(table, {k1, k1});
+  }
+
+  const AllocationEntry k1_unsampled_merged = {
+      2 * t1_sampled_weight * 1024,
+      2 * t1_sampled_weight,
+      512,
+      16,
+      1024,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  {
+    SCOPED_TRACE("2x t1 unsampled");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, true);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t1);
+    EXPECT_EQ(2, table.depth_total());
+    EXPECT_EQ(1, table.bucket_total());
+
+    CheckTraces(table, {k1_unsampled_merged});
+  }
+
+  // Table w/ t1, t2
+  {
+    SCOPED_TRACE("t1, t2");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t2);
+    EXPECT_EQ(4, table.depth_total());
+    EXPECT_EQ(2, table.bucket_total());
+    CheckTraces(table, {k1, k2});
+  }
+
+  // Table w/ 1.6 x t1, 1 x t2.
+  // Note that t1's 1.6 count will be rounded-up to 2.0.
+  {
+    SCOPED_TRACE("1.6 t1, t2");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 0.4, t1);
+    AddTrace(&table, 1.0, t2);
+    AddTrace(&table, 1.2, t1);
+    EXPECT_EQ(4, table.depth_total());
+    EXPECT_EQ(2, table.bucket_total());
+
+    const AllocationEntry scaled_k1 = {
+        2048,
+        2,
+        512,
+        16,
+        1024,
+        2,
+        {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+    };
+
+    CheckTraces(table, {scaled_k1, k2});
+  }
+
+  // Same stack as t1, but w/ different size
+  StackTrace t3 = {};
+  t3.requested_size = static_cast<uintptr_t>(13);
+  t3.requested_alignment = static_cast<uintptr_t>(0);
+  t3.allocated_size = static_cast<uintptr_t>(17);
+  t3.depth = static_cast<uintptr_t>(2);
+  t3.stack[0] = reinterpret_cast<void*>(1);
+  t3.stack[1] = reinterpret_cast<void*>(2);
+  t3.weight = 1;
+
+  const AllocationEntry k3 = {
+      17,
+      1,
+      13,
+      0,
+      17,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  // Table w/ t1, t3
+  {
+    SCOPED_TRACE("t1, t3");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t3);
+    EXPECT_EQ(4, table.depth_total());
+    EXPECT_EQ(2, table.bucket_total());
+
+    CheckTraces(table, {k1, k3});
+  }
+
+  // Same stack as t1, but w/ different alignment
+  StackTrace t4;
+  t4.requested_size = static_cast<uintptr_t>(512);
+  t4.requested_alignment = static_cast<uintptr_t>(32);
+  t4.allocated_size = static_cast<uintptr_t>(1024);
+  t4.depth = static_cast<uintptr_t>(2);
+  t4.stack[0] = reinterpret_cast<void*>(1);
+  t4.stack[1] = reinterpret_cast<void*>(2);
+  t4.weight = 1;
+
+  const AllocationEntry k4 = {
+      1024,
+      1,
+      512,
+      32,
+      1024,
+      2,
+      {reinterpret_cast<void*>(1), reinterpret_cast<void*>(2)},
+  };
+
+  // Table w/ t1, t4
+  {
+    SCOPED_TRACE("t1, t4");
+
+    StackTraceTable table(ProfileType::kHeap, 1, true, false);
+    AddTrace(&table, 1.0, t1);
+    AddTrace(&table, 1.0, t4);
+    EXPECT_EQ(4, table.depth_total());
+    EXPECT_EQ(2, table.bucket_total());
+
+    CheckTraces(table, {k1, k4});
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.cc b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc
new file mode 100644
index 0000000000..08a70de493
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.cc
@@ -0,0 +1,138 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/static_vars.h"
+
+#include <stddef.h>
+
+#include <atomic>
+#include <new>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/mincore.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// Cacheline-align our SizeMap and CPUCache.  They both have very hot arrays as
+// their first member variables, and aligning them reduces the number of cache
+// lines these arrays use.
+//
+// IF YOU ADD TO THIS LIST, ADD TO STATIC_VAR_SIZE TOO!
+ABSL_CONST_INIT absl::base_internal::SpinLock pageheap_lock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+ABSL_CONST_INIT Arena Static::arena_;
+ABSL_CONST_INIT SizeMap ABSL_CACHELINE_ALIGNED Static::sizemap_;
+ABSL_CONST_INIT TransferCacheManager Static::transfer_cache_;
+ABSL_CONST_INIT ShardedTransferCacheManager Static::sharded_transfer_cache_;
+ABSL_CONST_INIT CPUCache ABSL_CACHELINE_ALIGNED Static::cpu_cache_;
+ABSL_CONST_INIT PageHeapAllocator<Span> Static::span_allocator_;
+ABSL_CONST_INIT PageHeapAllocator<StackTrace> Static::stacktrace_allocator_;
+ABSL_CONST_INIT PageHeapAllocator<ThreadCache> Static::threadcache_allocator_;
+ABSL_CONST_INIT SpanList Static::sampled_objects_;
+ABSL_CONST_INIT tcmalloc_internal::StatsCounter Static::sampled_objects_size_;
+ABSL_CONST_INIT PeakHeapTracker Static::peak_heap_tracker_;
+ABSL_CONST_INIT PageHeapAllocator<StackTraceTable::Bucket>
+    Static::bucket_allocator_;
+ABSL_CONST_INIT std::atomic<bool> Static::inited_{false};
+ABSL_CONST_INIT bool Static::cpu_cache_active_ = false;
+ABSL_CONST_INIT bool Static::fork_support_enabled_ = false;
+ABSL_CONST_INIT Static::CreateSampleUserDataCallback*
+    Static::create_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::CopySampleUserDataCallback*
+    Static::copy_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::DestroySampleUserDataCallback*
+    Static::destroy_sample_user_data_callback_ = nullptr;
+ABSL_CONST_INIT Static::PageAllocatorStorage Static::page_allocator_;
+ABSL_CONST_INIT PageMap Static::pagemap_;
+ABSL_CONST_INIT absl::base_internal::SpinLock guarded_page_lock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+ABSL_CONST_INIT GuardedPageAllocator Static::guardedpage_allocator_;
+ABSL_CONST_INIT NumaTopology<kNumaPartitions, kNumBaseClasses>
+    Static::numa_topology_;
+
+size_t Static::metadata_bytes() {
+  // This is ugly and doesn't nicely account for e.g. alignment losses
+  // -- I'd like to put all the above in a struct and take that
+  // struct's size.  But we can't due to linking issues.
+  const size_t static_var_size =
+      sizeof(pageheap_lock) + sizeof(arena_) + sizeof(sizemap_) +
+      sizeof(sharded_transfer_cache_) + sizeof(transfer_cache_) +
+      sizeof(cpu_cache_) + sizeof(span_allocator_) +
+      sizeof(stacktrace_allocator_) + sizeof(threadcache_allocator_) +
+      sizeof(sampled_objects_) + sizeof(bucket_allocator_) +
+      sizeof(inited_) + sizeof(cpu_cache_active_) + sizeof(page_allocator_) +
+      sizeof(pagemap_) + sizeof(sampled_objects_size_) +
+      sizeof(peak_heap_tracker_) + sizeof(guarded_page_lock) +
+      sizeof(guardedpage_allocator_) + sizeof(numa_topology_);
+
+  const size_t allocated = arena().bytes_allocated() +
+                           AddressRegionFactory::InternalBytesAllocated();
+  return allocated + static_var_size;
+}
+
+size_t Static::pagemap_residence() {
+  // Determine residence of the root node of the pagemap.
+  size_t total = MInCore::residence(&pagemap_, sizeof(pagemap_));
+  return total;
+}
+
+ABSL_ATTRIBUTE_COLD ABSL_ATTRIBUTE_NOINLINE void Static::SlowInitIfNecessary() {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+
+  // double-checked locking
+  if (!inited_.load(std::memory_order_acquire)) {
+    tracking::Init();
+    sizemap_.Init();
+    numa_topology_.Init();
+    span_allocator_.Init(&arena_);
+    span_allocator_.New();  // Reduce cache conflicts
+    span_allocator_.New();  // Reduce cache conflicts
+    stacktrace_allocator_.Init(&arena_);
+    bucket_allocator_.Init(&arena_);
+    // Do a bit of sanitizing: make sure central_cache is aligned properly
+    CHECK_CONDITION((sizeof(transfer_cache_) % ABSL_CACHELINE_SIZE) == 0);
+    transfer_cache_.Init();
+    sharded_transfer_cache_.Init();
+    new (page_allocator_.memory) PageAllocator;
+    threadcache_allocator_.Init(&arena_);
+    cpu_cache_active_ = false;
+    pagemap_.MapRootWithSmallPages();
+    guardedpage_allocator_.Init(/*max_alloced_pages=*/64, /*total_pages=*/128);
+    inited_.store(true, std::memory_order_release);
+
+    pageheap_lock.Unlock();
+    pthread_atfork(
+      TCMallocPreFork,
+      TCMallocPostFork,
+      TCMallocPostFork);
+    pageheap_lock.Lock();
+  }
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/static_vars.h b/contrib/libs/tcmalloc/tcmalloc/static_vars.h
new file mode 100644
index 0000000000..be68edc189
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/static_vars.h
@@ -0,0 +1,262 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Static variables shared by multiple classes.
+
+#ifndef TCMALLOC_STATIC_VARS_H_
+#define TCMALLOC_STATIC_VARS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/arena.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/page_allocator.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/peak_heap_tracker.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+class CPUCache;
+class PageMap;
+class ThreadCache;
+
+void TCMallocPreFork();
+void TCMallocPostFork();
+
+class Static {
+ public:
+  // True if InitIfNecessary() has run to completion.
+  static bool IsInited();
+  // Must be called before calling any of the accessors below.
+  // Safe to call multiple times.
+  static void InitIfNecessary();
+
+  // Central cache.
+  static const CentralFreeList& central_freelist(int size_class) {
+    return transfer_cache().central_freelist(size_class);
+  }
+  // Central cache -- an array of free-lists, one per size-class.
+  // We have a separate lock per free-list to reduce contention.
+  static TransferCacheManager& transfer_cache() { return transfer_cache_; }
+
+  // A per-cache domain TransferCache.
+  static ShardedTransferCacheManager& sharded_transfer_cache() {
+    return sharded_transfer_cache_;
+  }
+
+  static SizeMap& sizemap() { return sizemap_; }
+
+  static CPUCache& cpu_cache() { return cpu_cache_; }
+
+  static PeakHeapTracker& peak_heap_tracker() { return peak_heap_tracker_; }
+
+  static NumaTopology<kNumaPartitions, kNumBaseClasses>& numa_topology() {
+    return numa_topology_;
+  }
+
+  //////////////////////////////////////////////////////////////////////
+  // In addition to the explicit initialization comment, the variables below
+  // must be protected by pageheap_lock.
+
+  static Arena& arena() { return arena_; }
+
+  // Page-level allocator.
+  static PageAllocator& page_allocator() {
+    return *reinterpret_cast<PageAllocator*>(page_allocator_.memory);
+  }
+
+  static PageMap& pagemap() { return pagemap_; }
+
+  static GuardedPageAllocator& guardedpage_allocator() {
+    return guardedpage_allocator_;
+  }
+
+  static PageHeapAllocator<Span>& span_allocator() { return span_allocator_; }
+
+  static PageHeapAllocator<StackTrace>& stacktrace_allocator() {
+    return stacktrace_allocator_;
+  }
+
+  static PageHeapAllocator<ThreadCache>& threadcache_allocator() {
+    return threadcache_allocator_;
+  }
+
+  // State kept for sampled allocations (/heapz support). The StatsCounter is
+  // only written while holding pageheap_lock, so writes can safely use
+  // LossyAdd and reads do not require locking.
+  static SpanList sampled_objects_ ABSL_GUARDED_BY(pageheap_lock);
+  ABSL_CONST_INIT static tcmalloc_internal::StatsCounter sampled_objects_size_;
+
+  static PageHeapAllocator<StackTraceTable::Bucket>& bucket_allocator() {
+    return bucket_allocator_;
+  }
+
+  static bool ABSL_ATTRIBUTE_ALWAYS_INLINE CPUCacheActive() {
+    return cpu_cache_active_;
+  }
+  static void ActivateCPUCache() { cpu_cache_active_ = true; }
+  static void DeactivateCPUCache() { cpu_cache_active_ = false; }
+
+  static bool ForkSupportEnabled() { return fork_support_enabled_; }
+  static void EnableForkSupport() { fork_support_enabled_ = true; }
+
+  using CreateSampleUserDataCallback = void*();
+  using CopySampleUserDataCallback = void*(void*);
+  using DestroySampleUserDataCallback = void(void*);
+
+  static void SetSampleUserDataCallbacks(
+      CreateSampleUserDataCallback create,
+      CopySampleUserDataCallback copy,
+      DestroySampleUserDataCallback destroy) {
+    create_sample_user_data_callback_ = create;
+    copy_sample_user_data_callback_ = copy;
+    destroy_sample_user_data_callback_ = destroy;
+  }
+
+  static void* CreateSampleUserData() {
+    if (create_sample_user_data_callback_)
+      return create_sample_user_data_callback_();
+    return nullptr;
+  }
+  static void* CopySampleUserData(void* user_data) {
+    if (copy_sample_user_data_callback_)
+      return copy_sample_user_data_callback_(user_data);
+    return nullptr;
+  }
+  static void DestroySampleUserData(void* user_data) {
+    if (destroy_sample_user_data_callback_)
+      destroy_sample_user_data_callback_(user_data);
+  }
+
+  static bool ABSL_ATTRIBUTE_ALWAYS_INLINE IsOnFastPath() {
+    return
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+        // When the per-cpu cache is enabled, and the thread's current cpu
+        // variable is initialized we will try to allocate from the per-cpu
+        // cache. If something fails, we bail out to the full malloc.
+        // Checking the current cpu variable here allows us to remove it from
+        // the fast-path, since we will fall back to the slow path until this
+        // variable is initialized.
+        CPUCacheActive() & subtle::percpu::IsFastNoInit();
+#else
+        !CPUCacheActive();
+#endif
+  }
+
+  static size_t metadata_bytes() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // The root of the pagemap is potentially a large poorly utilized
+  // structure, so figure out how much of it is actually resident.
+  static size_t pagemap_residence();
+
+ private:
+#if defined(__clang__)
+  __attribute__((preserve_most))
+#endif
+  static void
+  SlowInitIfNecessary();
+
+  // These static variables require explicit initialization.  We cannot
+  // count on their constructors to do any initialization because other
+  // static variables may try to allocate memory before these variables
+  // can run their constructors.
+
+  ABSL_CONST_INIT static Arena arena_;
+  static SizeMap sizemap_;
+  ABSL_CONST_INIT static TransferCacheManager transfer_cache_;
+  ABSL_CONST_INIT static ShardedTransferCacheManager sharded_transfer_cache_;
+  static CPUCache cpu_cache_;
+  ABSL_CONST_INIT static GuardedPageAllocator guardedpage_allocator_;
+  static PageHeapAllocator<Span> span_allocator_;
+  static PageHeapAllocator<StackTrace> stacktrace_allocator_;
+  static PageHeapAllocator<ThreadCache> threadcache_allocator_;
+  static PageHeapAllocator<StackTraceTable::Bucket> bucket_allocator_;
+  ABSL_CONST_INIT static std::atomic<bool> inited_;
+  static bool cpu_cache_active_;
+  static bool fork_support_enabled_;
+  static CreateSampleUserDataCallback* create_sample_user_data_callback_;
+  static CopySampleUserDataCallback* copy_sample_user_data_callback_;
+  static DestroySampleUserDataCallback* destroy_sample_user_data_callback_;
+  ABSL_CONST_INIT static PeakHeapTracker peak_heap_tracker_;
+  ABSL_CONST_INIT static NumaTopology<kNumaPartitions, kNumBaseClasses>
+      numa_topology_;
+
+  // PageHeap uses a constructor for initialization.  Like the members above,
+  // we can't depend on initialization order, so pageheap is new'd
+  // into this buffer.
+  union PageAllocatorStorage {
+    constexpr PageAllocatorStorage() : extra(0) {}
+
+    char memory[sizeof(PageAllocator)];
+    uintptr_t extra;  // To force alignment
+  };
+
+  static PageAllocatorStorage page_allocator_;
+  static PageMap pagemap_;
+};
+
+inline bool Static::IsInited() {
+  return inited_.load(std::memory_order_acquire);
+}
+
+inline void Static::InitIfNecessary() {
+  if (ABSL_PREDICT_FALSE(!IsInited())) {
+    SlowInitIfNecessary();
+  }
+}
+
+// Why are these functions here? Because we want to inline them, but they
+// need access to Static::span_allocator. Putting them in span.h would lead
+// to nasty dependency loops.  Since anything that needs them certainly
+// includes static_vars.h, this is a perfectly good compromise.
+// TODO(b/134687001): move span_allocator to Span, getting rid of the need for
+// this.
+inline Span* Span::New(PageId p, Length len) {
+  Span* result = Static::span_allocator().New();
+  result->Init(p, len);
+  return result;
+}
+
+inline void Span::Delete(Span* span) {
+#ifndef NDEBUG
+  // In debug mode, trash the contents of deleted Spans
+  memset(static_cast<void*>(span), 0x3f, sizeof(*span));
+#endif
+  Static::span_allocator().Delete(span);
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_STATIC_VARS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.cc b/contrib/libs/tcmalloc/tcmalloc/stats.cc
new file mode 100644
index 0000000000..bb553ee5cd
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats.cc
@@ -0,0 +1,553 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stats.h"
+
+#include <inttypes.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <limits>
+
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/base/macros.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/string_view.h"
+#include "absl/time/time.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/huge_pages.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+static double BytesToMiB(size_t bytes) {
+  const double MiB = 1048576.0;
+  return bytes / MiB;
+}
+
+static double PagesToMiB(uint64_t pages) {
+  return BytesToMiB(pages * kPageSize);
+}
+
+// For example, PrintRightAdjustedWithPrefix(out, ">=", 42, 6) prints "  >=42".
+static void PrintRightAdjustedWithPrefix(Printer *out, const char *prefix,
+                                         Length num, int width) {
+  width -= strlen(prefix);
+  int num_tmp = num.raw_num();
+  for (int i = 0; i < width - 1; i++) {
+    num_tmp /= 10;
+    if (num_tmp == 0) {
+      out->printf(" ");
+    }
+  }
+  out->printf("%s%zu", prefix, num.raw_num());
+}
+
+void PrintStats(const char *label, Printer *out, const BackingStats &backing,
+                const SmallSpanStats &small, const LargeSpanStats &large,
+                bool everything) {
+  size_t nonempty_sizes = 0;
+  for (int i = 0; i < kMaxPages.raw_num(); ++i) {
+    const size_t norm = small.normal_length[i];
+    const size_t ret = small.returned_length[i];
+    if (norm + ret > 0) nonempty_sizes++;
+  }
+
+  out->printf("------------------------------------------------\n");
+  out->printf("%s: %zu sizes; %6.1f MiB free; %6.1f MiB unmapped\n", label,
+              nonempty_sizes, BytesToMiB(backing.free_bytes),
+              BytesToMiB(backing.unmapped_bytes));
+  out->printf("------------------------------------------------\n");
+
+  Length cum_normal_pages, cum_returned_pages, cum_total_pages;
+  if (!everything) return;
+
+  for (size_t i = 0; i < kMaxPages.raw_num(); ++i) {
+    const size_t norm = small.normal_length[i];
+    const size_t ret = small.returned_length[i];
+    const size_t total = norm + ret;
+    if (total == 0) continue;
+    const Length norm_pages = Length(norm * i);
+    const Length ret_pages = Length(ret * i);
+    const Length total_pages = norm_pages + ret_pages;
+    cum_normal_pages += norm_pages;
+    cum_returned_pages += ret_pages;
+    cum_total_pages += total_pages;
+    out->printf(
+        "%6zu pages * %6zu spans ~ %6.1f MiB; %6.1f MiB cum"
+        "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+        i, total, total_pages.in_mib(), cum_total_pages.in_mib(),
+        ret_pages.in_mib(), cum_returned_pages.in_mib());
+  }
+
+  cum_normal_pages += large.normal_pages;
+  cum_returned_pages += large.returned_pages;
+  const Length large_total_pages = large.normal_pages + large.returned_pages;
+  cum_total_pages += large_total_pages;
+  PrintRightAdjustedWithPrefix(out, ">=", kMaxPages, 6);
+  out->printf(
+      " large * %6zu spans ~ %6.1f MiB; %6.1f MiB cum"
+      "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
+      static_cast<size_t>(large.spans), large_total_pages.in_mib(),
+      cum_total_pages.in_mib(), large.returned_pages.in_mib(),
+      cum_returned_pages.in_mib());
+}
+
+struct HistBucket {
+  uint64_t min_sec;
+  const char *label;
+};
+
+static const HistBucket kSpanAgeHistBuckets[] = {
+    // clang-format off
+    {0, "<1s"},
+    {1, "1s"},
+    {30, "30s"},
+    {1 * 60, "1m"},
+    {30 * 60, "30m"},
+    {1 * 60 * 60, "1h"},
+    {8 * 60 * 60, "8+h"},
+    // clang-format on
+};
+
+struct PageHeapEntry {
+  int64_t span_size;  // bytes
+  int64_t present;    // bytes
+  int64_t released;   // bytes
+  int64_t num_spans;
+  double avg_live_age_secs;
+  double avg_released_age_secs;
+  int64_t live_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0,
+                                                                 0, 0, 0};
+  int64_t released_age_hist_bytes[PageAgeHistograms::kNumBuckets] = {0, 0, 0, 0,
+                                                                     0, 0, 0};
+
+  void PrintInPbtxt(PbtxtRegion *parent,
+                    absl::string_view sub_region_name) const;
+};
+
+void PageHeapEntry::PrintInPbtxt(PbtxtRegion *parent,
+                                 absl::string_view sub_region_name) const {
+  auto page_heap = parent->CreateSubRegion(sub_region_name);
+  page_heap.PrintI64("span_size", span_size);
+  page_heap.PrintI64("present", present);
+  page_heap.PrintI64("released", released);
+  page_heap.PrintI64("num_spans", num_spans);
+  page_heap.PrintDouble("avg_live_age_secs", avg_live_age_secs);
+  page_heap.PrintDouble("avg_released_age_secs", avg_released_age_secs);
+
+  for (int j = 0; j < PageAgeHistograms::kNumBuckets; j++) {
+    uint64_t min_age_secs = kSpanAgeHistBuckets[j].min_sec;
+    uint64_t max_age_secs = j != PageAgeHistograms::kNumBuckets - 1
+                                ? kSpanAgeHistBuckets[j + 1].min_sec
+                                : INT_MAX;
+    if (live_age_hist_bytes[j] != 0) {
+      auto live_age_hist = page_heap.CreateSubRegion("live_age_hist");
+      live_age_hist.PrintI64("bytes", live_age_hist_bytes[j]);
+      live_age_hist.PrintI64("min_age_secs", min_age_secs);
+      live_age_hist.PrintI64("max_age_secs", max_age_secs);
+    }
+    if (released_age_hist_bytes[j] != 0) {
+      auto released_age_hist = page_heap.CreateSubRegion("released_age_hist");
+      released_age_hist.PrintI64("bytes", released_age_hist_bytes[j]);
+      released_age_hist.PrintI64("min_age_secs", min_age_secs);
+      released_age_hist.PrintI64("max_age_secs", max_age_secs);
+    }
+  }
+}
+
+void PrintStatsInPbtxt(PbtxtRegion *region, const SmallSpanStats &small,
+                       const LargeSpanStats &large,
+                       const PageAgeHistograms &ages) {
+  // Print for small pages.
+  for (auto i = Length(0); i < kMaxPages; ++i) {
+    const size_t norm = small.normal_length[i.raw_num()];
+    const size_t ret = small.returned_length[i.raw_num()];
+    const size_t total = norm + ret;
+    if (total == 0) continue;
+    const Length norm_pages = norm * i;
+    const Length ret_pages = ret * i;
+    PageHeapEntry entry;
+    entry.span_size = i.in_bytes();
+    entry.present = norm_pages.in_bytes();
+    entry.released = ret_pages.in_bytes();
+    entry.num_spans = total;
+
+    // Histogram is only collected for pages < ages.kNumSize.
+    if (i < Length(PageAgeHistograms::kNumSizes)) {
+      entry.avg_live_age_secs =
+          ages.GetSmallHistogram(/*released=*/false, i)->avg_age();
+      entry.avg_released_age_secs =
+          ages.GetSmallHistogram(/*released=*/true, i)->avg_age();
+      for (int j = 0; j < ages.kNumBuckets; j++) {
+        entry.live_age_hist_bytes[j] =
+            ages.GetSmallHistogram(/*released=*/false, i)->pages_in_bucket(j) *
+            kPageSize;
+        entry.released_age_hist_bytes[j] =
+            ages.GetSmallHistogram(/*released=*/true, i)->pages_in_bucket(j) *
+            kPageSize;
+      }
+    }
+    entry.PrintInPbtxt(region, "page_heap");
+  }
+
+  // Print for large page.
+  {
+    PageHeapEntry entry;
+    entry.span_size = -1;
+    entry.num_spans = large.spans;
+    entry.present = large.normal_pages.in_bytes();
+    entry.released = large.returned_pages.in_bytes();
+    entry.avg_live_age_secs =
+        ages.GetLargeHistogram(/*released=*/false)->avg_age();
+    entry.avg_released_age_secs =
+        ages.GetLargeHistogram(/*released=*/true)->avg_age();
+    for (int j = 0; j < ages.kNumBuckets; j++) {
+      entry.live_age_hist_bytes[j] =
+          ages.GetLargeHistogram(/*released=*/false)->pages_in_bucket(j) *
+          kPageSize;
+      entry.released_age_hist_bytes[j] =
+          ages.GetLargeHistogram(/*released=*/true)->pages_in_bucket(j) *
+          kPageSize;
+    }
+    entry.PrintInPbtxt(region, "page_heap");
+  }
+
+  region->PrintI64("min_large_span_size", kMaxPages.raw_num());
+}
+
+static int HistBucketIndex(double age_exact) {
+  uint64_t age_secs = age_exact;  // truncate to seconds
+  for (int i = 0; i < ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1; i++) {
+    if (age_secs < kSpanAgeHistBuckets[i + 1].min_sec) {
+      return i;
+    }
+  }
+  return ABSL_ARRAYSIZE(kSpanAgeHistBuckets) - 1;
+}
+
+PageAgeHistograms::PageAgeHistograms(int64_t now)
+    : now_(now), freq_(absl::base_internal::CycleClock::Frequency()) {
+  static_assert(
+      PageAgeHistograms::kNumBuckets == ABSL_ARRAYSIZE(kSpanAgeHistBuckets),
+      "buckets don't match constant in header");
+}
+
+void PageAgeHistograms::RecordRange(Length pages, bool released, int64_t when) {
+  double age = std::max(0.0, (now_ - when) / freq_);
+  (released ? returned_ : live_).Record(pages, age);
+}
+
+void PageAgeHistograms::PerSizeHistograms::Record(Length pages, double age) {
+  (pages < kLargeSize ? GetSmall(pages) : GetLarge())->Record(pages, age);
+  total.Record(pages, age);
+}
+
+static uint32_t SaturatingAdd(uint32_t x, uint32_t y) {
+  uint32_t z = x + y;
+  if (z < x) z = std::numeric_limits<uint32_t>::max();
+  return z;
+}
+
+void PageAgeHistograms::Histogram::Record(Length pages, double age) {
+  size_t bucket = HistBucketIndex(age);
+  buckets_[bucket] = SaturatingAdd(buckets_[bucket], pages.raw_num());
+  total_pages_ += pages;
+  total_age_ += pages.raw_num() * age;
+}
+
+void PageAgeHistograms::Print(const char *label, Printer *out) const {
+  out->printf("------------------------------------------------\n");
+  out->printf(
+      "%s cache entry age (count of pages in spans of "
+      "a given size that have been idle for up to the given period of time)\n",
+      label);
+  out->printf("------------------------------------------------\n");
+  out->printf("                             ");
+  // Print out the table header.  All columns have width 8 chars.
+  out->printf("    mean");
+  for (int b = 0; b < kNumBuckets; b++) {
+    out->printf("%8s", kSpanAgeHistBuckets[b].label);
+  }
+  out->printf("\n");
+
+  live_.Print("Live span", out);
+  out->printf("\n");
+  returned_.Print("Unmapped span", out);
+}
+
+static void PrintLineHeader(Printer *out, const char *kind, const char *prefix,
+                            Length num) {
+  // Print the beginning of the line, e.g. "Live span,   >=128 pages: ".  The
+  // span size ("128" in the example) is padded such that it plus the span
+  // prefix ("Live") plus the span size prefix (">=") is kHeaderExtraChars wide.
+  const int kHeaderExtraChars = 19;
+  const int span_size_width =
+      std::max<int>(0, kHeaderExtraChars - strlen(kind));
+  out->printf("%s, ", kind);
+  PrintRightAdjustedWithPrefix(out, prefix, num, span_size_width);
+  out->printf(" pages: ");
+}
+
+void PageAgeHistograms::PerSizeHistograms::Print(const char *kind,
+                                                 Printer *out) const {
+  out->printf("%-15s TOTAL PAGES: ", kind);
+  total.Print(out);
+
+  for (auto l = Length(1); l < Length(kNumSizes); ++l) {
+    const Histogram *here = &small[l.raw_num() - 1];
+    if (here->empty()) continue;
+    PrintLineHeader(out, kind, "", l);
+    here->Print(out);
+  }
+
+  if (!large.empty()) {
+    PrintLineHeader(out, kind, ">=", Length(kNumSizes));
+    large.Print(out);
+  }
+}
+
+void PageAgeHistograms::Histogram::Print(Printer *out) const {
+  const double mean = avg_age();
+  out->printf(" %7.1f", mean);
+  for (int b = 0; b < kNumBuckets; ++b) {
+    out->printf(" %7" PRIu32, buckets_[b]);
+  }
+
+  out->printf("\n");
+}
+
+void PageAllocInfo::Print(Printer *out) const {
+  int64_t ticks = TimeTicks();
+  double hz = freq_ / ticks;
+  out->printf("%s: stats on allocation sizes\n", label_);
+  out->printf("%s: %zu pages live small allocation\n", label_,
+              total_small_.raw_num());
+  out->printf("%s: %zu pages of slack on large allocations\n", label_,
+              total_slack_.raw_num());
+  out->printf("%s: largest seen allocation %zu pages\n", label_,
+              largest_seen_.raw_num());
+  out->printf("%s: per-size information:\n", label_);
+
+  auto print_counts = [this, hz, out](const Counts &c, Length nmin,
+                                      Length nmax) {
+    const size_t a = c.nalloc;
+    const size_t f = c.nfree;
+    const Length a_pages = c.alloc_size;
+    const Length f_pages = c.free_size;
+    if (a == 0) return;
+    const size_t live = a - f;
+    const double live_mib = (a_pages - f_pages).in_mib();
+    const double rate_hz = a * hz;
+    const double mib_hz = a_pages.in_mib() * hz;
+    if (nmin == nmax) {
+      out->printf("%s: %21zu page info: ", label_, nmin.raw_num());
+    } else {
+      out->printf("%s: [ %7zu , %7zu ] page info: ", label_, nmin.raw_num(),
+                  nmax.raw_num());
+    }
+    out->printf(
+        "%10zu / %10zu a/f, %8zu (%6.1f MiB) live, "
+        "%8.3g allocs/s (%6.1f MiB/s)\n",
+        a, f, live, live_mib, rate_hz, mib_hz);
+  };
+
+  for (auto i = Length(0); i < kMaxPages; ++i) {
+    const Length n = i + Length(1);
+    print_counts(small_[i.raw_num()], n, n);
+  }
+
+  for (int i = 0; i < kAddressBits - kPageShift; ++i) {
+    const Length nmax = Length(uintptr_t{1} << i);
+    const Length nmin = nmax / 2 + Length(1);
+    print_counts(large_[i], nmin, nmax);
+  }
+}
+
+void PageAllocInfo::PrintInPbtxt(PbtxtRegion *region,
+                                 absl::string_view stat_name) const {
+  int64_t ticks = TimeTicks();
+  double hz = freq_ / ticks;
+  region->PrintI64("num_small_allocation_pages", total_small_.raw_num());
+  region->PrintI64("num_slack_pages", total_slack_.raw_num());
+  region->PrintI64("largest_allocation_pages", largest_seen_.raw_num());
+
+  auto print_counts = [hz, region, &stat_name](const Counts &c, Length nmin,
+                                               Length nmax) {
+    const size_t a = c.nalloc;
+    const size_t f = c.nfree;
+    const Length a_pages = c.alloc_size;
+    const Length f_pages = c.free_size;
+    if (a == 0) return;
+    const int64_t live_bytes = (a_pages - f_pages).in_bytes();
+    const double rate_hz = a * hz;
+    const double bytes_hz = static_cast<double>(a_pages.in_bytes()) * hz;
+    auto stat = region->CreateSubRegion(stat_name);
+    stat.PrintI64("min_span_pages", nmin.raw_num());
+    stat.PrintI64("max_span_pages", nmax.raw_num());
+    stat.PrintI64("num_spans_allocated", a);
+    stat.PrintI64("num_spans_freed", f);
+    stat.PrintI64("live_bytes", live_bytes);
+    stat.PrintDouble("spans_allocated_per_second", rate_hz);
+    stat.PrintI64("bytes_allocated_per_second", static_cast<int64_t>(bytes_hz));
+  };
+
+  for (auto i = Length(0); i < kMaxPages; ++i) {
+    const Length n = i + Length(1);
+    print_counts(small_[i.raw_num()], n, n);
+  }
+
+  for (int i = 0; i < kAddressBits - kPageShift; ++i) {
+    const Length nmax = Length(uintptr_t(1) << i);
+    const Length nmin = nmax / 2 + Length(1);
+    print_counts(large_[i], nmin, nmax);
+  }
+}
+
+static Length RoundUp(Length value, Length alignment) {
+  return Length((value.raw_num() + alignment.raw_num() - 1) &
+                ~(alignment.raw_num() - 1));
+}
+
+void PageAllocInfo::RecordAlloc(PageId p, Length n) {
+  if (ABSL_PREDICT_FALSE(log_on())) {
+    int64_t t = TimeTicks();
+    LogAlloc(t, p, n);
+  }
+
+  static_assert(kMaxPages.in_bytes() == 1024 * 1024, "threshold changed?");
+  static_assert(kMaxPages < kPagesPerHugePage, "there should be slack");
+  largest_seen_ = std::max(largest_seen_, n);
+  if (n <= kMaxPages) {
+    total_small_ += n;
+    small_[(n - Length(1)).raw_num()].Alloc(n);
+  } else {
+    Length slack = RoundUp(n, kPagesPerHugePage) - n;
+    total_slack_ += slack;
+    size_t i = absl::bit_width(n.raw_num() - 1);
+    large_[i].Alloc(n);
+  }
+}
+
+void PageAllocInfo::RecordFree(PageId p, Length n) {
+  if (ABSL_PREDICT_FALSE(log_on())) {
+    int64_t t = TimeTicks();
+    LogFree(t, p, n);
+  }
+
+  if (n <= kMaxPages) {
+    total_small_ -= n;
+    small_[n.raw_num() - 1].Free(n);
+  } else {
+    Length slack = RoundUp(n, kPagesPerHugePage) - n;
+    total_slack_ -= slack;
+    size_t i = absl::bit_width(n.raw_num() - 1);
+    large_[i].Free(n);
+  }
+}
+
+void PageAllocInfo::RecordRelease(Length n, Length got) {
+  if (ABSL_PREDICT_FALSE(log_on())) {
+    int64_t t = TimeTicks();
+    LogRelease(t, n);
+  }
+}
+
+const PageAllocInfo::Counts &PageAllocInfo::counts_for(Length n) const {
+  if (n <= kMaxPages) {
+    return small_[n.raw_num() - 1];
+  }
+  size_t i = absl::bit_width(n.raw_num() - 1);
+  return large_[i];
+}
+
+// Our current format is really simple. We have an eight-byte version
+// number as a header (currently = 1). We then follow up with a sequence
+// of fixed-size events, each 16 bytes:
+// - 8 byte "id" (really returned page)
+// - 4 byte size (in kib, for compatibility)
+//   (this gets us to 4 TiB; anything larger is reported truncated)
+// - 4 bytes for when (ms since last event) + what
+// We shift up the when by 8 bits, and store what the event is in
+// low 8 bits. (Currently just 0=alloc, 1=free, 2=Release.)
+// This truncates time deltas to 2^24 ms ~= 4 hours.
+// This could be compressed further.  (As is, it compresses well
+// with gzip.)
+// All values are host-order.
+
+struct Entry {
+  uint64_t id;
+  uint32_t kib;
+  uint32_t whenwhat;
+};
+
+using tcmalloc::tcmalloc_internal::signal_safe_write;
+
+void PageAllocInfo::Write(uint64_t when, uint8_t what, PageId p, Length n) {
+  static_assert(sizeof(Entry) == 16, "bad sizing");
+  Entry e;
+  // Round the time to ms *before* computing deltas, because this produces more
+  // accurate results in the long run.
+
+  // Consider events that occur at absolute time 0.7ms and 50ms.  If
+  // we take deltas first, we say the first event occurred at +0.7 =
+  // 0ms and the second event occurred at +49.3ms = 49ms.
+  // Rounding first produces 0 and 50.
+  const uint64_t ms = when * 1000 / freq_;
+  uint64_t delta_ms = ms - last_ms_;
+  last_ms_ = ms;
+  // clamping
+  if (delta_ms >= 1 << 24) {
+    delta_ms = (1 << 24) - 1;
+  }
+  e.whenwhat = delta_ms << 8 | what;
+  e.id = p.index();
+  size_t bytes = n.in_bytes();
+  static const size_t KiB = 1024;
+  static const size_t kMaxRep = std::numeric_limits<uint32_t>::max() * KiB;
+  if (bytes > kMaxRep) {
+    bytes = kMaxRep;
+  }
+  e.kib = bytes / KiB;
+  const char *ptr = reinterpret_cast<const char *>(&e);
+  const size_t len = sizeof(Entry);
+  CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr));
+}
+
+PageAllocInfo::PageAllocInfo(const char *label, int log_fd)
+    : label_(label), fd_(log_fd) {
+  if (ABSL_PREDICT_FALSE(log_on())) {
+    // version 1 of the format, in case we change things up
+    uint64_t header = 1;
+    const char *ptr = reinterpret_cast<const char *>(&header);
+    const size_t len = sizeof(header);
+    CHECK_CONDITION(len == signal_safe_write(fd_, ptr, len, nullptr));
+  }
+}
+
+int64_t PageAllocInfo::TimeTicks() const {
+  return absl::base_internal::CycleClock::Now() - baseline_ticks_;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats.h b/contrib/libs/tcmalloc/tcmalloc/stats.h
new file mode 100644
index 0000000000..19070d867d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats.h
@@ -0,0 +1,271 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_STATS_H_
+#define TCMALLOC_STATS_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "absl/base/internal/cycleclock.h"
+#include "absl/strings/string_view.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/pages.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct BackingStats {
+  BackingStats() : system_bytes(0), free_bytes(0), unmapped_bytes(0) {}
+  uint64_t system_bytes;    // Total bytes allocated from system
+  uint64_t free_bytes;      // Total bytes on normal freelists
+  uint64_t unmapped_bytes;  // Total bytes on returned freelists
+
+  BackingStats& operator+=(BackingStats rhs) {
+    system_bytes += rhs.system_bytes;
+    free_bytes += rhs.free_bytes;
+    unmapped_bytes += rhs.unmapped_bytes;
+    return *this;
+  }
+};
+
+inline BackingStats operator+(BackingStats lhs, BackingStats rhs) {
+  return lhs += rhs;
+}
+
+struct SmallSpanStats {
+  constexpr SmallSpanStats() = default;
+
+  // For each free list of small spans, the length (in spans) of the
+  // normal and returned free lists for that size.
+  int64_t normal_length[kMaxPages.raw_num()] = {0};
+  int64_t returned_length[kMaxPages.raw_num()] = {0};
+
+  SmallSpanStats& operator+=(SmallSpanStats rhs) {
+    for (size_t i = 0; i < kMaxPages.raw_num(); ++i) {
+      normal_length[i] += rhs.normal_length[i];
+      returned_length[i] += rhs.returned_length[i];
+    }
+    return *this;
+  }
+};
+
+inline SmallSpanStats operator+(SmallSpanStats lhs, SmallSpanStats rhs) {
+  return lhs += rhs;
+}
+
+// Stats for free large spans (i.e., spans with more than kMaxPages pages).
+struct LargeSpanStats {
+  size_t spans = 0;       // Number of such spans
+  Length normal_pages;    // Combined page length of normal large spans
+  Length returned_pages;  // Combined page length of unmapped spans
+
+  LargeSpanStats& operator+=(LargeSpanStats rhs) {
+    spans += rhs.spans;
+    normal_pages += rhs.normal_pages;
+    returned_pages += rhs.returned_pages;
+    return *this;
+  }
+};
+
+inline LargeSpanStats operator+(LargeSpanStats lhs, LargeSpanStats rhs) {
+  return lhs += rhs;
+}
+
+void PrintStats(const char* label, Printer* out, const BackingStats& backing,
+                const SmallSpanStats& small, const LargeSpanStats& large,
+                bool everything);
+
+class PageAgeHistograms {
+ public:
+  // <now> assumed to be taken from absl::base_internal::CycleClock::Now (done
+  // like this for tests)
+  explicit PageAgeHistograms(int64_t now);
+
+  // <when> = absl::base_internal::CycleClock::Now() when the span was last
+  // changed.
+  void RecordRange(Length pages, bool released, int64_t when);
+
+  void Print(const char* label, Printer* out) const;
+
+  static constexpr size_t kNumBuckets = 7;
+  static constexpr size_t kNumSizes = 64;
+
+  static constexpr Length kLargeSize = Length(kNumSizes);
+  class Histogram {
+   public:
+    void Record(Length pages, double age);
+    void Print(Printer* out) const;
+
+    uint32_t pages_in_bucket(size_t i) const { return buckets_[i]; }
+
+    Length total() const { return total_pages_; }
+
+    double avg_age() const {
+      return empty() ? 0.0 : total_age_ / total_pages_.raw_num();
+    }
+
+    bool empty() const { return total_pages_ == Length(0); }
+
+   private:
+    // total number of pages fitting in this bucket We are actually
+    // somewhat space constrained so it's important to _not_ use a
+    // 64-bit counter here.  This comfortably supports terabytes of
+    // RAM, and just in case we will update this with saturating arithmetic.
+    uint32_t buckets_[kNumBuckets] = {0};
+
+    Length total_pages_;
+    double total_age_ = 0;
+  };
+
+  const Histogram* GetSmallHistogram(bool released, Length n) const {
+    if (released) {
+      return returned_.GetSmall(n);
+    } else {
+      return live_.GetSmall(n);
+    }
+  }
+
+  const Histogram* GetLargeHistogram(bool released) const {
+    if (released) {
+      return returned_.GetLarge();
+    } else {
+      return live_.GetLarge();
+    }
+  }
+
+  const Histogram* GetTotalHistogram(bool released) {
+    if (released) {
+      return returned_.GetTotal();
+    } else {
+      return live_.GetTotal();
+    }
+  }
+
+ private:
+  struct PerSizeHistograms {
+    void Record(Length pages, double age);
+    void Print(const char* kind, Printer* out) const;
+
+    Histogram* GetSmall(Length n) {
+      CHECK_CONDITION(n.raw_num() < kNumSizes);
+      return &small[n.raw_num() - 1];
+    }
+    const Histogram* GetSmall(Length n) const {
+      CHECK_CONDITION(n.raw_num() < kNumSizes);
+      return &small[n.raw_num() - 1];
+    }
+
+    Histogram* GetLarge() { return &large; }
+    const Histogram* GetLarge() const { return &large; }
+
+    Histogram* GetTotal() { return &total; }
+
+    Histogram small[kNumSizes - 1];
+    Histogram large;
+    Histogram total;
+  };
+
+  const int64_t now_;
+  const double freq_;
+
+  PerSizeHistograms live_;
+  PerSizeHistograms returned_;
+};
+
+void PrintStatsInPbtxt(PbtxtRegion* region, const SmallSpanStats& small,
+                       const LargeSpanStats& large,
+                       const PageAgeHistograms& ages);
+
+class PageAllocInfo {
+ private:
+  struct Counts;
+
+ public:
+  // If log_fd >= 0, dump a page trace to it as record events come in.
+  PageAllocInfo(const char* label, int log_fd);
+
+  // Subclasses are responsible for calling these methods when
+  // the relevant actions occur
+  void RecordAlloc(PageId p, Length n);
+  void RecordFree(PageId p, Length n);
+  void RecordRelease(Length n, Length got);
+  // And invoking this in their Print() implementation.
+  void Print(Printer* out) const;
+  void PrintInPbtxt(PbtxtRegion* region, absl::string_view stat_name) const;
+
+  // Total size of allocations < 1 MiB
+  Length small() const { return total_small_; }
+  // We define the "slack" of an allocation as the difference
+  // between its size and the nearest hugepage multiple (i.e. how
+  // much would go unused if we allocated it as an aligned hugepage
+  // and didn't use the rest.)
+  // Return the total slack of all non-small allocations.
+  Length slack() const { return total_slack_; }
+
+  const Counts& counts_for(Length n) const;
+
+  // Returns (approximate) CycleClock ticks since class instantiation.
+  int64_t TimeTicks() const;
+
+ private:
+  Length total_small_;
+  Length total_slack_;
+
+  Length largest_seen_;
+
+  // How many alloc/frees have we seen (of some size range?)
+  struct Counts {
+    // raw counts
+    size_t nalloc{0}, nfree{0};
+    // and total sizes (needed if this struct tracks a nontrivial range
+    Length alloc_size;
+    Length free_size;
+
+    void Alloc(Length n) {
+      nalloc++;
+      alloc_size += n;
+    }
+    void Free(Length n) {
+      nfree++;
+      free_size += n;
+    }
+  };
+
+  // Indexed by exact length
+  Counts small_[kMaxPages.raw_num()];
+  // Indexed by power-of-two-buckets
+  Counts large_[kAddressBits - kPageShift];
+  const char* label_;
+
+  const int64_t baseline_ticks_{absl::base_internal::CycleClock::Now()};
+  const double freq_{absl::base_internal::CycleClock::Frequency()};
+
+  // State for page trace logging.
+  const int fd_;
+  uint64_t last_ms_{0};
+  void Write(uint64_t when, uint8_t what, PageId p, Length n);
+  bool log_on() const { return fd_ >= 0; }
+  void LogAlloc(int64_t when, PageId p, Length n) { Write(when, 0, p, n); }
+  void LogFree(int64_t when, PageId p, Length n) { Write(when, 1, p, n); }
+  void LogRelease(int64_t when, Length n) { Write(when, 2, PageId{0}, n); }
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/stats_test.cc b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc
new file mode 100644
index 0000000000..733fcc9534
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/stats_test.cc
@@ -0,0 +1,268 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/stats.h"
+
+#include <limits>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/base/internal/cycleclock.h"
+#include "absl/time/clock.h"
+#include "absl/time/time.h"
+#include "tcmalloc/huge_pages.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class PrintTest : public ::testing::Test {
+ protected:
+  static constexpr size_t kBufferSize = 256 * 1024;
+  char buf_[kBufferSize];
+
+  void ExpectStats(const BackingStats &back, const SmallSpanStats &small,
+                   const LargeSpanStats &large, const std::string &expected) {
+    Printer out(&buf_[0], kBufferSize);
+    PrintStats("PrintTest", &out, back, small, large, true);
+    EXPECT_EQ(expected, buf_);
+  }
+
+  BackingStats Backing(size_t system, size_t free, size_t unmapped) {
+    BackingStats stat;
+    stat.system_bytes = system;
+    stat.free_bytes = free;
+    stat.unmapped_bytes = unmapped;
+
+    return stat;
+  }
+};
+
+TEST_F(PrintTest, Empty) {
+  ExpectStats(Backing(0, 0, 0), {{}, {}},  // small
+              {0, Length(0), Length(0)},   // large
+                                           // clang-format off
+R"LIT(------------------------------------------------
+PrintTest: 0 sizes;    0.0 MiB free;    0.0 MiB unmapped
+------------------------------------------------
+ >=128 large *      0 spans ~    0.0 MiB;    0.0 MiB cum; unmapped:    0.0 MiB;    0.0 MiB cum
+)LIT");
+                                           // clang-format on
+}
+
+TEST_F(PrintTest, ManySizes) {
+  ExpectStats(Backing(987654321, 1900 * 1000, 67 * 1000 * 1000),
+              {{0, 100, 0, 250, 0, 0, 0, 0, 0, 51},
+               {0, 0, 300, 400, 0, 0, 0, 0, 0, 27}},  // small
+              {2, Length(100000), Length(2000)},      // large
+                                                      // clang-format off
+R"LIT(------------------------------------------------
+PrintTest: 4 sizes;    1.8 MiB free;   63.9 MiB unmapped
+------------------------------------------------
+     1 pages *    100 spans ~    0.8 MiB;    0.8 MiB cum; unmapped:    0.0 MiB;    0.0 MiB cum
+     2 pages *    300 spans ~    4.7 MiB;    5.5 MiB cum; unmapped:    4.7 MiB;    4.7 MiB cum
+     3 pages *    650 spans ~   15.2 MiB;   20.7 MiB cum; unmapped:    9.4 MiB;   14.1 MiB cum
+     9 pages *     78 spans ~    5.5 MiB;   26.2 MiB cum; unmapped:    1.9 MiB;   16.0 MiB cum
+ >=128 large *      2 spans ~  796.9 MiB;  823.1 MiB cum; unmapped:   15.6 MiB;   31.6 MiB cum
+)LIT");
+                                                      // clang-format on
+}
+
+class AgeTest : public testing::Test {
+ protected:
+  static constexpr size_t kBufferSize = 256 * 1024;
+  char buf_[kBufferSize];
+
+  static constexpr int64_t kNow = 1000ll * 1000 * 1000 * 1000;
+
+  // correct "when" value to compute age as <age>
+  int64_t WhenForAge(double age) {
+    static double freq = absl::base_internal::CycleClock::Frequency();
+    // age = (now - when) / freq
+    return kNow - freq * age;
+  }
+
+  void ExpectAges(const PageAgeHistograms &ages, const std::string &expected) {
+    Printer out(&buf_[0], kBufferSize);
+    ages.Print("AgeTest", &out);
+    std::string got = buf_;
+    EXPECT_EQ(expected, got);
+  }
+};
+
+TEST_F(AgeTest, Basic) {
+  PageAgeHistograms ages(kNow);
+  ages.RecordRange(Length(1), false, WhenForAge(0.5));
+  ages.RecordRange(Length(1), false, WhenForAge(1.2));
+  ages.RecordRange(Length(1), false, WhenForAge(3.7));
+
+  ages.RecordRange(Length(3), false, WhenForAge(60 * 60 * 10));
+
+  for (int i = 0; i < 10; ++i) {
+    ages.RecordRange(Length(2), true, WhenForAge(0.1));
+  }
+  ages.RecordRange(Length(2), true, WhenForAge(10 * 60 + 5));
+
+  ages.RecordRange(Length(200), true, WhenForAge(10 * 60));
+  // clang-format off
+  const char kExpected[] =
+R"LIT(------------------------------------------------
+AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time)
+------------------------------------------------
+                                 mean     <1s      1s     30s      1m     30m      1h     8+h
+Live span       TOTAL PAGES:  18000.9       1       2       0       0       0       0       3
+Live span,          1 pages:      1.8       1       2       0       0       0       0       0
+Live span,          3 pages:  36000.0       0       0       0       0       0       0       3
+
+Unmapped span   TOTAL PAGES:    546.0      20       0       0     202       0       0       0
+Unmapped span,      2 pages:     55.1      20       0       0       2       0       0       0
+Unmapped span,   >=64 pages:    600.0       0       0       0     200       0       0       0
+)LIT";
+  // clang-format on
+  ExpectAges(ages, kExpected);
+}
+
+TEST_F(AgeTest, Overflow) {
+  PageAgeHistograms ages(kNow);
+  const Length too_big = Length(4 * (std::numeric_limits<uint32_t>::max() / 5));
+  ages.RecordRange(too_big, false, WhenForAge(0.5));
+  ages.RecordRange(too_big, false, WhenForAge(0.5));
+
+  // clang-format off
+  const char kExpected[] =
+R"LIT(------------------------------------------------
+AgeTest cache entry age (count of pages in spans of a given size that have been idle for up to the given period of time)
+------------------------------------------------
+                                 mean     <1s      1s     30s      1m     30m      1h     8+h
+Live span       TOTAL PAGES:      0.5 4294967295       0       0       0       0       0       0
+Live span,       >=64 pages:      0.5 4294967295       0       0       0       0       0       0
+
+Unmapped span   TOTAL PAGES:      0.0       0       0       0       0       0       0       0
+)LIT";
+  // clang-format on
+  ExpectAges(ages, kExpected);
+}
+
+TEST_F(AgeTest, ManySizes) {
+  PageAgeHistograms ages(kNow);
+  const Length N = PageAgeHistograms::kLargeSize;
+  for (auto i = Length(1); i <= N; ++i) {
+    ages.RecordRange(i, false, WhenForAge(i.raw_num() * 3));
+  }
+
+  for (auto i = Length(1); i < N; ++i) {
+    auto hist = ages.GetSmallHistogram(false, i);
+    EXPECT_EQ(i, hist->total());
+    EXPECT_FLOAT_EQ(i.raw_num() * 3, hist->avg_age());
+  }
+
+  auto large = ages.GetLargeHistogram(false);
+  EXPECT_EQ(N, large->total());
+  EXPECT_FLOAT_EQ(N.raw_num() * 3, large->avg_age());
+
+  auto total = ages.GetTotalHistogram(false);
+  // sum_{i = 1}^N i = n(n+1)/2
+  EXPECT_EQ(N.raw_num() * (N.raw_num() + 1) / 2, total->total().raw_num());
+  // sum_{i = 1}^N 3 * i * i = n(n + 1)(2n + 1) / 2;
+  // divide by the above page total gives (2n+1)
+  EXPECT_FLOAT_EQ(2 * N.raw_num() + 1, total->avg_age());
+}
+
+TEST(PageAllocInfo, Small) {
+  PageAllocInfo info("", -1);
+  static_assert(kMaxPages >= Length(4), "odd config");
+
+  info.RecordAlloc(PageId{0}, Length(2));
+  info.RecordAlloc(PageId{0}, Length(2));
+  info.RecordAlloc(PageId{0}, Length(2));
+
+  info.RecordAlloc(PageId{0}, Length(3));
+  info.RecordAlloc(PageId{0}, Length(3));
+
+  info.RecordFree(PageId{0}, Length(3));
+
+  auto c2 = info.counts_for(Length(2));
+  EXPECT_EQ(3, c2.nalloc);
+  EXPECT_EQ(0, c2.nfree);
+  EXPECT_EQ(Length(6), c2.alloc_size);
+  EXPECT_EQ(Length(0), c2.free_size);
+
+  auto c3 = info.counts_for(Length(3));
+  EXPECT_EQ(2, c3.nalloc);
+  EXPECT_EQ(1, c3.nfree);
+  EXPECT_EQ(Length(6), c3.alloc_size);
+  EXPECT_EQ(Length(3), c3.free_size);
+
+  EXPECT_EQ(Length(3 * 2 + (2 - 1) * 3), info.small());
+  EXPECT_EQ(Length(0), info.slack());
+}
+
+TEST(PageAllocInfo, Large) {
+  PageAllocInfo info("", -1);
+  static_assert(kPagesPerHugePage > kMaxPages, "odd config");
+
+  // These three should be aggregated
+  Length slack;
+  info.RecordAlloc(PageId{0}, kMaxPages + Length(1));
+  slack += kPagesPerHugePage - kMaxPages - Length(1);
+  info.RecordAlloc(PageId{0}, kMaxPages * 3 / 2);
+  slack += kPagesPerHugePage - kMaxPages * 3 / 2;
+  info.RecordAlloc(PageId{0}, kMaxPages * 2);
+  slack += kPagesPerHugePage - kMaxPages * 2;
+
+  // This shouldn't
+  const Length larger = kMaxPages * 2 + Length(1);
+  info.RecordAlloc(PageId{0}, larger);
+  slack +=
+      (kPagesPerHugePage - (larger % kPagesPerHugePage)) % kPagesPerHugePage;
+
+  auto c1 = info.counts_for(kMaxPages + Length(1));
+  EXPECT_EQ(3, c1.nalloc);
+  EXPECT_EQ(0, c1.nfree);
+  EXPECT_EQ(kMaxPages * 9 / 2 + Length(1), c1.alloc_size);
+  EXPECT_EQ(Length(0), c1.free_size);
+
+  auto c2 = info.counts_for(kMaxPages * 2 + Length(1));
+  EXPECT_EQ(1, c2.nalloc);
+  EXPECT_EQ(0, c2.nfree);
+  EXPECT_EQ(kMaxPages * 2 + Length(1), c2.alloc_size);
+  EXPECT_EQ(Length(0), c2.free_size);
+
+  EXPECT_EQ(Length(0), info.small());
+  EXPECT_EQ(slack, info.slack());
+}
+
+TEST(ClockTest, ClockTicks) {
+  // It's a bit ironic to test this clock against other clocks since
+  // this exists because we don't trust other clocks.  But hopefully
+  // no one is using libfaketime on this binary, and of course we
+  // don't care about signal safety, just ticking.
+  const absl::Time before = absl::Now();
+  const double b = absl::base_internal::CycleClock::Now() /
+                   absl::base_internal::CycleClock::Frequency();
+  static const absl::Duration kDur = absl::Milliseconds(500);
+  absl::SleepFor(kDur);
+  const double a = absl::base_internal::CycleClock::Now() /
+                   absl::base_internal::CycleClock::Frequency();
+  const absl::Time after = absl::Now();
+
+  const absl::Duration actual = (after - before);
+  const absl::Duration measured = absl::Seconds(a - b);
+  EXPECT_LE(actual * 0.99, measured) << actual;
+  EXPECT_GE(actual * 1.01, measured) << actual;
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc
new file mode 100644
index 0000000000..b079c9c966
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.cc
@@ -0,0 +1,623 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/system-alloc.h"
+
+#include <asm/unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <new>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/types/optional.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/sampler.h"
+
+// On systems (like freebsd) that don't define MAP_ANONYMOUS, use the old
+// form of the name instead.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Solaris has a bug where it doesn't declare madvise() for C++.
+//    http://www.opensolaris.org/jive/thread.jspa?threadID=21035&tstart=0
+#if defined(__sun) && defined(__SVR4)
+#include <sys/types.h>
+extern "C" int madvise(caddr_t, size_t, int);
+#endif
+
+#ifdef __linux__
+#include <linux/mempolicy.h>
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+namespace {
+
+// Check that no bit is set at position ADDRESS_BITS or higher.
+template <int ADDRESS_BITS>
+void CheckAddressBits(uintptr_t ptr) {
+  ASSERT((ptr >> ADDRESS_BITS) == 0);
+}
+
+// Specialize for the bit width of a pointer to avoid undefined shift.
+template <>
+ABSL_ATTRIBUTE_UNUSED void CheckAddressBits<8 * sizeof(void*)>(uintptr_t ptr) {}
+
+static_assert(kAddressBits <= 8 * sizeof(void*),
+              "kAddressBits must be smaller than the pointer size");
+
+// Structure for discovering alignment
+union MemoryAligner {
+  void* p;
+  double d;
+  size_t s;
+} ABSL_CACHELINE_ALIGNED;
+
+static_assert(sizeof(MemoryAligner) < kMinSystemAlloc,
+              "hugepage alignment too small");
+
+ABSL_CONST_INIT absl::base_internal::SpinLock spinlock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+// Page size is initialized on demand
+size_t pagesize = 0;
+size_t preferred_alignment = 0;
+
+// The current region factory.
+AddressRegionFactory* region_factory = nullptr;
+
+// Rounds size down to a multiple of alignment.
+size_t RoundDown(const size_t size, const size_t alignment) {
+  // Checks that the alignment has only one bit set.
+  ASSERT(absl::has_single_bit(alignment));
+  return (size) & ~(alignment - 1);
+}
+
+// Rounds size up to a multiple of alignment.
+size_t RoundUp(const size_t size, const size_t alignment) {
+  return RoundDown(size + alignment - 1, alignment);
+}
+
+class MmapRegion final : public AddressRegion {
+ public:
+  MmapRegion(uintptr_t start, size_t size, AddressRegionFactory::UsageHint hint)
+      : start_(start), free_size_(size), hint_(hint) {}
+  std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override;
+  ~MmapRegion() override = default;
+
+ private:
+  const uintptr_t start_;
+  size_t free_size_;
+  const AddressRegionFactory::UsageHint hint_;
+};
+
+class MmapRegionFactory final : public AddressRegionFactory {
+ public:
+  AddressRegion* Create(void* start, size_t size, UsageHint hint) override;
+  size_t GetStats(absl::Span<char> buffer) override;
+  size_t GetStatsInPbtxt(absl::Span<char> buffer) override;
+  ~MmapRegionFactory() override = default;
+
+ private:
+  std::atomic<size_t> bytes_reserved_{0};
+};
+std::aligned_storage<sizeof(MmapRegionFactory),
+                     alignof(MmapRegionFactory)>::type mmap_space;
+
+class RegionManager {
+ public:
+  std::pair<void*, size_t> Alloc(size_t size, size_t alignment, MemoryTag tag);
+
+  void DiscardMappedRegions() {
+    std::fill(normal_region_.begin(), normal_region_.end(), nullptr);
+    sampled_region_ = nullptr;
+  }
+
+ private:
+  // Checks that there is sufficient space available in the reserved region
+  // for the next allocation, if not allocate a new region.
+  // Then returns a pointer to the new memory.
+  std::pair<void*, size_t> Allocate(size_t size, size_t alignment,
+                                    MemoryTag tag);
+
+  std::array<AddressRegion*, kNumaPartitions> normal_region_{{nullptr}};
+  AddressRegion* sampled_region_{nullptr};
+};
+std::aligned_storage<sizeof(RegionManager), alignof(RegionManager)>::type
+    region_manager_space;
+RegionManager* region_manager = nullptr;
+
+std::pair<void*, size_t> MmapRegion::Alloc(size_t request_size,
+                                           size_t alignment) {
+  // Align on kMinSystemAlloc boundaries to reduce external fragmentation for
+  // future allocations.
+  size_t size = RoundUp(request_size, kMinSystemAlloc);
+  if (size < request_size) return {nullptr, 0};
+  alignment = std::max(alignment, preferred_alignment);
+
+  // Tries to allocate size bytes from the end of [start_, start_ + free_size_),
+  // aligned to alignment.
+  uintptr_t end = start_ + free_size_;
+  uintptr_t result = end - size;
+  if (result > end) return {nullptr, 0};  // Underflow.
+  result &= ~(alignment - 1);
+  if (result < start_) return {nullptr, 0};  // Out of memory in region.
+  size_t actual_size = end - result;
+
+  ASSERT(result % pagesize == 0);
+  void* result_ptr = reinterpret_cast<void*>(result);
+  if (mprotect(result_ptr, actual_size, PROT_READ | PROT_WRITE) != 0) {
+    Log(kLogWithStack, __FILE__, __LINE__,
+        "mprotect() region failed (ptr, size, error)", result_ptr, actual_size,
+        strerror(errno));
+    return {nullptr, 0};
+  }
+  (void)hint_;
+  free_size_ -= actual_size;
+  return {result_ptr, actual_size};
+}
+
+AddressRegion* MmapRegionFactory::Create(void* start, size_t size,
+                                         UsageHint hint) {
+  void* region_space = MallocInternal(sizeof(MmapRegion));
+  if (!region_space) return nullptr;
+  bytes_reserved_.fetch_add(size, std::memory_order_relaxed);
+  return new (region_space)
+      MmapRegion(reinterpret_cast<uintptr_t>(start), size, hint);
+}
+
+size_t MmapRegionFactory::GetStats(absl::Span<char> buffer) {
+  Printer printer(buffer.data(), buffer.size());
+  size_t allocated = bytes_reserved_.load(std::memory_order_relaxed);
+  constexpr double MiB = 1048576.0;
+  printer.printf("MmapSysAllocator: %zu bytes (%.1f MiB) reserved\n", allocated,
+                 allocated / MiB);
+
+  return printer.SpaceRequired();
+}
+
+size_t MmapRegionFactory::GetStatsInPbtxt(absl::Span<char> buffer) {
+  Printer printer(buffer.data(), buffer.size());
+  size_t allocated = bytes_reserved_.load(std::memory_order_relaxed);
+  printer.printf("mmap_sys_allocator: %lld\n", allocated);
+
+  return printer.SpaceRequired();
+}
+
+static AddressRegionFactory::UsageHint TagToHint(MemoryTag tag) {
+  using UsageHint = AddressRegionFactory::UsageHint;
+  switch (tag) {
+    case MemoryTag::kNormal:
+    case MemoryTag::kNormalP1:
+      return UsageHint::kNormal;
+      break;
+    case MemoryTag::kSampled:
+      return UsageHint::kInfrequentAllocation;
+      break;
+    default:
+      ASSUME(false);
+      __builtin_unreachable();
+  }
+}
+
+std::pair<void*, size_t> RegionManager::Alloc(size_t request_size,
+                                              size_t alignment,
+                                              const MemoryTag tag) {
+  constexpr uintptr_t kTagFree = uintptr_t{1} << kTagShift;
+
+  // We do not support size or alignment larger than kTagFree.
+  // TODO(b/141325493): Handle these large allocations.
+  if (request_size > kTagFree || alignment > kTagFree) return {nullptr, 0};
+
+  // If we are dealing with large sizes, or large alignments we do not
+  // want to throw away the existing reserved region, so instead we
+  // return a new region specifically targeted for the request.
+  if (request_size > kMinMmapAlloc || alignment > kMinMmapAlloc) {
+    // Align on kMinSystemAlloc boundaries to reduce external fragmentation for
+    // future allocations.
+    size_t size = RoundUp(request_size, kMinSystemAlloc);
+    if (size < request_size) return {nullptr, 0};
+    alignment = std::max(alignment, preferred_alignment);
+    void* ptr = MmapAligned(size, alignment, tag);
+    if (!ptr) return {nullptr, 0};
+
+    const auto region_type = TagToHint(tag);
+    AddressRegion* region = region_factory->Create(ptr, size, region_type);
+    if (!region) {
+      munmap(ptr, size);
+      return {nullptr, 0};
+    }
+    std::pair<void*, size_t> result = region->Alloc(size, alignment);
+    if (result.first != nullptr) {
+      ASSERT(result.first == ptr);
+      ASSERT(result.second == size);
+    } else {
+      ASSERT(result.second == 0);
+    }
+    return result;
+  }
+  return Allocate(request_size, alignment, tag);
+}
+
+std::pair<void*, size_t> RegionManager::Allocate(size_t size, size_t alignment,
+                                                 const MemoryTag tag) {
+  AddressRegion*& region = *[&]() {
+    switch (tag) {
+      case MemoryTag::kNormal:
+        return &normal_region_[0];
+      case MemoryTag::kNormalP1:
+        return &normal_region_[1];
+      case MemoryTag::kSampled:
+        return &sampled_region_;
+      default:
+        ASSUME(false);
+        __builtin_unreachable();
+    }
+  }();
+  // For sizes that fit in our reserved range first of all check if we can
+  // satisfy the request from what we have available.
+  if (region) {
+    std::pair<void*, size_t> result = region->Alloc(size, alignment);
+    if (result.first) return result;
+  }
+
+  // Allocation failed so we need to reserve more memory.
+  // Reserve new region and try allocation again.
+  void* ptr = MmapAligned(kMinMmapAlloc, kMinMmapAlloc, tag);
+  if (!ptr) return {nullptr, 0};
+
+  const auto region_type = TagToHint(tag);
+  region = region_factory->Create(ptr, kMinMmapAlloc, region_type);
+  if (!region) {
+    munmap(ptr, kMinMmapAlloc);
+    return {nullptr, 0};
+  }
+  return region->Alloc(size, alignment);
+}
+
+void InitSystemAllocatorIfNecessary() {
+  if (region_factory) return;
+  pagesize = getpagesize();
+  // Sets the preferred alignment to be the largest of either the alignment
+  // returned by mmap() or our minimum allocation size. The minimum allocation
+  // size is usually a multiple of page size, but this need not be true for
+  // SMALL_BUT_SLOW where we do not allocate in units of huge pages.
+  preferred_alignment = std::max(pagesize, kMinSystemAlloc);
+  region_manager = new (&region_manager_space) RegionManager();
+  region_factory = new (&mmap_space) MmapRegionFactory();
+}
+
+// Bind the memory region spanning `size` bytes starting from `base` to NUMA
+// nodes assigned to `partition`. Returns zero upon success, or a standard
+// error code upon failure.
+void BindMemory(void* const base, const size_t size, const size_t partition) {
+  auto& topology = Static::numa_topology();
+
+  // If NUMA awareness is unavailable or disabled, or the user requested that
+  // we don't bind memory then do nothing.
+  const NumaBindMode bind_mode = topology.bind_mode();
+  if (!topology.numa_aware() || bind_mode == NumaBindMode::kNone) {
+    return;
+  }
+
+  const uint64_t nodemask = topology.GetPartitionNodes(partition);
+  int err =
+      syscall(__NR_mbind, base, size, MPOL_BIND | MPOL_F_STATIC_NODES,
+              &nodemask, sizeof(nodemask) * 8, MPOL_MF_STRICT | MPOL_MF_MOVE);
+  if (err == 0) {
+    return;
+  }
+
+  if (bind_mode == NumaBindMode::kAdvisory) {
+    Log(kLogWithStack, __FILE__, __LINE__, "Warning: Unable to mbind memory",
+        err, base, nodemask);
+    return;
+  }
+
+  ASSERT(bind_mode == NumaBindMode::kStrict);
+  Crash(kCrash, __FILE__, __LINE__, "Unable to mbind memory", err, base,
+        nodemask);
+}
+
+ABSL_CONST_INIT std::atomic<int> system_release_errors = ATOMIC_VAR_INIT(0);
+
+}  // namespace
+
+void AcquireSystemAllocLock() {
+  spinlock.Lock();
+}
+
+void ReleaseSystemAllocLock() {
+  spinlock.Unlock();
+}
+
+void* SystemAlloc(size_t bytes, size_t* actual_bytes, size_t alignment,
+                  const MemoryTag tag) {
+  // If default alignment is set request the minimum alignment provided by
+  // the system.
+  alignment = std::max(alignment, pagesize);
+
+  // Discard requests that overflow
+  if (bytes + alignment < bytes) return nullptr;
+
+  // This may return significantly more memory than "bytes" by default, so
+  // require callers to know the true amount allocated.
+  ASSERT(actual_bytes != nullptr);
+
+  absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+
+  InitSystemAllocatorIfNecessary();
+
+  void* result = nullptr;
+  std::tie(result, *actual_bytes) =
+      region_manager->Alloc(bytes, alignment, tag);
+
+  if (result != nullptr) {
+    CheckAddressBits<kAddressBits>(reinterpret_cast<uintptr_t>(result) +
+                                   *actual_bytes - 1);
+    ASSERT(GetMemoryTag(result) == tag);
+  }
+  return result;
+}
+
+static bool ReleasePages(void* start, size_t length) {
+  int ret;
+  // Note -- ignoring most return codes, because if this fails it
+  // doesn't matter...
+  // Moreover, MADV_REMOVE *will* fail (with EINVAL) on anonymous memory,
+  // but that's harmless.
+#ifdef MADV_REMOVE
+  // MADV_REMOVE deletes any backing storage for non-anonymous memory
+  // (tmpfs).
+  do {
+    ret = madvise(start, length, MADV_REMOVE);
+  } while (ret == -1 && errno == EAGAIN);
+
+  if (ret == 0) {
+    return true;
+  }
+#endif
+#ifdef MADV_DONTNEED
+  // MADV_DONTNEED drops page table info and any anonymous pages.
+  do {
+    ret = madvise(start, length, MADV_DONTNEED);
+  } while (ret == -1 && errno == EAGAIN);
+
+  if (ret == 0) {
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+int SystemReleaseErrors() {
+  return system_release_errors.load(std::memory_order_relaxed);
+}
+
+void SystemRelease(void* start, size_t length) {
+  int saved_errno = errno;
+#if defined(MADV_DONTNEED) || defined(MADV_REMOVE)
+  const size_t pagemask = pagesize - 1;
+
+  size_t new_start = reinterpret_cast<size_t>(start);
+  size_t end = new_start + length;
+  size_t new_end = end;
+
+  // Round up the starting address and round down the ending address
+  // to be page aligned:
+  new_start = (new_start + pagesize - 1) & ~pagemask;
+  new_end = new_end & ~pagemask;
+
+  ASSERT((new_start & pagemask) == 0);
+  ASSERT((new_end & pagemask) == 0);
+  ASSERT(new_start >= reinterpret_cast<size_t>(start));
+  ASSERT(new_end <= end);
+
+  if (new_end > new_start) {
+    void* new_ptr = reinterpret_cast<void*>(new_start);
+    size_t new_length = new_end - new_start;
+
+    if (!ReleasePages(new_ptr, new_length)) {
+      // Try unlocking.
+      int ret;
+      do {
+        ret = munlock(reinterpret_cast<char*>(new_start), new_end - new_start);
+      } while (ret == -1 && errno == EAGAIN);
+
+      if (ret != 0 || !ReleasePages(new_ptr, new_length)) {
+        // If we fail to munlock *or* fail our second attempt at madvise,
+        // increment our failure count.
+        system_release_errors.fetch_add(1, std::memory_order_relaxed);
+      }
+    }
+  }
+#endif
+  errno = saved_errno;
+}
+
+void SystemBack(void* start, size_t length) {
+  // TODO(b/134694141): use madvise when we have better support for that;
+  // taking faults is not free.
+
+  // TODO(b/134694141): enable this, if we can avoid causing trouble for apps
+  // that routinely make large mallocs they never touch (sigh).
+  return;
+
+  // Strictly speaking, not everything uses 4K pages.  However, we're
+  // not asking the OS for anything actually page-related, just taking
+  // a fault on every "page".  If the real page size is bigger, we do
+  // a few extra reads; this is not worth worrying about.
+  static const size_t kHardwarePageSize = 4 * 1024;
+  CHECK_CONDITION(reinterpret_cast<intptr_t>(start) % kHardwarePageSize == 0);
+  CHECK_CONDITION(length % kHardwarePageSize == 0);
+  const size_t num_pages = length / kHardwarePageSize;
+
+  struct PageStruct {
+    volatile size_t data[kHardwarePageSize / sizeof(size_t)];
+  };
+  CHECK_CONDITION(sizeof(PageStruct) == kHardwarePageSize);
+
+  PageStruct* ps = reinterpret_cast<PageStruct*>(start);
+  PageStruct* limit = ps + num_pages;
+  for (; ps < limit; ++ps) {
+    ps->data[0] = 0;
+  }
+}
+
+AddressRegionFactory* GetRegionFactory() {
+  absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+  InitSystemAllocatorIfNecessary();
+  return region_factory;
+}
+
+void SetRegionFactory(AddressRegionFactory* factory) {
+  absl::base_internal::SpinLockHolder lock_holder(&spinlock);
+  InitSystemAllocatorIfNecessary();
+  region_manager->DiscardMappedRegions();
+  region_factory = factory;
+}
+
+static uintptr_t RandomMmapHint(size_t size, size_t alignment,
+                                const MemoryTag tag) {
+  // Rely on kernel's mmap randomization to seed our RNG.
+  static uintptr_t rnd = []() {
+    void* seed =
+        mmap(nullptr, kPageSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (seed == MAP_FAILED) {
+      Crash(kCrash, __FILE__, __LINE__,
+            "Initial mmap() reservation failed (size)", kPageSize);
+    }
+    munmap(seed, kPageSize);
+    return reinterpret_cast<uintptr_t>(seed);
+  }();
+
+  // Mask out bits that cannot be used by the hardware, mask out the top
+  // "usable" bit since it is reserved for kernel use, and also mask out the
+  // next top bit to significantly reduce collisions with mappings that tend to
+  // be placed in the upper half of the address space (e.g., stack, executable,
+  // kernel-placed mmaps).  See b/139357826.
+  //
+  // TODO(b/124707070): Remove this #ifdef
+#if defined(MEMORY_SANITIZER) || defined(THREAD_SANITIZER)
+  // MSan and TSan use up all of the lower address space, so we allow use of
+  // mid-upper address space when they're active.  This only matters for
+  // TCMalloc-internal tests, since sanitizers install their own malloc/free.
+  constexpr uintptr_t kAddrMask = (uintptr_t{3} << (kAddressBits - 3)) - 1;
+#else
+  constexpr uintptr_t kAddrMask = (uintptr_t{3} << (kAddressBits - 3)) - 1;
+#endif
+
+  // Ensure alignment >= size so we're guaranteed the full mapping has the same
+  // tag.
+  alignment = absl::bit_ceil(std::max(alignment, size));
+
+  rnd = Sampler::NextRandom(rnd);
+  uintptr_t addr = rnd & kAddrMask & ~(alignment - 1) & ~kTagMask;
+  addr |= static_cast<uintptr_t>(tag) << kTagShift;
+  ASSERT(GetMemoryTag(reinterpret_cast<const void*>(addr)) == tag);
+  return addr;
+}
+
+void* MmapAligned(size_t size, size_t alignment, const MemoryTag tag) {
+  ASSERT(size <= kTagMask);
+  ASSERT(alignment <= kTagMask);
+
+  static uintptr_t next_sampled_addr = 0;
+  static std::array<uintptr_t, kNumaPartitions> next_normal_addr = {0};
+
+  absl::optional<int> numa_partition;
+  uintptr_t& next_addr = *[&]() {
+    switch (tag) {
+      case MemoryTag::kSampled:
+        return &next_sampled_addr;
+      case MemoryTag::kNormalP0:
+        numa_partition = 0;
+        return &next_normal_addr[0];
+      case MemoryTag::kNormalP1:
+        numa_partition = 1;
+        return &next_normal_addr[1];
+      default:
+        ASSUME(false);
+        __builtin_unreachable();
+    }
+  }();
+
+  if (!next_addr || next_addr & (alignment - 1) ||
+      GetMemoryTag(reinterpret_cast<void*>(next_addr)) != tag ||
+      GetMemoryTag(reinterpret_cast<void*>(next_addr + size - 1)) != tag) {
+    next_addr = RandomMmapHint(size, alignment, tag);
+  }
+  void* hint;
+  for (int i = 0; i < 1000; ++i) {
+    hint = reinterpret_cast<void*>(next_addr);
+    ASSERT(GetMemoryTag(hint) == tag);
+    // TODO(b/140190055): Use MAP_FIXED_NOREPLACE once available.
+    void* result =
+        mmap(hint, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (result == hint) {
+      if (numa_partition.has_value()) {
+        BindMemory(result, size, *numa_partition);
+      }
+      // Attempt to keep the next mmap contiguous in the common case.
+      next_addr += size;
+      CHECK_CONDITION(kAddressBits == std::numeric_limits<uintptr_t>::digits ||
+                      next_addr <= uintptr_t{1} << kAddressBits);
+
+      ASSERT((reinterpret_cast<uintptr_t>(result) & (alignment - 1)) == 0);
+      return result;
+    }
+    if (result == MAP_FAILED) {
+      Log(kLogWithStack, __FILE__, __LINE__,
+          "mmap() reservation failed (hint, size, error)", hint, size,
+          strerror(errno));
+      return nullptr;
+    }
+    if (int err = munmap(result, size)) {
+      Log(kLogWithStack, __FILE__, __LINE__, "munmap() failed");
+      ASSERT(err == 0);
+    }
+    next_addr = RandomMmapHint(size, alignment, tag);
+  }
+
+  Log(kLogWithStack, __FILE__, __LINE__,
+      "MmapAligned() failed - unable to allocate with tag (hint, size, "
+      "alignment) - is something limiting address placement?",
+      hint, size, alignment);
+  return nullptr;
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc.h b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h
new file mode 100644
index 0000000000..3d1e7fd60b
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc.h
@@ -0,0 +1,91 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Routine that uses sbrk/mmap to allocate memory from the system.
+// Useful for implementing malloc.
+
+#ifndef TCMALLOC_SYSTEM_ALLOC_H_
+#define TCMALLOC_SYSTEM_ALLOC_H_
+
+#include <stddef.h>
+
+#include "tcmalloc/common.h"
+#include "tcmalloc/malloc_extension.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// REQUIRES: "alignment" is a power of two or "0" to indicate default alignment
+// REQUIRES: "alignment" and "size" <= kTagMask
+//
+// Allocate and return "bytes" of zeroed memory.  The allocator may optionally
+// return more bytes than asked for (i.e. return an entire "huge" page).  The
+// length of the returned memory area is stored in *actual_bytes.
+//
+// The returned pointer is a multiple of "alignment" if non-zero. The
+// returned pointer will always be aligned suitably for holding a
+// void*, double, or size_t. In addition, if this platform defines
+// ABSL_CACHELINE_ALIGNED, the return pointer will always be cacheline
+// aligned.
+//
+// The returned pointer is guaranteed to satisfy GetMemoryTag(ptr) == "tag".
+//
+// Returns nullptr when out of memory.
+void *SystemAlloc(size_t bytes, size_t *actual_bytes, size_t alignment,
+                  MemoryTag tag);
+
+// Returns the number of times we failed to give pages back to the OS after a
+// call to SystemRelease.
+int SystemReleaseErrors();
+
+void AcquireSystemAllocLock();
+void ReleaseSystemAllocLock();
+
+// This call is a hint to the operating system that the pages
+// contained in the specified range of memory will not be used for a
+// while, and can be released for use by other processes or the OS.
+// Pages which are released in this way may be destroyed (zeroed) by
+// the OS.  The benefit of this function is that it frees memory for
+// use by the system, the cost is that the pages are faulted back into
+// the address space next time they are touched, which can impact
+// performance.  (Only pages fully covered by the memory region will
+// be released, partial pages will not.)
+void SystemRelease(void *start, size_t length);
+
+// This call is the inverse of SystemRelease: the pages in this range
+// are in use and should be faulted in.  (In principle this is a
+// best-effort hint, but in practice we will unconditionally fault the
+// range.)
+// REQUIRES: [start, start + length) is a range aligned to 4KiB boundaries.
+void SystemBack(void *start, size_t length);
+
+// Returns the current address region factory.
+AddressRegionFactory *GetRegionFactory();
+
+// Sets the current address region factory to factory.
+void SetRegionFactory(AddressRegionFactory *factory);
+
+// Reserves using mmap() a region of memory of the requested size and alignment,
+// with the bits specified by kTagMask set according to tag.
+//
+// REQUIRES: pagesize <= alignment <= kTagMask
+// REQUIRES: size <= kTagMask
+void *MmapAligned(size_t size, size_t alignment, MemoryTag tag);
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_SYSTEM_ALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc
new file mode 100644
index 0000000000..496bd048ee
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/system-alloc_test.cc
@@ -0,0 +1,147 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/system-alloc.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <limits>
+#include <new>
+#include <utility>
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_format.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+class MmapAlignedTest : public testing::TestWithParam<size_t> {
+ protected:
+  void MmapAndCheck(size_t size, size_t alignment) {
+    SCOPED_TRACE(absl::StrFormat("size = %u, alignment = %u", size, alignment));
+
+    for (MemoryTag tag : {MemoryTag::kNormal, MemoryTag::kSampled}) {
+      SCOPED_TRACE(static_cast<unsigned int>(tag));
+
+      void* p = MmapAligned(size, alignment, tag);
+      EXPECT_NE(p, nullptr);
+      EXPECT_EQ(reinterpret_cast<uintptr_t>(p) % alignment, 0);
+      EXPECT_EQ(IsTaggedMemory(p), tag == MemoryTag::kSampled);
+      EXPECT_EQ(GetMemoryTag(p), tag);
+      EXPECT_EQ(GetMemoryTag(static_cast<char*>(p) + size - 1), tag);
+      EXPECT_EQ(munmap(p, size), 0);
+    }
+  }
+};
+INSTANTIATE_TEST_SUITE_P(VariedAlignment, MmapAlignedTest,
+                         testing::Values(kPageSize, kMinSystemAlloc,
+                                         kMinMmapAlloc,
+                                         uintptr_t{1} << kTagShift));
+
+TEST_P(MmapAlignedTest, CorrectAlignmentAndTag) {
+  MmapAndCheck(kMinSystemAlloc, GetParam());
+}
+
+// Ensure mmap sizes near kTagMask still have the correct tag at the beginning
+// and end of the mapping.
+TEST_F(MmapAlignedTest, LargeSizeSmallAlignment) {
+  MmapAndCheck(uintptr_t{1} << kTagShift, kPageSize);
+}
+
+// Was SimpleRegion::Alloc invoked at least once?
+static bool simple_region_alloc_invoked = false;
+
+class SimpleRegion : public AddressRegion {
+ public:
+  SimpleRegion(uintptr_t start, size_t size)
+      : start_(start), free_size_(size) {}
+
+  std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+    simple_region_alloc_invoked = true;
+    uintptr_t result = (start_ + free_size_ - size) & ~(alignment - 1);
+    if (result < start_ || result >= start_ + free_size_) return {nullptr, 0};
+    size_t actual_size = start_ + free_size_ - result;
+    free_size_ -= actual_size;
+    void* ptr = reinterpret_cast<void*>(result);
+    int err = mprotect(ptr, actual_size, PROT_READ | PROT_WRITE);
+    CHECK_CONDITION(err == 0);
+    return {ptr, actual_size};
+  }
+
+ private:
+  uintptr_t start_;
+  size_t free_size_;
+};
+
+class SimpleRegionFactory : public AddressRegionFactory {
+ public:
+  AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+    void* region_space = MallocInternal(sizeof(SimpleRegion));
+    CHECK_CONDITION(region_space != nullptr);
+    return new (region_space)
+        SimpleRegion(reinterpret_cast<uintptr_t>(start), size);
+  }
+};
+SimpleRegionFactory f;
+
+TEST(Basic, InvokedTest) {
+  MallocExtension::SetRegionFactory(&f);
+
+  // An allocation size that is likely to trigger the system allocator.
+  void* ptr = ::operator new(kMinSystemAlloc);
+  // TODO(b/183453911): Remove workaround for GCC 10.x deleting operator new,
+  // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94295.
+  benchmark::DoNotOptimize(ptr);
+  ::operator delete(ptr);
+
+  // Make sure that our allocator was invoked.
+  ASSERT_TRUE(simple_region_alloc_invoked);
+}
+
+TEST(Basic, RetryFailTest) {
+  // Check with the allocator still works after a failed allocation.
+  //
+  // There is no way to call malloc and guarantee it will fail.  malloc takes a
+  // size_t parameter and the C++ standard does not constrain the size of
+  // size_t.  For example, consider an implementation where size_t is 32 bits
+  // and pointers are 64 bits.
+  //
+  // It is likely, though, that sizeof(size_t) == sizeof(void*).  In that case,
+  // the first allocation here might succeed but the second allocation must
+  // fail.
+  //
+  // If the second allocation succeeds, you will have to rewrite or
+  // disable this test.
+  const size_t kHugeSize = std::numeric_limits<size_t>::max() / 2;
+  void* p1 = malloc(kHugeSize);
+  void* p2 = malloc(kHugeSize);
+  ASSERT_EQ(p2, nullptr);
+  if (p1 != nullptr) free(p1);
+
+  void* q = malloc(1024);
+  ASSERT_NE(q, nullptr);
+  free(q);
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc
new file mode 100644
index 0000000000..8e62ba91b9
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.cc
@@ -0,0 +1,2441 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// tcmalloc is a fast malloc implementation.  See
+// https://github.com/google/tcmalloc/tree/master/docs/design.md for a high-level description of
+// how this malloc works.
+//
+// SYNCHRONIZATION
+//  1. The thread-/cpu-specific lists are accessed without acquiring any locks.
+//     This is safe because each such list is only accessed by one thread/cpu at
+//     a time.
+//  2. We have a lock per central free-list, and hold it while manipulating
+//     the central free list for a particular size.
+//  3. The central page allocator is protected by "pageheap_lock".
+//  4. The pagemap (which maps from page-number to descriptor),
+//     can be read without holding any locks, and written while holding
+//     the "pageheap_lock".
+//
+//     This multi-threaded access to the pagemap is safe for fairly
+//     subtle reasons.  We basically assume that when an object X is
+//     allocated by thread A and deallocated by thread B, there must
+//     have been appropriate synchronization in the handoff of object
+//     X from thread A to thread B.
+//
+// PAGEMAP
+// -------
+// Page map contains a mapping from page id to Span.
+//
+// If Span s occupies pages [p..q],
+//      pagemap[p] == s
+//      pagemap[q] == s
+//      pagemap[p+1..q-1] are undefined
+//      pagemap[p-1] and pagemap[q+1] are defined:
+//         NULL if the corresponding page is not yet in the address space.
+//         Otherwise it points to a Span.  This span may be free
+//         or allocated.  If free, it is in one of pageheap's freelist.
+
+#include "tcmalloc/tcmalloc.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <limits>
+#include <map>
+#include <memory>
+#include <new>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/const_init.h"
+#include "absl/base/dynamic_annotations.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/internal/sysinfo.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/debugging/stacktrace.h"
+#include "absl/memory/memory.h"
+#include "absl/numeric/bits.h"
+#include "absl/strings/match.h"
+#include "absl/strings/numbers.h"
+#include "absl/strings/strip.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/cpu_cache.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/memory_stats.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/internal_malloc_extension.h"
+#include "tcmalloc/malloc_extension.h"
+#include "tcmalloc/page_allocator.h"
+#include "tcmalloc/page_heap.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/pagemap.h"
+#include "tcmalloc/pages.h"
+#include "tcmalloc/parameters.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/span.h"
+#include "tcmalloc/stack_trace_table.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/stats.h"
+#include "tcmalloc/system-alloc.h"
+#include "tcmalloc/tcmalloc_policy.h"
+#include "tcmalloc/thread_cache.h"
+#include "tcmalloc/tracking.h"
+#include "tcmalloc/transfer_cache.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO)
+#include <malloc.h>
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// ----------------------- IMPLEMENTATION -------------------------------
+
+// Extract interesting stats
+struct TCMallocStats {
+  uint64_t thread_bytes;               // Bytes in thread caches
+  uint64_t central_bytes;              // Bytes in central cache
+  uint64_t transfer_bytes;             // Bytes in central transfer cache
+  uint64_t metadata_bytes;             // Bytes alloced for metadata
+  uint64_t sharded_transfer_bytes;     // Bytes in per-CCX cache
+  uint64_t per_cpu_bytes;              // Bytes in per-CPU cache
+  uint64_t pagemap_root_bytes_res;     // Resident bytes of pagemap root node
+  uint64_t percpu_metadata_bytes_res;  // Resident bytes of the per-CPU metadata
+  AllocatorStats tc_stats;             // ThreadCache objects
+  AllocatorStats span_stats;           // Span objects
+  AllocatorStats stack_stats;          // StackTrace objects
+  AllocatorStats bucket_stats;         // StackTraceTable::Bucket objects
+  size_t pagemap_bytes;                // included in metadata bytes
+  size_t percpu_metadata_bytes;        // included in metadata bytes
+  BackingStats pageheap;               // Stats from page heap
+
+  // Explicitly declare the ctor to put it in the google_malloc section.
+  TCMallocStats() = default;
+};
+
+// Get stats into "r".  Also, if class_count != NULL, class_count[k]
+// will be set to the total number of objects of size class k in the
+// central cache, transfer cache, and per-thread and per-CPU caches.
+// If small_spans is non-NULL, it is filled.  Same for large_spans.
+// The boolean report_residence determines whether residence information
+// should be captured or not. Residence info requires a potentially
+// costly OS call, and is not necessary in all situations.
+static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
+                         SpanStats* span_stats, SmallSpanStats* small_spans,
+                         LargeSpanStats* large_spans,
+                         TransferCacheStats* tc_stats, bool report_residence) {
+  r->central_bytes = 0;
+  r->transfer_bytes = 0;
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    const size_t length = Static::central_freelist(cl).length();
+    const size_t tc_length = Static::transfer_cache().tc_length(cl);
+    const size_t cache_overhead = Static::central_freelist(cl).OverheadBytes();
+    const size_t size = Static::sizemap().class_to_size(cl);
+    r->central_bytes += (size * length) + cache_overhead;
+    r->transfer_bytes += (size * tc_length);
+    if (class_count) {
+      // Sum the lengths of all per-class freelists, except the per-thread
+      // freelists, which get counted when we call GetThreadStats(), below.
+      class_count[cl] = length + tc_length;
+      if (UsePerCpuCache()) {
+        class_count[cl] += Static::cpu_cache().TotalObjectsOfClass(cl);
+      }
+    }
+    if (span_stats) {
+      span_stats[cl] = Static::central_freelist(cl).GetSpanStats();
+    }
+    if (tc_stats) {
+      tc_stats[cl] = Static::transfer_cache().GetHitRateStats(cl);
+    }
+  }
+
+  // Add stats from per-thread heaps
+  r->thread_bytes = 0;
+  {  // scope
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
+    r->tc_stats = ThreadCache::HeapStats();
+    r->span_stats = Static::span_allocator().stats();
+    r->stack_stats = Static::stacktrace_allocator().stats();
+    r->bucket_stats = Static::bucket_allocator().stats();
+    r->metadata_bytes = Static::metadata_bytes();
+    r->pagemap_bytes = Static::pagemap().bytes();
+    r->pageheap = Static::page_allocator().stats();
+    if (small_spans != nullptr) {
+      Static::page_allocator().GetSmallSpanStats(small_spans);
+    }
+    if (large_spans != nullptr) {
+      Static::page_allocator().GetLargeSpanStats(large_spans);
+    }
+  }
+  // We can access the pagemap without holding the pageheap_lock since it
+  // is static data, and we are only taking address and size which are
+  // constants.
+  if (report_residence) {
+    auto resident_bytes = Static::pagemap_residence();
+    r->pagemap_root_bytes_res = resident_bytes;
+    ASSERT(r->metadata_bytes >= r->pagemap_bytes);
+    r->metadata_bytes = r->metadata_bytes - r->pagemap_bytes + resident_bytes;
+  } else {
+    r->pagemap_root_bytes_res = 0;
+  }
+
+  r->per_cpu_bytes = 0;
+  r->sharded_transfer_bytes = 0;
+  r->percpu_metadata_bytes_res = 0;
+  r->percpu_metadata_bytes = 0;
+  if (UsePerCpuCache()) {
+    r->per_cpu_bytes = Static::cpu_cache().TotalUsedBytes();
+    r->sharded_transfer_bytes = Static::sharded_transfer_cache().TotalBytes();
+
+    if (report_residence) {
+      auto percpu_metadata = Static::cpu_cache().MetadataMemoryUsage();
+      r->percpu_metadata_bytes_res = percpu_metadata.resident_size;
+      r->percpu_metadata_bytes = percpu_metadata.virtual_size;
+
+      ASSERT(r->metadata_bytes >= r->percpu_metadata_bytes);
+      r->metadata_bytes = r->metadata_bytes - r->percpu_metadata_bytes +
+                          r->percpu_metadata_bytes_res;
+    }
+  }
+}
+
+static void ExtractTCMallocStats(TCMallocStats* r, bool report_residence) {
+  ExtractStats(r, nullptr, nullptr, nullptr, nullptr, nullptr,
+               report_residence);
+}
+
+// Because different fields of stats are computed from state protected
+// by different locks, they may be inconsistent.  Prevent underflow
+// when subtracting to avoid gigantic results.
+static uint64_t StatSub(uint64_t a, uint64_t b) {
+  return (a >= b) ? (a - b) : 0;
+}
+
+// Return approximate number of bytes in use by app.
+static uint64_t InUseByApp(const TCMallocStats& stats) {
+  return StatSub(stats.pageheap.system_bytes,
+                 stats.thread_bytes + stats.central_bytes +
+                     stats.transfer_bytes + stats.per_cpu_bytes +
+                     stats.sharded_transfer_bytes + stats.pageheap.free_bytes +
+                     stats.pageheap.unmapped_bytes);
+}
+
+static uint64_t VirtualMemoryUsed(const TCMallocStats& stats) {
+  return stats.pageheap.system_bytes + stats.metadata_bytes;
+}
+
+static uint64_t PhysicalMemoryUsed(const TCMallocStats& stats) {
+  return StatSub(VirtualMemoryUsed(stats), stats.pageheap.unmapped_bytes);
+}
+
+// The number of bytes either in use by the app or fragmented so that
+// it cannot be (arbitrarily) reused.
+static uint64_t RequiredBytes(const TCMallocStats& stats) {
+  return StatSub(PhysicalMemoryUsed(stats), stats.pageheap.free_bytes);
+}
+
+static int CountAllowedCpus() {
+  cpu_set_t allowed_cpus;
+  if (sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus) != 0) {
+    return 0;
+  }
+
+  return CPU_COUNT(&allowed_cpus);
+}
+
+// WRITE stats to "out"
+static void DumpStats(Printer* out, int level) {
+  TCMallocStats stats;
+  uint64_t class_count[kNumClasses];
+  SpanStats span_stats[kNumClasses];
+  TransferCacheStats tc_stats[kNumClasses];
+  if (level >= 2) {
+    ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats,
+                 true);
+  } else {
+    ExtractTCMallocStats(&stats, true);
+  }
+
+  static const double MiB = 1048576.0;
+
+  out->printf(
+      "See https://github.com/google/tcmalloc/tree/master/docs/stats.md for an explanation of "
+      "this page\n");
+
+  const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+  const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+  const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+
+#ifdef TCMALLOC_SMALL_BUT_SLOW
+  out->printf("NOTE:  SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
+#endif
+  // clang-format off
+  // Avoid clang-format complaining about the way that this text is laid out.
+  out->printf(
+      "------------------------------------------------\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in per-CPU cache freelist\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in Sharded cache freelist\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
+      "MALLOC:   ------------\n"
+      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
+      "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
+      "MALLOC:   ------------\n"
+      "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
+      "MALLOC:\n"
+      "MALLOC:   %12" PRIu64 "               Spans in use\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Spans created\n"
+      "MALLOC:   %12" PRIu64 "               Thread heaps in use\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Thread heaps created\n"
+      "MALLOC:   %12" PRIu64 "               Stack traces in use\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Stack traces created\n"
+      "MALLOC:   %12" PRIu64 "               Table buckets in use\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Table buckets created\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Pagemap bytes used\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) Pagemap root resident bytes\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) per-CPU slab bytes used\n"
+      "MALLOC:   %12" PRIu64 " (%7.1f MiB) per-CPU slab resident bytes\n"
+      "MALLOC:   %12" PRIu64 "               Tcmalloc page size\n"
+      "MALLOC:   %12" PRIu64 "               Tcmalloc hugepage size\n"
+      "MALLOC:   %12" PRIu64 "               CPUs Allowed in Mask\n",
+      bytes_in_use_by_app, bytes_in_use_by_app / MiB,
+      stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
+      stats.central_bytes, stats.central_bytes / MiB,
+      stats.per_cpu_bytes, stats.per_cpu_bytes / MiB,
+      stats.sharded_transfer_bytes, stats.sharded_transfer_bytes / MiB,
+      stats.transfer_bytes, stats.transfer_bytes / MiB,
+      stats.thread_bytes, stats.thread_bytes / MiB,
+      stats.metadata_bytes, stats.metadata_bytes / MiB,
+      physical_memory_used, physical_memory_used / MiB,
+      stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
+      virtual_memory_used, virtual_memory_used / MiB,
+      uint64_t(stats.span_stats.in_use),
+      uint64_t(stats.span_stats.total),
+      (stats.span_stats.total * sizeof(Span)) / MiB,
+      uint64_t(stats.tc_stats.in_use),
+      uint64_t(stats.tc_stats.total),
+      (stats.tc_stats.total * sizeof(ThreadCache)) / MiB,
+      uint64_t(stats.stack_stats.in_use),
+      uint64_t(stats.stack_stats.total),
+      (stats.stack_stats.total * sizeof(StackTrace)) / MiB,
+      uint64_t(stats.bucket_stats.in_use),
+      uint64_t(stats.bucket_stats.total),
+      (stats.bucket_stats.total * sizeof(StackTraceTable::Bucket)) / MiB,
+      uint64_t(stats.pagemap_bytes),
+      stats.pagemap_bytes / MiB,
+      stats.pagemap_root_bytes_res, stats.pagemap_root_bytes_res / MiB,
+      uint64_t(stats.percpu_metadata_bytes),
+      stats.percpu_metadata_bytes / MiB,
+      stats.percpu_metadata_bytes_res, stats.percpu_metadata_bytes_res / MiB,
+      uint64_t(kPageSize),
+      uint64_t(kHugePageSize),
+      CountAllowedCpus());
+  // clang-format on
+
+  PrintExperiments(out);
+  out->printf(
+      "MALLOC SAMPLED PROFILES: %zu bytes (current), %zu bytes (peak)\n",
+      static_cast<size_t>(Static::sampled_objects_size_.value()),
+      Static::peak_heap_tracker().CurrentPeakSize());
+
+  MemoryStats memstats;
+  if (GetMemoryStats(&memstats)) {
+    uint64_t rss = memstats.rss;
+    uint64_t vss = memstats.vss;
+    // clang-format off
+    out->printf(
+        "\n"
+        "Total process stats (inclusive of non-malloc sources):\n"
+        "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes resident (physical memory used)\n"
+        "TOTAL: %12" PRIu64 " (%7.1f MiB) Bytes mapped (virtual memory used)\n",
+        rss, rss / MiB, vss, vss / MiB);
+    // clang-format on
+  }
+
+  out->printf(
+      "------------------------------------------------\n"
+      "Call ReleaseMemoryToSystem() to release freelist memory to the OS"
+      " (via madvise()).\n"
+      "Bytes released to the OS take up virtual address space"
+      " but no physical memory.\n");
+  if (level >= 2) {
+    out->printf("------------------------------------------------\n");
+    out->printf("Total size of freelists for per-thread and per-CPU caches,\n");
+    out->printf("transfer cache, and central cache, as well as number of\n");
+    out->printf("live pages, returned/requested spans by size class\n");
+    out->printf("------------------------------------------------\n");
+
+    uint64_t cumulative = 0;
+    for (int cl = 1; cl < kNumClasses; ++cl) {
+      uint64_t class_bytes =
+          class_count[cl] * Static::sizemap().class_to_size(cl);
+
+      cumulative += class_bytes;
+      // clang-format off
+      out->printf(
+          "class %3d [ %8zu bytes ] : %8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB; "
+          "%8" PRIu64 " live pages; spans: %6zu ret / %6zu req = %5.4f;\n",
+          cl, Static::sizemap().class_to_size(cl), class_count[cl],
+          class_bytes / MiB, cumulative / MiB,
+          span_stats[cl].num_live_spans()*Static::sizemap().class_to_pages(cl),
+          span_stats[cl].num_spans_returned, span_stats[cl].num_spans_requested,
+          span_stats[cl].prob_returned());
+      // clang-format on
+    }
+
+    out->printf("------------------------------------------------\n");
+    out->printf("Transfer cache implementation: %s\n",
+                TransferCacheImplementationToLabel(
+                    Static::transfer_cache().implementation()));
+
+    out->printf("------------------------------------------------\n");
+    out->printf("Transfer cache insert/remove hits/misses by size class\n");
+    for (int cl = 1; cl < kNumClasses; ++cl) {
+      out->printf(
+          "class %3d [ %8zu bytes ] : %8" PRIu64 " insert hits; %8" PRIu64
+          " insert misses (%8lu partial); %8" PRIu64 " remove hits; %8" PRIu64
+          " remove misses (%8lu partial);\n",
+          cl, Static::sizemap().class_to_size(cl), tc_stats[cl].insert_hits,
+          tc_stats[cl].insert_misses, tc_stats[cl].insert_non_batch_misses,
+          tc_stats[cl].remove_hits, tc_stats[cl].remove_misses,
+          tc_stats[cl].remove_non_batch_misses);
+    }
+
+    if (UsePerCpuCache()) {
+      Static::cpu_cache().Print(out);
+    }
+
+    Static::page_allocator().Print(out, MemoryTag::kNormal);
+    if (Static::numa_topology().active_partitions() > 1) {
+      Static::page_allocator().Print(out, MemoryTag::kNormalP1);
+    }
+    Static::page_allocator().Print(out, MemoryTag::kSampled);
+    tracking::Print(out);
+    Static::guardedpage_allocator().Print(out);
+
+    uint64_t limit_bytes;
+    bool is_hard;
+    std::tie(limit_bytes, is_hard) = Static::page_allocator().limit();
+    out->printf("PARAMETER desired_usage_limit_bytes %" PRIu64 " %s\n",
+                limit_bytes, is_hard ? "(hard)" : "");
+    out->printf("Number of times limit was hit: %lld\n",
+                Static::page_allocator().limit_hits());
+
+    out->printf("PARAMETER tcmalloc_per_cpu_caches %d\n",
+                Parameters::per_cpu_caches() ? 1 : 0);
+    out->printf("PARAMETER tcmalloc_max_per_cpu_cache_size %d\n",
+                Parameters::max_per_cpu_cache_size());
+    out->printf("PARAMETER tcmalloc_max_total_thread_cache_bytes %lld\n",
+                Parameters::max_total_thread_cache_bytes());
+    out->printf("PARAMETER malloc_release_bytes_per_sec %llu\n",
+                Parameters::background_release_rate());
+    out->printf(
+        "PARAMETER tcmalloc_skip_subrelease_interval %s\n",
+        absl::FormatDuration(Parameters::filler_skip_subrelease_interval()));
+    out->printf("PARAMETER flat vcpus %d\n",
+                subtle::percpu::UsingFlatVirtualCpus() ? 1 : 0);
+  }
+}
+
+namespace {
+
+/*static*/ void DumpStatsInPbtxt(Printer* out, int level) {
+  TCMallocStats stats;
+  uint64_t class_count[kNumClasses];
+  SpanStats span_stats[kNumClasses];
+  TransferCacheStats tc_stats[kNumClasses];
+  if (level >= 2) {
+    ExtractStats(&stats, class_count, span_stats, nullptr, nullptr, tc_stats,
+                 true);
+  } else {
+    ExtractTCMallocStats(&stats, true);
+  }
+
+  const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+  const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+  const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+
+  PbtxtRegion region(out, kTop, /*indent=*/0);
+  region.PrintI64("in_use_by_app", bytes_in_use_by_app);
+  region.PrintI64("page_heap_freelist", stats.pageheap.free_bytes);
+  region.PrintI64("central_cache_freelist", stats.central_bytes);
+  region.PrintI64("per_cpu_cache_freelist", stats.per_cpu_bytes);
+  region.PrintI64("sharded_transfer_cache_freelist",
+                  stats.sharded_transfer_bytes);
+  region.PrintI64("transfer_cache_freelist", stats.transfer_bytes);
+  region.PrintI64("thread_cache_freelists", stats.thread_bytes);
+  region.PrintI64("malloc_metadata", stats.metadata_bytes);
+  region.PrintI64("actual_mem_used", physical_memory_used);
+  region.PrintI64("unmapped", stats.pageheap.unmapped_bytes);
+  region.PrintI64("virtual_address_space_used", virtual_memory_used);
+  region.PrintI64("num_spans", uint64_t(stats.span_stats.in_use));
+  region.PrintI64("num_spans_created", uint64_t(stats.span_stats.total));
+  region.PrintI64("num_thread_heaps", uint64_t(stats.tc_stats.in_use));
+  region.PrintI64("num_thread_heaps_created", uint64_t(stats.tc_stats.total));
+  region.PrintI64("num_stack_traces", uint64_t(stats.stack_stats.in_use));
+  region.PrintI64("num_stack_traces_created",
+                  uint64_t(stats.stack_stats.total));
+  region.PrintI64("num_table_buckets", uint64_t(stats.bucket_stats.in_use));
+  region.PrintI64("num_table_buckets_created",
+                  uint64_t(stats.bucket_stats.total));
+  region.PrintI64("pagemap_size", uint64_t(stats.pagemap_bytes));
+  region.PrintI64("pagemap_root_residence", stats.pagemap_root_bytes_res);
+  region.PrintI64("percpu_slab_size", stats.percpu_metadata_bytes);
+  region.PrintI64("percpu_slab_residence", stats.percpu_metadata_bytes_res);
+  region.PrintI64("tcmalloc_page_size", uint64_t(kPageSize));
+  region.PrintI64("tcmalloc_huge_page_size", uint64_t(kHugePageSize));
+  region.PrintI64("cpus_allowed", CountAllowedCpus());
+
+  {
+    auto sampled_profiles = region.CreateSubRegion("sampled_profiles");
+    sampled_profiles.PrintI64("current_bytes",
+                              Static::sampled_objects_size_.value());
+    sampled_profiles.PrintI64("peak_bytes",
+                              Static::peak_heap_tracker().CurrentPeakSize());
+  }
+
+  // Print total process stats (inclusive of non-malloc sources).
+  MemoryStats memstats;
+  if (GetMemoryStats(&memstats)) {
+    region.PrintI64("total_resident", uint64_t(memstats.rss));
+    region.PrintI64("total_mapped", uint64_t(memstats.vss));
+  }
+
+  if (level >= 2) {
+    {
+      for (int cl = 1; cl < kNumClasses; ++cl) {
+        uint64_t class_bytes =
+            class_count[cl] * Static::sizemap().class_to_size(cl);
+        PbtxtRegion entry = region.CreateSubRegion("freelist");
+        entry.PrintI64("sizeclass", Static::sizemap().class_to_size(cl));
+        entry.PrintI64("bytes", class_bytes);
+        entry.PrintI64("num_spans_requested",
+                       span_stats[cl].num_spans_requested);
+        entry.PrintI64("num_spans_returned", span_stats[cl].num_spans_returned);
+        entry.PrintI64("obj_capacity", span_stats[cl].obj_capacity);
+      }
+    }
+
+    {
+      for (int cl = 1; cl < kNumClasses; ++cl) {
+        PbtxtRegion entry = region.CreateSubRegion("transfer_cache");
+        entry.PrintI64("sizeclass", Static::sizemap().class_to_size(cl));
+        entry.PrintI64("insert_hits", tc_stats[cl].insert_hits);
+        entry.PrintI64("insert_misses", tc_stats[cl].insert_misses);
+        entry.PrintI64("insert_non_batch_misses",
+                       tc_stats[cl].insert_non_batch_misses);
+        entry.PrintI64("remove_hits", tc_stats[cl].remove_hits);
+        entry.PrintI64("remove_misses", tc_stats[cl].remove_misses);
+        entry.PrintI64("remove_non_batch_misses",
+                       tc_stats[cl].remove_non_batch_misses);
+      }
+    }
+
+    region.PrintRaw("transfer_cache_implementation",
+                    TransferCacheImplementationToLabel(
+                        Static::transfer_cache().implementation()));
+
+    if (UsePerCpuCache()) {
+      Static::cpu_cache().PrintInPbtxt(&region);
+    }
+  }
+  Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kNormal);
+  if (Static::numa_topology().active_partitions() > 1) {
+    Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kNormalP1);
+  }
+  Static::page_allocator().PrintInPbtxt(&region, MemoryTag::kSampled);
+  // We do not collect tracking information in pbtxt.
+
+  size_t limit_bytes;
+  bool is_hard;
+  std::tie(limit_bytes, is_hard) = Static::page_allocator().limit();
+  region.PrintI64("desired_usage_limit_bytes", limit_bytes);
+  region.PrintBool("hard_limit", is_hard);
+  region.PrintI64("limit_hits", Static::page_allocator().limit_hits());
+
+  {
+    auto gwp_asan = region.CreateSubRegion("gwp_asan");
+    Static::guardedpage_allocator().PrintInPbtxt(&gwp_asan);
+  }
+
+  region.PrintI64("memory_release_failures", SystemReleaseErrors());
+
+  region.PrintBool("tcmalloc_per_cpu_caches", Parameters::per_cpu_caches());
+  region.PrintI64("tcmalloc_max_per_cpu_cache_size",
+                  Parameters::max_per_cpu_cache_size());
+  region.PrintI64("tcmalloc_max_total_thread_cache_bytes",
+                  Parameters::max_total_thread_cache_bytes());
+  region.PrintI64("malloc_release_bytes_per_sec",
+                  static_cast<int64_t>(Parameters::background_release_rate()));
+  region.PrintI64(
+      "tcmalloc_skip_subrelease_interval_ns",
+      absl::ToInt64Nanoseconds(Parameters::filler_skip_subrelease_interval()));
+  region.PrintRaw("percpu_vcpu_type",
+                  subtle::percpu::UsingFlatVirtualCpus() ? "FLAT" : "NONE");
+}
+
+}  // namespace
+
+// Gets a human readable description of the current state of the malloc data
+// structures. A part of the state is stored in pbtxt format in `buffer`, the
+// rest of the state is stored in the old format (the same as in
+// MallocExtension::GetStats) in `other_buffer`. Both buffers are
+// null-terminated strings in a prefix of "buffer[0,buffer_length-1]" or
+// "other_buffer[0,other_buffer_length-1]". Returns the actual written sizes for
+// buffer and other_buffer.
+//
+// REQUIRES: buffer_length > 0 and other_buffer_length > 0.
+//
+// TODO(b/130249686): This is NOT YET ready to use.
+extern "C" ABSL_ATTRIBUTE_UNUSED int MallocExtension_Internal_GetStatsInPbtxt(
+    char* buffer, int buffer_length) {
+  ASSERT(buffer_length > 0);
+  Printer printer(buffer, buffer_length);
+
+  // Print level one stats unless lots of space is available
+  if (buffer_length < 10000) {
+    DumpStatsInPbtxt(&printer, 1);
+  } else {
+    DumpStatsInPbtxt(&printer, 2);
+  }
+
+  size_t required = printer.SpaceRequired();
+
+  if (buffer_length > required) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    required += GetRegionFactory()->GetStatsInPbtxt(
+        absl::Span<char>(buffer + required, buffer_length - required));
+  }
+
+  return required;
+}
+
+static void PrintStats(int level) {
+  const int kBufferSize = (TCMALLOC_HAVE_TRACKING ? 2 << 20 : 64 << 10);
+  char* buffer = new char[kBufferSize];
+  Printer printer(buffer, kBufferSize);
+  DumpStats(&printer, level);
+  (void)write(STDERR_FILENO, buffer, strlen(buffer));
+  delete[] buffer;
+}
+
+// This function computes a profile that maps a live stack trace to
+// the number of bytes of central-cache memory pinned by an allocation
+// at that stack trace.
+static std::unique_ptr<const ProfileBase> DumpFragmentationProfile() {
+  auto profile = absl::make_unique<StackTraceTable>(ProfileType::kFragmentation,
+                                                    1, true, true);
+
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    for (Span* s : Static::sampled_objects_) {
+      // Compute fragmentation to charge to this sample:
+      StackTrace* const t = s->sampled_stack();
+      if (t->proxy == nullptr) {
+        // There is just one object per-span, and neighboring spans
+        // can be released back to the system, so we charge no
+        // fragmentation to this sampled object.
+        continue;
+      }
+
+      // Fetch the span on which the proxy lives so we can examine its
+      // co-residents.
+      const PageId p = PageIdContaining(t->proxy);
+      Span* span = Static::pagemap().GetDescriptor(p);
+      if (span == nullptr) {
+        // Avoid crashes in production mode code, but report in tests.
+        ASSERT(span != nullptr);
+        continue;
+      }
+
+      const double frag = span->Fragmentation();
+      if (frag > 0) {
+        profile->AddTrace(frag, *t);
+      }
+    }
+  }
+  return profile;
+}
+
+// If <unsample> is true, the caller expects a profile where sampling has been
+// compensated for (that is, it reports 8000 16-byte objects iff we believe the
+// program has that many live objects.)  Otherwise, do not adjust for sampling
+// (the caller will do so somehow.)
+static std::unique_ptr<const ProfileBase> DumpHeapProfile(bool unsample) {
+  auto profile = absl::make_unique<StackTraceTable>(
+      ProfileType::kHeap, Sampler::GetSamplePeriod(), true, unsample);
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  for (Span* s : Static::sampled_objects_) {
+    profile->AddTrace(1.0, *s->sampled_stack());
+  }
+  return profile;
+}
+
+class AllocationSampleList;
+
+class AllocationSample final : public AllocationProfilingTokenBase {
+ public:
+  AllocationSample();
+  ~AllocationSample() override;
+
+  Profile Stop() && override;
+
+ private:
+  std::unique_ptr<StackTraceTable> mallocs_;
+  AllocationSample* next ABSL_GUARDED_BY(pageheap_lock);
+  friend class AllocationSampleList;
+};
+
+class AllocationSampleList {
+ public:
+  void Add(AllocationSample* as) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    as->next = first_;
+    first_ = as;
+  }
+
+  // This list is very short and we're nowhere near a hot path, just walk
+  void Remove(AllocationSample* as)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    AllocationSample** link = &first_;
+    AllocationSample* cur = first_;
+    while (cur != as) {
+      CHECK_CONDITION(cur != nullptr);
+      link = &cur->next;
+      cur = cur->next;
+    }
+    *link = as->next;
+  }
+
+  void ReportMalloc(const struct StackTrace& sample)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    AllocationSample* cur = first_;
+    while (cur != nullptr) {
+      cur->mallocs_->AddTrace(1.0, sample);
+      cur = cur->next;
+    }
+  }
+
+ private:
+  AllocationSample* first_;
+} allocation_samples_ ABSL_GUARDED_BY(pageheap_lock);
+
+AllocationSample::AllocationSample() {
+  mallocs_ = absl::make_unique<StackTraceTable>(
+      ProfileType::kAllocations, Sampler::GetSamplePeriod(), true, true);
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  allocation_samples_.Add(this);
+}
+
+AllocationSample::~AllocationSample() {
+  if (mallocs_ == nullptr) {
+    return;
+  }
+
+  // deleted before ending profile, do it for them
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    allocation_samples_.Remove(this);
+  }
+}
+
+Profile AllocationSample::Stop() && ABSL_LOCKS_EXCLUDED(pageheap_lock) {
+  // We need to remove ourselves from the allocation_samples_ list before we
+  // mutate mallocs_;
+  if (mallocs_) {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    allocation_samples_.Remove(this);
+  }
+  return ProfileAccessor::MakeProfile(std::move(mallocs_));
+}
+
+extern "C" void MallocExtension_Internal_GetStats(std::string* ret) {
+  for (size_t shift = 17; shift < 22; shift++) {
+    const size_t size = 1 << shift;
+    // Double ret's size until we succeed in writing the buffer without
+    // truncation.
+    //
+    // TODO(b/142931922):  printer only writes data and does not read it.
+    // Leverage https://wg21.link/P1072 when it is standardized.
+    ret->resize(size - 1);
+
+    size_t written_size = TCMalloc_Internal_GetStats(&*ret->begin(), size - 1);
+    if (written_size < size - 1) {
+      // We did not truncate.
+      ret->resize(written_size);
+      break;
+    }
+  }
+}
+
+extern "C" size_t TCMalloc_Internal_GetStats(char* buffer,
+                                             size_t buffer_length) {
+  Printer printer(buffer, buffer_length);
+  if (buffer_length < 10000) {
+    DumpStats(&printer, 1);
+  } else {
+    DumpStats(&printer, 2);
+  }
+
+  printer.printf("\nLow-level allocator stats:\n");
+  printer.printf("Memory Release Failures: %d\n", SystemReleaseErrors());
+
+  size_t n = printer.SpaceRequired();
+
+  size_t bytes_remaining = buffer_length > n ? buffer_length - n : 0;
+  if (bytes_remaining > 0) {
+    n += GetRegionFactory()->GetStats(
+        absl::Span<char>(buffer + n, bytes_remaining));
+  }
+
+  return n;
+}
+
+extern "C" const ProfileBase* MallocExtension_Internal_SnapshotCurrent(
+    ProfileType type) {
+  switch (type) {
+    case ProfileType::kHeap:
+      return DumpHeapProfile(true).release();
+    case ProfileType::kFragmentation:
+      return DumpFragmentationProfile().release();
+    case ProfileType::kPeakHeap:
+      return Static::peak_heap_tracker().DumpSample().release();
+    default:
+      return nullptr;
+  }
+}
+
+extern "C" AllocationProfilingTokenBase*
+MallocExtension_Internal_StartAllocationProfiling() {
+  return new AllocationSample();
+}
+
+bool GetNumericProperty(const char* name_data, size_t name_size,
+                        size_t* value) {
+  ASSERT(name_data != nullptr);
+  ASSERT(value != nullptr);
+  const absl::string_view name(name_data, name_size);
+
+  // This is near the top since ReleasePerCpuMemoryToOS() calls it frequently.
+  if (name == "tcmalloc.per_cpu_caches_active") {
+    *value = Static::CPUCacheActive();
+    return true;
+  }
+
+  if (name == "generic.virtual_memory_used") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = VirtualMemoryUsed(stats);
+    return true;
+  }
+
+  if (name == "generic.physical_memory_used") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = PhysicalMemoryUsed(stats);
+    return true;
+  }
+
+  if (name == "generic.current_allocated_bytes" ||
+      name == "generic.bytes_in_use_by_app") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = InUseByApp(stats);
+    return true;
+  }
+
+  if (name == "generic.heap_size") {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    BackingStats stats = Static::page_allocator().stats();
+    *value = stats.system_bytes - stats.unmapped_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.central_cache_free") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.central_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.cpu_free") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.per_cpu_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.sharded_transfer_cache_free") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.sharded_transfer_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.slack_bytes") {
+    // Kept for backwards compatibility.  Now defined externally as:
+    //    pageheap_free_bytes + pageheap_unmapped_bytes.
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    BackingStats stats = Static::page_allocator().stats();
+    *value = stats.free_bytes + stats.unmapped_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.pageheap_free_bytes" ||
+      name == "tcmalloc.page_heap_free") {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    *value = Static::page_allocator().stats().free_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.pageheap_unmapped_bytes" ||
+      name == "tcmalloc.page_heap_unmapped") {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    *value = Static::page_allocator().stats().unmapped_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.page_algorithm") {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    *value = Static::page_allocator().algorithm();
+    return true;
+  }
+
+  if (name == "tcmalloc.max_total_thread_cache_bytes") {
+    absl::base_internal::SpinLockHolder l(&pageheap_lock);
+    *value = ThreadCache::overall_thread_cache_size();
+    return true;
+  }
+
+  if (name == "tcmalloc.current_total_thread_cache_bytes" ||
+      name == "tcmalloc.thread_cache_free") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.thread_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.thread_cache_count") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.tc_stats.in_use;
+    return true;
+  }
+
+  if (name == "tcmalloc.local_bytes") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value =
+        stats.thread_bytes + stats.per_cpu_bytes + stats.sharded_transfer_bytes;
+    ;
+    return true;
+  }
+
+  if (name == "tcmalloc.external_fragmentation_bytes") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = (stats.pageheap.free_bytes + stats.central_bytes +
+              stats.per_cpu_bytes + stats.sharded_transfer_bytes +
+              stats.transfer_bytes + stats.thread_bytes + stats.metadata_bytes);
+    return true;
+  }
+
+  if (name == "tcmalloc.metadata_bytes") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, true);
+    *value = stats.metadata_bytes;
+    return true;
+  }
+
+  if (name == "tcmalloc.transfer_cache_free") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = stats.transfer_bytes;
+    return true;
+  }
+
+  bool want_hard_limit = (name == "tcmalloc.hard_usage_limit_bytes");
+  if (want_hard_limit || name == "tcmalloc.desired_usage_limit_bytes") {
+    size_t amount;
+    bool is_hard;
+    std::tie(amount, is_hard) = Static::page_allocator().limit();
+    if (want_hard_limit != is_hard) {
+      amount = std::numeric_limits<size_t>::max();
+    }
+    *value = amount;
+    return true;
+  }
+
+  if (name == "tcmalloc.required_bytes") {
+    TCMallocStats stats;
+    ExtractTCMallocStats(&stats, false);
+    *value = RequiredBytes(stats);
+    return true;
+  }
+
+  const absl::string_view kExperimentPrefix = "tcmalloc.experiment.";
+  if (absl::StartsWith(name, kExperimentPrefix)) {
+    absl::optional<Experiment> exp =
+        FindExperimentByName(absl::StripPrefix(name, kExperimentPrefix));
+    if (exp.has_value()) {
+      *value = IsExperimentActive(*exp) ? 1 : 0;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+MallocExtension::Ownership GetOwnership(const void* ptr) {
+  const PageId p = PageIdContaining(ptr);
+  return Static::pagemap().GetDescriptor(p)
+             ? MallocExtension::Ownership::kOwned
+             : MallocExtension::Ownership::kNotOwned;
+}
+
+extern "C" bool MallocExtension_Internal_GetNumericProperty(
+    const char* name_data, size_t name_size, size_t* value) {
+  return GetNumericProperty(name_data, name_size, value);
+}
+
+extern "C" void MallocExtension_Internal_GetMemoryLimit(
+    MallocExtension::MemoryLimit* limit) {
+  ASSERT(limit != nullptr);
+
+  std::tie(limit->limit, limit->hard) = Static::page_allocator().limit();
+}
+
+extern "C" void MallocExtension_Internal_SetMemoryLimit(
+    const MallocExtension::MemoryLimit* limit) {
+  ASSERT(limit != nullptr);
+
+  if (!limit->hard) {
+    Parameters::set_heap_size_hard_limit(0);
+    Static::page_allocator().set_limit(limit->limit, false /* !hard */);
+  } else {
+    Parameters::set_heap_size_hard_limit(limit->limit);
+  }
+}
+
+extern "C" void MallocExtension_Internal_MarkThreadIdle() {
+  ThreadCache::BecomeIdle();
+}
+
+extern "C" AddressRegionFactory* MallocExtension_Internal_GetRegionFactory() {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  return GetRegionFactory();
+}
+
+extern "C" void MallocExtension_Internal_SetRegionFactory(
+    AddressRegionFactory* factory) {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  SetRegionFactory(factory);
+}
+
+// ReleaseMemoryToSystem drops the page heap lock while actually calling to
+// kernel to release pages. To avoid confusing ourselves with
+// extra_bytes_released handling, lets do separate lock just for release.
+ABSL_CONST_INIT static absl::base_internal::SpinLock release_lock(
+    absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY);
+
+extern "C" void MallocExtension_Internal_ReleaseMemoryToSystem(
+    size_t num_bytes) {
+  // ReleaseMemoryToSystem() might release more than the requested bytes because
+  // the page heap releases at the span granularity, and spans are of wildly
+  // different sizes.  This keeps track of the extra bytes bytes released so
+  // that the app can periodically call ReleaseMemoryToSystem() to release
+  // memory at a constant rate.
+  ABSL_CONST_INIT static size_t extra_bytes_released;
+
+  absl::base_internal::SpinLockHolder rh(&release_lock);
+
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  if (num_bytes <= extra_bytes_released) {
+    // We released too much on a prior call, so don't release any
+    // more this time.
+    extra_bytes_released = extra_bytes_released - num_bytes;
+    num_bytes = 0;
+  } else {
+    num_bytes = num_bytes - extra_bytes_released;
+  }
+
+  Length num_pages;
+  if (num_bytes > 0) {
+    // A sub-page size request may round down to zero.  Assume the caller wants
+    // some memory released.
+    num_pages = BytesToLengthCeil(num_bytes);
+    ASSERT(num_pages > Length(0));
+  } else {
+    num_pages = Length(0);
+  }
+  size_t bytes_released =
+      Static::page_allocator().ReleaseAtLeastNPages(num_pages).in_bytes();
+  if (bytes_released > num_bytes) {
+    extra_bytes_released = bytes_released - num_bytes;
+  } else {
+    // The PageHeap wasn't able to release num_bytes.  Don't try to compensate
+    // with a big release next time.
+    extra_bytes_released = 0;
+  }
+}
+
+extern "C" void MallocExtension_EnableForkSupport() {
+  Static::EnableForkSupport();
+}
+
+void TCMallocPreFork() {
+  if (!Static::ForkSupportEnabled()) {
+    return;
+  }
+
+  if (Static::CPUCacheActive()) {
+    Static::cpu_cache().AcquireInternalLocks();
+  }
+  Static::transfer_cache().AcquireInternalLocks();
+  guarded_page_lock.Lock();
+  release_lock.Lock();
+  pageheap_lock.Lock();
+  AcquireSystemAllocLock();
+}
+
+void TCMallocPostFork() {
+  if (!Static::ForkSupportEnabled()) {
+    return;
+  }
+
+  ReleaseSystemAllocLock();
+  pageheap_lock.Unlock();  
+  guarded_page_lock.Unlock();
+  release_lock.Unlock();
+  Static::transfer_cache().ReleaseInternalLocks();
+  if (Static::CPUCacheActive()) {
+    Static::cpu_cache().ReleaseInternalLocks();
+  }
+}
+
+extern "C" void MallocExtension_SetSampleUserDataCallbacks(
+    MallocExtension::CreateSampleUserDataCallback create,
+    MallocExtension::CopySampleUserDataCallback copy,
+    MallocExtension::DestroySampleUserDataCallback destroy) {
+  Static::SetSampleUserDataCallbacks(create, copy, destroy);
+}
+
+// nallocx slow path.
+// Moved to a separate function because size_class_with_alignment is not inlined
+// which would cause nallocx to become non-leaf function with stack frame and
+// stack spills. ABSL_ATTRIBUTE_ALWAYS_INLINE does not work on
+// size_class_with_alignment, compiler barks that it can't inline the function
+// somewhere.
+static ABSL_ATTRIBUTE_NOINLINE size_t nallocx_slow(size_t size, int flags) {
+  Static::InitIfNecessary();
+  size_t align = static_cast<size_t>(1ull << (flags & 0x3f));
+  uint32_t cl;
+  if (ABSL_PREDICT_TRUE(Static::sizemap().GetSizeClass(
+          CppPolicy().AlignAs(align), size, &cl))) {
+    ASSERT(cl != 0);
+    return Static::sizemap().class_to_size(cl);
+  } else {
+    return BytesToLengthCeil(size).in_bytes();
+  }
+}
+
+// The nallocx function allocates no memory, but it performs the same size
+// computation as the malloc function, and returns the real size of the
+// allocation that would result from the equivalent malloc function call.
+// nallocx is a malloc extension originally implemented by jemalloc:
+// http://www.unix.com/man-page/freebsd/3/nallocx/
+extern "C" size_t nallocx(size_t size, int flags) noexcept {
+  if (ABSL_PREDICT_FALSE(!Static::IsInited() || flags != 0)) {
+    return nallocx_slow(size, flags);
+  }
+  uint32_t cl;
+  if (ABSL_PREDICT_TRUE(
+          Static::sizemap().GetSizeClass(CppPolicy(), size, &cl))) {
+    ASSERT(cl != 0);
+    return Static::sizemap().class_to_size(cl);
+  } else {
+    return BytesToLengthCeil(size).in_bytes();
+  }
+}
+
+extern "C" MallocExtension::Ownership MallocExtension_Internal_GetOwnership(
+    const void* ptr) {
+  return GetOwnership(ptr);
+}
+
+extern "C" void MallocExtension_Internal_GetProperties(
+    std::map<std::string, MallocExtension::Property>* result) {
+  TCMallocStats stats;
+  ExtractTCMallocStats(&stats, true);
+
+  const uint64_t virtual_memory_used = VirtualMemoryUsed(stats);
+  const uint64_t physical_memory_used = PhysicalMemoryUsed(stats);
+  const uint64_t bytes_in_use_by_app = InUseByApp(stats);
+
+  result->clear();
+  // Virtual Memory Used
+  (*result)["generic.virtual_memory_used"].value = virtual_memory_used;
+  // Physical Memory used
+  (*result)["generic.physical_memory_used"].value = physical_memory_used;
+  // Bytes in use By App
+  (*result)["generic.bytes_in_use_by_app"].value = bytes_in_use_by_app;
+  // Page Heap Free
+  (*result)["tcmalloc.page_heap_free"].value = stats.pageheap.free_bytes;
+  // Metadata Bytes
+  (*result)["tcmalloc.metadata_bytes"].value = stats.metadata_bytes;
+  // Heaps in Use
+  (*result)["tcmalloc.thread_cache_count"].value = stats.tc_stats.in_use;
+  // Central Cache Free List
+  (*result)["tcmalloc.central_cache_free"].value = stats.central_bytes;
+  // Transfer Cache Free List
+  (*result)["tcmalloc.transfer_cache_free"].value = stats.transfer_bytes;
+  // Per CPU Cache Free List
+  (*result)["tcmalloc.cpu_free"].value = stats.per_cpu_bytes;
+  (*result)["tcmalloc.sharded_transfer_cache_free"].value =
+      stats.sharded_transfer_bytes;
+  (*result)["tcmalloc.per_cpu_caches_active"].value = Static::CPUCacheActive();
+  // Thread Cache Free List
+  (*result)["tcmalloc.thread_cache_free"].value = stats.thread_bytes;
+  // Page Unmapped
+  (*result)["tcmalloc.pageheap_unmapped_bytes"].value =
+      stats.pageheap.unmapped_bytes;
+  (*result)["tcmalloc.page_heap_unmapped"].value =
+      stats.pageheap.unmapped_bytes;
+
+  (*result)["tcmalloc.page_algorithm"].value =
+      Static::page_allocator().algorithm();
+
+  FillExperimentProperties(result);
+  tracking::GetProperties(result);
+}
+
+extern "C" size_t MallocExtension_Internal_ReleaseCpuMemory(int cpu) {
+  size_t bytes = 0;
+  if (Static::CPUCacheActive()) {
+    bytes = Static::cpu_cache().Reclaim(cpu);
+  }
+  return bytes;
+}
+
+//-------------------------------------------------------------------
+// Helpers for the exported routines below
+//-------------------------------------------------------------------
+
+#ifdef ABSL_HAVE_TLS
+// See the comment on ThreadCache::thread_local_data_ regarding
+// ABSL_ATTRIBUTE_INITIAL_EXEC.
+__thread Sampler thread_sampler_ ABSL_ATTRIBUTE_INITIAL_EXEC;
+
+inline Sampler* GetThreadSampler() { return &thread_sampler_; }
+
+#else
+
+inline Sampler* GetThreadSampler() {
+  ThreadCache* heap = ThreadCache::GetCache();
+  return heap->GetSampler();
+}
+
+#endif
+
+enum class Hooks { RUN, NO };
+
+static void FreeSmallSlow(void* ptr, size_t cl);
+
+namespace {
+
+// Sets `*psize` to `size`,
+inline void SetCapacity(size_t size, std::nullptr_t) {}
+inline void SetCapacity(size_t size, size_t* psize) { *psize = size; }
+
+// Sets `*psize` to the size for the size class in `cl`,
+inline void SetClassCapacity(size_t size, std::nullptr_t) {}
+inline void SetClassCapacity(uint32_t cl, size_t* psize) {
+  *psize = Static::sizemap().class_to_size(cl);
+}
+
+// Sets `*psize` to the size for the size class in `cl` if `ptr` is not null,
+// else `*psize` is set to 0. This method is overloaded for `nullptr_t` below,
+// allowing the compiler to optimize code between regular and size returning
+// allocation operations.
+inline void SetClassCapacity(const void*, uint32_t, std::nullptr_t) {}
+inline void SetClassCapacity(const void* ptr, uint32_t cl, size_t* psize) {
+  if (ABSL_PREDICT_TRUE(ptr != nullptr)) {
+    *psize = Static::sizemap().class_to_size(cl);
+  } else {
+    *psize = 0;
+  }
+}
+
+// Sets `*psize` to the size in pages corresponding to the requested size in
+// `size` if `ptr` is not null, else `*psize` is set to 0. This method is
+// overloaded for `nullptr_t` below, allowing the compiler to optimize code
+// between regular and size returning allocation operations.
+inline void SetPagesCapacity(const void*, size_t, std::nullptr_t) {}
+inline void SetPagesCapacity(const void* ptr, size_t size, size_t* psize) {
+  if (ABSL_PREDICT_TRUE(ptr != nullptr)) {
+    *psize = BytesToLengthCeil(size).in_bytes();
+  } else {
+    *psize = 0;
+  }
+}
+
+}  // namespace
+
+// In free fast-path we handle delete hooks by delegating work to slower
+// function that both performs delete hooks calls and does free. This is done so
+// that free fast-path only does tail calls, which allow compiler to avoid
+// generating costly prologue/epilogue for fast-path.
+template <void F(void*, size_t), Hooks hooks_state>
+static ABSL_ATTRIBUTE_SECTION(google_malloc) void invoke_delete_hooks_and_free(
+    void* ptr, size_t t) {
+  // Refresh the fast path state.
+  GetThreadSampler()->UpdateFastPathState();
+  return F(ptr, t);
+}
+
+template <void F(void*, PageId), Hooks hooks_state>
+static ABSL_ATTRIBUTE_SECTION(google_malloc) void invoke_delete_hooks_and_free(
+    void* ptr, PageId p) {
+  // Refresh the fast path state.
+  GetThreadSampler()->UpdateFastPathState();
+  return F(ptr, p);
+}
+
+// Helper for do_free_with_cl
+template <Hooks hooks_state>
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreeSmall(void* ptr,
+                                                          size_t cl) {
+  if (ABSL_PREDICT_FALSE(!GetThreadSampler()->IsOnFastPath())) {
+    // Take the slow path.
+    invoke_delete_hooks_and_free<FreeSmallSlow, hooks_state>(ptr, cl);
+    return;
+  }
+
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+  // The CPU Cache is enabled, so we're able to take the fastpath.
+  ASSERT(Static::CPUCacheActive());
+  ASSERT(subtle::percpu::IsFastNoInit());
+
+  Static::cpu_cache().Deallocate(ptr, cl);
+#else  // TCMALLOC_DEPRECATED_PERTHREAD
+  ThreadCache* cache = ThreadCache::GetCacheIfPresent();
+
+  // IsOnFastPath does not track whether or not we have an active ThreadCache on
+  // this thread, so we need to check cache for nullptr.
+  if (ABSL_PREDICT_FALSE(cache == nullptr)) {
+    FreeSmallSlow(ptr, cl);
+    return;
+  }
+
+  cache->Deallocate(ptr, cl);
+#endif  // TCMALLOC_DEPRECATED_PERTHREAD
+}
+
+// this helper function is used when FreeSmall (defined above) hits
+// the case of thread state not being in per-cpu mode or hitting case
+// of no thread cache. This happens when thread state is not yet
+// properly initialized with real thread cache or with per-cpu mode,
+// or when thread state is already destroyed as part of thread
+// termination.
+//
+// We explicitly prevent inlining it to keep it out of fast-path, so
+// that fast-path only has tail-call, so that fast-path doesn't need
+// function prologue/epilogue.
+ABSL_ATTRIBUTE_NOINLINE
+static void FreeSmallSlow(void* ptr, size_t cl) {
+  if (ABSL_PREDICT_TRUE(UsePerCpuCache())) {
+    Static::cpu_cache().Deallocate(ptr, cl);
+  } else if (ThreadCache* cache = ThreadCache::GetCacheIfPresent()) {
+    // TODO(b/134691947):  If we reach this path from the ThreadCache fastpath,
+    // we've already checked that UsePerCpuCache is false and cache == nullptr.
+    // Consider optimizing this.
+    cache->Deallocate(ptr, cl);
+  } else {
+    // This thread doesn't have thread-cache yet or already. Delete directly
+    // into central cache.
+    Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&ptr, 1));
+  }
+}
+
+namespace {
+
+// If this allocation can be guarded, and if it's time to do a guarded sample,
+// returns a guarded allocation Span.  Otherwise returns nullptr.
+static void* TrySampleGuardedAllocation(size_t size, size_t alignment,
+                                        Length num_pages) {
+  if (num_pages == Length(1) &&
+      GetThreadSampler()->ShouldSampleGuardedAllocation()) {
+    // The num_pages == 1 constraint ensures that size <= kPageSize.  And since
+    // alignments above kPageSize cause cl == 0, we're also guaranteed
+    // alignment <= kPageSize
+    //
+    // In all cases kPageSize <= GPA::page_size_, so Allocate's preconditions
+    // are met.
+    return Static::guardedpage_allocator().Allocate(size, alignment);
+  }
+  return nullptr;
+}
+
+// Performs sampling for already occurred allocation of object.
+//
+// For very small object sizes, object is used as 'proxy' and full
+// page with sampled marked is allocated instead.
+//
+// For medium-sized objects that have single instance per span,
+// they're simply freed and fresh page span is allocated to represent
+// sampling.
+//
+// For large objects (i.e. allocated with do_malloc_pages) they are
+// also fully reused and their span is marked as sampled.
+//
+// Note that do_free_with_size assumes sampled objects have
+// page-aligned addresses. Please change both functions if need to
+// invalidate the assumption.
+//
+// Note that cl might not match requested_size in case of
+// memalign. I.e. when larger than requested allocation is done to
+// satisfy alignment constraint.
+//
+// In case of out-of-memory condition when allocating span or
+// stacktrace struct, this function simply cheats and returns original
+// object. As if no sampling was requested.
+static void* SampleifyAllocation(size_t requested_size, size_t weight,
+                                 size_t requested_alignment, size_t cl,
+                                 void* obj, Span* span, size_t* capacity) {
+  CHECK_CONDITION((cl != 0 && obj != nullptr && span == nullptr) ||
+                  (cl == 0 && obj == nullptr && span != nullptr));
+
+  void* proxy = nullptr;
+  void* guarded_alloc = nullptr;
+  size_t allocated_size;
+
+  // requested_alignment = 1 means 'small size table alignment was used'
+  // Historically this is reported as requested_alignment = 0
+  if (requested_alignment == 1) {
+    requested_alignment = 0;
+  }
+
+  if (cl != 0) {
+    ASSERT(cl == Static::pagemap().sizeclass(PageIdContaining(obj)));
+
+    allocated_size = Static::sizemap().class_to_size(cl);
+
+    // If the caller didn't provide a span, allocate one:
+    Length num_pages = BytesToLengthCeil(allocated_size);
+    if ((guarded_alloc = TrySampleGuardedAllocation(
+             requested_size, requested_alignment, num_pages))) {
+      ASSERT(IsSampledMemory(guarded_alloc));
+      const PageId p = PageIdContaining(guarded_alloc);
+      absl::base_internal::SpinLockHolder h(&pageheap_lock);
+      span = Span::New(p, num_pages);
+      Static::pagemap().Set(p, span);
+      // If we report capacity back from a size returning allocation, we can not
+      // report the allocated_size, as we guard the size to 'requested_size',
+      // and we maintain the invariant that GetAllocatedSize() must match the
+      // returned size from size returning allocations. So in that case, we
+      // report the requested size for both capacity and GetAllocatedSize().
+      if (capacity) allocated_size = requested_size;
+    } else if ((span = Static::page_allocator().New(
+                    num_pages, MemoryTag::kSampled)) == nullptr) {
+      if (capacity) *capacity = allocated_size;
+      return obj;
+    }
+
+    size_t span_size = Length(Static::sizemap().class_to_pages(cl)).in_bytes();
+    size_t objects_per_span = span_size / allocated_size;
+
+    if (objects_per_span != 1) {
+      ASSERT(objects_per_span > 1);
+      proxy = obj;
+      obj = nullptr;
+    }
+  } else {
+    // Set allocated_size to the exact size for a page allocation.
+    // NOTE: if we introduce gwp-asan sampling / guarded allocations
+    // for page allocations, then we need to revisit do_malloc_pages as
+    // the current assumption is that only class sized allocs are sampled
+    // for gwp-asan.
+    allocated_size = span->bytes_in_span();
+  }
+  if (capacity) *capacity = allocated_size;
+
+  ASSERT(span != nullptr);
+
+  // Grab the stack trace outside the heap lock
+  StackTrace tmp;
+  tmp.proxy = proxy;
+  tmp.depth = absl::GetStackTrace(tmp.stack, kMaxStackDepth, 1);
+  tmp.requested_size = requested_size;
+  tmp.requested_alignment = requested_alignment;
+  tmp.allocated_size = allocated_size;
+  tmp.weight = weight;
+  tmp.user_data = Static::CreateSampleUserData();
+
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    // Allocate stack trace
+    StackTrace* stack = Static::stacktrace_allocator().New();
+    allocation_samples_.ReportMalloc(tmp);
+    *stack = tmp;
+    span->Sample(stack);
+  }
+
+  Static::peak_heap_tracker().MaybeSaveSample();
+
+  if (obj != nullptr) {
+#if TCMALLOC_HAVE_TRACKING
+    // We delete directly into central cache to avoid tracking this as
+    // purely internal deletion. We've already (correctly) tracked
+    // this allocation as either malloc hit or malloc miss, and we
+    // must not count anything else for this allocation.
+    //
+    // TODO(b/158678747):  As of cl/315283185, we may occasionally see a hit in
+    // the TransferCache here.  Prior to that CL, we always forced a miss.  Both
+    // of these may artificially skew our tracking data.
+    Static::transfer_cache().InsertRange(cl, absl::Span<void*>(&obj, 1));
+#else
+    // We are not maintaining precise statistics on malloc hit/miss rates at our
+    // cache tiers.  We can deallocate into our ordinary cache.
+    ASSERT(cl != 0);
+    FreeSmallSlow(obj, cl);
+#endif
+  }
+  return guarded_alloc ? guarded_alloc : span->start_address();
+}
+
+// ShouldSampleAllocation() is called when an allocation of the given requested
+// size is in progress. It returns the sampling weight of the allocation if it
+// should be "sampled," and 0 otherwise. See SampleifyAllocation().
+//
+// Sampling is done based on requested sizes and later unskewed during profile
+// generation.
+inline size_t ShouldSampleAllocation(size_t size) {
+  return GetThreadSampler()->RecordAllocation(size);
+}
+
+template <typename Policy>
+inline void* do_malloc_pages(Policy policy, size_t size) {
+  // Page allocator does not deal well with num_pages = 0.
+  Length num_pages = std::max<Length>(BytesToLengthCeil(size), Length(1));
+
+  MemoryTag tag = MemoryTag::kNormal;
+    if (Static::numa_topology().numa_aware()) {
+    tag = NumaNormalTag(policy.numa_partition());
+  }
+  const size_t alignment = policy.align();
+  Span* span = Static::page_allocator().NewAligned(
+      num_pages, BytesToLengthCeil(alignment), tag);
+
+  if (span == nullptr) {
+    return nullptr;
+  }
+
+  void* result = span->start_address();
+  ASSERT(
+      tag == GetMemoryTag(span->start_address()));
+
+  if (size_t weight = ShouldSampleAllocation(size)) {
+    CHECK_CONDITION(result == SampleifyAllocation(size, weight, alignment, 0,
+                                                  nullptr, span, nullptr));
+  }
+
+  return result;
+}
+
+template <typename Policy, typename CapacityPtr>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE AllocSmall(Policy policy, size_t cl,
+                                                     size_t size,
+                                                     CapacityPtr capacity) {
+  ASSERT(cl != 0);
+  void* result;
+
+  if (UsePerCpuCache()) {
+    result = Static::cpu_cache().Allocate<Policy::handle_oom>(cl);
+  } else {
+    result = ThreadCache::GetCache()->Allocate<Policy::handle_oom>(cl);
+  }
+
+  if (!Policy::can_return_nullptr()) {
+    ASSUME(result != nullptr);
+  }
+
+  if (ABSL_PREDICT_FALSE(result == nullptr)) {
+    SetCapacity(0, capacity);
+    return nullptr;
+  }
+  size_t weight;
+  if (ABSL_PREDICT_FALSE(weight = ShouldSampleAllocation(size))) {
+    return SampleifyAllocation(size, weight, policy.align(), cl, result,
+                               nullptr, capacity);
+  }
+  SetClassCapacity(cl, capacity);
+  return result;
+}
+
+// Handles freeing object that doesn't have size class, i.e. which
+// is either large or sampled. We explicitly prevent inlining it to
+// keep it out of fast-path. This helps avoid expensive
+// prologue/epiloge for fast-path freeing functions.
+ABSL_ATTRIBUTE_NOINLINE
+static void do_free_pages(void* ptr, const PageId p) {
+  void* proxy = nullptr;
+  size_t size;
+  bool notify_sampled_alloc = false;
+
+  Span* span = Static::pagemap().GetExistingDescriptor(p);
+  ASSERT(span != nullptr);
+  // Prefetch now to avoid a stall accessing *span while under the lock.
+  span->Prefetch();
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    ASSERT(span->first_page() == p);
+    if (StackTrace* st = span->Unsample()) {
+      proxy = st->proxy;
+      size = st->allocated_size;
+      if (proxy == nullptr && size <= kMaxSize) {
+        tracking::Report(kFreeMiss,
+                         Static::sizemap().SizeClass(
+                             CppPolicy().InSameNumaPartitionAs(ptr), size),
+                         1);
+      }
+      notify_sampled_alloc = true;
+      Static::DestroySampleUserData(st->user_data);
+      Static::stacktrace_allocator().Delete(st);
+    }
+    if (IsSampledMemory(ptr)) {
+      if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+        // Release lock while calling Deallocate() since it does a system call.
+        pageheap_lock.Unlock();
+        Static::guardedpage_allocator().Deallocate(ptr);
+        pageheap_lock.Lock();
+        Span::Delete(span);
+      } else {
+        ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+        Static::page_allocator().Delete(span, MemoryTag::kSampled);
+      }
+    } else if (kNumaPartitions != 1) {
+      ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+      Static::page_allocator().Delete(span, GetMemoryTag(ptr));
+    } else {
+      ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
+      Static::page_allocator().Delete(span, MemoryTag::kNormal);
+    }
+  }
+
+  if (notify_sampled_alloc) {
+  }
+
+  if (proxy) {
+    const auto policy = CppPolicy().InSameNumaPartitionAs(proxy);
+    const size_t cl = Static::sizemap().SizeClass(policy, size);
+    FreeSmall<Hooks::NO>(proxy, cl);
+  }
+}
+
+#ifndef NDEBUG
+static size_t GetSizeClass(void* ptr) {
+  const PageId p = PageIdContaining(ptr);
+  return Static::pagemap().sizeclass(p);
+}
+#endif
+
+// Helper for the object deletion (free, delete, etc.).  Inputs:
+//   ptr is object to be freed
+//   cl is the size class of that object, or 0 if it's unknown
+//   have_cl is true iff cl is known and is non-0.
+//
+// Note that since have_cl is compile-time constant, genius compiler
+// would not need it. Since it would be able to somehow infer that
+// GetSizeClass never produces 0 cl, and so it
+// would know that places that call this function with explicit 0 is
+// "have_cl-case" and others are "!have_cl-case". But we certainly
+// don't have such compiler. See also do_free_with_size below.
+template <bool have_cl, Hooks hooks_state>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_cl(void* ptr, size_t cl) {
+  // !have_cl -> cl == 0
+  ASSERT(have_cl || cl == 0);
+
+  const PageId p = PageIdContaining(ptr);
+
+  // if we have_cl, then we've excluded ptr == nullptr case. See
+  // comment in do_free_with_size. Thus we only bother testing nullptr
+  // in non-sized case.
+  //
+  // Thus: ptr == nullptr -> !have_cl
+  ASSERT(ptr != nullptr || !have_cl);
+  if (!have_cl && ABSL_PREDICT_FALSE(ptr == nullptr)) {
+    return;
+  }
+
+  // ptr must be a result of a previous malloc/memalign/... call, and
+  // therefore static initialization must have already occurred.
+  ASSERT(Static::IsInited());
+
+  if (!have_cl) {
+    cl = Static::pagemap().sizeclass(p);
+  }
+  if (have_cl || ABSL_PREDICT_TRUE(cl != 0)) {
+    ASSERT(cl == GetSizeClass(ptr));
+    ASSERT(ptr != nullptr);
+    ASSERT(!Static::pagemap().GetExistingDescriptor(p)->sampled());
+    FreeSmall<hooks_state>(ptr, cl);
+  } else {
+    invoke_delete_hooks_and_free<do_free_pages, hooks_state>(ptr, p);
+  }
+}
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free(void* ptr) {
+  return do_free_with_cl<false, Hooks::RUN>(ptr, 0);
+}
+
+void do_free_no_hooks(void* ptr) {
+  return do_free_with_cl<false, Hooks::NO>(ptr, 0);
+}
+
+template <typename AlignPolicy>
+bool CorrectSize(void* ptr, size_t size, AlignPolicy align);
+
+bool CorrectAlignment(void* ptr, std::align_val_t alignment);
+
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void FreePages(void* ptr) {
+  const PageId p = PageIdContaining(ptr);
+  invoke_delete_hooks_and_free<do_free_pages, Hooks::RUN>(ptr, p);
+}
+
+template <typename AlignPolicy>
+inline ABSL_ATTRIBUTE_ALWAYS_INLINE void do_free_with_size(void* ptr,
+                                                           size_t size,
+                                                           AlignPolicy align) {
+  ASSERT(CorrectSize(ptr, size, align));
+  ASSERT(CorrectAlignment(ptr, static_cast<std::align_val_t>(align.align())));
+
+  // This is an optimized path that may be taken if the binary is compiled
+  // with -fsized-delete. We attempt to discover the size class cheaply
+  // without any cache misses by doing a plain computation that
+  // maps from size to size-class.
+  //
+  // The optimized path doesn't work with sampled objects, whose deletions
+  // trigger more operations and require to visit metadata.
+  if (ABSL_PREDICT_FALSE(IsSampledMemory(ptr))) {
+      // we don't know true class size of the ptr
+      if (ptr == nullptr) return;
+      return FreePages(ptr);
+  }
+
+  // At this point, since ptr's tag bit is 1, it means that it
+  // cannot be nullptr either. Thus all code below may rely on ptr !=
+  // nullptr. And particularly, since we're only caller of
+  // do_free_with_cl with have_cl == true, it means have_cl implies
+  // ptr != nullptr.
+  ASSERT(ptr != nullptr);
+
+  uint32_t cl;
+  if (ABSL_PREDICT_FALSE(!Static::sizemap().GetSizeClass(
+          CppPolicy().AlignAs(align.align()).InSameNumaPartitionAs(ptr), size,
+          &cl))) {
+    // We couldn't calculate the size class, which means size > kMaxSize.
+    ASSERT(size > kMaxSize || align.align() > alignof(std::max_align_t));
+    static_assert(kMaxSize >= kPageSize, "kMaxSize must be at least kPageSize");
+    return FreePages(ptr);
+  }
+
+  return do_free_with_cl<true, Hooks::RUN>(ptr, cl);
+}
+
+inline size_t GetSize(const void* ptr) {
+  if (ptr == nullptr) return 0;
+  const PageId p = PageIdContaining(ptr);
+  size_t cl = Static::pagemap().sizeclass(p);
+  if (cl != 0) {
+    return Static::sizemap().class_to_size(cl);
+  } else {
+    const Span* span = Static::pagemap().GetExistingDescriptor(p);
+    if (span->sampled()) {
+      if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+        return Static::guardedpage_allocator().GetRequestedSize(ptr);
+      }
+      return span->sampled_stack()->allocated_size;
+    } else {
+      return span->bytes_in_span();
+    }
+  }
+}
+
+// Checks that an asserted object size for <ptr> is valid.
+template <typename AlignPolicy>
+bool CorrectSize(void* ptr, size_t size, AlignPolicy align) {
+  // size == 0 means we got no hint from sized delete, so we certainly don't
+  // have an incorrect one.
+  if (size == 0) return true;
+  if (ptr == nullptr) return true;
+  uint32_t cl = 0;
+  // Round-up passed in size to how much tcmalloc allocates for that size.
+  if (Static::guardedpage_allocator().PointerIsMine(ptr)) {
+    size = Static::guardedpage_allocator().GetRequestedSize(ptr);
+  } else if (Static::sizemap().GetSizeClass(CppPolicy().AlignAs(align.align()),
+                                            size, &cl)) {
+    size = Static::sizemap().class_to_size(cl);
+  } else {
+    size = BytesToLengthCeil(size).in_bytes();
+  }
+  size_t actual = GetSize(ptr);
+  if (ABSL_PREDICT_TRUE(actual == size)) return true;
+  Log(kLog, __FILE__, __LINE__, "size check failed", actual, size, cl);
+  return false;
+}
+
+// Checks that an asserted object <ptr> has <align> alignment.
+bool CorrectAlignment(void* ptr, std::align_val_t alignment) {
+  size_t align = static_cast<size_t>(alignment);
+  ASSERT(absl::has_single_bit(align));
+  return ((reinterpret_cast<uintptr_t>(ptr) & (align - 1)) == 0);
+}
+
+// Helpers for use by exported routines below or inside debugallocation.cc:
+
+inline void do_malloc_stats() { PrintStats(1); }
+
+inline int do_mallopt(int cmd, int value) {
+  return 1;  // Indicates error
+}
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+inline struct mallinfo do_mallinfo() {
+  TCMallocStats stats;
+  ExtractTCMallocStats(&stats, false);
+
+  // Just some of the fields are filled in.
+  struct mallinfo info;
+  memset(&info, 0, sizeof(info));
+
+  // Unfortunately, the struct contains "int" field, so some of the
+  // size values will be truncated.
+  info.arena = static_cast<int>(stats.pageheap.system_bytes);
+  info.fsmblks = static_cast<int>(stats.thread_bytes + stats.central_bytes +
+                                  stats.transfer_bytes);
+  info.fordblks = static_cast<int>(stats.pageheap.free_bytes +
+                                   stats.pageheap.unmapped_bytes);
+  info.uordblks = static_cast<int>(InUseByApp(stats));
+
+  return info;
+}
+#endif  // TCMALLOC_HAVE_STRUCT_MALLINFO
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+using tcmalloc::tcmalloc_internal::AllocSmall;
+using tcmalloc::tcmalloc_internal::CppPolicy;
+using tcmalloc::tcmalloc_internal::do_free_no_hooks;
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+using tcmalloc::tcmalloc_internal::do_mallinfo;
+#endif
+using tcmalloc::tcmalloc_internal::do_malloc_pages;
+using tcmalloc::tcmalloc_internal::do_malloc_stats;
+using tcmalloc::tcmalloc_internal::do_mallopt;
+using tcmalloc::tcmalloc_internal::GetThreadSampler;
+using tcmalloc::tcmalloc_internal::MallocPolicy;
+using tcmalloc::tcmalloc_internal::SetClassCapacity;
+using tcmalloc::tcmalloc_internal::SetPagesCapacity;
+using tcmalloc::tcmalloc_internal::Static;
+using tcmalloc::tcmalloc_internal::UsePerCpuCache;
+
+#ifdef TCMALLOC_DEPRECATED_PERTHREAD
+using tcmalloc::tcmalloc_internal::ThreadCache;
+#endif  // TCMALLOC_DEPRECATED_PERTHREAD
+
+// Slow path implementation.
+// This function is used by `fast_alloc` if the allocation requires page sized
+// allocations or some complex logic is required such as initialization,
+// invoking new/delete hooks, sampling, etc.
+//
+// TODO(b/130771275):  This function is marked as static, rather than appearing
+// in the anonymous namespace, to workaround incomplete heapz filtering.
+template <typename Policy, typename CapacityPtr = std::nullptr_t>
+static void* ABSL_ATTRIBUTE_SECTION(google_malloc)
+    slow_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) {
+  Static::InitIfNecessary();
+  GetThreadSampler()->UpdateFastPathState();
+  void* p;
+  uint32_t cl;
+  bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl);
+  if (ABSL_PREDICT_TRUE(is_small)) {
+    p = AllocSmall(policy, cl, size, capacity);
+  } else {
+    p = do_malloc_pages(policy, size);
+    // Set capacity to the exact size for a page allocation.
+    // This needs to be revisited if we introduce gwp-asan
+    // sampling / guarded allocations to do_malloc_pages().
+    SetPagesCapacity(p, size, capacity);
+    if (ABSL_PREDICT_FALSE(p == nullptr)) {
+      return Policy::handle_oom(size);
+    }
+  }
+  if (Policy::invoke_hooks()) {
+  }
+  return p;
+}
+
+template <typename Policy, typename CapacityPtr = std::nullptr_t>
+static inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE
+fast_alloc(Policy policy, size_t size, CapacityPtr capacity = nullptr) {
+  // If size is larger than kMaxSize, it's not fast-path anymore. In
+  // such case, GetSizeClass will return false, and we'll delegate to the slow
+  // path. If malloc is not yet initialized, we may end up with cl == 0
+  // (regardless of size), but in this case should also delegate to the slow
+  // path by the fast path check further down.
+  uint32_t cl;
+  bool is_small = Static::sizemap().GetSizeClass(policy, size, &cl);
+  if (ABSL_PREDICT_FALSE(!is_small)) {
+    return slow_alloc(policy, size, capacity);
+  }
+
+  // When using per-thread caches, we have to check for the presence of the
+  // cache for this thread before we try to sample, as slow_alloc will
+  // also try to sample the allocation.
+#ifdef TCMALLOC_DEPRECATED_PERTHREAD
+  ThreadCache* const cache = ThreadCache::GetCacheIfPresent();
+  if (ABSL_PREDICT_FALSE(cache == nullptr)) {
+    return slow_alloc(policy, size, capacity);
+  }
+#endif
+  // TryRecordAllocationFast() returns true if no extra logic is required, e.g.:
+  // - this allocation does not need to be sampled
+  // - no new/delete hooks need to be invoked
+  // - no need to initialize thread globals, data or caches.
+  // The method updates 'bytes until next sample' thread sampler counters.
+  if (ABSL_PREDICT_FALSE(!GetThreadSampler()->TryRecordAllocationFast(size))) {
+    return slow_alloc(policy, size, capacity);
+  }
+
+  // Fast path implementation for allocating small size memory.
+  // This code should only be reached if all of the below conditions are met:
+  // - the size does not exceed the maximum size (size class > 0)
+  // - cpu / thread cache data has been initialized.
+  // - the allocation is not subject to sampling / gwp-asan.
+  // - no new/delete hook is installed and required to be called.
+  ASSERT(cl != 0);
+  void* ret;
+#ifndef TCMALLOC_DEPRECATED_PERTHREAD
+  // The CPU cache should be ready.
+  ret = Static::cpu_cache().Allocate<Policy::handle_oom>(cl);
+#else  // !defined(TCMALLOC_DEPRECATED_PERTHREAD)
+  // The ThreadCache should be ready.
+  ASSERT(cache != nullptr);
+  ret = cache->Allocate<Policy::handle_oom>(cl);
+#endif  // TCMALLOC_DEPRECATED_PERTHREAD
+  if (!Policy::can_return_nullptr()) {
+    ASSUME(ret != nullptr);
+  }
+  SetClassCapacity(ret, cl, capacity);
+  return ret;
+}
+
+using tcmalloc::tcmalloc_internal::GetOwnership;
+using tcmalloc::tcmalloc_internal::GetSize;
+
+extern "C" size_t MallocExtension_Internal_GetAllocatedSize(const void* ptr) {
+  ASSERT(!ptr ||
+         GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned);
+  return GetSize(ptr);
+}
+
+extern "C" void MallocExtension_Internal_MarkThreadBusy() {
+  // Allocate to force the creation of a thread cache, but avoid
+  // invoking any hooks.
+  Static::InitIfNecessary();
+
+  if (UsePerCpuCache()) {
+    return;
+  }
+
+  do_free_no_hooks(slow_alloc(CppPolicy().Nothrow().WithoutHooks(), 0));
+}
+
+//-------------------------------------------------------------------
+// Exported routines
+//-------------------------------------------------------------------
+
+using tcmalloc::tcmalloc_internal::AlignAsPolicy;
+using tcmalloc::tcmalloc_internal::CorrectAlignment;
+using tcmalloc::tcmalloc_internal::CorrectSize;
+using tcmalloc::tcmalloc_internal::DefaultAlignPolicy;
+using tcmalloc::tcmalloc_internal::do_free;
+using tcmalloc::tcmalloc_internal::do_free_with_size;
+
+// depends on TCMALLOC_HAVE_STRUCT_MALLINFO, so needs to come after that.
+#include "tcmalloc/libc_override.h"
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc(
+    size_t size) noexcept {
+  // Use TCMallocInternalMemalign to avoid requiring size %
+  // alignof(std::max_align_t) == 0. TCMallocInternalAlignedAlloc enforces this
+  // property.
+  return TCMallocInternalMemalign(alignof(std::max_align_t), size);
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNew(size_t size) {
+  return fast_alloc(CppPolicy(), size);
+}
+
+extern "C" ABSL_ATTRIBUTE_SECTION(google_malloc) tcmalloc::sized_ptr_t
+    tcmalloc_size_returning_operator_new(size_t size) {
+  size_t capacity;
+  void* p = fast_alloc(CppPolicy(), size, &capacity);
+  return {p, capacity};
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalMalloc_aligned(
+    size_t size, std::align_val_t alignment) noexcept {
+  return fast_alloc(MallocPolicy().AlignAs(alignment), size);
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void* TCMallocInternalNewAligned(
+    size_t size, std::align_val_t alignment) {
+  return fast_alloc(CppPolicy().AlignAs(alignment), size);
+}
+
+#ifdef TCMALLOC_ALIAS
+extern "C" void* TCMallocInternalNewAligned_nothrow(
+    size_t size, std::align_val_t alignment, const std::nothrow_t& nt) noexcept
+    // Note: we use malloc rather than new, as we are allowed to return nullptr.
+    // The latter crashes in that case.
+    TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+    google_malloc) void* TCMallocInternalNewAligned_nothrow(size_t size,
+                                                            std::align_val_t
+                                                                alignment,
+                                                            const std::nothrow_t&
+                                                                nt) noexcept {
+  return fast_alloc(CppPolicy().Nothrow().AlignAs(alignment), size);
+}
+#endif  // TCMALLOC_ALIAS
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalFree(
+    void* ptr) noexcept {
+  do_free(ptr);
+}
+
+extern "C" void TCMallocInternalSdallocx(void* ptr, size_t size,
+                                         int flags) noexcept {
+  size_t alignment = alignof(std::max_align_t);
+
+  if (ABSL_PREDICT_FALSE(flags != 0)) {
+    ASSERT((flags & ~0x3f) == 0);
+    alignment = static_cast<size_t>(1ull << (flags & 0x3f));
+  }
+
+  return do_free_with_size(ptr, size, AlignAsPolicy(alignment));
+}
+
+extern "C" void* TCMallocInternalCalloc(size_t n, size_t elem_size) noexcept {
+  // Overflow check
+  const size_t size = n * elem_size;
+  if (elem_size != 0 && size / elem_size != n) {
+    return MallocPolicy::handle_oom(std::numeric_limits<size_t>::max());
+  }
+  void* result = fast_alloc(MallocPolicy(), size);
+  if (result != nullptr) {
+    memset(result, 0, size);
+  }
+  return result;
+}
+
+// Here and below we use TCMALLOC_ALIAS (if supported) to make
+// identical functions aliases.  This saves space in L1 instruction
+// cache.  As of now it saves ~9K.
+extern "C" void TCMallocInternalCfree(void* ptr) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+  do_free(ptr);
+}
+#endif  // TCMALLOC_ALIAS
+
+static inline ABSL_ATTRIBUTE_ALWAYS_INLINE void* do_realloc(void* old_ptr,
+                                                            size_t new_size) {
+  Static::InitIfNecessary();
+  // Get the size of the old entry
+  const size_t old_size = GetSize(old_ptr);
+
+  // Reallocate if the new size is larger than the old size,
+  // or if the new size is significantly smaller than the old size.
+  // We do hysteresis to avoid resizing ping-pongs:
+  //    . If we need to grow, grow to max(new_size, old_size * 1.X)
+  //    . Don't shrink unless new_size < old_size * 0.Y
+  // X and Y trade-off time for wasted space.  For now we do 1.25 and 0.5.
+  const size_t min_growth = std::min(
+      old_size / 4,
+      std::numeric_limits<size_t>::max() - old_size);  // Avoid overflow.
+  const size_t lower_bound_to_grow = old_size + min_growth;
+  const size_t upper_bound_to_shrink = old_size / 2;
+  if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
+    // Need to reallocate.
+    void* new_ptr = nullptr;
+
+    if (new_size > old_size && new_size < lower_bound_to_grow) {
+      // Avoid fast_alloc() reporting a hook with the lower bound size
+      // as we the expectation for pointer returning allocation functions
+      // is that malloc hooks are invoked with the requested_size.
+      new_ptr = fast_alloc(MallocPolicy().Nothrow().WithoutHooks(),
+                           lower_bound_to_grow);
+      if (new_ptr != nullptr) {
+      }
+    }
+    if (new_ptr == nullptr) {
+      // Either new_size is not a tiny increment, or last do_malloc failed.
+      new_ptr = fast_alloc(MallocPolicy(), new_size);
+    }
+    if (new_ptr == nullptr) {
+      return nullptr;
+    }
+    memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
+    // We could use a variant of do_free() that leverages the fact
+    // that we already know the sizeclass of old_ptr.  The benefit
+    // would be small, so don't bother.
+    do_free(old_ptr);
+    return new_ptr;
+  } else {
+    return old_ptr;
+  }
+}
+
+extern "C" void* TCMallocInternalRealloc(void* old_ptr,
+                                         size_t new_size) noexcept {
+  if (old_ptr == NULL) {
+    return fast_alloc(MallocPolicy(), new_size);
+  }
+  if (new_size == 0) {
+    do_free(old_ptr);
+    return NULL;
+  }
+  return do_realloc(old_ptr, new_size);
+}
+
+extern "C" void* TCMallocInternalNewNothrow(size_t size,
+                                            const std::nothrow_t&) noexcept {
+  return fast_alloc(CppPolicy().Nothrow(), size);
+}
+
+extern "C" tcmalloc::sized_ptr_t tcmalloc_size_returning_operator_new_nothrow(
+    size_t size) noexcept {
+  size_t capacity;
+  void* p = fast_alloc(CppPolicy().Nothrow(), size, &capacity);
+  return {p, capacity};
+}
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDelete(void* p) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+  do_free(p);
+}
+#endif  // TCMALLOC_ALIAS
+
+extern "C" void TCMallocInternalDeleteAligned(
+    void* p, std::align_val_t alignment) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+    TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+{
+  // Note: The aligned delete/delete[] implementations differ slightly from
+  // their respective aliased implementations to take advantage of checking the
+  // passed-in alignment.
+  ASSERT(CorrectAlignment(p, alignment));
+  return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" ABSL_CACHELINE_ALIGNED void TCMallocInternalDeleteSized(
+    void* p, size_t size) noexcept {
+  ASSERT(CorrectSize(p, size, DefaultAlignPolicy()));
+  do_free_with_size(p, size, DefaultAlignPolicy());
+}
+
+extern "C" void TCMallocInternalDeleteSizedAligned(
+    void* p, size_t t, std::align_val_t alignment) noexcept {
+  return do_free_with_size(p, t, AlignAsPolicy(alignment));
+}
+
+extern "C" void TCMallocInternalDeleteArraySized(void* p, size_t size) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalDeleteSized);
+#else
+{
+  do_free_with_size(p, size, DefaultAlignPolicy());
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArraySizedAligned(
+    void* p, size_t t, std::align_val_t alignment) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalDeleteSizedAligned);
+#else
+{
+  return TCMallocInternalDeleteSizedAligned(p, t, alignment);
+}
+#endif
+
+// Standard C++ library implementations define and use this
+// (via ::operator delete(ptr, nothrow)).
+// But it's really the same as normal delete, so we just do the same thing.
+extern "C" void TCMallocInternalDeleteNothrow(void* p,
+                                              const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+  do_free(p);
+}
+#endif  // TCMALLOC_ALIAS
+
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void TCMallocInternalDeleteAligned_nothrow(
+    void* p, std::align_val_t alignment, const std::nothrow_t& nt) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+    google_malloc) void TCMallocInternalDeleteAligned_nothrow(void* p,
+                                                              std::align_val_t
+                                                                  alignment,
+                                                              const std::nothrow_t&
+                                                                  nt) noexcept {
+  ASSERT(CorrectAlignment(p, alignment));
+  return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void* TCMallocInternalNewArray(size_t size)
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalNew);
+#else
+{
+  return fast_alloc(CppPolicy().WithoutHooks(), size);
+}
+#endif  // TCMALLOC_ALIAS
+
+extern "C" void* TCMallocInternalNewArrayAligned(size_t size,
+                                                 std::align_val_t alignment)
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+    TCMALLOC_ALIAS(TCMallocInternalNewAligned);
+#else
+{
+  return TCMallocInternalNewAligned(size, alignment);
+}
+#endif
+
+extern "C" void* TCMallocInternalNewArrayNothrow(size_t size,
+                                                 const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalNewNothrow);
+#else
+{
+  return fast_alloc(CppPolicy().Nothrow(), size);
+}
+#endif  // TCMALLOC_ALIAS
+
+// Note: we use malloc rather than new, as we are allowed to return nullptr.
+// The latter crashes in that case.
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void* TCMallocInternalNewArrayAligned_nothrow(
+    size_t size, std::align_val_t alignment, const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalMalloc_aligned);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+    google_malloc) void* TCMallocInternalNewArrayAligned_nothrow(size_t size,
+                                                                 std::align_val_t
+                                                                     alignment,
+                                                                 const std::
+                                                                     nothrow_t&) noexcept {
+  return TCMallocInternalMalloc_aligned(size, alignment);
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArray(void* p) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+  do_free(p);
+}
+#endif  // TCMALLOC_ALIAS
+
+extern "C" void TCMallocInternalDeleteArrayAligned(
+    void* p, std::align_val_t alignment) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+    TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+{
+  ASSERT(CorrectAlignment(p, alignment));
+  return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void TCMallocInternalDeleteArrayNothrow(
+    void* p, const std::nothrow_t&) noexcept
+#ifdef TCMALLOC_ALIAS
+    TCMALLOC_ALIAS(TCMallocInternalFree);
+#else
+{
+  do_free(p);
+}
+#endif  // TCMALLOC_ALIAS
+
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+extern "C" void TCMallocInternalDeleteArrayAligned_nothrow(
+    void* p, std::align_val_t alignment, const std::nothrow_t&) noexcept
+    TCMALLOC_ALIAS(TCMallocInternalDelete);
+#else
+extern "C" ABSL_ATTRIBUTE_SECTION(
+    google_malloc) void TCMallocInternalDeleteArrayAligned_nothrow(void* p,
+                                                                   std::align_val_t
+                                                                       alignment,
+                                                                   const std::
+                                                                       nothrow_t&) noexcept {
+  ASSERT(CorrectAlignment(p, alignment));
+  return TCMallocInternalDelete(p);
+}
+#endif
+
+extern "C" void* TCMallocInternalMemalign(size_t align, size_t size) noexcept {
+  ASSERT(absl::has_single_bit(align));
+  return fast_alloc(MallocPolicy().AlignAs(align), size);
+}
+
+extern "C" void* TCMallocInternalAlignedAlloc(size_t align,
+                                              size_t size) noexcept
+#if defined(TCMALLOC_ALIAS) && defined(NDEBUG)
+    TCMALLOC_ALIAS(TCMallocInternalMemalign);
+#else
+{
+  // aligned_alloc is memalign, but with the requirement that:
+  //   align be a power of two (like memalign)
+  //   size be a multiple of align (for the time being).
+  ASSERT(align != 0);
+  ASSERT(size % align == 0);
+
+  return TCMallocInternalMemalign(align, size);
+}
+#endif
+
+extern "C" int TCMallocInternalPosixMemalign(void** result_ptr, size_t align,
+                                             size_t size) noexcept {
+  if (((align % sizeof(void*)) != 0) || !absl::has_single_bit(align)) {
+    return EINVAL;
+  }
+  void* result = fast_alloc(MallocPolicy().Nothrow().AlignAs(align), size);
+  if (result == NULL) {
+    return ENOMEM;
+  } else {
+    *result_ptr = result;
+    return 0;
+  }
+}
+
+static size_t pagesize = 0;
+
+extern "C" void* TCMallocInternalValloc(size_t size) noexcept {
+  // Allocate page-aligned object of length >= size bytes
+  if (pagesize == 0) pagesize = getpagesize();
+  return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size);
+}
+
+extern "C" void* TCMallocInternalPvalloc(size_t size) noexcept {
+  // Round up size to a multiple of pagesize
+  if (pagesize == 0) pagesize = getpagesize();
+  if (size == 0) {    // pvalloc(0) should allocate one page, according to
+    size = pagesize;  // http://man.free4web.biz/man3/libmpatrol.3.html
+  }
+  size = (size + pagesize - 1) & ~(pagesize - 1);
+  return fast_alloc(MallocPolicy().Nothrow().AlignAs(pagesize), size);
+}
+
+extern "C" void TCMallocInternalMallocStats(void) noexcept {
+  do_malloc_stats();
+}
+
+extern "C" int TCMallocInternalMallOpt(int cmd, int value) noexcept {
+  return do_mallopt(cmd, value);
+}
+
+#ifdef TCMALLOC_HAVE_STRUCT_MALLINFO
+extern "C" struct mallinfo TCMallocInternalMallocInfo(void) noexcept {
+  return do_mallinfo();
+}
+#endif
+
+extern "C" size_t TCMallocInternalMallocSize(void* ptr) noexcept {
+  ASSERT(GetOwnership(ptr) != tcmalloc::MallocExtension::Ownership::kNotOwned);
+  return GetSize(ptr);
+}
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// The constructor allocates an object to ensure that initialization
+// runs before main(), and therefore we do not have a chance to become
+// multi-threaded before initialization.  We also create the TSD key
+// here.  Presumably by the time this constructor runs, glibc is in
+// good enough shape to handle pthread_key_create().
+//
+// The destructor prints stats when the program exits.
+class TCMallocGuard {
+ public:
+  TCMallocGuard() {
+    TCMallocInternalFree(TCMallocInternalMalloc(1));
+    ThreadCache::InitTSD();
+    TCMallocInternalFree(TCMallocInternalMalloc(1));
+  }
+};
+
+static TCMallocGuard module_enter_exit_hook;
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h
new file mode 100644
index 0000000000..1a8eeb4157
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc.h
@@ -0,0 +1,126 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This is the exported interface from tcmalloc.  For most users,
+// tcmalloc just overrides existing libc functionality, and thus this
+// .h file isn't needed.  But we also provide the tcmalloc allocation
+// routines through their own, dedicated name -- so people can wrap
+// their own malloc functions around tcmalloc routines, perhaps.
+// These are exported here.
+
+#ifndef TCMALLOC_TCMALLOC_H_
+#define TCMALLOC_TCMALLOC_H_
+
+#include <malloc.h>
+#include <stddef.h>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+#include "tcmalloc/internal/declarations.h"
+
+// __THROW is defined in glibc systems.  It means, counter-intuitively,
+// "This function will never throw an exception."  It's an optional
+// optimization tool, but we may need to use it to match glibc prototypes.
+#ifndef __THROW  // I guess we're not on a glibc system
+#define __THROW __attribute__((__nothrow__))
+#endif
+
+#ifdef __cplusplus
+
+extern "C" {
+#endif
+void* TCMallocInternalMalloc(size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalFree(void* ptr) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalSdallocx(void* ptr, size_t size, int flags) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalRealloc(void* ptr, size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalCalloc(size_t n, size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalCfree(void* ptr) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+void* TCMallocInternalAlignedAlloc(size_t align, size_t __size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalMemalign(size_t align, size_t __size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+int TCMallocInternalPosixMemalign(void** ptr, size_t align, size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalValloc(size_t __size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalPvalloc(size_t __size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+void TCMallocInternalMallocStats(void) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+int TCMallocInternalMallOpt(int cmd, int value) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+#if defined(TCMALLOC_HAVE_STRUCT_MALLINFO)
+struct mallinfo TCMallocInternalMallocInfo(void) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+#endif
+
+// This is an alias for MallocExtension::GetAllocatedSize().
+// It is equivalent to
+//    OS X: malloc_size()
+//    glibc: malloc_usable_size()
+//    Windows: _msize()
+size_t TCMallocInternalMallocSize(void* ptr) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+
+#ifdef __cplusplus
+void* TCMallocInternalNew(size_t size) ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewAligned(size_t size, std::align_val_t alignment)
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewNothrow(size_t size, const std::nothrow_t&) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDelete(void* p) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteAligned(void* p, std::align_val_t alignment) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteSized(void* p, size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteSizedAligned(void* p, size_t t,
+                                        std::align_val_t alignment) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteNothrow(void* p, const std::nothrow_t&) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArray(size_t size)
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArrayAligned(size_t size, std::align_val_t alignment)
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void* TCMallocInternalNewArrayNothrow(size_t size,
+                                      const std::nothrow_t&) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArray(void* p) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArrayAligned(void* p,
+                                        std::align_val_t alignment) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArraySized(void* p, size_t size) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArraySizedAligned(void* p, size_t t,
+                                             std::align_val_t alignment) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+void TCMallocInternalDeleteArrayNothrow(void* p, const std::nothrow_t&) __THROW
+    ABSL_ATTRIBUTE_SECTION(google_malloc);
+}
+#endif
+
+void TCMallocInternalAcquireLocks();
+void TCMallocInternalReleaseLocks();
+
+#endif  // TCMALLOC_TCMALLOC_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc
new file mode 100644
index 0000000000..f940120f46
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_large_test.cc
@@ -0,0 +1,204 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This is a unit test for large allocations in malloc and friends.
+// "Large" means "so large that they overflow the address space".
+// For 32 bits, this means allocations near 2^32 bytes and 2^31 bytes.
+// For 64 bits, this means allocations near 2^64 bytes and 2^63 bytes.
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include <utility>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/container/flat_hash_set.h"
+#include "absl/container/node_hash_set.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+// Alloc a size that should always fail.
+void TryAllocExpectFail(size_t size) {
+  void* p1 = malloc(size);
+  ASSERT_EQ(p1, nullptr);
+
+  void* p2 = malloc(1);
+  ASSERT_NE(p2, nullptr);
+
+  void* p3 = realloc(p2, size);
+  ASSERT_EQ(p3, nullptr);
+
+  free(p2);
+}
+
+// Alloc a size that might work and might fail.
+// If it does work, touch some pages.
+
+void TryAllocMightFail(size_t size) {
+  unsigned char* p = static_cast<unsigned char*>(malloc(size));
+  if (p != nullptr) {
+    unsigned char volatile* vp = p;  // prevent optimizations
+    static const size_t kPoints = 1024;
+
+    for (size_t i = 0; i < kPoints; ++i) {
+      vp[i * (size / kPoints)] = static_cast<unsigned char>(i);
+    }
+
+    for (size_t i = 0; i < kPoints; ++i) {
+      ASSERT_EQ(vp[i * (size / kPoints)], static_cast<unsigned char>(i));
+    }
+
+    vp[size - 1] = 'M';
+    ASSERT_EQ(vp[size - 1], 'M');
+  } else {
+    ASSERT_EQ(errno, ENOMEM);
+  }
+
+  free(p);
+}
+
+class NoErrnoRegion final : public AddressRegion {
+ public:
+  explicit NoErrnoRegion(AddressRegion* underlying) : underlying_(underlying) {}
+
+  std::pair<void*, size_t> Alloc(size_t size, size_t alignment) override {
+    std::pair<void*, size_t> result = underlying_->Alloc(size, alignment);
+    errno = 0;
+    return result;
+  }
+
+ private:
+  AddressRegion* underlying_;
+};
+
+class NoErrnoRegionFactory final : public AddressRegionFactory {
+ public:
+  explicit NoErrnoRegionFactory(AddressRegionFactory* underlying)
+      : underlying_(underlying) {}
+  ~NoErrnoRegionFactory() override {}
+
+  AddressRegion* Create(void* start, size_t size, UsageHint hint) override {
+    AddressRegion* underlying_region = underlying_->Create(start, size, hint);
+    CHECK_CONDITION(underlying_region != nullptr);
+    void* region_space = MallocInternal(sizeof(NoErrnoRegion));
+    CHECK_CONDITION(region_space != nullptr);
+    return new (region_space) NoErrnoRegion(underlying_region);
+  }
+
+  // Get a human-readable description of the current state of the
+  // allocator.
+  size_t GetStats(absl::Span<char> buffer) override {
+    return underlying_->GetStats(buffer);
+  }
+
+ private:
+  AddressRegionFactory* const underlying_;
+};
+
+class LargeAllocationTest : public ::testing::Test {
+ public:
+  LargeAllocationTest() {
+    old_ = MallocExtension::GetRegionFactory();
+    MallocExtension::SetRegionFactory(new NoErrnoRegionFactory(old_));
+
+    // Grab some memory so that some later allocations are guaranteed to fail.
+    small_ = ::operator new(4 << 20);
+  }
+
+  ~LargeAllocationTest() override {
+    ::operator delete(small_);
+
+    auto* current = MallocExtension::GetRegionFactory();
+
+    MallocExtension::SetRegionFactory(old_);
+    delete current;
+  }
+
+ private:
+  AddressRegionFactory* old_;
+  void* small_;
+};
+
+// Allocate some 0-byte objects.  They better be unique.  0 bytes is not large
+// but it exercises some paths related to large-allocation code.
+TEST_F(LargeAllocationTest, UniqueAddresses) {
+  constexpr int kZeroTimes = 1024;
+
+  absl::flat_hash_set<void*> ptrs;
+  for (int i = 0; i < kZeroTimes; ++i) {
+    void* p = malloc(1);
+    ASSERT_NE(p, nullptr);
+    EXPECT_THAT(ptrs, ::testing::Not(::testing::Contains(p)));
+    ptrs.insert(p);
+  }
+
+  for (auto* p : ptrs) {
+    free(p);
+  }
+}
+
+TEST_F(LargeAllocationTest, MaxSize) {
+  // Test sizes up near the maximum size_t.  These allocations test the
+  // wrap-around code.
+  constexpr size_t zero = 0;
+  constexpr size_t kMinusNTimes = 16384;
+  for (size_t i = 1; i < kMinusNTimes; ++i) {
+    TryAllocExpectFail(zero - i);
+  }
+}
+
+TEST_F(LargeAllocationTest, NearMaxSize) {
+  // Test sizes a bit smaller.  The small malloc above guarantees that all these
+  // return nullptr.
+  constexpr size_t zero = 0;
+  constexpr size_t kMinusMBMinusNTimes = 16384;
+  for (size_t i = 0; i < kMinusMBMinusNTimes; ++i) {
+    TryAllocExpectFail(zero - 1048576 - i);
+  }
+}
+
+TEST_F(LargeAllocationTest, Half) {
+  // Test sizes at half of size_t.
+  // These might or might not fail to allocate.
+  constexpr size_t kHalfPlusMinusTimes = 64;
+  constexpr size_t half = std::numeric_limits<size_t>::max() / 2 + 1;
+  for (size_t i = 0; i < kHalfPlusMinusTimes; ++i) {
+    TryAllocMightFail(half - i);
+    TryAllocMightFail(half + i);
+  }
+}
+
+TEST_F(LargeAllocationTest, NearMaxAddressBits) {
+  // Tests sizes near the maximum address space size.
+  // For -1 <= i < 5, we expect all allocations to fail.  For -6 <= i < -1, the
+  // allocation might succeed but create so much pagemap metadata that we exceed
+  // test memory limits and OOM.  So we skip that range.
+  for (int i = -10; i < -6; ++i) {
+    TryAllocMightFail(size_t{1} << (kAddressBits + i));
+  }
+  for (int i = -1; i < 5; ++i) {
+    TryAllocExpectFail(size_t{1} << (kAddressBits + i));
+  }
+}
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h
new file mode 100644
index 0000000000..d81f8f3be0
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tcmalloc_policy.h
@@ -0,0 +1,260 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file defines policies used when allocation memory.
+//
+// An allocation policy encapsulates four policies:
+//
+// - Out of memory policy.
+//   Dictates how to handle OOM conditions.
+//
+//   struct OomPolicyTemplate {
+//     // Invoked when we failed to allocate memory
+//     // Must either terminate, throw, or return nullptr
+//     static void* handle_oom(size_t size);
+//   };
+//
+// - Alignment policy
+//   Dictates alignment to use for an allocation.
+//   Must be trivially copyable.
+//
+//   struct AlignPolicyTemplate {
+//     // Returns the alignment to use for the memory allocation,
+//     // or 1 to use small allocation table alignments (8 bytes)
+//     // Returned value Must be a non-zero power of 2.
+//     size_t align() const;
+//   };
+//
+// - Hook invocation policy
+//   dictates invocation of allocation hooks
+//
+//   struct HooksPolicyTemplate {
+//     // Returns true if allocation hooks must be invoked.
+//     static bool invoke_hooks();
+//   };
+//
+// - NUMA partition policy
+//   When NUMA awareness is enabled this dictates which NUMA partition we will
+//   allocate memory from. Must be trivially copyable.
+//
+//   struct NumaPartitionPolicyTemplate {
+//     // Returns the NUMA partition to allocate from.
+//     size_t partition() const;
+//
+//     // Returns the NUMA partition to allocate from multiplied by
+//     // kNumBaseClasses - i.e. the first size class that corresponds to the
+//     // NUMA partition to allocate from.
+//     size_t scaled_partition() const;
+//   };
+
+#ifndef TCMALLOC_TCMALLOC_POLICY_H_
+#define TCMALLOC_TCMALLOC_POLICY_H_
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cstddef>
+
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/numa.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/static_vars.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// NullOomPolicy: returns nullptr
+struct NullOomPolicy {
+  static inline constexpr void* handle_oom(size_t size) { return nullptr; }
+
+  static constexpr bool can_return_nullptr() { return true; }
+};
+
+// MallocOomPolicy: sets errno to ENOMEM and returns nullptr
+struct MallocOomPolicy {
+  static inline void* handle_oom(size_t size) {
+    errno = ENOMEM;
+    return nullptr;
+  }
+
+  static constexpr bool can_return_nullptr() { return true; }
+};
+
+// CppOomPolicy: terminates the program
+struct CppOomPolicy {
+  static ABSL_ATTRIBUTE_NOINLINE ABSL_ATTRIBUTE_NORETURN void* handle_oom(
+      size_t size) {
+    Crash(kCrashWithStats, __FILE__, __LINE__,
+          "Unable to allocate (new failed)", size);
+    __builtin_unreachable();
+  }
+
+  static constexpr bool can_return_nullptr() { return false; }
+};
+
+// DefaultAlignPolicy: use default small size table based allocation
+struct DefaultAlignPolicy {
+  // Important: the value here is explicitly '1' to indicate that the used
+  // alignment is the default alignment of the size tables in tcmalloc.
+  // The constexpr value of 1 will optimize out the alignment checks and
+  // iterations in the GetSizeClass() calls for default aligned allocations.
+  static constexpr size_t align() { return 1; }
+};
+
+// MallocAlignPolicy: use std::max_align_t allocation
+struct MallocAlignPolicy {
+  static constexpr size_t align() { return alignof(std::max_align_t); }
+};
+
+// AlignAsPolicy: use user provided alignment
+class AlignAsPolicy {
+ public:
+  AlignAsPolicy() = delete;
+  explicit constexpr AlignAsPolicy(size_t value) : value_(value) {}
+  explicit constexpr AlignAsPolicy(std::align_val_t value)
+      : AlignAsPolicy(static_cast<size_t>(value)) {}
+
+  size_t constexpr align() const { return value_; }
+
+ private:
+  size_t value_;
+};
+
+// InvokeHooksPolicy: invoke memory allocation hooks
+struct InvokeHooksPolicy {
+  static constexpr bool invoke_hooks() { return true; }
+};
+
+// NoHooksPolicy: do not invoke memory allocation hooks
+struct NoHooksPolicy {
+  static constexpr bool invoke_hooks() { return false; }
+};
+
+// Use a fixed NUMA partition.
+class FixedNumaPartitionPolicy {
+ public:
+  explicit constexpr FixedNumaPartitionPolicy(size_t partition)
+      : partition_(partition) {}
+
+  size_t constexpr partition() const { return partition_; }
+
+  size_t constexpr scaled_partition() const {
+    return partition_ * kNumBaseClasses;
+  }
+
+ private:
+  size_t partition_;
+};
+
+// Use the NUMA partition which the executing CPU is local to.
+struct LocalNumaPartitionPolicy {
+  // Note that the partition returned may change between calls if the executing
+  // thread migrates between NUMA nodes & partitions. Users of this function
+  // should not rely upon multiple invocations returning the same partition.
+  size_t partition() const {
+    return Static::numa_topology().GetCurrentPartition();
+  }
+  size_t scaled_partition() const {
+    return Static::numa_topology().GetCurrentScaledPartition();
+  }
+};
+
+// TCMallocPolicy defines the compound policy object containing
+// the OOM, alignment and hooks policies.
+// Is trivially constructible, copyable and destructible.
+template <typename OomPolicy = CppOomPolicy,
+          typename AlignPolicy = DefaultAlignPolicy,
+          typename HooksPolicy = InvokeHooksPolicy,
+          typename NumaPolicy = LocalNumaPartitionPolicy>
+class TCMallocPolicy {
+ public:
+  constexpr TCMallocPolicy() = default;
+  explicit constexpr TCMallocPolicy(AlignPolicy align, NumaPolicy numa)
+      : align_(align), numa_(numa) {}
+
+  // OOM policy
+  static void* handle_oom(size_t size) { return OomPolicy::handle_oom(size); }
+
+  // Alignment policy
+  constexpr size_t align() const { return align_.align(); }
+
+  // NUMA partition
+  constexpr size_t numa_partition() const { return numa_.partition(); }
+
+  // NUMA partition multiplied by kNumBaseClasses
+  constexpr size_t scaled_numa_partition() const {
+    return numa_.scaled_partition();
+  }
+
+  // Hooks policy
+  static constexpr bool invoke_hooks() { return HooksPolicy::invoke_hooks(); }
+
+  // Returns this policy aligned as 'align'
+  template <typename align_t>
+  constexpr TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>
+  AlignAs(
+      align_t align) const {
+    return TCMallocPolicy<OomPolicy, AlignAsPolicy, HooksPolicy, NumaPolicy>(
+        AlignAsPolicy{align}, numa_);
+  }
+
+  // Returns this policy with a nullptr OOM policy.
+  constexpr TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy,
+  NumaPolicy> Nothrow()
+      const {
+    return TCMallocPolicy<NullOomPolicy, AlignPolicy, HooksPolicy,
+    NumaPolicy>(align_, numa_);
+  }
+
+  // Returns this policy with NewAllocHook invocations disabled.
+  constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy, NumaPolicy>
+  WithoutHooks()
+      const {
+    return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+    NumaPolicy>(align_, numa_);
+  }
+
+  // Returns this policy with a fixed NUMA partition.
+  constexpr TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+  FixedNumaPartitionPolicy> InNumaPartition(size_t partition) const {
+    return TCMallocPolicy<OomPolicy, AlignPolicy, NoHooksPolicy,
+    FixedNumaPartitionPolicy>(
+        align_, FixedNumaPartitionPolicy{partition});
+  }
+
+  // Returns this policy with a fixed NUMA partition matching that of the
+  // previously allocated `ptr`.
+  constexpr auto InSameNumaPartitionAs(void* ptr) const {
+    return InNumaPartition(NumaPartitionFromPointer(ptr));
+  }
+
+  static constexpr bool can_return_nullptr() {
+    return OomPolicy::can_return_nullptr();
+  }
+
+ private:
+  AlignPolicy align_;
+  NumaPolicy numa_;
+};
+
+using CppPolicy = TCMallocPolicy<CppOomPolicy, DefaultAlignPolicy>;
+using MallocPolicy = TCMallocPolicy<MallocOomPolicy, MallocAlignPolicy>;
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_TCMALLOC_POLICY_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc
new file mode 100644
index 0000000000..89cc779af1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.cc
@@ -0,0 +1,417 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/thread_cache.h"
+
+#include <algorithm>
+
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "tcmalloc/transfer_cache.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
+size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
+int64_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
+ThreadCache* ThreadCache::thread_heaps_ = nullptr;
+int ThreadCache::thread_heap_count_ = 0;
+ThreadCache* ThreadCache::next_memory_steal_ = nullptr;
+#ifdef ABSL_HAVE_TLS
+__thread ThreadCache* ThreadCache::thread_local_data_
+    ABSL_ATTRIBUTE_INITIAL_EXEC = nullptr;
+#endif
+ABSL_CONST_INIT bool ThreadCache::tsd_inited_ = false;
+pthread_key_t ThreadCache::heap_key_;
+
+void ThreadCache::Init(pthread_t tid) {
+  size_ = 0;
+
+  max_size_ = 0;
+  IncreaseCacheLimitLocked();
+  if (max_size_ == 0) {
+    // There isn't enough memory to go around.  Just give the minimum to
+    // this thread.
+    max_size_ = kMinThreadCacheSize;
+
+    // Take unclaimed_cache_space_ negative.
+    unclaimed_cache_space_ -= kMinThreadCacheSize;
+    ASSERT(unclaimed_cache_space_ < 0);
+  }
+
+  next_ = nullptr;
+  prev_ = nullptr;
+  tid_ = tid;
+  in_setspecific_ = false;
+  for (size_t cl = 0; cl < kNumClasses; ++cl) {
+    list_[cl].Init();
+  }
+}
+
+void ThreadCache::Cleanup() {
+  // Put unused memory back into central cache
+  for (int cl = 0; cl < kNumClasses; ++cl) {
+    if (list_[cl].length() > 0) {
+      ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
+    }
+  }
+}
+
+// Remove some objects of class "cl" from central cache and add to thread heap.
+// On success, return the first object for immediate use; otherwise return NULL.
+void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
+  FreeList* list = &list_[cl];
+  ASSERT(list->empty());
+  const int batch_size = Static::sizemap().num_objects_to_move(cl);
+
+  const int num_to_move = std::min<int>(list->max_length(), batch_size);
+  void* batch[kMaxObjectsToMove];
+  int fetch_count =
+      Static::transfer_cache().RemoveRange(cl, batch, num_to_move);
+  if (fetch_count == 0) {
+    return nullptr;
+  }
+
+  if (--fetch_count > 0) {
+    size_ += byte_size * fetch_count;
+    list->PushBatch(fetch_count, batch + 1);
+  }
+
+  // Increase max length slowly up to batch_size.  After that,
+  // increase by batch_size in one shot so that the length is a
+  // multiple of batch_size.
+  if (list->max_length() < batch_size) {
+    list->set_max_length(list->max_length() + 1);
+  } else {
+    // Don't let the list get too long.  In 32 bit builds, the length
+    // is represented by a 16 bit int, so we need to watch out for
+    // integer overflow.
+    int new_length = std::min<int>(list->max_length() + batch_size,
+                                   kMaxDynamicFreeListLength);
+    // The list's max_length must always be a multiple of batch_size,
+    // and kMaxDynamicFreeListLength is not necessarily a multiple
+    // of batch_size.
+    new_length -= new_length % batch_size;
+    ASSERT(new_length % batch_size == 0);
+    list->set_max_length(new_length);
+  }
+  return batch[0];
+}
+
+void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
+  const int batch_size = Static::sizemap().num_objects_to_move(cl);
+  ReleaseToCentralCache(list, cl, batch_size);
+
+  // If the list is too long, we need to transfer some number of
+  // objects to the central cache.  Ideally, we would transfer
+  // num_objects_to_move, so the code below tries to make max_length
+  // converge on num_objects_to_move.
+
+  if (list->max_length() < batch_size) {
+    // Slow start the max_length so we don't overreserve.
+    list->set_max_length(list->max_length() + 1);
+  } else if (list->max_length() > batch_size) {
+    // If we consistently go over max_length, shrink max_length.  If we don't
+    // shrink it, some amount of memory will always stay in this freelist.
+    list->set_length_overages(list->length_overages() + 1);
+    if (list->length_overages() > kMaxOverages) {
+      ASSERT(list->max_length() > batch_size);
+      list->set_max_length(list->max_length() - batch_size);
+      list->set_length_overages(0);
+    }
+  }
+}
+
+// Remove some objects of class "cl" from thread heap and add to central cache
+void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
+  ASSERT(src == &list_[cl]);
+  if (N > src->length()) N = src->length();
+  size_t delta_bytes = N * Static::sizemap().class_to_size(cl);
+
+  // We return prepackaged chains of the correct size to the central cache.
+  void* batch[kMaxObjectsToMove];
+  int batch_size = Static::sizemap().num_objects_to_move(cl);
+  while (N > batch_size) {
+    src->PopBatch(batch_size, batch);
+    static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+                  "not enough space in batch");
+    Static::transfer_cache().InsertRange(cl,
+                                         absl::Span<void*>(batch, batch_size));
+    N -= batch_size;
+  }
+  src->PopBatch(N, batch);
+  static_assert(ABSL_ARRAYSIZE(batch) >= kMaxObjectsToMove,
+                "not enough space in batch");
+  Static::transfer_cache().InsertRange(cl, absl::Span<void*>(batch, N));
+  size_ -= delta_bytes;
+}
+
+// Release idle memory to the central cache
+void ThreadCache::Scavenge() {
+  // If the low-water mark for the free list is L, it means we would
+  // not have had to allocate anything from the central cache even if
+  // we had reduced the free list size by L.  We aim to get closer to
+  // that situation by dropping L/2 nodes from the free list.  This
+  // may not release much memory, but if so we will call scavenge again
+  // pretty soon and the low-water marks will be high on that call.
+  for (int cl = 0; cl < kNumClasses; cl++) {
+    FreeList* list = &list_[cl];
+    const int lowmark = list->lowwatermark();
+    if (lowmark > 0) {
+      const int drop = (lowmark > 1) ? lowmark / 2 : 1;
+      ReleaseToCentralCache(list, cl, drop);
+
+      // Shrink the max length if it isn't used.  Only shrink down to
+      // batch_size -- if the thread was active enough to get the max_length
+      // above batch_size, it will likely be that active again.  If
+      // max_length shinks below batch_size, the thread will have to
+      // go through the slow-start behavior again.  The slow-start is useful
+      // mainly for threads that stay relatively idle for their entire
+      // lifetime.
+      const int batch_size = Static::sizemap().num_objects_to_move(cl);
+      if (list->max_length() > batch_size) {
+        list->set_max_length(
+            std::max<int>(list->max_length() - batch_size, batch_size));
+      }
+    }
+    list->clear_lowwatermark();
+  }
+
+  IncreaseCacheLimit();
+}
+
+void ThreadCache::DeallocateSlow(void* ptr, FreeList* list, size_t cl) {
+  tracking::Report(kFreeMiss, cl, 1);
+  if (ABSL_PREDICT_FALSE(list->length() > list->max_length())) {
+    tracking::Report(kFreeTruncations, cl, 1);
+    ListTooLong(list, cl);
+  }
+  if (size_ >= max_size_) {
+    tracking::Report(kFreeScavenges, cl, 1);
+    Scavenge();
+  }
+}
+
+void ThreadCache::IncreaseCacheLimit() {
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  IncreaseCacheLimitLocked();
+}
+
+void ThreadCache::IncreaseCacheLimitLocked() {
+  if (unclaimed_cache_space_ > 0) {
+    // Possibly make unclaimed_cache_space_ negative.
+    unclaimed_cache_space_ -= kStealAmount;
+    max_size_ += kStealAmount;
+    return;
+  }
+  // Don't hold pageheap_lock too long.  Try to steal from 10 other
+  // threads before giving up.  The i < 10 condition also prevents an
+  // infinite loop in case none of the existing thread heaps are
+  // suitable places to steal from.
+  for (int i = 0; i < 10; ++i, next_memory_steal_ = next_memory_steal_->next_) {
+    // Reached the end of the linked list.  Start at the beginning.
+    if (next_memory_steal_ == nullptr) {
+      ASSERT(thread_heaps_ != nullptr);
+      next_memory_steal_ = thread_heaps_;
+    }
+    if (next_memory_steal_ == this ||
+        next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
+      continue;
+    }
+    next_memory_steal_->max_size_ -= kStealAmount;
+    max_size_ += kStealAmount;
+
+    next_memory_steal_ = next_memory_steal_->next_;
+    return;
+  }
+}
+
+void ThreadCache::InitTSD() {
+  ASSERT(!tsd_inited_);
+  pthread_key_create(&heap_key_, DestroyThreadCache);
+  tsd_inited_ = true;
+}
+
+ThreadCache* ThreadCache::CreateCacheIfNecessary() {
+  // Initialize per-thread data if necessary
+  Static::InitIfNecessary();
+  ThreadCache* heap = nullptr;
+
+#ifdef ABSL_HAVE_TLS
+  const bool maybe_reentrant = !tsd_inited_;
+  // If we have set up our TLS, we can avoid a scan of the thread_heaps_ list.
+  if (tsd_inited_) {
+    if (thread_local_data_) {
+      return thread_local_data_;
+    }
+  }
+#else
+  const bool maybe_reentrant = true;
+#endif
+
+  {
+    absl::base_internal::SpinLockHolder h(&pageheap_lock);
+    const pthread_t me = pthread_self();
+
+    // This may be a recursive malloc call from pthread_setspecific()
+    // In that case, the heap for this thread has already been created
+    // and added to the linked list.  So we search for that first.
+    if (maybe_reentrant) {
+      for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+        if (h->tid_ == me) {
+          heap = h;
+          break;
+        }
+      }
+    }
+
+    if (heap == nullptr) {
+      heap = NewHeap(me);
+    }
+  }
+
+  // We call pthread_setspecific() outside the lock because it may
+  // call malloc() recursively.  We check for the recursive call using
+  // the "in_setspecific_" flag so that we can avoid calling
+  // pthread_setspecific() if we are already inside pthread_setspecific().
+  if (!heap->in_setspecific_ && tsd_inited_) {
+    heap->in_setspecific_ = true;
+#ifdef ABSL_HAVE_TLS
+    // Also keep a copy in __thread for faster retrieval
+    thread_local_data_ = heap;
+#endif
+    pthread_setspecific(heap_key_, heap);
+    heap->in_setspecific_ = false;
+  }
+  return heap;
+}
+
+ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
+  // Create the heap and add it to the linked list
+  ThreadCache* heap = Static::threadcache_allocator().New();
+  heap->Init(tid);
+  heap->next_ = thread_heaps_;
+  heap->prev_ = nullptr;
+  if (thread_heaps_ != nullptr) {
+    thread_heaps_->prev_ = heap;
+  } else {
+    // This is the only thread heap at the momment.
+    ASSERT(next_memory_steal_ == nullptr);
+    next_memory_steal_ = heap;
+  }
+  thread_heaps_ = heap;
+  thread_heap_count_++;
+  return heap;
+}
+
+void ThreadCache::BecomeIdle() {
+  if (!tsd_inited_) return;  // No caches yet
+  ThreadCache* heap = GetCacheIfPresent();
+  if (heap == nullptr) return;        // No thread cache to remove
+  if (heap->in_setspecific_) return;  // Do not disturb the active caller
+
+  heap->in_setspecific_ = true;
+  pthread_setspecific(heap_key_, nullptr);
+#ifdef ABSL_HAVE_TLS
+  // Also update the copy in __thread
+  thread_local_data_ = nullptr;
+#endif
+  heap->in_setspecific_ = false;
+  if (GetCacheIfPresent() == heap) {
+    // Somehow heap got reinstated by a recursive call to malloc
+    // from pthread_setspecific.  We give up in this case.
+    return;
+  }
+
+  // We can now get rid of the heap
+  DeleteCache(heap);
+}
+
+void ThreadCache::DestroyThreadCache(void* ptr) {
+  // Note that "ptr" cannot be NULL since pthread promises not
+  // to invoke the destructor on NULL values, but for safety,
+  // we check anyway.
+  if (ptr != nullptr) {
+#ifdef ABSL_HAVE_TLS
+    thread_local_data_ = nullptr;
+#endif
+    DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
+  }
+}
+
+void ThreadCache::DeleteCache(ThreadCache* heap) {
+  // Remove all memory from heap
+  heap->Cleanup();
+
+  // Remove from linked list
+  absl::base_internal::SpinLockHolder h(&pageheap_lock);
+  if (heap->next_ != nullptr) heap->next_->prev_ = heap->prev_;
+  if (heap->prev_ != nullptr) heap->prev_->next_ = heap->next_;
+  if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
+  thread_heap_count_--;
+
+  if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
+  if (next_memory_steal_ == nullptr) next_memory_steal_ = thread_heaps_;
+  unclaimed_cache_space_ += heap->max_size_;
+
+  Static::threadcache_allocator().Delete(heap);
+}
+
+void ThreadCache::RecomputePerThreadCacheSize() {
+  // Divide available space across threads
+  int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
+  size_t space = overall_thread_cache_size_ / n;
+
+  // Limit to allowed range
+  if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
+  if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
+
+  double ratio = space / std::max<double>(1, per_thread_cache_size_);
+  size_t claimed = 0;
+  for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+    // Increasing the total cache size should not circumvent the
+    // slow-start growth of max_size_.
+    if (ratio < 1.0) {
+      h->max_size_ *= ratio;
+    }
+    claimed += h->max_size_;
+  }
+  unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
+  per_thread_cache_size_ = space;
+}
+
+void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
+  for (ThreadCache* h = thread_heaps_; h != nullptr; h = h->next_) {
+    *total_bytes += h->Size();
+    if (class_count) {
+      for (int cl = 0; cl < kNumClasses; ++cl) {
+        class_count[cl] += h->freelist_length(cl);
+      }
+    }
+  }
+}
+
+void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
+  // Clip the value to a reasonable minimum
+  if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
+  overall_thread_cache_size_ = new_size;
+
+  RecomputePerThreadCacheSize();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache.h b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h
new file mode 100644
index 0000000000..ae6cef869f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache.h
@@ -0,0 +1,345 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_THREAD_CACHE_H_
+#define TCMALLOC_THREAD_CACHE_H_
+
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/page_heap_allocator.h"
+#include "tcmalloc/sampler.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+//-------------------------------------------------------------------
+// Data kept per thread
+//-------------------------------------------------------------------
+
+class ThreadCache {
+ public:
+  void Init(pthread_t tid) ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+  void Cleanup();
+
+  // Accessors (mostly just for printing stats)
+  int freelist_length(size_t cl) const { return list_[cl].length(); }
+
+  // Total byte size in cache
+  size_t Size() const { return size_; }
+
+  // Allocate an object of the given size class. When allocation fails
+  // (from this cache and after running FetchFromCentralCache),
+  // OOMHandler(size) is called and its return value is
+  // returned from Allocate. OOMHandler is used to parameterize
+  // out-of-memory handling (raising exception, returning nullptr,
+  // calling new_handler or anything else). "Passing" OOMHandler in
+  // this way allows Allocate to be used in tail-call position in
+  // fast-path, making allocate tail-call slow path code.
+  template <void* OOMHandler(size_t)>
+  void* Allocate(size_t cl);
+
+  void Deallocate(void* ptr, size_t cl);
+
+  void Scavenge();
+
+  Sampler* GetSampler();
+
+  static void InitTSD();
+  static ThreadCache* GetCache();
+  static ThreadCache* GetCacheIfPresent();
+  static ThreadCache* CreateCacheIfNecessary();
+  static void BecomeIdle();
+
+  // returns stats on total thread caches created/used
+  static inline AllocatorStats HeapStats()
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Adds to *total_bytes the total number of bytes used by all thread heaps.
+  // Also, if class_count is not NULL, it must be an array of size kNumClasses,
+  // and this function will increment each element of class_count by the number
+  // of items in all thread-local freelists of the corresponding size class.
+  static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Sets the total thread cache size to new_size, recomputing the
+  // individual thread cache sizes as necessary.
+  static void set_overall_thread_cache_size(size_t new_size)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  static size_t overall_thread_cache_size()
+      ABSL_SHARED_LOCKS_REQUIRED(pageheap_lock) {
+    return overall_thread_cache_size_;
+  }
+
+  template <void* OOMHandler(size_t)>
+  void* ABSL_ATTRIBUTE_NOINLINE AllocateSlow(size_t cl, size_t allocated_size) {
+    tracking::Report(kMallocMiss, cl, 1);
+    void* ret = FetchFromCentralCache(cl, allocated_size);
+    if (ABSL_PREDICT_TRUE(ret != nullptr)) {
+      return ret;
+    }
+    return OOMHandler(allocated_size);
+  }
+
+ private:
+  // We inherit rather than include the list as a data structure to reduce
+  // compiler padding.  Without inheritance, the compiler pads the list
+  // structure and then adds it as a member, even though we could fit everything
+  // without padding.
+  class FreeList : public LinkedList {
+   private:
+    uint32_t lowater_;     // Low water mark for list length.
+    uint32_t max_length_;  // Dynamic max list length based on usage.
+    // Tracks the number of times a deallocation has caused
+    // length_ > max_length_.  After the kMaxOverages'th time, max_length_
+    // shrinks and length_overages_ is reset to zero.
+    uint32_t length_overages_;
+
+    // This extra unused field pads FreeList size to 32 bytes on 64
+    // bit machines, helping compiler generate faster code for
+    // indexing array of lists.
+    void* ABSL_ATTRIBUTE_UNUSED extra_;
+
+   public:
+    void Init() {
+      LinkedList::Init();
+      lowater_ = 0;
+      max_length_ = 1;
+      length_overages_ = 0;
+    }
+
+    // Return the maximum length of the list.
+    size_t max_length() const { return max_length_; }
+
+    // Set the maximum length of the list.  If 'new_max' > length(), the
+    // client is responsible for removing objects from the list.
+    void set_max_length(size_t new_max) { max_length_ = new_max; }
+
+    // Return the number of times that length() has gone over max_length().
+    size_t length_overages() const { return length_overages_; }
+
+    void set_length_overages(size_t new_count) { length_overages_ = new_count; }
+
+    // Low-water mark management
+    int lowwatermark() const { return lowater_; }
+    void clear_lowwatermark() { lowater_ = length(); }
+
+    ABSL_ATTRIBUTE_ALWAYS_INLINE bool TryPop(void** ret) {
+      bool out = LinkedList::TryPop(ret);
+      if (ABSL_PREDICT_TRUE(out) && ABSL_PREDICT_FALSE(length() < lowater_)) {
+        lowater_ = length();
+      }
+      return out;
+    }
+
+    void PopBatch(int N, void** batch) {
+      LinkedList::PopBatch(N, batch);
+      if (length() < lowater_) lowater_ = length();
+    }
+  };
+
+// we've deliberately introduced unused extra_ field into FreeList
+// to pad the size. Lets ensure that it is still working as
+// intended.
+#ifdef _LP64
+  static_assert(sizeof(FreeList) == 32, "Freelist size has changed");
+#endif
+
+  // Gets and returns an object from the central cache, and, if possible,
+  // also adds some objects of that size class to this thread cache.
+  void* FetchFromCentralCache(size_t cl, size_t byte_size);
+
+  // Releases some number of items from src.  Adjusts the list's max_length
+  // to eventually converge on num_objects_to_move(cl).
+  void ListTooLong(FreeList* list, size_t cl);
+
+  void DeallocateSlow(void* ptr, FreeList* list, size_t cl);
+
+  // Releases N items from this thread cache.
+  void ReleaseToCentralCache(FreeList* src, size_t cl, int N);
+
+  // Increase max_size_ by reducing unclaimed_cache_space_ or by
+  // reducing the max_size_ of some other thread.  In both cases,
+  // the delta is kStealAmount.
+  void IncreaseCacheLimit();
+
+  // Same as above but called with pageheap_lock held.
+  void IncreaseCacheLimitLocked() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // If TLS is available, we also store a copy of the per-thread object
+  // in a __thread variable since __thread variables are faster to read
+  // than pthread_getspecific().  We still need pthread_setspecific()
+  // because __thread variables provide no way to run cleanup code when
+  // a thread is destroyed.
+  //
+  // We also give a hint to the compiler to use the "initial exec" TLS
+  // model.  This is faster than the default TLS model, at the cost that
+  // you cannot dlopen this library.  (To see the difference, look at
+  // the CPU use of __tls_get_addr with and without this attribute.)
+  //
+  // Since using dlopen on a malloc replacement is asking for trouble in any
+  // case, that's a good tradeoff for us.
+#ifdef ABSL_HAVE_TLS
+  static __thread ThreadCache* thread_local_data_ ABSL_ATTRIBUTE_INITIAL_EXEC;
+#endif
+
+  // Thread-specific key.  Initialization here is somewhat tricky
+  // because some Linux startup code invokes malloc() before it
+  // is in a good enough state to handle pthread_keycreate().
+  // Therefore, we use TSD keys only after tsd_inited is set to true.
+  // Until then, we use a slow path to get the heap object.
+  static bool tsd_inited_;
+  static pthread_key_t heap_key_;
+
+  // Linked list of heap objects.
+  static ThreadCache* thread_heaps_ ABSL_GUARDED_BY(pageheap_lock);
+  static int thread_heap_count_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // A pointer to one of the objects in thread_heaps_.  Represents
+  // the next ThreadCache from which a thread over its max_size_ should
+  // steal memory limit.  Round-robin through all of the objects in
+  // thread_heaps_.
+  static ThreadCache* next_memory_steal_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // Overall thread cache size.
+  static size_t overall_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // Global per-thread cache size.
+  static size_t per_thread_cache_size_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // Represents overall_thread_cache_size_ minus the sum of max_size_
+  // across all ThreadCaches. We use int64_t even in 32-bit builds because
+  // with enough ThreadCaches, this number can get smaller than -2^31.
+  static int64_t unclaimed_cache_space_ ABSL_GUARDED_BY(pageheap_lock);
+
+  // This class is laid out with the most frequently used fields
+  // first so that hot elements are placed on the same cache line.
+
+  FreeList list_[kNumClasses];  // Array indexed by size-class
+
+  size_t size_;      // Combined size of data
+  size_t max_size_;  // size_ > max_size_ --> Scavenge()
+
+#ifndef ABSL_HAVE_TLS
+  // We sample allocations, biased by the size of the allocation.
+  // If we have TLS, then we use sampler defined in tcmalloc.cc.
+  Sampler sampler_;
+#endif
+
+  pthread_t tid_;
+  bool in_setspecific_;
+
+  // Allocate a new heap.
+  static ThreadCache* NewHeap(pthread_t tid)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  // Use only as pthread thread-specific destructor function.
+  static void DestroyThreadCache(void* ptr);
+
+  static void DeleteCache(ThreadCache* heap);
+  static void RecomputePerThreadCacheSize()
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+ public:
+  // All ThreadCache objects are kept in a linked list (for stats collection)
+  ThreadCache* next_;
+  ThreadCache* prev_;
+
+ private:
+#ifdef ABSL_CACHELINE_SIZE
+  // Ensure that two instances of this class are never on the same cache line.
+  // This is critical for performance, as false sharing would negate many of
+  // the benefits of a per-thread cache.
+  char padding_[ABSL_CACHELINE_SIZE];
+#endif
+};
+
+inline AllocatorStats ThreadCache::HeapStats() {
+  return Static::threadcache_allocator().stats();
+}
+
+#ifndef ABSL_HAVE_TLS
+inline Sampler* ThreadCache::GetSampler() { return &sampler_; }
+#endif
+
+template <void* OOMHandler(size_t)>
+inline void* ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Allocate(size_t cl) {
+  const size_t allocated_size = Static::sizemap().class_to_size(cl);
+
+  FreeList* list = &list_[cl];
+  void* ret;
+  if (ABSL_PREDICT_TRUE(list->TryPop(&ret))) {
+    tracking::Report(kMallocHit, cl, 1);
+    size_ -= allocated_size;
+    return ret;
+  }
+
+  return AllocateSlow<OOMHandler>(cl, allocated_size);
+}
+
+inline void ABSL_ATTRIBUTE_ALWAYS_INLINE ThreadCache::Deallocate(void* ptr,
+                                                                 size_t cl) {
+  FreeList* list = &list_[cl];
+  size_ += Static::sizemap().class_to_size(cl);
+  ssize_t size_headroom = max_size_ - size_ - 1;
+
+  list->Push(ptr);
+  ssize_t list_headroom =
+      static_cast<ssize_t>(list->max_length()) - list->length();
+
+  // There are two relatively uncommon things that require further work.
+  // In the common case we're done, and in that case we need a single branch
+  // because of the bitwise-or trick that follows.
+  if ((list_headroom | size_headroom) < 0) {
+    DeallocateSlow(ptr, list, cl);
+  } else {
+    tracking::Report(kFreeHit, cl, 1);
+  }
+}
+
+inline ThreadCache* ABSL_ATTRIBUTE_ALWAYS_INLINE
+ThreadCache::GetCacheIfPresent() {
+#ifdef ABSL_HAVE_TLS
+  // __thread is faster
+  return thread_local_data_;
+#else
+  return tsd_inited_
+             ? reinterpret_cast<ThreadCache*>(pthread_getspecific(heap_key_))
+             : nullptr;
+#endif
+}
+
+inline ThreadCache* ThreadCache::GetCache() {
+  ThreadCache* tc = GetCacheIfPresent();
+  return (ABSL_PREDICT_TRUE(tc != nullptr)) ? tc : CreateCacheIfNecessary();
+}
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_THREAD_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc
new file mode 100644
index 0000000000..5b2d10b2ac
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/thread_cache_test.cc
@@ -0,0 +1,132 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <string>
+#include <thread>  // NOLINT(build/c++11)
+
+#include "gtest/gtest.h"
+#include "absl/strings/str_cat.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/memory_stats.h"
+#include "tcmalloc/internal/parameter_accessors.h"
+#include "tcmalloc/malloc_extension.h"
+
+namespace tcmalloc {
+namespace {
+
+int64_t MemoryUsageSlow(pid_t pid) {
+  int64_t ret = 0;
+
+  FILE *f =
+      fopen(absl::StrCat("/proc/", pid, "/task/", pid, "/smaps").c_str(), "r");
+  CHECK_CONDITION(f != nullptr);
+
+  char buf[BUFSIZ];
+  while (fgets(buf, sizeof(buf), f) != nullptr) {
+    size_t rss;
+    if (sscanf(buf, "Rss: %zu kB", &rss) == 1) ret += rss;
+  }
+  CHECK_CONDITION(feof(f));
+  fclose(f);
+
+  // Rss is reported in KiB
+  ret *= 1024;
+
+  // A sanity check: our return value should be in the same ballpark as
+  // GetMemoryStats.
+  tcmalloc::tcmalloc_internal::MemoryStats stats;
+  CHECK_CONDITION(tcmalloc::tcmalloc_internal::GetMemoryStats(&stats));
+  EXPECT_GE(ret, 0.9 * stats.rss);
+  EXPECT_LE(ret, 1.1 * stats.rss);
+
+  return ret;
+}
+
+class ThreadCacheTest : public ::testing::Test {
+ public:
+  ThreadCacheTest() {
+    // Explicitly disable guarded allocations for this test.  For aggressive
+    // sample rates on PPC (with 64KB pages), RSS grows quickly due to
+    // page-sized allocations that we don't release.
+    MallocExtension::SetGuardedSamplingRate(-1);
+  }
+};
+
+// Make sure that creating and destroying many mallocing threads
+// does not leak memory.
+TEST_F(ThreadCacheTest, NoLeakOnThreadDestruction) {
+  // Test only valid in per-thread mode
+  ASSERT_FALSE(MallocExtension::PerCpuCachesActive());
+
+  // Force a small sample to initialize tagged page allocator.
+  constexpr int64_t kAlloc = 8192;
+  const int64_t num_allocs =
+      32 * MallocExtension::GetProfileSamplingRate() / kAlloc;
+  for (int64_t i = 0; i < num_allocs; ++i) {
+    ::operator delete(::operator new(kAlloc));
+  }
+
+  // Prefault and mlock the currently mapped address space.  This avoids minor
+  // faults during the test from appearing as an apparent memory leak due to RSS
+  // growth.
+  //
+  // Previously, we tried to only mlock file-backed mappings, but page faults
+  // for .bss are also problematic (either from small pages [PPC] or hugepages
+  // [all platforms]) for test flakiness.
+  //
+  // We do *not* apply MCL_FUTURE, as to allow allocations during the test run
+  // to be released.
+  if (mlockall(MCL_CURRENT) != 0) {
+    GTEST_SKIP();
+  }
+  const int64_t start_size = MemoryUsageSlow(getpid());
+  ASSERT_GT(start_size, 0);
+
+  static const size_t kThreads = 16 * 1024;
+
+  for (int i = 0; i < kThreads; ++i) {
+    std::thread t([]() {
+      void *p = calloc(1024, 1);
+      benchmark::DoNotOptimize(p);
+      free(p);
+    });
+
+    t.join();
+  }
+  const int64_t end_size = MemoryUsageSlow(getpid());
+
+  // Flush the page heap.  Our allocations may have been retained.
+  if (TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval != nullptr) {
+    TCMalloc_Internal_SetHugePageFillerSkipSubreleaseInterval(
+        absl::ZeroDuration());
+  }
+  MallocExtension::ReleaseMemoryToSystem(std::numeric_limits<size_t>::max());
+
+  // This will detect a leak rate of 12 bytes per thread, which is well under 1%
+  // of the allocation done.
+  EXPECT_GE(192 * 1024, end_size - start_size)
+      << "Before: " << start_size << " After: " << end_size;
+}
+
+}  // namespace
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/tracking.h b/contrib/libs/tcmalloc/tcmalloc/tracking.h
new file mode 100644
index 0000000000..68d4c59b9c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/tracking.h
@@ -0,0 +1,109 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRACKING_H_
+#define TCMALLOC_TRACKING_H_
+// Optional support for tracking various stats in tcmalloc.  For each
+// sizeclass, we track:
+//  * # of mallocs
+//     * ...that hit the fast path
+//  * # of frees
+//     * ...that hit the fast path
+//
+// both on each CPU and on each thread.
+//
+// If disabled (TCMALLOC_TRACK_ALLOCS not defined), it has no runtime cost in
+// time or space.
+//
+// If enabled and an implementation provided, we issue calls to record various
+// statistics about cache hit rates.
+
+#include <stddef.h>
+#include <sys/types.h>
+
+#include <map>
+#include <string>
+
+#include "absl/base/internal/per_thread_tls.h"
+#include "absl/base/internal/spinlock.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/percpu.h"
+#include "tcmalloc/malloc_extension.h"
+
+// Uncomment here or pass --copt=-DTCMALLOC_TRACK_ALLOCS at build time if you
+// want tracking.
+#ifndef TCMALLOC_TRACK_ALLOCS
+// #define TCMALLOC_TRACK_ALLOCS
+#endif
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+#if 1
+#define TCMALLOC_HAVE_TRACKING 0
+#endif
+
+// We track various kinds of events on each thread and each cpu.  Each
+// event is broken down by sizeclass where it happened.
+// To track a new event, add a enum value here, insert calls to
+// Tracking::Report() where the event occurs, and add a printable name
+// to the event in kTrackingStatNames (in tracking.cc).  Optionally
+// print the stat somehow in State::Print.
+enum TrackingStat {
+  kMallocHit = 0,   // malloc that took the fast path
+  kMallocMiss = 1,  // malloc that didn't
+  kFreeHit = 2,     // ibid. for free
+  kFreeMiss = 3,
+  kFreeScavenges = 4,    // # of frees that leads to scavenge
+  kFreeTruncations = 5,  // # of frees that leads to list truncation
+  kTCInsertHit = 6,  // # of times the returned object list hits transfer cache.
+  kTCInsertMiss = 7,  // # of times the object list misses the transfer cache.
+  kTCRemoveHit = 8,   // # of times object list fetching hits transfer cache.
+  kTCRemoveMiss = 9,  // # of times object list fetching misses transfer cache.
+  kTCElementsPlunder = 10,  // # of elements plundered from the transfer cache.
+  kNumTrackingStats = 11,
+};
+
+namespace tracking {
+
+// Report <count> occurences of <stat> associated with sizeclass <cl>.
+void Report(TrackingStat stat, size_t cl, ssize_t count);
+
+// Dump all tracking data to <out>.  We could support various other
+// mechanisms for data delivery without too much trouble...
+void Print(Printer* out);
+
+// Call on startup during tcmalloc initialization.
+void Init();
+
+// Fill <result> with information for each stat type (broken down by
+// sizeclass if level == kDetailed.)
+void GetProperties(std::map<std::string, MallocExtension::Property>* result);
+
+#if !TCMALLOC_HAVE_TRACKING
+// no tracking, these are all no-ops
+inline void Report(TrackingStat stat, size_t cl, ssize_t count) {}
+inline void Print(Printer* out) {}
+inline void Init() {}
+inline void GetProperties(
+    std::map<std::string, MallocExtension::Property>* result) {}
+#endif
+
+}  // namespace tracking
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_TRACKING_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc
new file mode 100644
index 0000000000..efde485288
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.cc
@@ -0,0 +1,162 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/transfer_cache.h"
+
+#include <fcntl.h>
+#include <string.h>
+
+#include <algorithm>
+#include <atomic>
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/guarded_page_allocator.h"
+#include "tcmalloc/internal/cache_topology.h"
+#include "tcmalloc/internal/environment.h"
+#include "tcmalloc/internal/linked_list.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/internal/optimization.h"
+#include "tcmalloc/internal/util.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/tracking.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+absl::string_view TransferCacheImplementationToLabel(
+    TransferCacheImplementation type) {
+  switch (type) {
+    case TransferCacheImplementation::Legacy:
+      return "LEGACY";
+    case TransferCacheImplementation::None:
+      return "NO_TRANSFERCACHE";
+    case TransferCacheImplementation::Ring:
+      return "RING";
+    default:
+      ASSUME(false);
+  }
+}
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+
+size_t StaticForwarder::class_to_size(int size_class) {
+  return Static::sizemap().class_to_size(size_class);
+}
+size_t StaticForwarder::num_objects_to_move(int size_class) {
+  return Static::sizemap().num_objects_to_move(size_class);
+}
+void *StaticForwarder::Alloc(size_t size, int alignment) {
+  return Static::arena().Alloc(size, alignment);
+}
+
+void ShardedTransferCacheManager::Init() {
+  if (!IsExperimentActive(
+          Experiment::TEST_ONLY_TCMALLOC_SHARDED_TRANSFER_CACHE)) {
+    return;
+  }
+  num_shards_ = BuildCpuToL3CacheMap(l3_cache_index_);
+  cache_ = reinterpret_cast<Cache *>(Static::arena().Alloc(
+      sizeof(Cache) * kNumClasses * num_shards_, ABSL_CACHELINE_SIZE));
+  ASSERT(cache_ != nullptr);
+  for (int shard = 0; shard < num_shards_; ++shard) {
+    for (int cl = 0; cl < kNumClasses; ++cl) {
+      const int index = shard * kNumClasses + cl;
+      const int size_per_object = Static::sizemap().class_to_size(cl);
+      static constexpr int k12MB = 12 << 20;
+      static constexpr int min_size = 4096;
+      const int use_this_size_class = size_per_object >= min_size;
+      const int capacity = use_this_size_class ? k12MB / size_per_object : 0;
+      active_for_class_[cl] = use_this_size_class;
+      new (&cache_[index].tc)
+          TransferCache(nullptr, capacity > 0 ? cl : 0, {capacity, capacity});
+      cache_[index].tc.freelist().Init(cl);
+    }
+  }
+}
+
+size_t ShardedTransferCacheManager::TotalBytes() {
+  if (cache_ == nullptr) return 0;
+  size_t out = 0;
+  for (int shard = 0; shard < num_shards_; ++shard) {
+    for (int cl = 0; cl < kNumClasses; ++cl) {
+      const int bytes_per_entry = Static::sizemap().class_to_size(cl);
+      if (bytes_per_entry <= 0) continue;
+      const int index = shard * kNumClasses + cl;
+      out += cache_[index].tc.tc_length() * bytes_per_entry;
+    }
+  }
+  return out;
+}
+
+void ShardedTransferCacheManager::BackingTransferCache::InsertRange(
+    absl::Span<void *> batch) const {
+  Static::transfer_cache().InsertRange(size_class_, batch);
+}
+
+ABSL_MUST_USE_RESULT int
+ShardedTransferCacheManager::BackingTransferCache::RemoveRange(void **batch,
+                                                               int n) const {
+  return Static::transfer_cache().RemoveRange(size_class_, batch, n);
+}
+
+TransferCacheImplementation TransferCacheManager::ChooseImplementation() {
+  // Prefer ring, if we're forcing it on.
+  if (IsExperimentActive(
+          Experiment::TEST_ONLY_TCMALLOC_RING_BUFFER_TRANSFER_CACHE)) {
+    return TransferCacheImplementation::Ring;
+  }
+
+  // Consider opt-outs
+  const char *e = thread_safe_getenv("TCMALLOC_INTERNAL_TRANSFERCACHE_CONTROL");
+  if (e) {
+    if (e[0] == '0') {
+      return TransferCacheImplementation::Legacy;
+    }
+    if (e[0] == '1') {
+      return TransferCacheImplementation::Ring;
+    }
+    Crash(kCrash, __FILE__, __LINE__, "bad env var", e);
+  }
+
+  // Otherwise, default to ring.
+  return TransferCacheImplementation::Ring;
+}
+
+int TransferCacheManager::DetermineSizeClassToEvict() {
+  int t = next_to_evict_.load(std::memory_order_relaxed);
+  if (t >= kNumClasses) t = 1;
+  next_to_evict_.store(t + 1, std::memory_order_relaxed);
+
+  // Ask nicely first.
+  if (implementation_ == TransferCacheImplementation::Ring) {
+    if (cache_[t].rbtc.HasSpareCapacity(t)) return t;
+  } else {
+    if (cache_[t].tc.HasSpareCapacity(t)) return t;
+  }
+
+  // But insist on the second try.
+  t = next_to_evict_.load(std::memory_order_relaxed);
+  if (t >= kNumClasses) t = 1;
+  next_to_evict_.store(t + 1, std::memory_order_relaxed);
+  return t;
+}
+
+#endif
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h
new file mode 100644
index 0000000000..8b47eefafb
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache.h
@@ -0,0 +1,341 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_H_
+#define TCMALLOC_TRANSFER_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <atomic>
+#include <limits>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+#include "tcmalloc/transfer_cache_internals.h"
+#endif
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+enum class TransferCacheImplementation {
+  Legacy,
+  None,
+  Ring,
+};
+
+absl::string_view TransferCacheImplementationToLabel(
+    TransferCacheImplementation type);
+
+#ifndef TCMALLOC_SMALL_BUT_SLOW
+
+class StaticForwarder {
+ public:
+  static size_t class_to_size(int size_class);
+  static size_t num_objects_to_move(int size_class);
+  static void *Alloc(size_t size, int alignment = kAlignment)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+};
+
+// This transfer-cache is set up to be sharded per L3 cache. It is backed by
+// the non-sharded "normal" TransferCacheManager.
+class ShardedTransferCacheManager {
+ public:
+  constexpr ShardedTransferCacheManager() {}
+
+  void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock);
+
+  bool should_use(int cl) const { return active_for_class_[cl]; }
+
+  size_t TotalBytes();
+
+  void *Pop(int cl) {
+    void *batch[1];
+    const int got = cache_[get_index(cl)].tc.RemoveRange(cl, batch, 1);
+    return got == 1 ? batch[0] : nullptr;
+  }
+
+  void Push(int cl, void *ptr) {
+    cache_[get_index(cl)].tc.InsertRange(cl, {&ptr, 1});
+  }
+
+  // All caches not touched since last attempt will return all objects
+  // to the non-sharded TransferCache.
+  void Plunder() {
+    if (cache_ == nullptr || num_shards_ == 0) return;
+    for (int i = 0; i < num_shards_ * kNumClasses; ++i) {
+      cache_[i].tc.TryPlunder(cache_[i].tc.freelist().size_class());
+    }
+  }
+
+ private:
+  // The Manager is set up so that stealing is disabled for this TransferCache.
+  class Manager : public StaticForwarder {
+   public:
+    static constexpr int DetermineSizeClassToEvict() { return -1; }
+    static constexpr bool MakeCacheSpace(int) { return false; }
+    static constexpr bool ShrinkCache(int) { return false; }
+  };
+
+  // Forwards calls to the unsharded TransferCache.
+  class BackingTransferCache {
+   public:
+    void Init(int cl) { size_class_ = cl; }
+    void InsertRange(absl::Span<void *> batch) const;
+    ABSL_MUST_USE_RESULT int RemoveRange(void **batch, int n) const;
+    int size_class() const { return size_class_; }
+
+   private:
+    int size_class_ = -1;
+  };
+
+  using TransferCache =
+      internal_transfer_cache::RingBufferTransferCache<BackingTransferCache,
+                                                       Manager>;
+
+  union Cache {
+    constexpr Cache() : dummy(false) {}
+    ~Cache() {}
+    TransferCache tc;
+    bool dummy;
+  };
+
+  int get_index(int cl) {
+    const int cpu = tcmalloc::tcmalloc_internal::subtle::percpu::RseqCpuId();
+    ASSERT(cpu < 256);
+    ASSERT(cpu >= 0);
+    return get_index(cpu, cl);
+  }
+
+  int get_index(int cpu, int cl) {
+    const int shard = l3_cache_index_[cpu];
+    ASSERT(shard < num_shards_);
+    const int index = shard * kNumClasses + cl;
+    ASSERT(index < num_shards_ * kNumClasses);
+    return index;
+  }
+
+  // Mapping from cpu to the L3 cache used.
+  uint8_t l3_cache_index_[CPU_SETSIZE] = {0};
+
+  Cache *cache_ = nullptr;
+  int num_shards_ = 0;
+  bool active_for_class_[kNumClasses] = {false};
+};
+
+class TransferCacheManager : public StaticForwarder {
+  template <typename CentralFreeList, typename Manager>
+  friend class internal_transfer_cache::TransferCache;
+  using TransferCache =
+      internal_transfer_cache::TransferCache<tcmalloc_internal::CentralFreeList,
+                                             TransferCacheManager>;
+
+  template <typename CentralFreeList, typename Manager>
+  friend class internal_transfer_cache::RingBufferTransferCache;
+  using RingBufferTransferCache =
+      internal_transfer_cache::RingBufferTransferCache<
+          tcmalloc_internal::CentralFreeList, TransferCacheManager>;
+
+ public:
+  constexpr TransferCacheManager() : next_to_evict_(1) {}
+
+  TransferCacheManager(const TransferCacheManager &) = delete;
+  TransferCacheManager &operator=(const TransferCacheManager &) = delete;
+
+  void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    implementation_ = ChooseImplementation();
+    for (int i = 0; i < kNumClasses; ++i) {
+      if (implementation_ == TransferCacheImplementation::Ring) {
+        new (&cache_[i].rbtc) RingBufferTransferCache(this, i);
+      } else {
+        new (&cache_[i].tc) TransferCache(this, i);
+      }
+    }
+  }
+
+  void AcquireInternalLocks() {
+    for (int i = 0; i < kNumClasses; ++i) {
+      if (implementation_ == TransferCacheImplementation::Ring) {
+        cache_[i].rbtc.AcquireInternalLocks();
+      } else {
+        cache_[i].tc.AcquireInternalLocks();
+      }
+    }
+  }
+
+  void ReleaseInternalLocks() {
+    for (int i = 0; i < kNumClasses; ++i) {
+      if (implementation_ == TransferCacheImplementation::Ring) {
+        cache_[i].rbtc.ReleaseInternalLocks();
+      } else {
+        cache_[i].tc.ReleaseInternalLocks();
+      }
+    }    
+  }
+
+  void InsertRange(int size_class, absl::Span<void *> batch) {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      cache_[size_class].rbtc.InsertRange(size_class, batch);
+    } else {
+      cache_[size_class].tc.InsertRange(size_class, batch);
+    }
+  }
+
+  ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      return cache_[size_class].rbtc.RemoveRange(size_class, batch, n);
+    } else {
+      return cache_[size_class].tc.RemoveRange(size_class, batch, n);
+    }
+  }
+
+  // All caches which have not been modified since the last time this method has
+  // been called will return all objects to the freelist.
+  void Plunder() {
+    for (int i = 0; i < kNumClasses; ++i) {
+      if (implementation_ == TransferCacheImplementation::Ring) {
+        cache_[i].rbtc.TryPlunder(i);
+      } else {
+        cache_[i].tc.TryPlunder(i);
+      }
+    }
+  }
+
+  // This is not const because the underlying ring-buffer transfer cache
+  // function requires acquiring a lock.
+  size_t tc_length(int size_class) {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      return cache_[size_class].rbtc.tc_length();
+    } else {
+      return cache_[size_class].tc.tc_length();
+    }
+  }
+
+  TransferCacheStats GetHitRateStats(int size_class) const {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      return cache_[size_class].rbtc.GetHitRateStats();
+    } else {
+      return cache_[size_class].tc.GetHitRateStats();
+    }
+  }
+
+  const CentralFreeList &central_freelist(int size_class) const {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      return cache_[size_class].rbtc.freelist();
+    } else {
+      return cache_[size_class].tc.freelist();
+    }
+  }
+
+  TransferCacheImplementation implementation() const { return implementation_; }
+
+ private:
+  static TransferCacheImplementation ChooseImplementation();
+
+  int DetermineSizeClassToEvict();
+  bool ShrinkCache(int size_class) {
+    if (implementation_ == TransferCacheImplementation::Ring) {
+      return cache_[size_class].rbtc.ShrinkCache(size_class);
+    } else {
+      return cache_[size_class].tc.ShrinkCache(size_class);
+    }
+  }
+
+  TransferCacheImplementation implementation_ =
+      TransferCacheImplementation::Legacy;
+  std::atomic<int32_t> next_to_evict_;
+  union Cache {
+    constexpr Cache() : dummy(false) {}
+    ~Cache() {}
+
+    TransferCache tc;
+    RingBufferTransferCache rbtc;
+    bool dummy;
+  };
+  Cache cache_[kNumClasses];
+} ABSL_CACHELINE_ALIGNED;
+
+#else
+
+// For the small memory model, the transfer cache is not used.
+class TransferCacheManager {
+ public:
+  constexpr TransferCacheManager() : freelist_() {}
+  TransferCacheManager(const TransferCacheManager &) = delete;
+  TransferCacheManager &operator=(const TransferCacheManager &) = delete;
+
+  void Init() ABSL_EXCLUSIVE_LOCKS_REQUIRED(pageheap_lock) {
+    for (int i = 0; i < kNumClasses; ++i) {
+      freelist_[i].Init(i);
+    }
+  }
+
+  void InsertRange(int size_class, absl::Span<void *> batch) {
+    freelist_[size_class].InsertRange(batch);
+  }
+
+  ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int n) {
+    return freelist_[size_class].RemoveRange(batch, n);
+  }
+
+  static constexpr size_t tc_length(int size_class) { return 0; }
+
+  static constexpr TransferCacheStats GetHitRateStats(int size_class) {
+    return {0, 0, 0, 0};
+  }
+
+  const CentralFreeList &central_freelist(int size_class) const {
+    return freelist_[size_class];
+  }
+
+  TransferCacheImplementation implementation() const {
+    return TransferCacheImplementation::None;
+  }
+
+  void AcquireInternalLocks() {}
+  void ReleaseInternalLocks() {}
+
+ private:
+  CentralFreeList freelist_[kNumClasses];
+} ABSL_CACHELINE_ALIGNED;
+
+// A trivial no-op implementation.
+struct ShardedTransferCacheManager {
+  static constexpr void Init() {}
+  static constexpr bool should_use(int cl) { return false; }
+  static constexpr void *Pop(int cl) { return nullptr; }
+  static constexpr void Push(int cl, void *ptr) {}
+  static constexpr size_t TotalBytes() { return 0; }
+  static constexpr void Plunder() {}
+};
+
+#endif
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_TRANSFER_CACHE_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc
new file mode 100644
index 0000000000..70b1dcffc1
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_benchmark.cc
@@ -0,0 +1,149 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <atomic>
+
+#include "absl/types/optional.h"
+#include "benchmark/benchmark.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/transfer_cache_internals.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+using TransferCacheEnv =
+    FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+        MinimalFakeCentralFreeList, FakeTransferCacheManager>>;
+using RingBufferTransferCacheEnv = FakeTransferCacheEnvironment<
+    internal_transfer_cache::RingBufferTransferCache<MinimalFakeCentralFreeList,
+                                                     FakeTransferCacheManager>>;
+static constexpr int kSizeClass = 0;
+
+template <typename Env>
+void BM_CrossThread(benchmark::State& state) {
+  using Cache = typename Env::TransferCache;
+  const int kBatchSize = Env::kBatchSize;
+  const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+  void* batch[kMaxObjectsToMove];
+
+  struct CrossThreadState {
+    CrossThreadState() : m{}, c{Cache(&m, 1), Cache(&m, 1)} {}
+    FakeTransferCacheManager m;
+    Cache c[2];
+  };
+
+  static CrossThreadState* s = nullptr;
+  if (state.thread_index == 0) {
+    s = new CrossThreadState();
+    for (int i = 0; i < ::tcmalloc::tcmalloc_internal::internal_transfer_cache::
+                                kInitialCapacityInBatches /
+                            2;
+         ++i) {
+      for (Cache& c : s->c) {
+        c.freelist().AllocateBatch(batch, kBatchSize);
+        c.InsertRange(kSizeClass, {batch, kBatchSize});
+      }
+    }
+  }
+
+  int src = state.thread_index % 2;
+  int dst = (src + 1) % 2;
+  for (auto iter : state) {
+    benchmark::DoNotOptimize(batch);
+    (void)s->c[src].RemoveRange(kSizeClass, batch, kBatchSize);
+    benchmark::DoNotOptimize(batch);
+    s->c[dst].InsertRange(kSizeClass, {batch, kBatchSize});
+    benchmark::DoNotOptimize(batch);
+  }
+  if (state.thread_index == 0) {
+    TransferCacheStats stats{};
+    for (Cache& c : s->c) {
+      TransferCacheStats other = c.GetHitRateStats();
+      stats.insert_hits += other.insert_hits;
+      stats.insert_misses += other.insert_misses;
+      stats.remove_hits += other.remove_hits;
+      stats.remove_misses += other.remove_misses;
+    }
+
+    state.counters["insert_hit_ratio"] =
+        static_cast<double>(stats.insert_hits) /
+        (stats.insert_hits + stats.insert_misses);
+    state.counters["remove_hit_ratio"] =
+        static_cast<double>(stats.remove_hits) /
+        (stats.remove_hits + stats.remove_misses);
+    delete s;
+    s = nullptr;
+  }
+}
+
+template <typename Env>
+void BM_InsertRange(benchmark::State& state) {
+  const int kBatchSize = Env::kBatchSize;
+  const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+
+  // optional to have more precise control of when the destruction occurs, as
+  // we want to avoid polluting the timing with the dtor.
+  absl::optional<Env> e;
+  void* batch[kMaxObjectsToMove];
+  for (auto iter : state) {
+    state.PauseTiming();
+    e.emplace();
+    e->central_freelist().AllocateBatch(batch, kBatchSize);
+    benchmark::DoNotOptimize(e);
+    benchmark::DoNotOptimize(batch);
+    state.ResumeTiming();
+
+    e->transfer_cache().InsertRange(kSizeClass, {batch, kBatchSize});
+  }
+}
+
+template <typename Env>
+void BM_RemoveRange(benchmark::State& state) {
+  const int kBatchSize = Env::kBatchSize;
+  const int kMaxObjectsToMove = Env::kMaxObjectsToMove;
+
+  // optional to have more precise control of when the destruction occurs, as
+  // we want to avoid polluting the timing with the dtor.
+  absl::optional<Env> e;
+  void* batch[kMaxObjectsToMove];
+  for (auto iter : state) {
+    state.PauseTiming();
+    e.emplace();
+    e->Insert(kBatchSize);
+    benchmark::DoNotOptimize(e);
+    state.ResumeTiming();
+
+    (void)e->transfer_cache().RemoveRange(kSizeClass, batch, kBatchSize);
+    benchmark::DoNotOptimize(batch);
+  }
+}
+
+BENCHMARK_TEMPLATE(BM_CrossThread, TransferCacheEnv)->ThreadRange(2, 64);
+BENCHMARK_TEMPLATE(BM_CrossThread, RingBufferTransferCacheEnv)
+    ->ThreadRange(2, 64);
+BENCHMARK_TEMPLATE(BM_InsertRange, TransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_InsertRange, RingBufferTransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_RemoveRange, TransferCacheEnv);
+BENCHMARK_TEMPLATE(BM_RemoveRange, RingBufferTransferCacheEnv);
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc
new file mode 100644
index 0000000000..a31b06135e
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_fuzz.cc
@@ -0,0 +1,73 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+#include <cstdint>
+
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace {
+
+using TransferCache = tcmalloc_internal::internal_transfer_cache::TransferCache<
+    tcmalloc_internal::MockCentralFreeList,
+    tcmalloc_internal::MockTransferCacheManager>;
+using TransferCacheEnv =
+    tcmalloc_internal::FakeTransferCacheEnvironment<TransferCache>;
+
+using RingBufferTransferCache =
+    tcmalloc_internal::internal_transfer_cache::RingBufferTransferCache<
+        tcmalloc_internal::MockCentralFreeList,
+        tcmalloc_internal::MockTransferCacheManager>;
+using RingBufferTransferCacheEnv =
+    tcmalloc_internal::FakeTransferCacheEnvironment<RingBufferTransferCache>;
+
+template <typename Env>
+int RunFuzzer(const uint8_t *data, size_t size) {
+  Env env;
+  for (int i = 0; i < size; ++i) {
+    switch (data[i] % 10) {
+      case 0:
+        env.Grow();
+        break;
+      case 1:
+        env.Shrink();
+        break;
+      default:
+        if (++i < size) {
+          int batch = data[i] % 32;
+          if (data[i - 1] % 2) {
+            env.Insert(batch);
+          } else {
+            env.Remove(batch);
+          }
+        }
+        break;
+    }
+  }
+  return 0;
+}
+
+}  // namespace
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  tcmalloc::RunFuzzer<tcmalloc::TransferCacheEnv>(data, size);
+  tcmalloc::RunFuzzer<tcmalloc::RingBufferTransferCacheEnv>(data, size);
+  return 0;
+}
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h
new file mode 100644
index 0000000000..26d18fd99d
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_internals.h
@@ -0,0 +1,896 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
+#define TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
+
+#include <sched.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <cmath>
+#include <limits>
+
+#include "absl/numeric/bits.h"
+#include "tcmalloc/internal/config.h"
+
+#ifdef __x86_64__
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#endif
+
+#include <algorithm>
+#include <atomic>
+#include <cstddef>
+#include <utility>
+
+#include "absl/base/attributes.h"
+#include "absl/base/casts.h"
+#include "absl/base/const_init.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/base/macros.h"
+#include "absl/base/optimization.h"
+#include "absl/base/thread_annotations.h"
+#include "absl/synchronization/internal/futex.h"
+#include "absl/types/optional.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/experiment.h"
+#include "tcmalloc/internal/atomic_stats_counter.h"
+#include "tcmalloc/internal/logging.h"
+#include "tcmalloc/tracking.h"
+#include "tcmalloc/transfer_cache_stats.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc::tcmalloc_internal::internal_transfer_cache {
+
+struct alignas(8) SizeInfo {
+  int32_t used;
+  int32_t capacity;
+};
+static constexpr int kMaxCapacityInBatches = 64;
+static constexpr int kInitialCapacityInBatches = 16;
+
+// TransferCache is used to cache transfers of
+// sizemap.num_objects_to_move(size_class) back and forth between
+// thread caches and the central cache for a given size class.
+template <typename CentralFreeList, typename TransferCacheManager>
+class TransferCache {
+ public:
+  using Manager = TransferCacheManager;
+  using FreeList = CentralFreeList;
+
+  TransferCache(Manager *owner, int cl)
+      : TransferCache(owner, cl, CapacityNeeded(cl)) {}
+
+  struct Capacity {
+    int capacity;
+    int max_capacity;
+  };
+
+  TransferCache(Manager *owner, int cl, Capacity capacity)
+      : owner_(owner),
+        lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+        max_capacity_(capacity.max_capacity),
+        slot_info_(SizeInfo({0, capacity.capacity})),
+        low_water_mark_(std::numeric_limits<int>::max()),
+        slots_(nullptr),
+        freelist_do_not_access_directly_() {
+    freelist().Init(cl);
+    slots_ = max_capacity_ != 0 ? reinterpret_cast<void **>(owner_->Alloc(
+                                      max_capacity_ * sizeof(void *)))
+                                : nullptr;
+  }
+
+  TransferCache(const TransferCache &) = delete;
+  TransferCache &operator=(const TransferCache &) = delete;
+
+  // Compute initial and max capacity that we should configure this cache for.
+  static Capacity CapacityNeeded(size_t cl) {
+    // We need at least 2 slots to store list head and tail.
+    static_assert(kMinObjectsToMove >= 2);
+
+    const size_t bytes = Manager::class_to_size(cl);
+    if (cl <= 0 || bytes <= 0) return {0, 0};
+
+    // Limit the maximum size of the cache based on the size class.  If this
+    // is not done, large size class objects will consume a lot of memory if
+    // they just sit in the transfer cache.
+    const size_t objs_to_move = Manager::num_objects_to_move(cl);
+    ASSERT(objs_to_move > 0);
+
+    // Starting point for the maximum number of entries in the transfer cache.
+    // This actual maximum for a given size class may be lower than this
+    // maximum value.
+    int max_capacity = kMaxCapacityInBatches * objs_to_move;
+    // A transfer cache freelist can have anywhere from 0 to
+    // max_capacity_ slots to put link list chains into.
+    int capacity = kInitialCapacityInBatches * objs_to_move;
+
+    // Limit each size class cache to at most 1MB of objects or one entry,
+    // whichever is greater. Total transfer cache memory used across all
+    // size classes then can't be greater than approximately
+    // 1MB * kMaxNumTransferEntries.
+    max_capacity = std::min<int>(
+        max_capacity,
+        std::max<int>(objs_to_move,
+                      (1024 * 1024) / (bytes * objs_to_move) * objs_to_move));
+    capacity = std::min(capacity, max_capacity);
+
+    return {capacity, max_capacity};
+  }
+
+  // This transfercache implementation does not deal well with non-batch sized
+  // inserts and removes.
+  static constexpr bool IsFlexible() { return false; }
+
+  // These methods all do internal locking.
+
+  // Insert the specified batch into the transfer cache.  N is the number of
+  // elements in the range.  RemoveRange() is the opposite operation.
+  void InsertRange(int size_class, absl::Span<void *> batch)
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    const int N = batch.size();
+    const int B = Manager::num_objects_to_move(size_class);
+    ASSERT(0 < N && N <= B);
+    auto info = slot_info_.load(std::memory_order_relaxed);
+    if (N == B) {
+      if (info.used + N <= max_capacity_) {
+        absl::base_internal::SpinLockHolder h(&lock_);
+        if (MakeCacheSpace(size_class, N)) {
+          // MakeCacheSpace can drop the lock, so refetch
+          info = slot_info_.load(std::memory_order_relaxed);
+          info.used += N;
+          SetSlotInfo(info);
+
+          void **entry = GetSlot(info.used - N);
+          memcpy(entry, batch.data(), sizeof(void *) * N);
+          tracking::Report(kTCInsertHit, size_class, 1);
+          insert_hits_.LossyAdd(1);
+          return;
+        }
+      }
+
+      insert_misses_.Add(1);
+    } else {
+      insert_non_batch_misses_.Add(1);
+    }
+
+    tracking::Report(kTCInsertMiss, size_class, 1);
+    freelist().InsertRange(batch);
+  }
+
+  // Returns the actual number of fetched elements and stores elements in the
+  // batch.
+  ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N)
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    ASSERT(N > 0);
+    const int B = Manager::num_objects_to_move(size_class);
+    auto info = slot_info_.load(std::memory_order_relaxed);
+    if (N == B) {
+      if (info.used >= N) {
+        absl::base_internal::SpinLockHolder h(&lock_);
+        // Refetch with the lock
+        info = slot_info_.load(std::memory_order_relaxed);
+        if (info.used >= N) {
+          info.used -= N;
+          SetSlotInfo(info);
+          void **entry = GetSlot(info.used);
+          memcpy(batch, entry, sizeof(void *) * N);
+          tracking::Report(kTCRemoveHit, size_class, 1);
+          remove_hits_.LossyAdd(1);
+          low_water_mark_.store(
+              std::min(low_water_mark_.load(std::memory_order_acquire),
+                       info.used),
+              std::memory_order_release);
+          return N;
+        }
+      }
+
+      remove_misses_.Add(1);
+    } else {
+      remove_non_batch_misses_.Add(1);
+    }
+    low_water_mark_.store(0, std::memory_order_release);
+
+    tracking::Report(kTCRemoveMiss, size_class, 1);
+    return freelist().RemoveRange(batch, N);
+  }
+
+  // If this object has not been touched since the last attempt, then
+  // return all objects to 'freelist()'.
+  void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    if (max_capacity_ == 0) return;
+    int low_water_mark = low_water_mark_.load(std::memory_order_acquire);
+    low_water_mark_.store(std::numeric_limits<int>::max(),
+                          std::memory_order_release);
+    while (low_water_mark > 0) {
+      if (!lock_.TryLock()) return;
+      if (low_water_mark_.load(std::memory_order_acquire) !=
+          std::numeric_limits<int>::max()) {
+        lock_.Unlock();
+        return;
+      }
+      const int B = Manager::num_objects_to_move(size_class);
+      SizeInfo info = GetSlotInfo();
+      if (info.used == 0) {
+        lock_.Unlock();
+        return;
+      }
+      const size_t num_to_move = std::min(B, info.used);
+      void *buf[kMaxObjectsToMove];
+      void **const entry = GetSlot(info.used - B);
+      memcpy(buf, entry, sizeof(void *) * B);
+      info.used -= num_to_move;
+      low_water_mark -= num_to_move;
+      SetSlotInfo(info);
+      lock_.Unlock();
+      tracking::Report(kTCElementsPlunder, size_class, num_to_move);
+      freelist().InsertRange({buf, num_to_move});
+    }
+  }
+  // Returns the number of free objects in the transfer cache.
+  size_t tc_length() const {
+    return static_cast<size_t>(slot_info_.load(std::memory_order_relaxed).used);
+  }
+
+  // Returns the number of transfer cache insert/remove hits/misses.
+  TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) {
+    TransferCacheStats stats;
+
+    stats.insert_hits = insert_hits_.value();
+    stats.remove_hits = remove_hits_.value();
+    stats.insert_misses = insert_misses_.value();
+    stats.insert_non_batch_misses = insert_non_batch_misses_.value();
+    stats.remove_misses = remove_misses_.value();
+    stats.remove_non_batch_misses = remove_non_batch_misses_.value();
+
+    // For performance reasons, we only update a single atomic as part of the
+    // actual allocation operation.  For reporting, we keep reporting all
+    // misses together and separately break-out how many of those misses were
+    // non-batch sized.
+    stats.insert_misses += stats.insert_non_batch_misses;
+    stats.remove_misses += stats.remove_non_batch_misses;
+
+    return stats;
+  }
+
+  SizeInfo GetSlotInfo() const {
+    return slot_info_.load(std::memory_order_relaxed);
+  }
+
+  // REQUIRES: lock is held.
+  // Tries to make room for N elements. If the cache is full it will try to
+  // expand it at the cost of some other cache size.  Return false if there is
+  // no space.
+  bool MakeCacheSpace(int size_class, int N)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    auto info = slot_info_.load(std::memory_order_relaxed);
+    // Is there room in the cache?
+    if (info.used + N <= info.capacity) return true;
+    // Check if we can expand this cache?
+    if (info.capacity + N > max_capacity_) return false;
+
+    int to_evict = owner_->DetermineSizeClassToEvict();
+    if (to_evict == size_class) return false;
+
+    // Release the held lock before the other instance tries to grab its lock.
+    lock_.Unlock();
+    bool made_space = owner_->ShrinkCache(to_evict);
+    lock_.Lock();
+
+    if (!made_space) return false;
+
+    // Succeeded in evicting, we're going to make our cache larger.  However, we
+    // may have dropped and re-acquired the lock, so the cache_size may have
+    // changed.  Therefore, check and verify that it is still OK to increase the
+    // cache_size.
+    info = slot_info_.load(std::memory_order_relaxed);
+    if (info.capacity + N > max_capacity_) return false;
+    info.capacity += N;
+    SetSlotInfo(info);
+    return true;
+  }
+
+  bool HasSpareCapacity(int size_class) const {
+    int n = Manager::num_objects_to_move(size_class);
+    auto info = GetSlotInfo();
+    return info.capacity - info.used >= n;
+  }
+
+  // Takes lock_ and invokes MakeCacheSpace() on this cache.  Returns true if it
+  // succeeded at growing the cache by a batch size.
+  bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    absl::base_internal::SpinLockHolder h(&lock_);
+    return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class));
+  }
+
+  // REQUIRES: lock_ is *not* held.
+  // Tries to shrink the Cache.  Return false if it failed to shrink the cache.
+  // Decreases cache_slots_ on success.
+  bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    int N = Manager::num_objects_to_move(size_class);
+
+    void *to_free[kMaxObjectsToMove];
+    int num_to_free;
+    {
+      absl::base_internal::SpinLockHolder h(&lock_);
+      auto info = slot_info_.load(std::memory_order_relaxed);
+      if (info.capacity == 0) return false;
+      if (info.capacity < N) return false;
+
+      N = std::min(N, info.capacity);
+      int unused = info.capacity - info.used;
+      if (N <= unused) {
+        info.capacity -= N;
+        SetSlotInfo(info);
+        return true;
+      }
+
+      num_to_free = N - unused;
+      info.capacity -= N;
+      info.used -= num_to_free;
+      SetSlotInfo(info);
+
+      // Our internal slot array may get overwritten as soon as we drop the
+      // lock, so copy the items to free to an on stack buffer.
+      memcpy(to_free, GetSlot(info.used), sizeof(void *) * num_to_free);
+    }
+
+    // Access the freelist without holding the lock.
+    freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)});
+    return true;
+  }
+
+  // This is a thin wrapper for the CentralFreeList.  It is intended to ensure
+  // that we are not holding lock_ when we access it.
+  ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) {
+    return freelist_do_not_access_directly_;
+  }
+
+  // The const version of the wrapper, needed to call stats on
+  ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    return freelist_do_not_access_directly_;
+  }
+
+  void AcquireInternalLocks()
+  {
+    freelist().AcquireInternalLocks();
+    lock_.Lock();
+  }
+
+  void ReleaseInternalLocks()
+  {
+    lock_.Unlock();
+    freelist().ReleaseInternalLocks();
+  }
+
+ private:
+  // Returns first object of the i-th slot.
+  void **GetSlot(size_t i) ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    return slots_ + i;
+  }
+
+  void SetSlotInfo(SizeInfo info) {
+    ASSERT(0 <= info.used);
+    ASSERT(info.used <= info.capacity);
+    ASSERT(info.capacity <= max_capacity_);
+    slot_info_.store(info, std::memory_order_relaxed);
+  }
+
+  Manager *const owner_;
+
+  // This lock protects all the data members.  used_slots_ and cache_slots_
+  // may be looked at without holding the lock.
+  absl::base_internal::SpinLock lock_;
+
+  // Maximum size of the cache.
+  const int32_t max_capacity_;
+
+  // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations
+  // and use LossyAdd, but the thread annotations cannot indicate that we do not
+  // need a lock for reads.
+  StatsCounter insert_hits_;
+  StatsCounter remove_hits_;
+  // Miss counters do not hold lock_, so they use Add.
+  StatsCounter insert_misses_;
+  StatsCounter insert_non_batch_misses_;
+  StatsCounter remove_misses_;
+  StatsCounter remove_non_batch_misses_;
+
+  // Number of currently used and available cached entries in slots_. This
+  // variable is updated under a lock but can be read without one.
+  // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_]
+  std::atomic<SizeInfo> slot_info_;
+
+  // Lowest value of "slot_info_.used" since last call to TryPlunder. All
+  // elements not used for a full cycle (2 seconds) are unlikely to get used
+  // again.
+  std::atomic<int> low_water_mark_;
+
+  // Pointer to array of free objects.  Use GetSlot() to get pointers to
+  // entries.
+  void **slots_ ABSL_GUARDED_BY(lock_);
+
+  FreeList freelist_do_not_access_directly_;
+} ABSL_CACHELINE_ALIGNED;
+
+struct RingBufferSizeInfo {
+  // The starting index of data stored in the ring buffer.
+  int32_t start;
+  // How many elements are stored.
+  int32_t used;
+  // How many elements are allowed to be stored at most.
+  int32_t capacity;
+};
+
+// RingBufferTransferCache is a transfer cache which stores cache entries in a
+// ring buffer instead of a stack.
+template <typename CentralFreeList, typename TransferCacheManager>
+class RingBufferTransferCache {
+ public:
+  using Manager = TransferCacheManager;
+  using FreeList = CentralFreeList;
+
+  RingBufferTransferCache(Manager *owner, int cl)
+      : RingBufferTransferCache(owner, cl, CapacityNeeded(cl)) {}
+
+  RingBufferTransferCache(
+      Manager *owner, int cl,
+      typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity
+          capacity)
+      : lock_(absl::kConstInit, absl::base_internal::SCHEDULE_KERNEL_ONLY),
+        slot_info_(RingBufferSizeInfo({0, 0, capacity.capacity})),
+        max_capacity_(capacity.max_capacity),
+        freelist_do_not_access_directly_(),
+        owner_(owner) {
+    freelist().Init(cl);
+    if (max_capacity_ == 0) {
+      // We don't allocate a buffer. Set slots_bitmask_ to 0 to prevent UB.
+      slots_bitmask_ = 0;
+    } else {
+      const size_t slots_size = absl::bit_ceil<size_t>(max_capacity_);
+      ASSERT(slots_size >= max_capacity_);
+      ASSERT(slots_size < max_capacity_ * 2);
+      slots_ =
+          reinterpret_cast<void **>(owner_->Alloc(slots_size * sizeof(void *)));
+      slots_bitmask_ = slots_size - 1;
+    }
+  }
+
+  RingBufferTransferCache(const RingBufferTransferCache &) = delete;
+  RingBufferTransferCache &operator=(const RingBufferTransferCache &) = delete;
+
+  // This transfercache implementation handles non-batch sized
+  // inserts and removes efficiently.
+  static constexpr bool IsFlexible() { return true; }
+
+  // These methods all do internal locking.
+
+  void AcquireInternalLocks()
+  {
+    freelist().AcquireInternalLocks();
+    lock_.Lock();
+  }
+
+  void ReleaseInternalLocks()
+  {
+    lock_.Unlock();
+    freelist().ReleaseInternalLocks();
+  }
+
+  // Insert the specified batch into the transfer cache.  N is the number of
+  // elements in the range.  RemoveRange() is the opposite operation.
+  void InsertRange(int size_class, absl::Span<void *> batch)
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    const int N = batch.size();
+    const int B = Manager::num_objects_to_move(size_class);
+    ASSERT(0 < N && N <= B);
+    void *to_free_buf[kMaxObjectsToMove];
+    int to_free_num = 0;
+
+    {
+      absl::base_internal::SpinLockHolder h(&lock_);
+      RingBufferSizeInfo info = GetSlotInfo();
+      if (info.used + N <= max_capacity_) {
+        const bool cache_grown = MakeCacheSpace(size_class, N);
+        // MakeCacheSpace can drop the lock, so refetch
+        info = GetSlotInfo();
+        if (cache_grown) {
+          CopyIntoEnd(batch.data(), N, info);
+          SetSlotInfo(info);
+          tracking::Report(kTCInsertHit, size_class, 1);
+          insert_hits_.LossyAdd(1);
+          return;
+        }
+      }
+
+      // If we arrive here, this means that there is not enough capacity in the
+      // current cache to include the new items, and we cannot grow it.
+
+      // We want to return up to `B` items from the transfer cache and currently
+      // inserted items.
+      const int returned_from_cache = std::min<int>(B, info.used);
+      if (returned_from_cache > 0) {
+        CopyOutOfStart(to_free_buf, returned_from_cache, info);
+      }
+      to_free_num = returned_from_cache;
+      if (info.used > 0) {
+        // We didn't have to return the whole cache. This means we can copy
+        // in all of the inserted items.
+        ASSERT(info.used + N <= info.capacity);
+        CopyIntoEnd(batch.data(), N, info);
+      } else {
+        // The transfercache is empty. We might still not have enough capacity
+        // to store all of the inserted items though.
+        const int to_insert_start = std::max(0, N - info.capacity);
+        ASSERT(returned_from_cache + to_insert_start <= B);
+        if (to_insert_start > 0) {
+          // We also want to return some of the inserted items in this case.
+          memcpy(to_free_buf + to_free_num, batch.data(),
+                 to_insert_start * sizeof(void *));
+          to_free_num += to_insert_start;
+        }
+        // This is only false if info.capacity is 0.
+        if (ABSL_PREDICT_TRUE(N > to_insert_start)) {
+          CopyIntoEnd(batch.data() + to_insert_start, N - to_insert_start,
+                      info);
+        }
+      }
+      SetSlotInfo(info);
+    }
+    // It can work out that we manage to insert all items into the cache after
+    // all.
+    if (to_free_num > 0) {
+      ASSERT(to_free_num <= kMaxObjectsToMove);
+      ASSERT(to_free_num <= B);
+      insert_misses_.Add(1);
+      tracking::Report(kTCInsertMiss, size_class, 1);
+      freelist().InsertRange(absl::Span<void *>(to_free_buf, to_free_num));
+    }
+  }
+
+  // Returns the actual number of fetched elements and stores elements in the
+  // batch. This might return less than N if the transfercache is non-empty but
+  // contains fewer elements than N. It is guaranteed to return at least 1 as
+  // long as either the transfercache or the free list are not empty.
+  ABSL_MUST_USE_RESULT int RemoveRange(int size_class, void **batch, int N)
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    ASSERT(N > 0);
+
+    {
+      absl::base_internal::SpinLockHolder h(&lock_);
+      RingBufferSizeInfo info = GetSlotInfo();
+      if (info.used > 0) {
+        // Return up to however much we have in our local cache.
+        const int copied = std::min<int>(N, info.used);
+        CopyOutOfEnd(batch, copied, info);
+        SetSlotInfo(info);
+        tracking::Report(kTCRemoveHit, size_class, 1);
+        remove_hits_.LossyAdd(1);
+        low_water_mark_ = std::min(low_water_mark_, info.used);
+        return copied;
+      }
+      low_water_mark_ = 0;
+    }
+
+    remove_misses_.Add(1);
+    tracking::Report(kTCRemoveMiss, size_class, 1);
+    return freelist().RemoveRange(batch, N);
+  }
+
+  // Return all objects not touched since last call to this function.
+  void TryPlunder(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    if (max_capacity_ == 0) return;
+    // If the lock is being held, someone is modifying the cache.
+    if (!lock_.TryLock()) return;
+    int low_water_mark = low_water_mark_;
+    low_water_mark_ = std::numeric_limits<int>::max();
+    const int B = Manager::num_objects_to_move(size_class);
+    while (slot_info_.used > 0 && low_water_mark >= B &&
+           (low_water_mark_ == std::numeric_limits<int>::max())) {
+      const size_t num_to_move(std::min(B, slot_info_.used));
+      void *buf[kMaxObjectsToMove];
+      CopyOutOfEnd(buf, num_to_move, slot_info_);
+      low_water_mark -= num_to_move;
+      lock_.Unlock();
+      freelist().InsertRange({buf, num_to_move});
+      tracking::Report(kTCElementsPlunder, size_class, num_to_move);
+      // If someone is starting to use the cache, stop doing this.
+      if (!lock_.TryLock()) {
+        return;
+      }
+    }
+    lock_.Unlock();
+  }
+
+  // Returns the number of free objects in the transfer cache.
+  size_t tc_length() ABSL_LOCKS_EXCLUDED(lock_) {
+    absl::base_internal::SpinLockHolder h(&lock_);
+    return static_cast<size_t>(GetSlotInfo().used);
+  }
+
+  // Returns the number of transfer cache insert/remove hits/misses.
+  TransferCacheStats GetHitRateStats() const ABSL_LOCKS_EXCLUDED(lock_) {
+    TransferCacheStats stats;
+
+    stats.insert_hits = insert_hits_.value();
+    stats.remove_hits = remove_hits_.value();
+    stats.insert_misses = insert_misses_.value();
+    stats.insert_non_batch_misses = 0;
+    stats.remove_misses = remove_misses_.value();
+    stats.remove_non_batch_misses = 0;
+
+    return stats;
+  }
+
+  RingBufferSizeInfo GetSlotInfo() const ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    return slot_info_;
+  }
+
+  // REQUIRES: lock is held.
+  // Tries to make room for N elements. If the cache is full it will try to
+  // expand it at the cost of some other cache size.  Return false if there is
+  // no space.
+  bool MakeCacheSpace(int size_class, int N)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    // Increase capacity in number of batches, as we do when reducing capacity.
+    const int B = Manager::num_objects_to_move(size_class);
+    ASSERT(B >= N);
+
+    auto info = GetSlotInfo();
+    // Is there room in the cache?
+    if (info.used + N <= info.capacity) return true;
+    // Check if we can expand this cache?
+    if (info.capacity + B > max_capacity_) return false;
+
+    // Release the held lock before the other instance tries to grab its lock.
+    lock_.Unlock();
+    int to_evict = owner_->DetermineSizeClassToEvict();
+    if (to_evict == size_class) {
+      lock_.Lock();
+      return false;
+    }
+    bool made_space = owner_->ShrinkCache(to_evict);
+    lock_.Lock();
+
+    if (!made_space) return false;
+
+    // Succeeded in evicting, we're going to make our cache larger.  However, we
+    // have dropped and re-acquired the lock, so slot_info_ may have
+    // changed.  Therefore, check and verify that it is still OK to increase the
+    // cache size.
+    info = GetSlotInfo();
+    if (info.capacity + B > max_capacity_) return false;
+    info.capacity += B;
+    SetSlotInfo(info);
+    return true;
+  }
+
+  bool HasSpareCapacity(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    const int n = Manager::num_objects_to_move(size_class);
+    absl::base_internal::SpinLockHolder h(&lock_);
+    const auto info = GetSlotInfo();
+    return info.capacity - info.used >= n;
+  }
+
+  // Takes lock_ and invokes MakeCacheSpace() on this cache.  Returns true if it
+  // succeeded at growing the cache by a batch size.
+  bool GrowCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    absl::base_internal::SpinLockHolder h(&lock_);
+    return MakeCacheSpace(size_class, Manager::num_objects_to_move(size_class));
+  }
+
+  // REQUIRES: lock_ is *not* held.
+  // Tries to shrink the Cache.  Return false if it failed to shrink the cache.
+  // Decreases cache_slots_ on success.
+  bool ShrinkCache(int size_class) ABSL_LOCKS_EXCLUDED(lock_) {
+    const int N = Manager::num_objects_to_move(size_class);
+
+    void *to_free[kMaxObjectsToMove];
+    int num_to_free;
+    {
+      absl::base_internal::SpinLockHolder h(&lock_);
+      auto info = GetSlotInfo();
+      if (info.capacity == 0) return false;
+      if (info.capacity < N) return false;
+
+      const int unused = info.capacity - info.used;
+      if (N <= unused) {
+        info.capacity -= N;
+        SetSlotInfo(info);
+        return true;
+      }
+
+      num_to_free = N - unused;
+
+      // Remove from the beginning of the buffer which holds the oldest entries.
+      // Our internal slot array may get overwritten as soon as we drop the
+      // lock, so copy the items to free to an on stack buffer.
+      CopyOutOfStart(to_free, num_to_free, info);
+      low_water_mark_ = info.used;
+      info.capacity -= N;
+      SetSlotInfo(info);
+    }
+
+    // Access the freelist without holding the lock.
+    freelist().InsertRange({to_free, static_cast<uint64_t>(num_to_free)});
+    return true;
+  }
+
+  // This is a thin wrapper for the CentralFreeList.  It is intended to ensure
+  // that we are not holding lock_ when we access it.
+  ABSL_ATTRIBUTE_ALWAYS_INLINE FreeList &freelist() ABSL_LOCKS_EXCLUDED(lock_) {
+    return freelist_do_not_access_directly_;
+  }
+
+  // The const version of the wrapper, needed to call stats on
+  ABSL_ATTRIBUTE_ALWAYS_INLINE const FreeList &freelist() const
+      ABSL_LOCKS_EXCLUDED(lock_) {
+    return freelist_do_not_access_directly_;
+  }
+
+ private:
+  // Due to decreased downward pressure, the ring buffer based transfer cache
+  // contains on average more bytes than the legacy implementation.
+  // To counteract this, decrease the capacity (but not max capacity).
+  // TODO(b/161927252):  Revisit TransferCache rebalancing strategy
+  static typename TransferCache<CentralFreeList, TransferCacheManager>::Capacity
+  CapacityNeeded(int cl) {
+    auto capacity =
+        TransferCache<CentralFreeList, TransferCacheManager>::CapacityNeeded(
+            cl);
+    const int N = Manager::num_objects_to_move(cl);
+    if (N == 0) return {0, 0};
+    ASSERT(capacity.capacity % N == 0);
+    // We still want capacity to be in multiples of batches.
+    const int capacity_in_batches = capacity.capacity / N;
+    // This factor was found by trial and error.
+    const int new_batches =
+        static_cast<int>(std::ceil(capacity_in_batches / 1.5));
+    capacity.capacity = new_batches * N;
+    return capacity;
+  }
+
+  // Converts a logical index (i.e. i-th element stored in the ring buffer) into
+  // a physical index into slots_.
+  size_t GetSlotIndex(size_t start, size_t i) const {
+    return (start + i) & slots_bitmask_;
+  }
+
+  // Copies N elements from source to the end of the ring buffer. It updates
+  // `info`, be sure to call SetSlotInfo() to save the modifications.
+  // N has to be > 0.
+  void CopyIntoEnd(void *const *source, size_t N, RingBufferSizeInfo &info)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    ASSERT(N > 0);
+    ASSERT(info.used + N <= info.capacity);
+    const size_t begin = GetSlotIndex(info.start, info.used);
+    const size_t end = GetSlotIndex(info.start, info.used + N);
+    if (ABSL_PREDICT_FALSE(end < begin && end != 0)) {
+      // We wrap around the buffer.
+      memcpy(slots_ + begin, source, sizeof(void *) * (N - end));
+      memcpy(slots_, source + (N - end), sizeof(void *) * end);
+    } else {
+      memcpy(slots_ + begin, source, sizeof(void *) * N);
+    }
+    info.used += N;
+  }
+
+  // Copies N elements stored in slots_ starting at the given logic index into
+  // target. Does not do any updates to slot_info_.
+  // N has to be > 0.
+  // You should use CopyOutOfEnd or CopyOutOfStart instead in most cases.
+  void CopyOutOfSlots(void **target, size_t N, size_t start, size_t index) const
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    ASSERT(N > 0);
+    const size_t begin = GetSlotIndex(start, index);
+    const size_t end = GetSlotIndex(start, index + N);
+    if (ABSL_PREDICT_FALSE(end < begin && end != 0)) {
+      // We wrap around the buffer.
+      memcpy(target, slots_ + begin, sizeof(void *) * (N - end));
+      memcpy(target + (N - end), slots_, sizeof(void *) * end);
+    } else {
+      memcpy(target, slots_ + begin, sizeof(void *) * N);
+    }
+  }
+
+  // Copies N elements from the start of the ring buffer into target. Updates
+  // `info`, be sure to call SetSlotInfo() to save the modifications.
+  // N has to be > 0.
+  void CopyOutOfStart(void **target, size_t N, RingBufferSizeInfo &info)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    ASSERT(N > 0);
+    ASSERT(N <= info.used);
+    CopyOutOfSlots(target, N, info.start, 0);
+    info.used -= N;
+    if (info.used == 0) {
+      // This makes it less likely that we will have to do copies that wrap
+      // around in the immediate future.
+      info.start = 0;
+    } else {
+      info.start = (info.start + N) & slots_bitmask_;
+    }
+  }
+
+  // Copies N elements from the end of the ring buffer into target. Updates
+  // `info`, be sure to call SetSlotInfo() to save the modifications.
+  // N has to be > 0.
+  void CopyOutOfEnd(void **target, size_t N, RingBufferSizeInfo &info)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    ASSERT(N > 0);
+    ASSERT(N <= info.used);
+    info.used -= N;
+    CopyOutOfSlots(target, N, info.start, info.used);
+    if (info.used == 0) {
+      // This makes it less likely that we will have to do copies that wrap
+      // around in the immediate future.
+      info.start = 0;
+    }
+  }
+
+  void SetSlotInfo(RingBufferSizeInfo info)
+      ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
+    ASSERT(0 <= info.start);
+    ASSERT((info.start & slots_bitmask_) == info.start);
+    ASSERT(0 <= info.used);
+    ASSERT(info.used <= info.capacity);
+    ASSERT(info.capacity <= max_capacity_);
+    slot_info_ = info;
+  }
+
+  // Pointer to array of free objects.
+  void **slots_ ABSL_GUARDED_BY(lock_);
+
+  // This lock protects all the data members.  used_slots_ and cache_slots_
+  // may be looked at without holding the lock.
+  absl::base_internal::SpinLock lock_;
+
+  // Number of currently used and available cached entries in slots_. Use
+  // GetSlotInfo() to read this.
+  // INVARIANT: [0 <= slot_info_.used <= slot_info.capacity <= max_cache_slots_]
+  RingBufferSizeInfo slot_info_ ABSL_GUARDED_BY(lock_);
+
+  // Lowest value of "slot_info_.used" since last call to TryPlunder. All
+  // elements not used for a full cycle (2 seconds) are unlikely to get used
+  // again.
+  int low_water_mark_ ABSL_GUARDED_BY(lock_) = std::numeric_limits<int>::max();
+
+  // Maximum size of the cache.
+  const int32_t max_capacity_;
+  // This is a bitmask used instead of a modulus in the ringbuffer index
+  // calculations. This is 1 smaller than the size of slots_ which itself has
+  // the size of `absl::bit_ceil(max_capacity_)`, i.e. the smallest power of two
+  // >= max_capacity_.
+  size_t slots_bitmask_;
+
+  // insert_hits_ and remove_hits_ are logically guarded by lock_ for mutations
+  // and use LossyAdd, but the thread annotations cannot indicate that we do not
+  // need a lock for reads.
+  StatsCounter insert_hits_;
+  StatsCounter remove_hits_;
+  // Miss counters do not hold lock_, so they use Add.
+  StatsCounter insert_misses_;
+  StatsCounter remove_misses_;
+
+  FreeList freelist_do_not_access_directly_;
+  Manager *const owner_;
+} ABSL_CACHELINE_ALIGNED;
+
+}  // namespace tcmalloc::tcmalloc_internal::internal_transfer_cache
+GOOGLE_MALLOC_SECTION_END
+
+#endif  // TCMALLOC_TRANSFER_CACHE_INTERNAL_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h
new file mode 100644
index 0000000000..fdc8fba53c
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_stats.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TCMALLOC_TRANSFER_CACHE_STATS_H_
+#define TCMALLOC_TRANSFER_CACHE_STATS_H_
+
+#include <stddef.h>
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+struct TransferCacheStats {
+  size_t insert_hits;
+  size_t insert_misses;
+  size_t insert_non_batch_misses;
+  size_t remove_hits;
+  size_t remove_misses;
+  size_t remove_non_batch_misses;
+};
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+
+#endif  // TCMALLOC_TRANSFER_CACHE_STATS_H_
diff --git a/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc
new file mode 100644
index 0000000000..4531f7a921
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/transfer_cache_test.cc
@@ -0,0 +1,625 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "tcmalloc/transfer_cache.h"
+
+#include <algorithm>
+#include <atomic>
+#include <cmath>
+#include <cstring>
+#include <random>
+#include <thread>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "absl/base/internal/spinlock.h"
+#include "absl/random/distributions.h"
+#include "absl/random/random.h"
+#include "absl/time/clock.h"
+#include "absl/types/span.h"
+#include "tcmalloc/central_freelist.h"
+#include "tcmalloc/common.h"
+#include "tcmalloc/mock_central_freelist.h"
+#include "tcmalloc/mock_transfer_cache.h"
+#include "tcmalloc/static_vars.h"
+#include "tcmalloc/testing/thread_manager.h"
+#include "tcmalloc/transfer_cache_internals.h"
+
+namespace tcmalloc {
+namespace tcmalloc_internal {
+namespace {
+
+static constexpr int kSizeClass = 0;
+
+template <typename Env>
+using TransferCacheTest = ::testing::Test;
+TYPED_TEST_SUITE_P(TransferCacheTest);
+
+TYPED_TEST_P(TransferCacheTest, IsolatedSmoke) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+  EXPECT_CALL(e.central_freelist(), InsertRange)
+      .Times(e.transfer_cache().IsFlexible() ? 0 : 1);
+  EXPECT_CALL(e.central_freelist(), RemoveRange)
+      .Times(e.transfer_cache().IsFlexible() ? 0 : 1);
+
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+
+  e.Insert(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+  e.Insert(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2);
+  e.Insert(batch_size - 1);
+  if (e.transfer_cache().IsFlexible()) {
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 3);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+  } else {
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 2);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 1);
+  }
+  e.Remove(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1);
+  e.Remove(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2);
+  e.Remove(batch_size - 1);
+  if (e.transfer_cache().IsFlexible()) {
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 3);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+  } else {
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 2);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 1);
+  }
+}
+
+TYPED_TEST_P(TransferCacheTest, ReadStats) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+  EXPECT_CALL(e.central_freelist(), RemoveRange).Times(0);
+
+  // Ensure there is at least one insert hit/remove hit, so we can assert a
+  // non-tautology in t2.
+  e.Insert(batch_size);
+  e.Remove(batch_size);
+
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_non_batch_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, 1);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_non_batch_misses, 0);
+
+  std::atomic<bool> stop{false};
+
+  std::thread t1([&]() {
+    while (!stop.load(std::memory_order_acquire)) {
+      e.Insert(batch_size);
+      e.Remove(batch_size);
+    }
+  });
+
+  std::thread t2([&]() {
+    while (!stop.load(std::memory_order_acquire)) {
+      auto stats = e.transfer_cache().GetHitRateStats();
+      CHECK_CONDITION(stats.insert_hits >= 1);
+      CHECK_CONDITION(stats.insert_misses == 0);
+      CHECK_CONDITION(stats.insert_non_batch_misses == 0);
+      CHECK_CONDITION(stats.remove_hits >= 1);
+      CHECK_CONDITION(stats.remove_misses == 0);
+      CHECK_CONDITION(stats.remove_non_batch_misses == 0);
+    }
+  });
+
+  absl::SleepFor(absl::Seconds(1));
+  stop.store(true, std::memory_order_release);
+
+  t1.join();
+  t2.join();
+}
+
+TYPED_TEST_P(TransferCacheTest, SingleItemSmoke) {
+  const int batch_size = TypeParam::kBatchSize;
+  if (batch_size == 1) {
+    GTEST_SKIP() << "skipping trivial batch size";
+  }
+  TypeParam e;
+  const int actions = e.transfer_cache().IsFlexible() ? 2 : 0;
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(2 - actions);
+  EXPECT_CALL(e.central_freelist(), RemoveRange).Times(2 - actions);
+
+  e.Insert(1);
+  e.Insert(1);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, actions);
+  e.Remove(1);
+  e.Remove(1);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_hits, actions);
+}
+
+TYPED_TEST_P(TransferCacheTest, FetchesFromFreelist) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+  EXPECT_CALL(e.central_freelist(), RemoveRange).Times(1);
+  e.Remove(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 1);
+}
+
+TYPED_TEST_P(TransferCacheTest, PartialFetchFromFreelist) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+  EXPECT_CALL(e.central_freelist(), RemoveRange)
+      .Times(2)
+      .WillOnce([&](void** batch, int n) {
+        int returned = static_cast<FakeCentralFreeList&>(e.central_freelist())
+                           .RemoveRange(batch, std::min(batch_size / 2, n));
+        // Overwrite the elements of batch that were not populated by
+        // RemoveRange.
+        memset(batch + returned, 0x3f, sizeof(*batch) * (n - returned));
+        return returned;
+      });
+  e.Remove(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().remove_misses, 2);
+}
+
+TYPED_TEST_P(TransferCacheTest, EvictsOtherCaches) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+
+  EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillOnce([]() {
+    return true;
+  });
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+
+  while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+    e.Insert(batch_size);
+  }
+  size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits;
+  e.Insert(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, old_hits + 1);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+}
+
+TYPED_TEST_P(TransferCacheTest, EvictsOtherCachesFlex) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+
+  EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() {
+    return true;
+  });
+  if (e.transfer_cache().IsFlexible()) {
+    EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+  } else {
+    EXPECT_CALL(e.central_freelist(), InsertRange).Times(batch_size - 1);
+  }
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 0);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+
+  int total = 0;
+  for (int i = 1; i <= batch_size; i++) {
+    e.Insert(i);
+    total += i;
+  }
+
+  if (e.transfer_cache().IsFlexible()) {
+    EXPECT_EQ(e.transfer_cache().tc_length(), total);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, batch_size);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 0);
+  } else {
+    EXPECT_EQ(e.transfer_cache().tc_length(), 1 * batch_size);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, 1);
+    EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses,
+              batch_size - 1);
+  }
+}
+
+// Similar to EvictsOtherCachesFlex, but with full cache.
+TYPED_TEST_P(TransferCacheTest, FullCacheFlex) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+
+  EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillRepeatedly([]() {
+    return true;
+  });
+  if (e.transfer_cache().IsFlexible()) {
+    EXPECT_CALL(e.central_freelist(), InsertRange).Times(0);
+  } else {
+    EXPECT_CALL(e.central_freelist(), InsertRange)
+        .Times(testing::AtLeast(batch_size));
+  }
+
+  while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+    e.Insert(batch_size);
+  }
+  for (int i = 1; i < batch_size + 2; i++) {
+    e.Insert(i);
+  }
+}
+
+TYPED_TEST_P(TransferCacheTest, PushesToFreelist) {
+  const int batch_size = TypeParam::kBatchSize;
+  TypeParam e;
+
+  EXPECT_CALL(e.transfer_cache_manager(), ShrinkCache).WillOnce([]() {
+    return false;
+  });
+  EXPECT_CALL(e.central_freelist(), InsertRange).Times(1);
+
+  while (e.transfer_cache().HasSpareCapacity(kSizeClass)) {
+    e.Insert(batch_size);
+  }
+  size_t old_hits = e.transfer_cache().GetHitRateStats().insert_hits;
+  e.Insert(batch_size);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_hits, old_hits);
+  EXPECT_EQ(e.transfer_cache().GetHitRateStats().insert_misses, 1);
+}
+
+TYPED_TEST_P(TransferCacheTest, WrappingWorks) {
+  const int batch_size = TypeParam::kBatchSize;
+
+  TypeParam env;
+  EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0);
+
+  while (env.transfer_cache().HasSpareCapacity(kSizeClass)) {
+    env.Insert(batch_size);
+  }
+  for (int i = 0; i < 100; ++i) {
+    env.Remove(batch_size);
+    env.Insert(batch_size);
+  }
+}
+
+TYPED_TEST_P(TransferCacheTest, WrappingFlex) {
+  const int batch_size = TypeParam::kBatchSize;
+
+  TypeParam env;
+  EXPECT_CALL(env.transfer_cache_manager(), ShrinkCache).Times(0);
+  if (env.transfer_cache().IsFlexible()) {
+    EXPECT_CALL(env.central_freelist(), InsertRange).Times(0);
+    EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+  }
+
+  while (env.transfer_cache().HasSpareCapacity(kSizeClass)) {
+    env.Insert(batch_size);
+  }
+  for (int i = 0; i < 100; ++i) {
+    for (size_t size = 1; size < batch_size + 2; size++) {
+      env.Remove(size);
+      env.Insert(size);
+    }
+  }
+}
+
+TYPED_TEST_P(TransferCacheTest, Plunder) {
+  TypeParam env;
+  //  EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+  //  EXPECT_CALL(env.central_freelist(), InsertRange).Times(1);
+  // Fill in some elements.
+  env.Insert(TypeParam::kBatchSize);
+  env.Insert(TypeParam::kBatchSize);
+  ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+  // All these elements will be plundered.
+  env.transfer_cache().TryPlunder(kSizeClass);
+  ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+
+  env.Insert(TypeParam::kBatchSize);
+  env.Insert(TypeParam::kBatchSize);
+  ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+
+  void* buf[TypeParam::kBatchSize];
+  // -1 +1, this sets the low_water_mark (the lowest end-state after a
+  // call to RemoveRange to 1 batch.
+  (void)env.transfer_cache().RemoveRange(kSizeClass, buf,
+                                         TypeParam::kBatchSize);
+  env.transfer_cache().InsertRange(kSizeClass, {buf, TypeParam::kBatchSize});
+  ASSERT_EQ(env.transfer_cache().tc_length(), 2 * TypeParam::kBatchSize);
+  // We have one batch, and this is the same as the low water mark, so nothing
+  // gets plundered.
+  env.transfer_cache().TryPlunder(kSizeClass);
+  ASSERT_EQ(env.transfer_cache().tc_length(), TypeParam::kBatchSize);
+  // If we plunder immediately the low_water_mark is at maxint, and eveything
+  // gets plundered.
+  env.transfer_cache().TryPlunder(kSizeClass);
+  ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+}
+
+// PickCoprimeBatchSize picks a batch size in [2, max_batch_size) that is
+// coprime with 2^32.  We choose the largest possible batch size within that
+// constraint to minimize the number of iterations of insert/remove required.
+static size_t PickCoprimeBatchSize(size_t max_batch_size) {
+  while (max_batch_size > 1) {
+    if ((size_t{1} << 32) % max_batch_size != 0) {
+      return max_batch_size;
+    }
+    max_batch_size--;
+  }
+
+  return max_batch_size;
+}
+
+TEST(RingBufferTest, b172283201) {
+  // This test is designed to exercise the wraparound behavior for the
+  // RingBufferTransferCache, which manages its indices in uint32_t's.  Because
+  // it uses a non-standard batch size (kBatchSize) as part of
+  // PickCoprimeBatchSize, it triggers a TransferCache miss to the
+  // CentralFreeList, which is uninteresting for exercising b/172283201.
+
+  // For performance reasons, limit to optimized builds.
+#if !defined(NDEBUG)
+  GTEST_SKIP() << "skipping long running test on debug build";
+#elif defined(THREAD_SANITIZER)
+  // This test is single threaded, so thread sanitizer will not be useful.
+  GTEST_SKIP() << "skipping under thread sanitizer, which slows test execution";
+#endif
+
+  using EnvType = FakeTransferCacheEnvironment<
+      internal_transfer_cache::RingBufferTransferCache<
+          MockCentralFreeList, MockTransferCacheManager>>;
+  EnvType env;
+
+  // We pick the largest value <= EnvType::kBatchSize to use as a batch size,
+  // such that it is prime relative to 2^32.  This ensures that when we
+  // encounter a wraparound, the last operation actually spans both ends of the
+  // buffer.
+  const size_t batch_size = PickCoprimeBatchSize(EnvType::kBatchSize);
+  ASSERT_GT(batch_size, 0);
+  ASSERT_NE((size_t{1} << 32) % batch_size, 0) << batch_size;
+  // For ease of comparison, allocate a buffer of char's.  We will use these to
+  // generate unique addresses.  Since we assert that we will never miss in the
+  // TransferCache and go to the CentralFreeList, these do not need to be valid
+  // objects for deallocation.
+  std::vector<char> buffer(batch_size);
+  std::vector<void*> pointers;
+  pointers.reserve(batch_size);
+  for (size_t i = 0; i < batch_size; i++) {
+    pointers.push_back(&buffer[i]);
+  }
+
+  // To produce wraparound in the RingBufferTransferCache, we fill up the cache
+  // completely and then keep inserting new elements. This makes the cache
+  // return old elements to the freelist and eventually wrap around.
+  EXPECT_CALL(env.central_freelist(), RemoveRange).Times(0);
+  // We do return items to the freelist, don't try to actually free them.
+  ON_CALL(env.central_freelist(), InsertRange).WillByDefault(testing::Return());
+  ON_CALL(env.transfer_cache_manager(), DetermineSizeClassToEvict)
+      .WillByDefault(testing::Return(kSizeClass));
+
+  // First fill up the cache to its capacity.
+
+  while (env.transfer_cache().HasSpareCapacity(kSizeClass) ||
+         env.transfer_cache().GrowCache(kSizeClass)) {
+    env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers));
+  }
+
+  // The current size of the transfer cache is close to its capacity. Insert
+  // enough batches to make sure we wrap around twice (1 batch size should wrap
+  // around as we are full currently, then insert the same amount of items
+  // again, then one more wrap around).
+  const size_t kObjects = env.transfer_cache().tc_length() + 2 * batch_size;
+
+  // From now on, calls to InsertRange() should result in a corresponding call
+  // to the freelist whenever the cache is full. This doesn't happen on every
+  // call, as we return up to num_to_move (i.e. kBatchSize) items to the free
+  // list in one batch.
+  EXPECT_CALL(env.central_freelist(),
+              InsertRange(testing::SizeIs(EnvType::kBatchSize)))
+      .Times(testing::AnyNumber());
+  for (size_t i = 0; i < kObjects; i += batch_size) {
+    env.transfer_cache().InsertRange(kSizeClass, absl::MakeSpan(pointers));
+  }
+  // Manually drain the items in the transfercache, otherwise the destructor
+  // will try to free them.
+  std::vector<void*> to_free(batch_size);
+  size_t N = env.transfer_cache().tc_length();
+  while (N > 0) {
+    const size_t to_remove = std::min(N, batch_size);
+    const size_t removed =
+        env.transfer_cache().RemoveRange(kSizeClass, to_free.data(), to_remove);
+    ASSERT_THAT(removed, testing::Le(to_remove));
+    ASSERT_THAT(removed, testing::Gt(0));
+    N -= removed;
+  }
+  ASSERT_EQ(env.transfer_cache().tc_length(), 0);
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TransferCacheTest, IsolatedSmoke, ReadStats,
+                            FetchesFromFreelist, PartialFetchFromFreelist,
+                            EvictsOtherCaches, PushesToFreelist, WrappingWorks,
+                            SingleItemSmoke, EvictsOtherCachesFlex,
+                            FullCacheFlex, WrappingFlex, Plunder);
+template <typename Env>
+using FuzzTest = ::testing::Test;
+TYPED_TEST_SUITE_P(FuzzTest);
+
+TYPED_TEST_P(FuzzTest, MultiThreadedUnbiased) {
+  TypeParam env;
+  ThreadManager threads;
+  threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+  auto start = absl::Now();
+  while (start + absl::Seconds(0.3) > absl::Now()) env.RandomlyPoke();
+  threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedInsert) {
+  const int batch_size = TypeParam::kBatchSize;
+
+  TypeParam env;
+  ThreadManager threads;
+  threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+  auto start = absl::Now();
+  while (start + absl::Seconds(5) > absl::Now()) env.Insert(batch_size);
+  threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedRemove) {
+  const int batch_size = TypeParam::kBatchSize;
+
+  TypeParam env;
+  ThreadManager threads;
+  threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+  auto start = absl::Now();
+  while (start + absl::Seconds(5) > absl::Now()) env.Remove(batch_size);
+  threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedShrink) {
+  TypeParam env;
+  ThreadManager threads;
+  threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+  auto start = absl::Now();
+  while (start + absl::Seconds(5) > absl::Now()) env.Shrink();
+  threads.Stop();
+}
+
+TYPED_TEST_P(FuzzTest, MultiThreadedBiasedGrow) {
+  TypeParam env;
+  ThreadManager threads;
+  threads.Start(10, [&](int) { env.RandomlyPoke(); });
+
+  auto start = absl::Now();
+  while (start + absl::Seconds(5) > absl::Now()) env.Grow();
+  threads.Stop();
+}
+
+REGISTER_TYPED_TEST_SUITE_P(FuzzTest, MultiThreadedUnbiased,
+                            MultiThreadedBiasedInsert,
+                            MultiThreadedBiasedRemove, MultiThreadedBiasedGrow,
+                            MultiThreadedBiasedShrink);
+
+namespace unit_tests {
+using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+    MockCentralFreeList, MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TransferCacheTest,
+                               ::testing::Types<Env>);
+
+using RingBufferEnv = FakeTransferCacheEnvironment<
+    internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList,
+                                                     MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TransferCacheTest,
+                               ::testing::Types<RingBufferEnv>);
+}  // namespace unit_tests
+
+namespace fuzz_tests {
+// Use the FakeCentralFreeList instead of the MockCentralFreeList for fuzz tests
+// as it avoids the overheads of mocks and allows more iterations of the fuzzing
+// itself.
+using Env = FakeTransferCacheEnvironment<internal_transfer_cache::TransferCache<
+    MockCentralFreeList, MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, FuzzTest, ::testing::Types<Env>);
+
+using RingBufferEnv = FakeTransferCacheEnvironment<
+    internal_transfer_cache::RingBufferTransferCache<MockCentralFreeList,
+                                                     MockTransferCacheManager>>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, FuzzTest,
+                               ::testing::Types<RingBufferEnv>);
+}  // namespace fuzz_tests
+
+namespace leak_tests {
+
+template <typename Env>
+using TwoSizeClassTest = ::testing::Test;
+TYPED_TEST_SUITE_P(TwoSizeClassTest);
+
+TYPED_TEST_P(TwoSizeClassTest, NoLeaks) {
+  TypeParam env;
+
+  // The point of this test is to see that adding "random" amounts of
+  // allocations to the transfer caches behaves correctly, even in the case that
+  // there are multiple size classes interacting by stealing from each other.
+
+  // Fill all caches to their maximum without starting to steal from each other.
+  for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) {
+    const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+    while (env.transfer_cache_manager().HasSpareCapacity(cl)) {
+      env.Insert(cl, batch_size);
+    }
+  }
+
+  // Count the number of batches currently in the cache.
+  auto count_batches = [&env]() {
+    int batch_count = 0;
+    for (int cl = 1; cl < TypeParam::Manager::kSizeClasses; ++cl) {
+      const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+      batch_count += env.transfer_cache_manager().tc_length(cl) / batch_size;
+    }
+    return batch_count;
+  };
+
+  absl::BitGen bitgen;
+  const int max_batches = count_batches();
+  int expected_batches = max_batches;
+  for (int i = 0; i < 100; ++i) {
+    {
+      // First remove.
+      const int cl =
+          absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses);
+      const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+      if (env.transfer_cache_manager().tc_length(cl) >= batch_size) {
+        env.Remove(cl, batch_size);
+        --expected_batches;
+      }
+      const int current_batches = count_batches();
+      EXPECT_EQ(current_batches, expected_batches) << "iteration " << i;
+    }
+    {
+      // Then add in another size class.
+      const int cl =
+          absl::Uniform<int>(bitgen, 1, TypeParam::Manager::kSizeClasses);
+      // Evict from the "next" size class, skipping 0.
+      // This makes sure we are always evicting from somewhere if at all
+      // possible.
+      env.transfer_cache_manager().evicting_from_ =
+          1 + cl % (TypeParam::Manager::kSizeClasses - 1);
+      if (expected_batches < max_batches) {
+        const size_t batch_size = TypeParam::Manager::num_objects_to_move(cl);
+        env.Insert(cl, batch_size);
+        ++expected_batches;
+      }
+      const int current_batches = count_batches();
+      EXPECT_EQ(current_batches, expected_batches) << "iteration " << i;
+    }
+  }
+}
+
+REGISTER_TYPED_TEST_SUITE_P(TwoSizeClassTest, NoLeaks);
+
+using TwoTransferCacheEnv =
+    TwoSizeClassEnv<internal_transfer_cache::TransferCache>;
+INSTANTIATE_TYPED_TEST_SUITE_P(TransferCache, TwoSizeClassTest,
+                               ::testing::Types<TwoTransferCacheEnv>);
+
+using TwoRingBufferEnv =
+    TwoSizeClassEnv<internal_transfer_cache::RingBufferTransferCache>;
+INSTANTIATE_TYPED_TEST_SUITE_P(RingBuffer, TwoSizeClassTest,
+                               ::testing::Types<TwoRingBufferEnv>);
+
+}  // namespace leak_tests
+
+}  // namespace
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc
new file mode 100644
index 0000000000..b488ceb54f
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and forces HPAA
+// on/subrelease off.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc
new file mode 100644
index 0000000000..323cce40ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_hpaa_subrelease.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and forces HPAA
+// on/subrelease on.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return 1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return 1; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc
new file mode 100644
index 0000000000..28580e13ed
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_legacy_spans.cc
@@ -0,0 +1,28 @@
+// Copyright 2020 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides common.cc and
+// forces old span sizes.
+ABSL_ATTRIBUTE_UNUSED int default_want_legacy_spans() { return 1; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc
new file mode 100644
index 0000000000..e23d93d9ce
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_no_hpaa.cc
@@ -0,0 +1,30 @@
+// Copyright 2019 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// This -if linked into a binary - overrides page_allocator.cc and
+// forces HPAA off/subrelease off.
+ABSL_ATTRIBUTE_UNUSED int default_want_hpaa() { return -1; }
+
+ABSL_ATTRIBUTE_UNUSED int default_subrelease() { return -1; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc
new file mode 100644
index 0000000000..3f0519dd50
--- /dev/null
+++ b/contrib/libs/tcmalloc/tcmalloc/want_numa_aware.cc
@@ -0,0 +1,28 @@
+// Copyright 2021 The TCMalloc Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/base/attributes.h"
+#include "tcmalloc/internal/config.h"
+
+GOOGLE_MALLOC_SECTION_BEGIN
+namespace tcmalloc {
+namespace tcmalloc_internal {
+
+// When linked into a binary this overrides the weak implementation in numa.cc
+// and causes TCMalloc to enable NUMA awareness by default.
+ABSL_ATTRIBUTE_UNUSED bool default_want_numa_aware() { return true; }
+
+}  // namespace tcmalloc_internal
+}  // namespace tcmalloc
+GOOGLE_MALLOC_SECTION_END
diff --git a/contrib/libs/tcmalloc/ya.make b/contrib/libs/tcmalloc/ya.make
new file mode 100644
index 0000000000..54701b1b77
--- /dev/null
+++ b/contrib/libs/tcmalloc/ya.make
@@ -0,0 +1,38 @@
+LIBRARY()
+
+LICENSE(Apache-2.0)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+OWNER(
+    ayles
+    prime
+    g:cpp-contrib
+)
+
+# https://github.com/google/tcmalloc
+VERSION(2021-10-04-45c59ccbc062ac96d83710205033c656e490d376)
+
+SRCS(
+    # Options
+    tcmalloc/want_hpaa.cc
+)
+
+INCLUDE(common.inc)
+
+CFLAGS(
+    -DTCMALLOC_256K_PAGES
+)
+
+END()
+
+IF (NOT DLL_FOR)
+    RECURSE(
+        default
+        dynamic
+        malloc_extension
+        numa_256k
+        numa_large_pages
+        slow_but_small
+    )
+ENDIF()
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tcmalloc
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz