diff options
author | morozov1one <morozov1one@yandex-team.com> | 2024-11-29 20:28:24 +0300 |
---|---|---|
committer | morozov1one <morozov1one@yandex-team.com> | 2024-11-29 20:37:50 +0300 |
commit | 0924e1c53b7aec2c5efefe89499154b0a7e902f7 (patch) | |
tree | d41153892fac73aeed483b850203444f747bf54e /contrib/libs/mimalloc | |
parent | c67888be3cb9ffde249bcc0ec11b1a2cde58f60b (diff) | |
download | ydb-0924e1c53b7aec2c5efefe89499154b0a7e902f7.tar.gz |
Upgrade mimalloc to 1.8.7
Ниже описал существенные изменения в поведении, которые я заметил (в сравнении с версией 1.7.2, которая лежит в контрибах сейчас)
Полный changelog можно посмотреть в [readme.md](http://readme.md)
* Поменялся дефолт у [опции](https://github.com/microsoft/mimalloc/blob/9cae0d31cd28476664dbaa6e4e6940b9d900842a/src/options.c#L109), определяющей то, как неиспользуемая память возвращается в систему. В старых версиях по умолчанию использовался madvise с флагом MADV_FREE, в свежих версиях же используется MADV_DONTNEED. Это может вызвать неожиданные изменения (в худшую сторону) на графиках потребляемой анонимной памяти (), хотя по факту потребление должно быть \+- одинаковым
* Алгоритм работы аллокатора претерпел некоторые изменения. Например, мы споткнулись об то, что в новой версии mimalloc выделяет себе 1Gb (размер задается [опцией](https://github.com/microsoft/mimalloc/blob/2765ec93026f445cad8f38e6b196dd226a1f6e61/src/options.c#L87)) памяти при первой же аллокации. Само по себе это мало на что влияет, но неприятности могут случиться, если звать в начале программы mlockall
commit_hash:dc6d945c1776c874e554f94b705c4e446b0a11d8
Diffstat (limited to 'contrib/libs/mimalloc')
40 files changed, 7527 insertions, 4801 deletions
diff --git a/contrib/libs/mimalloc/.yandex_meta/devtools.copyrights.report b/contrib/libs/mimalloc/.yandex_meta/devtools.copyrights.report index a37ac7da01..4b22008681 100644 --- a/contrib/libs/mimalloc/.yandex_meta/devtools.copyrights.report +++ b/contrib/libs/mimalloc/.yandex_meta/devtools.copyrights.report @@ -29,16 +29,6 @@ # FILE_INCLUDE - include all file data into licenses text file # ======================= -KEEP COPYRIGHT_SERVICE_LABEL 21dec668d9ab2431f46cc70979134ba7 -BELONGS ya.make - Note: matched license text is too long. Read it in the source files. - Scancode info: - Original SPDX id: COPYRIGHT_SERVICE_LABEL - Score : 100.00 - Match type : COPYRIGHT - Files with this license: - src/bitmap.c [2:4] - KEEP COPYRIGHT_SERVICE_LABEL 25dcefb85a8e188fc5c56da58857f739 BELONGS ya.make License text: @@ -58,20 +48,14 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/mimalloc-internal.h [2:4] - include/mimalloc-types.h [2:4] - include/mimalloc.h [2:4] src/alloc-aligned.c [2:4] src/alloc-override.c [2:4] src/alloc-posix.c [2:4] - src/alloc.c [2:4] src/heap.c [2:4] - src/init.c [2:4] src/options.c [2:4] - src/os.c [2:4] src/stats.c [2:4] -KEEP COPYRIGHT_SERVICE_LABEL 28da6750f9f70938a34a2683265c5f37 +KEEP COPYRIGHT_SERVICE_LABEL 5b7d847fe742e0704b8071bd0042a721 BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -79,9 +63,10 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/mimalloc-atomic.h [2:4] + include/mimalloc-new-delete.h [2:4] + include/mimalloc-override.h [2:4] -KEEP COPYRIGHT_SERVICE_LABEL 4d891fec2fadb396208278a6d4280c2c +KEEP COPYRIGHT_SERVICE_LABEL 5f21aa30041415548b09b1e8e25da7fb BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -89,9 +74,10 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - src/bitmap.h [2:4] + src/arena.c [2:4] + src/segment-map.c [2:4] -KEEP COPYRIGHT_SERVICE_LABEL 5b7d847fe742e0704b8071bd0042a721 +KEEP COPYRIGHT_SERVICE_LABEL 5fc5246a7da6971940f2a93100292b4d BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -99,10 +85,9 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/mimalloc-new-delete.h [2:4] - include/mimalloc-override.h [2:4] + src/random.c [2:4] -KEEP COPYRIGHT_SERVICE_LABEL 5fc5246a7da6971940f2a93100292b4d +KEEP COPYRIGHT_SERVICE_LABEL 8417b808fabacd093257b7972e1b7c8f BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -110,10 +95,20 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - src/arena.c [2:4] - src/random.c [2:4] + include/mimalloc/atomic.h [2:4] + +KEEP COPYRIGHT_SERVICE_LABEL bb2ecc7d3573627ff5673fd4981120f7 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/bitmap.c [2:4] + src/bitmap.h [2:4] -KEEP COPYRIGHT_SERVICE_LABEL fe43a4aab9cf694378c07b2f43474c64 +KEEP COPYRIGHT_SERVICE_LABEL e7f053f38ca4d796d9d56538f0ce1ac7 BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -121,9 +116,17 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - src/region.c [2:4] + include/mimalloc.h [2:4] + include/mimalloc/internal.h [2:4] + include/mimalloc/prim.h [2:4] + include/mimalloc/track.h [2:4] + src/libc.c [2:4] + src/os.c [2:4] + src/prim/osx/prim.c [2:4] + src/prim/prim.c [2:4] + src/prim/unix/prim.c [2:4] -KEEP COPYRIGHT_SERVICE_LABEL feb05913c2d79921f57fa41bec01920f +KEEP COPYRIGHT_SERVICE_LABEL fc7c1095a8a64f3166ead9be4cfabb96 BELONGS ya.make Note: matched license text is too long. Read it in the source files. Scancode info: @@ -131,8 +134,20 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - src/alloc-override-osx.c [2:4] + include/mimalloc/types.h [2:4] + src/alloc.c [2:4] + src/free.c [2:4] src/page-queue.c [2:4] src/page.c [2:4] src/segment.c [2:4] - src/static.c [2:4] + +KEEP COPYRIGHT_SERVICE_LABEL fe63ec86a6a35162f9131f69ba4bc3e6 +BELONGS ya.make + Note: matched license text is too long. Read it in the source files. + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/init.c [2:4] + src/prim/osx/alloc-override-zone.c [2:4] diff --git a/contrib/libs/mimalloc/.yandex_meta/devtools.licenses.report b/contrib/libs/mimalloc/.yandex_meta/devtools.licenses.report index 7bfbc229ab..6791efdfc3 100644 --- a/contrib/libs/mimalloc/.yandex_meta/devtools.licenses.report +++ b/contrib/libs/mimalloc/.yandex_meta/devtools.licenses.report @@ -31,7 +31,6 @@ SKIP LicenseRef-scancode-generic-cla 0539c29f2b403f650800fcba3b1c53a6 BELONGS ya.make - # Contributor License Agreement License text: Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us Scancode info: @@ -40,11 +39,11 @@ BELONGS ya.make Match type : NOTICE Links : https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/licenses/generic-cla.LICENSE Files with this license: - readme.md [680:680] + readme.md [816:816] KEEP MIT AND MIT 2d229fcf116e5a7facba2d4dcccf15ba BELONGS ya.make -FILE_INCLUDE LICENSE found in files: include/mimalloc-internal.h at line 5, include/mimalloc-types.h at line 5, include/mimalloc.h at line 5, src/alloc-aligned.c at line 5, src/alloc.c at line 5, src/options.c at line 5 +FILE_INCLUDE LICENSE found in files: include/mimalloc.h at line 5, include/mimalloc/atomic.h at line 5, include/mimalloc/internal.h at line 5, include/mimalloc/track.h at line 5, src/init.c at line 5, src/os.c at line 5 License text: This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file @@ -55,24 +54,30 @@ FILE_INCLUDE LICENSE found in files: include/mimalloc-internal.h at line 5, incl Match type : NOTICE Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - include/mimalloc-atomic.h [3:5] include/mimalloc-new-delete.h [3:5] include/mimalloc-override.h [3:5] - src/alloc-override-osx.c [3:5] + include/mimalloc/prim.h [3:5] + include/mimalloc/types.h [3:5] + src/alloc-aligned.c [3:5] src/alloc-override.c [3:5] src/alloc-posix.c [3:5] + src/alloc.c [3:5] src/arena.c [3:5] src/bitmap.c [3:5] src/bitmap.h [3:5] + src/free.c [3:5] src/heap.c [3:5] - src/init.c [3:5] - src/os.c [3:5] + src/libc.c [3:5] + src/options.c [3:5] src/page-queue.c [3:5] src/page.c [3:5] + src/prim/osx/alloc-override-zone.c [3:5] + src/prim/osx/prim.c [3:5] + src/prim/prim.c [3:5] + src/prim/unix/prim.c [3:5] src/random.c [3:5] - src/region.c [3:5] + src/segment-map.c [3:5] src/segment.c [3:5] - src/static.c [3:5] src/stats.c [3:5] Scancode info: Original SPDX id: MIT @@ -80,12 +85,12 @@ FILE_INCLUDE LICENSE found in files: include/mimalloc-internal.h at line 5, incl Match type : NOTICE Links : http://opensource.org/licenses/mit-license.php, https://spdx.org/licenses/MIT Files with this license: - include/mimalloc-internal.h [3:5] - include/mimalloc-types.h [3:5] include/mimalloc.h [3:5] - src/alloc-aligned.c [3:5] - src/alloc.c [3:5] - src/options.c [3:5] + include/mimalloc/atomic.h [3:5] + include/mimalloc/internal.h [3:5] + include/mimalloc/track.h [3:5] + src/init.c [3:5] + src/os.c [3:5] KEEP MIT 399584035c417b91040964779555dfac BELONGS ya.make @@ -99,6 +104,20 @@ BELONGS ya.make Files with this license: LICENSE [1:1] +KEEP MIT 46d3a844e933821ebdb401dff1a34bc0 +BELONGS ya.make + License text: + This is free software; you can redistribute it and/or modify it under the + terms of the MIT license. A copy of the license can be found in the file + Scancode info: + Original SPDX id: BSD-3-Clause + Score : 52.38 + Match type : NOTICE + Links : http://www.opensource.org/licenses/BSD-3-Clause, https://spdx.org/licenses/BSD-3-Clause + Files with this license: + include/mimalloc/atomic.h [3:4] + include/mimalloc/track.h [3:4] + KEEP MIT 54575e81a786e9aa7d98337ec2e1ebb0 BELONGS ya.make Note: matched license text is too long. Read it in the source files. diff --git a/contrib/libs/mimalloc/.yandex_meta/licenses.list.txt b/contrib/libs/mimalloc/.yandex_meta/licenses.list.txt index 3408919d8b..69648edbd5 100644 --- a/contrib/libs/mimalloc/.yandex_meta/licenses.list.txt +++ b/contrib/libs/mimalloc/.yandex_meta/licenses.list.txt @@ -5,47 +5,53 @@ terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen + + +====================COPYRIGHT==================== +Copyright (c) 2018-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2018-2021 Microsoft Corporation, Daan Leijen +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2018-2021 Microsoft Research, Daan Leijen +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen +Copyright (c) 2019-2021, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file ====================COPYRIGHT==================== -Copyright (c) 2019-2021, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file @@ -97,6 +103,11 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +====================MIT==================== +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file + + ====================MIT AND MIT==================== This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file diff --git a/contrib/libs/mimalloc/SECURITY.md b/contrib/libs/mimalloc/SECURITY.md new file mode 100644 index 0000000000..b3c89efc85 --- /dev/null +++ b/contrib/libs/mimalloc/SECURITY.md @@ -0,0 +1,41 @@ +<!-- BEGIN MICROSOFT SECURITY.MD V0.0.9 BLOCK --> + +## Security + +Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). + +If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. + +## Reporting Security Issues + +**Please do not report security vulnerabilities through public GitHub issues.** + +Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). + +If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). + +You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). + +Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: + + * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) + * Full paths of source file(s) related to the manifestation of the issue + * The location of the affected source code (tag/branch/commit or direct URL) + * Any special configuration required to reproduce the issue + * Step-by-step instructions to reproduce the issue + * Proof-of-concept or exploit code (if possible) + * Impact of the issue, including how an attacker might exploit the issue + +This information will help us triage your report more quickly. + +If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. + +## Preferred Languages + +We prefer all communications to be in English. + +## Policy + +Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). + +<!-- END MICROSOFT SECURITY.MD BLOCK --> diff --git a/contrib/libs/mimalloc/include/mimalloc-new-delete.h b/contrib/libs/mimalloc/include/mimalloc-new-delete.h index ba208f0556..c16f4a6653 100644 --- a/contrib/libs/mimalloc/include/mimalloc-new-delete.h +++ b/contrib/libs/mimalloc/include/mimalloc-new-delete.h @@ -22,14 +22,26 @@ terms of the MIT license. A copy of the license can be found in the file #include <new> #include <mimalloc.h> + #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_) + // stay consistent with VCRT definitions + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n) + #else + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict + #endif + void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; - void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } - void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } + void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); } + void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); } + + mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } + mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } - void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } - void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); }; @@ -41,9 +53,11 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); }; + void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } - void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } - void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } + void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } + void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); } void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); } #endif diff --git a/contrib/libs/mimalloc/include/mimalloc-override.h b/contrib/libs/mimalloc/include/mimalloc-override.h index 7d9f3e7d0c..48a8a6226a 100644 --- a/contrib/libs/mimalloc/include/mimalloc-override.h +++ b/contrib/libs/mimalloc/include/mimalloc-override.h @@ -24,7 +24,7 @@ not accidentally mix pointers from different allocators). #define free(p) mi_free(p) #define strdup(s) mi_strdup(s) -#define strndup(s,n) mi_strndup(s,n) +#define strndup(s,n) mi_strndup(s,n) #define realpath(f,n) mi_realpath(f,n) // Microsoft extensions @@ -43,11 +43,13 @@ not accidentally mix pointers from different allocators). #define reallocf(p,n) mi_reallocf(p,n) #define malloc_size(p) mi_usable_size(p) #define malloc_usable_size(p) mi_usable_size(p) +#define malloc_good_size(sz) mi_malloc_good_size(sz) #define cfree(p) mi_free(p) #define valloc(n) mi_valloc(n) #define pvalloc(n) mi_pvalloc(n) #define reallocarray(p,s,n) mi_reallocarray(p,s,n) +#define reallocarr(p,s,n) mi_reallocarr(p,s,n) #define memalign(a,n) mi_memalign(a,n) #define aligned_alloc(a,n) mi_aligned_alloc(a,n) #define posix_memalign(p,a,n) mi_posix_memalign(p,a,n) diff --git a/contrib/libs/mimalloc/include/mimalloc.h b/contrib/libs/mimalloc/include/mimalloc.h index fe5aa8f343..ae6f99b4b4 100644 --- a/contrib/libs/mimalloc/include/mimalloc.h +++ b/contrib/libs/mimalloc/include/mimalloc.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 171 // major + 2 digits minor +#define MI_MALLOC_VERSION 187 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -26,8 +26,10 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__cplusplus) && (__cplusplus >= 201703) #define mi_decl_nodiscard [[nodiscard]] -#elif (__GNUC__ >= 4) || defined(__clang__) // includes clang, icc, and clang-cl +#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif defined(_HAS_NODISCARD) + #define mi_decl_nodiscard _NODISCARD #elif (_MSC_VER >= 1700) #define mi_decl_nodiscard _Check_return_ #else @@ -58,8 +60,12 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_attr_alloc_size2(s1,s2) #define mi_attr_alloc_align(p) #elif defined(__GNUC__) // includes clang and icc + #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __attribute__((visibility("default"))) + #else + #define mi_decl_export + #endif #define mi_cdecl // leads to warnings... __attribute__((cdecl)) - #define mi_decl_export __attribute__((visibility("default"))) #define mi_decl_restrict #define mi_attr_malloc __attribute__((malloc)) #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) @@ -153,8 +159,8 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; -mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, - size_t* current_rss, size_t* peak_rss, +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; // ------------------------------------------------------------------------------------- @@ -249,8 +255,9 @@ typedef struct mi_heap_area_s { void* blocks; // start of the area containing heap blocks size_t reserved; // bytes reserved for this area (virtual) size_t committed; // current available bytes for this area - size_t used; // bytes in use by allocated blocks + size_t used; // number of allocated blocks size_t block_size; // size in bytes of each block + size_t full_block_size; // size in bytes of a full block including padding and metadata. } mi_heap_area_t; typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); @@ -267,6 +274,19 @@ mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; +mi_decl_export void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept; + +// Experimental: heaps associated with specific memory arena's +typedef int mi_arena_id_t; +mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); +mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; + +#if MI_MALLOC_VERSION >= 182 +// Create a heap that only allocates in the specified arena +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); +#endif // deprecated mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; @@ -292,33 +312,49 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size // ------------------------------------------------------ -// Options, all `false` by default +// Options // ------------------------------------------------------ typedef enum mi_option_e { // stable options - mi_option_show_errors, - mi_option_show_stats, - mi_option_verbose, - // the following options are experimental - mi_option_eager_commit, - mi_option_eager_region_commit, - mi_option_reset_decommits, - mi_option_large_os_pages, // implies eager commit - mi_option_reserve_huge_os_pages, - mi_option_reserve_os_memory, - mi_option_segment_cache, - mi_option_page_reset, - mi_option_abandoned_page_reset, - mi_option_segment_reset, - mi_option_eager_commit_delay, - mi_option_reset_delay, - mi_option_use_numa_nodes, - mi_option_limit_os_alloc, - mi_option_os_tag, - mi_option_max_errors, - mi_option_max_warnings, - _mi_option_last + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // advanced options + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit? (=1). Set to 0 to use memory reset on a purge (instead of decommit) + mi_option_allow_large_os_pages, // allow large (2 or 4 MiB) OS pages, implies eager commit. If false, also disables THP for the process. + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB pages) at startup + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup (internally, this value is in KiB; use `mi_option_get_size`) + mi_option_deprecated_segment_cache, + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. (=10) + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_disallow_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) (=100) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, // max. percentage of the abandoned segments can be reclaimed per try (=10%) + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe + mi_option_arena_reserve, // initial memory size for arena reservation (= 1 GiB on 64-bit) (internally, this value is in KiB; use `mi_option_get_size`) + mi_option_arena_purge_mult, // multiplier for `purge_delay` for the purging delay for arenas (=10) + mi_option_purge_extend_delay, + mi_option_abandoned_reclaim_on_free, // allow to reclaim an abandoned segment on a free (=1) + mi_option_disallow_arena_alloc, // 1 = do not use arena's for allocation (except if using specific arena id's) + mi_option_retry_on_oom, // retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. (only on windows) + _mi_option_last, + // legacy option names + mi_option_large_os_pages = mi_option_allow_large_os_pages, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge, + mi_option_limit_os_alloc = mi_option_disallow_os_alloc } mi_option_t; @@ -328,7 +364,9 @@ mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); @@ -342,6 +380,7 @@ mi_decl_export void mi_option_set_default(mi_option_t option, long value); mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; @@ -351,6 +390,7 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; @@ -373,6 +413,9 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); + #ifdef __cplusplus } #endif @@ -383,13 +426,14 @@ mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, // --------------------------------------------------------------------------------------------- #ifdef __cplusplus +#include <cstddef> // std::size_t #include <cstdint> // PTRDIFF_MAX #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 #include <type_traits> // std::true_type #include <utility> // std::forward #endif -template<class T> struct mi_stl_allocator { +template<class T> struct _mi_stl_allocator_common { typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; @@ -397,6 +441,27 @@ template<class T> struct mi_stl_allocator { typedef value_type const& const_reference; typedef value_type* pointer; typedef value_type const* const_pointer; + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); } + template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; template <class U> struct rebind { typedef mi_stl_allocator<U> other; }; mi_stl_allocator() mi_attr_noexcept = default; @@ -413,24 +478,91 @@ template<class T> struct mi_stl_allocator { #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 - using propagate_on_container_copy_assignment = std::true_type; - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); } - template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); } - #else - void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } - void destroy(pointer p) { p->~value_type(); } + using is_always_equal = std::true_type; #endif - - size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } }; template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; } template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; } + + +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 +#define MI_HAS_HEAP_STL_ALLOCATOR 1 + +#include <memory> // std::shared_ptr + +// Common base class for STL allocators in a specific heap +template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; + + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp, [](mi_heap_t*) {}) {} /* will not delete nor destroy the passed in heap */ + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::false_type; + #endif + + void collect(bool force) { mi_heap_collect(this->heap.get(), force); } + template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); } + +protected: + std::shared_ptr<mi_heap_t> heap; + template<class U, bool D> friend struct _mi_heap_stl_allocator_common; + + _mi_heap_stl_allocator_common() { + mi_heap_t* hp = mi_heap_new(); + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + } + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { } + +private: + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } +}; + +// STL allocator allocation in a specific heap +template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> { + using typename _mi_heap_stl_allocator_common<T, false>::size_type; + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { } + + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + + +// STL allocator allocation in a specific heap, where `free` does nothing and +// the heap is destroyed in one go on destruction -- use with care! +template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> { + using typename _mi_heap_stl_allocator_common<T, true>::size_type; + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { } + + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } + template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + +#endif // C++11 + #endif // __cplusplus #endif diff --git a/contrib/libs/mimalloc/include/mimalloc-atomic.h b/contrib/libs/mimalloc/include/mimalloc/atomic.h index f7cac357e2..38f174e454 100644 --- a/contrib/libs/mimalloc/include/mimalloc-atomic.h +++ b/contrib/libs/mimalloc/include/mimalloc/atomic.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021 Microsoft Research, Daan Leijen +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file // -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. -// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. -// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations // instead of passing the memory order as a parameter. // ----------------------------------------------------------------------------------------------- @@ -23,10 +23,17 @@ terms of the MIT license. A copy of the license can be found in the file #define _Atomic(tp) std::atomic<tp> #define mi_atomic(name) std::atomic_##name #define mi_memory_order(name) std::memory_order_##name +#if (__cplusplus >= 202002L) // c++20, see issue #571 +#define MI_ATOMIC_VAR_INIT(x) x +#elif !defined(ATOMIC_VAR_INIT) +#define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #elif defined(_MSC_VER) // Use MSVC C wrapper for C11 atomics #define _Atomic(tp) tp -#define ATOMIC_VAR_INIT(x) x +#define MI_ATOMIC_VAR_INIT(x) x #define mi_atomic(name) mi_atomic_##name #define mi_memory_order(name) mi_memory_order_##name #else @@ -34,6 +41,13 @@ terms of the MIT license. A copy of the license can be found in the file #include <stdatomic.h> #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name +#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#elif !defined(ATOMIC_VAR_INIT) + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #endif // Various defines for all used memory orders in mimalloc @@ -107,17 +121,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { } // Used by timers -#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) -#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) -#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) -#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) #elif defined(_MSC_VER) -// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. +// Legacy MSVC plain C compilation wrapper that uses Interlocked operations to model C11 atomics. +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include <windows.h> #include <intrin.h> #ifdef _WIN64 @@ -173,7 +191,7 @@ static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintpt } static inline void mi_atomic_thread_fence(mi_memory_order mo) { (void)(mo); - _Atomic(uintptr_t)x = 0; + _Atomic(uintptr_t) x = 0; mi_atomic_exchange_explicit(&x, 1, mo); } static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { @@ -183,7 +201,7 @@ static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_ #else uintptr_t x = *p; if (mo > mi_memory_order_relaxed) { - while (!mi_atomic_compare_exchange_weak_explicit(p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; } return x; #endif @@ -239,6 +257,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + // The pointer macros cast to `uintptr_t`. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) @@ -269,14 +302,36 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p, -sub); } -// Yield +typedef _Atomic(uintptr_t) mi_atomic_once_t; + +// Returns true only on the first invocation +static inline bool mi_atomic_once( mi_atomic_once_t* once ) { + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 +} + +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) + + + +// Yield #if defined(__cplusplus) #include <thread> static inline void mi_atomic_yield(void) { std::this_thread::yield(); } #elif defined(_WIN32) +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif #include <windows.h> static inline void mi_atomic_yield(void) { YieldProcessor(); @@ -288,7 +343,7 @@ static inline void mi_atomic_yield(void) { } #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ - defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -301,10 +356,16 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } -#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } +#endif #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { __asm__ volatile ("nop" ::: "memory"); diff --git a/contrib/libs/mimalloc/include/mimalloc-internal.h b/contrib/libs/mimalloc/include/mimalloc/internal.h index 1e1a79665c..2954eabd86 100644 --- a/contrib/libs/mimalloc/include/mimalloc-internal.h +++ b/contrib/libs/mimalloc/include/mimalloc/internal.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,7 +8,14 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_INTERNAL_H #define MIMALLOC_INTERNAL_H -#include "mimalloc-types.h" + +// -------------------------------------------------------------------------- +// This file contains the interal API's of mimalloc and various utility +// functions and macros. +// -------------------------------------------------------------------------- + +#include "types.h" +#include "track.h" #if (MI_DEBUG>0) #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) @@ -19,17 +26,37 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_CACHE_LINE 64 #if defined(_MSC_VER) #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning #define mi_decl_noinline __declspec(noinline) #define mi_decl_thread __declspec(thread) #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) -#elif (defined(__GNUC__) && (__GNUC__>=3)) // includes clang and icc +#define mi_decl_weak +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc #define mi_decl_noinline __attribute__((noinline)) #define mi_decl_thread __thread #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#define mi_decl_weak __attribute__((weak)) #else #define mi_decl_noinline #define mi_decl_thread __thread // hope for the best :-) #define mi_decl_cache_align +#define mi_decl_weak +#endif + +#if defined(__EMSCRIPTEN__) && !defined(__wasi__) +#define __wasi__ +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc +#endif + +// pthreads +#if !defined(_WIN32) && !defined(__wasi__) +#define MI_USE_PTHREADS +#include <pthread.h> #endif // "options.c" @@ -43,61 +70,110 @@ void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); -uintptr_t _os_random_weak(uintptr_t extra_seed); +uintptr_t _mi_os_random_weak(uintptr_t extra_seed); static inline uintptr_t _mi_random_shuffle(uintptr_t x); // init.c extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); -bool _mi_preloading(); // true while the C runtime is not ready +size_t _mi_current_thread_count(void); +bool _mi_preloading(void); // true while the C runtime is not initialized yet +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); +void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); // os.c +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); + size_t _mi_os_page_size(void); -void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data size_t _mi_os_good_alloc_size(size_t size); - -// memory.c -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* id, mi_os_tld_t* tld); -void _mi_mem_free(void* p, size_t size, size_t id, bool fully_committed, bool any_reset, mi_os_tld_t* tld); - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld); -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld); -bool _mi_mem_protect(void* addr, size_t size); -bool _mi_mem_unprotect(void* addr, size_t size); - -void _mi_mem_collect(mi_os_tld_t* tld); +bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); + +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); + +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); + +// arena.c +mi_arena_id_t _mi_arena_id_none(void); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arenas_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); + +bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment); +void _mi_arena_segment_mark_abandoned(mi_segment_t* segment); +size_t _mi_arena_segment_abandoned_count(void); + +typedef struct mi_arena_field_cursor_s { // abstract + mi_arena_id_t start; + int count; + size_t bitmap_idx; +} mi_arena_field_cursor_t; +void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current); +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous); + +// "segment-map.c" +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size); // page start for any page +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); + +#if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif -void _mi_segment_thread_collect(mi_segments_tld_t* tld); +void _mi_segments_collect(bool force, mi_segments_tld_t* tld); void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); void _mi_abandoned_await_readers(void); - - +bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment); // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; -void _mi_page_retire(mi_page_t* page); // free the page if there are no other pages with many free blocks +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... -void _mi_heap_delayed_free(mi_heap_t* heap); +void _mi_heap_delayed_free_all(mi_heap_t* heap); +bool _mi_heap_delayed_free_partial(mi_heap_t* heap); void _mi_heap_collect_retired(mi_heap_t* heap, bool force); void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); void _mi_deferred_free(mi_heap_t* heap, bool force); @@ -108,24 +184,43 @@ size_t _mi_bin_size(uint8_t bin); // for stats uint8_t _mi_bin(size_t size); // for stats // "heap.c" +void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag); void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); +mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag); // "stats.c" void _mi_stats_done(mi_stats_t* stats); - mi_msecs_t _mi_clock_now(void); mi_msecs_t _mi_clock_end(mi_msecs_t start); mi_msecs_t _mi_clock_start(void); // "alloc.c" -void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); -mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; +mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); -void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); +void _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); + +// "libc.c" +#include <stdarg.h> +void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args); +void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...); +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); +bool _mi_getenv(const char* name, char* result, size_t result_size); #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -137,8 +232,11 @@ bool _mi_page_is_valid(mi_page_t* page); // ------------------------------------------------------ #if defined(__GNUC__) || defined(__clang__) -#define mi_unlikely(x) __builtin_expect((x),0) -#define mi_likely(x) __builtin_expect((x),1) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] #else #define mi_unlikely(x) (x) #define mi_likely(x) (x) @@ -176,11 +274,11 @@ bool _mi_page_is_valid(mi_page_t* page); /* ----------------------------------------------------------- Inlined definitions ----------------------------------------------------------- */ -#define UNUSED(x) (void)(x) +#define MI_UNUSED(x) (void)(x) #if (MI_DEBUG>0) -#define UNUSED_RELEASE(x) +#define MI_UNUSED_RELEASE(x) #else -#define UNUSED_RELEASE(x) UNUSED(x) +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x) #endif #define MI_INIT4(x) x(),x(),x(),x() @@ -192,11 +290,21 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) +#include <string.h> +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); } +// Is a pointer aligned? +static inline bool _mi_is_aligned(void* p, size_t alignment) { + mi_assert_internal(alignment != 0); + return (((uintptr_t)p % alignment) == 0); +} + // Align upwards static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { mi_assert_internal(alignment != 0); @@ -209,6 +317,12 @@ static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { } } +// Align a pointer upwards +static inline void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); +} + + // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { mi_assert_internal(divider != 0); @@ -216,7 +330,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { } // Is memory zero initialized? -static inline bool mi_mem_is_zero(void* p, size_t size) { +static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } @@ -230,32 +344,27 @@ static inline size_t _mi_wsize_from_size(size_t size) { return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); } -// Does malloc satisfy the alignment constraints already? -static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size) { - return (alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2))); -} - // Overflow detecting multiply -#if __has_builtin(__builtin_umul_overflow) || __GNUC__ >= 5 +#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) #include <limits.h> // UINT_MAX, ULONG_MAX #if defined(_CLOCK_T) // for Illumos #undef _CLOCK_T #endif static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { - #if (SIZE_MAX == UINT_MAX) - return __builtin_umul_overflow(count, size, total); - #elif (SIZE_MAX == ULONG_MAX) - return __builtin_umull_overflow(count, size, total); + #if (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, (unsigned long *)total); + #elif (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, (unsigned int *)total); #else - return __builtin_umulll_overflow(count, size, total); + return __builtin_umulll_overflow(count, size, (unsigned long long *)total); #endif } #else /* __builtin_umul_overflow is unavailable */ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { - #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + #define MI_MUL_COULD_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) *total = count * size; - return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) - && size > 0 && (SIZE_MAX / size) < count); + // note: gcc/clang optimize this to directly check the overflow flag + return ((size >= MI_MUL_COULD_OVERFLOW || count >= MI_MUL_COULD_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); } #endif @@ -265,8 +374,10 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot *total = size; return false; } - else if (mi_unlikely(mi_mul_overflow(count, size, total))) { + else if mi_unlikely(mi_mul_overflow(count, size, total)) { + #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); + #endif *total = SIZE_MAX; return true; } @@ -274,85 +385,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot } -/* ---------------------------------------------------------------------------------------- -The thread local default heap: `_mi_get_default_heap` returns the thread local heap. -On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a -__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures -that the storage will always be available (allocated on the thread stacks). -On some platforms though we cannot use that when overriding `malloc` since the underlying -TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. -We try to circumvent this in an efficient way: -- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the - loader itself calls `malloc` even before the modules are initialized. -- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). -- DragonFly: the uniqueid use is buggy but kept for reference. +/*---------------------------------------------------------------------------------------- + Heap functions ------------------------------------------------------------------------------------------- */ extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap -extern bool _mi_process_is_initialized; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap - -#if defined(MI_MALLOC_OVERRIDE) -#if defined(__APPLE__) // macOS -#define MI_TLS_SLOT 89 // seems unused? -// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) -// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> -#elif defined(__OpenBSD__) -// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) -// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) -#elif defined(__DragonFly__) -#warning "mimalloc is not working correctly on DragonFly yet." -//#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> -#endif -#endif - -#if defined(MI_TLS_SLOT) -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) -#include <pthread.h> -static inline mi_heap_t** mi_tls_pthread_heap_slot(void) { - pthread_t self = pthread_self(); - #if defined(__DragonFly__) - if (self==NULL) { - mi_heap_t* pheap_main = _mi_heap_main_get(); - return &pheap_main; - } - #endif - return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); -} -#elif defined(MI_TLS_PTHREAD) -#include <pthread.h> -extern pthread_key_t _mi_heap_default_key; -#endif - -// Default heap to allocate from (if not using TLS- or pthread slots). -// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`). -// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined. -// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356). -extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from - -static inline mi_heap_t* mi_get_default_heap(void) { -#if defined(MI_TLS_SLOT) - mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#elif defined(MI_TLS_PTHREAD_SLOT_OFS) - mi_heap_t* heap = *mi_tls_pthread_heap_slot(); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#elif defined(MI_TLS_PTHREAD) - mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); - return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); -#else - #if defined(MI_TLS_RECURSE_GUARD) - if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); - #endif - return _mi_heap_default; -#endif -} - -static inline bool mi_heap_is_default(const mi_heap_t* heap) { - return (heap == mi_get_default_heap()); -} static inline bool mi_heap_is_backing(const mi_heap_t* heap) { return (heap->tld->heap_backing == heap); @@ -380,30 +417,34 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si return heap->pages_free_direct[idx]; } -// Get the page belonging to a certain size class -static inline mi_page_t* _mi_get_free_small_page(size_t size) { - return _mi_heap_get_free_small_page(mi_get_default_heap(), size); -} - // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. +// We check for NULL afterwards on 64-bit systems to improve codegen for `mi_free`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { - // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + mi_segment_t* const segment = (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); + #if MI_INTPTR_SIZE <= 4 + return (p==NULL ? NULL : segment); + #else + return ((intptr_t)segment <= 0 ? NULL : segment); + #endif } // Segment belonging to a page static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_assert_internal(page!=NULL); mi_segment_t* segment = _mi_ptr_segment(page); mi_assert_internal(segment == NULL || page == &segment->pages[page->segment_idx]); return segment; } // used internally -static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { +static inline size_t _mi_segment_page_idx_of(const mi_segment_t* segment, const void* p) { // if (segment->page_size > MI_SEGMENT_SIZE) return &segment->pages[0]; // huge pages ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && (size_t)diff < MI_SEGMENT_SIZE); - uintptr_t idx = (uintptr_t)diff >> segment->page_shift; + mi_assert_internal(diff >= 0 && (size_t)diff <= MI_SEGMENT_SIZE /* for huge alignment it can be equal */); + size_t idx = (size_t)diff >> segment->page_shift; mi_assert_internal(idx < segment->capacity); mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || idx == 0); return idx; @@ -411,34 +452,33 @@ static inline uintptr_t _mi_segment_page_idx_of(const mi_segment_t* segment, con // Get the page containing the pointer static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { - uintptr_t idx = _mi_segment_page_idx_of(segment, p); + size_t idx = _mi_segment_page_idx_of(segment, p); return &((mi_segment_t*)segment)->pages[idx]; } // Quick page start for initialized pages -static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - const size_t bsize = page->xblock_size; - mi_assert_internal(bsize > 0 && (bsize%sizeof(void*)) == 0); - return _mi_segment_page_start(segment, page, bsize, page_size, NULL); +static inline uint8_t* mi_page_start(const mi_page_t* page) { + mi_assert_internal(page->page_start != NULL); + mi_assert_expensive(_mi_segment_page_start(_mi_page_segment(page),page,NULL) == page->page_start); + return page->page_start; } // Get the page containing the pointer static inline mi_page_t* _mi_ptr_page(void* p) { + mi_assert_internal(p!=NULL); return _mi_segment_page_of(_mi_ptr_segment(p), p); } -// Get the block size of a page (special cased for huge objects) +// Get the block size of a page (special case for huge objects) static inline size_t mi_page_block_size(const mi_page_t* page) { - const size_t bsize = page->xblock_size; - mi_assert_internal(bsize > 0); - if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) { - return bsize; - } - else { - size_t psize; - _mi_segment_page_start(_mi_page_segment(page), page, bsize, &psize, NULL); - return psize; - } + mi_assert_internal(page->block_size > 0); + return page->block_size; +} + +static inline bool mi_page_is_huge(const mi_page_t* page) { + mi_assert_internal((page->is_huge && _mi_page_segment(page)->page_kind == MI_PAGE_HUGE) || + (!page->is_huge && _mi_page_segment(page)->page_kind != MI_PAGE_HUGE)); + return page->is_huge; } // Get the usable block size of a page without fixed padding. @@ -447,6 +487,10 @@ static inline size_t mi_page_usable_block_size(const mi_page_t* page) { return mi_page_block_size(page) - MI_PADDING_SIZE; } +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_size; +} // Thread free access static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { @@ -465,6 +509,7 @@ static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); mi_atomic_store_release(&page->xheap,(uintptr_t)heap); + if (heap != NULL) { page->heap_tag = heap->tag; } } // Thread free flag helpers @@ -569,8 +614,8 @@ static inline bool mi_is_in_same_page(const void* p, const void* q) { mi_segment_t* segmentp = _mi_ptr_segment(p); mi_segment_t* segmentq = _mi_ptr_segment(q); if (segmentp != segmentq) return false; - uintptr_t idxp = _mi_segment_page_idx_of(segmentp, p); - uintptr_t idxq = _mi_segment_page_idx_of(segmentq, q); + size_t idxp = _mi_segment_page_idx_of(segmentp, p); + size_t idxq = _mi_segment_page_idx_of(segmentq, q); return (idxp == idxq); } @@ -585,30 +630,36 @@ static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); - return (mi_unlikely(p==null) ? NULL : p); + return (p==null ? NULL : p); } static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { - uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p); + uintptr_t x = (uintptr_t)(p==NULL ? null : p); return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; } static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + mi_track_mem_defined(block,sizeof(mi_block_t)); + mi_block_t* next; #ifdef MI_ENCODE_FREELIST - return (mi_block_t*)mi_ptr_decode(null, block->next, keys); + next = (mi_block_t*)mi_ptr_decode(null, block->next, keys); #else - UNUSED(keys); UNUSED(null); - return (mi_block_t*)block->next; + MI_UNUSED(keys); MI_UNUSED(null); + next = (mi_block_t*)block->next; #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); + return next; } static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + mi_track_mem_undefined(block,sizeof(mi_block_t)); #ifdef MI_ENCODE_FREELIST block->next = mi_ptr_encode(null, next, keys); #else - UNUSED(keys); UNUSED(null); + MI_UNUSED(keys); MI_UNUSED(null); block->next = (mi_encoded_t)next; #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); } static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { @@ -616,13 +667,13 @@ static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* mi_block_t* next = mi_block_nextx(page,block,page->keys); // check for free list corruption: is `next` at least in the same page? // TODO: check if `next` is `page->block_size` aligned? - if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) { + if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); next = NULL; } return next; #else - UNUSED(page); + MI_UNUSED(page); return mi_block_nextx(page,block,NULL); #endif } @@ -631,11 +682,36 @@ static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, c #ifdef MI_ENCODE_FREELIST mi_block_set_nextx(page,block,next, page->keys); #else - UNUSED(page); + MI_UNUSED(page); mi_block_set_nextx(page,block,next,NULL); #endif } + +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.initially_committed = committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return memid; +} + + // ------------------------------------------------------------------- // Fast "random" shuffle // ------------------------------------------------------------------- @@ -669,102 +745,16 @@ size_t _mi_os_numa_node_count_get(void); extern _Atomic(size_t) _mi_numa_node_count; static inline int _mi_os_numa_node(mi_os_tld_t* tld) { - if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0; + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } else return _mi_os_numa_node_get(tld); } static inline size_t _mi_os_numa_node_count(void) { const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); - if (mi_likely(count>0)) return count; + if mi_likely(count > 0) { return count; } else return _mi_os_numa_node_count_get(); } -// ------------------------------------------------------------------- -// Getting the thread id should be performant as it is called in the -// fast path of `_mi_free` and we specialize for various platforms. -// ------------------------------------------------------------------- -#if defined(_WIN32) -#define WIN32_LEAN_AND_MEAN -#include <windows.h> -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { - // Windows: works on Intel and ARM in both 32- and 64-bit - return (uintptr_t)NtCurrentTeb(); -} - -#elif defined(__GNUC__) && \ - (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)) - -// TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf -static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept { - void* res; - const size_t ofs = (slot*sizeof(void*)); -#if defined(__i386__) - __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS -#elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS -#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI -#elif defined(__x86_64__) - __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS -#elif defined(__arm__) - void** tcb; UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - res = tcb[slot]; -#elif defined(__aarch64__) - void** tcb; UNUSED(ofs); -#if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); - tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits -#else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); -#endif - res = tcb[slot]; -#endif - return res; -} - -// setting is only used on macOSX for now -static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { - const size_t ofs = (slot*sizeof(void*)); -#if defined(__i386__) - __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS -#elif defined(__APPLE__) && defined(__x86_64__) - __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS -#elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) - __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI -#elif defined(__x86_64__) - __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS -#elif defined(__arm__) - void** tcb; UNUSED(ofs); - __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); - tcb[slot] = value; -#elif defined(__aarch64__) - void** tcb; UNUSED(ofs); -#if defined(__APPLE__) // M1, issue #343 - __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb)); - tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits -#else - __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); -#endif - tcb[slot] = value; -#endif -} - -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { -#if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__)) - // on Android, slot 1 is the thread ID (pointer to pthread internal struct) - return (uintptr_t)mi_tls_slot(1); -#else - // in all our other targets, slot 0 is the pointer to the thread control block - return (uintptr_t)mi_tls_slot(0); -#endif -} -#else -// otherwise use standard C -static inline uintptr_t _mi_thread_id(void) mi_attr_noexcept { - return (uintptr_t)&_mi_heap_default; -} -#endif // ----------------------------------------------------------------------- // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) @@ -791,9 +781,10 @@ static inline size_t mi_ctz(uintptr_t x) { #endif } -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) #include <limits.h> // LONG_MAX +#include <intrin.h> // BitScanReverse64 #define MI_HAVE_FAST_BITSCAN static inline size_t mi_clz(uintptr_t x) { if (x==0) return MI_INTPTR_BITS; @@ -802,7 +793,7 @@ static inline size_t mi_clz(uintptr_t x) { _BitScanReverse(&idx, x); #else _BitScanReverse64(&idx, x); -#endif +#endif return ((MI_INTPTR_BITS - 1) - idx); } static inline size_t mi_ctz(uintptr_t x) { @@ -812,7 +803,7 @@ static inline size_t mi_ctz(uintptr_t x) { _BitScanForward(&idx, x); #else _BitScanForward64(&idx, x); -#endif +#endif return idx; } @@ -842,7 +833,7 @@ static inline size_t mi_clz32(uint32_t x) { } static inline size_t mi_clz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; + if (x==0) return MI_INTPTR_BITS; #if (MI_INTPTR_BITS <= 32) return mi_clz32((uint32_t)x); #else @@ -873,44 +864,57 @@ static inline size_t mi_bsr(uintptr_t x) { // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. // -// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if -// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support -// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. // --------------------------------------------------------------------------------- -#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include <intrin.h> -#include <string.h> extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { __movsb((unsigned char*)dst, (const unsigned char*)src, n); } else { - memcpy(dst, src, n); // todo: use noinline? + memcpy(dst, src, n); + } +} +static inline void _mi_memzero(void* dst, size_t n) { + if (_mi_cpu_has_fsrm) { + __stosb((unsigned char*)dst, 0, n); + } + else { + memset(dst, 0, n); } } #else -#include <string.h> static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } +static inline void _mi_memzero(void* dst, size_t n) { + memset(dst, 0, n); +} #endif - // ------------------------------------------------------------------------------- -// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. // ------------------------------------------------------------------------------- -#if (__GNUC__ >= 4) || defined(__clang__) +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. -#include <string.h> static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); - memcpy(adst, asrc, n); + _mi_memcpy(adst, asrc, n); +} + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + _mi_memzero(adst, n); } #else // Default fallback on `_mi_memcpy` @@ -918,6 +922,11 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); _mi_memcpy(dst, src, n); } + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + _mi_memzero(dst, n); +} #endif diff --git a/contrib/libs/mimalloc/include/mimalloc/prim.h b/contrib/libs/mimalloc/include/mimalloc/prim.h new file mode 100644 index 0000000000..3f4574ddd9 --- /dev/null +++ b/contrib/libs/mimalloc/include/mimalloc/prim.h @@ -0,0 +1,373 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PRIM_H +#define MIMALLOC_PRIM_H + + +// -------------------------------------------------------------------------- +// This file specifies the primitive portability API. +// Each OS/host needs to implement these primitives, see `src/prim` +// for implementations on Window, macOS, WASI, and Linux/Unix. +// +// note: on all primitive functions, we always have result parameters != NULL, and: +// addr != NULL and page aligned +// size > 0 and page aligned +// the return value is an error code as an `int` where 0 is success +// -------------------------------------------------------------------------- + +// OS memory configuration +typedef struct mi_os_mem_config_s { + size_t page_size; // default to 4KiB + size_t large_page_size; // 0 if not supported, usually 2MiB (4MiB on Windows) + size_t alloc_granularity; // smallest allocation size (usually 4KiB, on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool has_partial_free; // can allocated blocks be freed partially? (true for mmap, false for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) +} mi_os_mem_config_t; + +// Initialize +void _mi_prim_mem_init( mi_os_mem_config_t* config ); + +// Free OS memory +int _mi_prim_free(void* addr, size_t size ); + +// Allocate OS memory. Return NULL on error. +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: !commit => !allow_large +// try_alignment >= _mi_os_page_size() and a power of 2 +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); + +// Commit memory. Returns error code or 0 on success. +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); + +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); + +// Reset memory. The range keeps being accessible but the content might be reset. +// Returns error code or 0 on success. +int _mi_prim_reset(void* addr, size_t size); + +// Protect memory. Returns error code or 0 on success. +int _mi_prim_protect(void* addr, size_t size, bool protect); + +// Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: size > 0 and a multiple of 1GiB. +// numa_node is either negative (don't care), or a numa node number. +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); + +// Return the current NUMA node +size_t _mi_prim_numa_node(void); + +// Return the number of logical NUMA nodes +size_t _mi_prim_numa_node_count(void); + +// Clock ticks +mi_msecs_t _mi_prim_clock_now(void); + +// Return process information (only for statistics) +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); + +// Default stderr output. (only for warnings etc. with verbose enabled) +// msg != NULL && _mi_strlen(msg) > 0 +void _mi_prim_out_stderr( const char* msg ); + +// Get an environment variable. (only for options) +// name != NULL, result != NULL, result_size >= 64 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size); + + +// Fill a buffer with strong randomness; return `false` on error or if +// there is no strong randomization available. +bool _mi_prim_random_buf(void* buf, size_t buf_len); + +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. +void _mi_prim_thread_init_auto_done(void); + +// Called on process exit and may take action to clean up resources associated with the thread auto done. +void _mi_prim_thread_done_auto_done(void); + +// Called when the default heap for a thread changes +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + +//------------------------------------------------------------------- +// Thread id: `_mi_prim_thread_id()` +// +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms as +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`. +// We only require _mi_prim_thread_id() to return a unique id +// for each thread (unequal to zero). +//------------------------------------------------------------------- + +// On some libc + platform combinations we can directly access a thread-local storage (TLS) slot. +// The TLS layout depends on both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +// +// Note: we would like to prefer `__builtin_thread_pointer()` nowadays instead of using assembly, +// but unfortunately we can not detect support reliably (see issue #883) +// We also use it on Apple OS as we use a TLS slot for the default heap there. +#if defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__) || defined(__POWERPC__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +#define MI_HAS_TLS_SLOT + +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781 + MI_UNUSED(ofs); + res = pthread_getspecific(slot); + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #elif defined(__APPLE__) && defined(__POWERPC__) // ppc, issue #781 + MI_UNUSED(ofs); + pthread_setspecific(slot, value); + #endif +} + +#endif + +// Do we have __builtin_thread_pointer? This would be the preferred way to get a unique thread id +// but unfortunately, it seems we cannot test for this reliably at this time (see issue #883) +// Nevertheless, it seems needed on older graviton platforms (see issue #851). +// For now, we only enable this for specific platforms. +#if !defined(__APPLE__) /* on apple (M1) the wrong register is read (tpidr_el0 instead of tpidrro_el0) so fall back to TLS slot assembly (<https://github.com/microsoft/mimalloc/issues/343#issuecomment-763272369>)*/ \ + && !defined(MI_LIBC_MUSL) \ + && (!defined(__clang_major__) || __clang_major__ >= 14) /* older clang versions emit bad code; fall back to using the TLS slot (<https://lore.kernel.org/linux-arm-kernel/202110280952.352F66D8@keescook/T/>) */ + #if (defined(__GNUC__) && (__GNUC__ >= 7) && defined(__aarch64__)) /* aarch64 for older gcc versions (issue #851) */ \ + || (defined(__GNUC__) && (__GNUC__ >= 11) && defined(__x86_64__)) \ + || (defined(__clang_major__) && (__clang_major__ >= 14) && (defined(__aarch64__) || defined(__x86_64__))) + #define MI_USE_BUILTIN_THREAD_POINTER 1 + #endif +#endif + + + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +// Get a unique id for the current thread. +#if defined(MI_PRIM_THREAD_ID) + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return MI_PRIM_THREAD_ID(); // used for example by CPython for a free threaded build (see python/cpython#115488) +} + +#elif defined(_WIN32) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +#elif MI_USE_BUILTIN_THREAD_POINTER + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Works on most Unix based platforms with recent compilers + return (uintptr_t)__builtin_thread_pointer(); +} + +#elif defined(MI_HAS_TLS_SLOT) + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_prim_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_prim_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_prim_get_default_heap()` +This is inlined here as it is on the fast path for allocation functions. + +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). + +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +static inline mi_heap_t* mi_prim_get_default_heap(void); + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS + #define MI_TLS_SLOT 89 // seems unused? + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> +#elif defined(__OpenBSD__) + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) + // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) + // #elif defined(__DragonFly__) + // #warning "mimalloc is not working correctly on DragonFly yet." + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> +#elif defined(__ANDROID__) + // See issue #381 + #define MI_TLS_PTHREAD +#endif +#endif + + +#if defined(MI_TLS_SLOT) +# if !defined(MI_HAS_TLS_SLOT) +# error "trying to use a TLS slot for the default heap, but the mi_prim_tls_slot primitives are not defined" +# endif + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + if mi_unlikely(heap == NULL) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +} + +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) return NULL; + #endif + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); + if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); + mi_heap_t* heap = *pheap; + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; + return heap; +} + +#elif defined(MI_TLS_PTHREAD) + +extern pthread_key_t _mi_heap_default_key; +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#else // default using a thread local variable; used on most platforms. + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +} + +#endif // mi_prim_get_default_heap() + + + +#endif // MIMALLOC_PRIM_H diff --git a/contrib/libs/mimalloc/include/mimalloc/track.h b/contrib/libs/mimalloc/include/mimalloc/track.h new file mode 100644 index 0000000000..42ca9071cc --- /dev/null +++ b/contrib/libs/mimalloc/include/mimalloc/track.h @@ -0,0 +1,149 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TRACK_H +#define MIMALLOC_TRACK_H + +/* ------------------------------------------------------------------------------------------------------ +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. +These can be defined for tracking allocation: + + #define mi_track_malloc_size(p,reqsize,size,zero) + #define mi_track_free_size(p,_size) + +The macros are set up such that the size passed to `mi_track_free_size` +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). +The `reqsize` is what the user requested, and `size >= reqsize`. +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, +or otherwise it is the usable block size which may be larger than the original request. +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). +The `zero` parameter is `true` if the allocated block is zero initialized. + +Optional: + + #define mi_track_align(p,alignedp,offset,size) + #define mi_track_resize(p,oldsize,newsize) + #define mi_track_init() + +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). +The `mi_track_resize` is currently unused but could be called on reallocations within a block. +`mi_track_init` is called at program start. + +The following macros are for tools like asan and valgrind to track whether memory is +defined, undefined, or not accessible at all: + + #define mi_track_mem_defined(p,size) + #define mi_track_mem_undefined(p,size) + #define mi_track_mem_noaccess(p,size) + +-------------------------------------------------------------------------------------------------------*/ + +#if MI_TRACK_VALGRIND +// valgrind tool + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy +#define MI_TRACK_TOOL "valgrind" + +#include <valgrind/valgrind.h> +#include <valgrind/memcheck.h> + +#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) +#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) + +#elif MI_TRACK_ASAN +// address sanitizer + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "asan" + +#include <sanitizer/asan_interface.h> + +#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + +#elif MI_TRACK_ETW +// windows event tracing + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 +#define MI_TRACK_TOOL "ETW" + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +#include <windows.h> +#error #include "../src/prim/windows/etw.h" + +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) + +#else +// no tracking + +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc_size(p,reqsize,size,zero) +#define mi_track_free_size(p,_size) + +#endif + +// ------------------- +// Utility definitions + +#ifndef mi_track_resize +#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) +#endif + +#ifndef mi_track_align +#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) +#endif + +#ifndef mi_track_init +#define mi_track_init() +#endif + +#ifndef mi_track_mem_defined +#define mi_track_mem_defined(p,size) +#endif + +#ifndef mi_track_mem_undefined +#define mi_track_mem_undefined(p,size) +#endif + +#ifndef mi_track_mem_noaccess +#define mi_track_mem_noaccess(p,size) +#endif + + +#if MI_PADDING +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)==(reqsize)); \ + mi_track_malloc_size(p,reqsize,reqsize,zero); \ + } +#else +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)>=(reqsize)); \ + mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \ + } +#endif + +#endif diff --git a/contrib/libs/mimalloc/include/mimalloc-types.h b/contrib/libs/mimalloc/include/mimalloc/types.h index caf161d63f..ed326c694e 100644 --- a/contrib/libs/mimalloc/include/mimalloc-types.h +++ b/contrib/libs/mimalloc/include/mimalloc/types.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -8,16 +8,29 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_TYPES_H #define MIMALLOC_TYPES_H +// -------------------------------------------------------------------------- +// This file contains the main type definitions for mimalloc: +// mi_heap_t : all data for a thread-local heap, contains +// lists of all managed heap pages. +// mi_segment_t : a larger chunk of memory (32GiB) from where pages +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// where objects are allocated. +// Note: we write "OS page" for OS memory pages while +// using plain "page" for mimalloc pages (`mi_page_t`). +// -------------------------------------------------------------------------- + + #include <stddef.h> // ptrdiff_t #include <stdint.h> // uintptr_t, uint16_t, etc -#include "mimalloc-atomic.h" // _Atomic +#include "atomic.h" // _Atomic #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int -#endif +#endif // Minimal alignment necessary. On most platforms 16 bytes are needed -// due to SSE registers for example. This must be at least `MI_INTPTR_SIZE` +// due to SSE registers for example. This must be at least `sizeof(void*)` #ifndef MI_MAX_ALIGN_SIZE #define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) #endif @@ -29,6 +42,11 @@ terms of the MIT license. A copy of the license can be found in the file // Define NDEBUG in the release version to disable assertions. // #define NDEBUG +// Define MI_TRACK_<tool> to enable tracking support +// #define MI_TRACK_VALGRIND 1 +// #define MI_TRACK_ASAN 1 +// #define MI_TRACK_ETW 1 + // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). // #define MI_STAT 1 @@ -55,18 +73,32 @@ terms of the MIT license. A copy of the license can be found in the file #endif // Reserve extra padding at the end of each block to be more resilient against heap block overflows. -// The padding can detect byte-precise buffer overflow on free. -#if !defined(MI_PADDING) && (MI_DEBUG>=1) +// The padding can detect buffer overflow on free. +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) #define MI_PADDING 1 #endif +// Check padding bytes; allows byte-precise buffer overflow detection +#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING_CHECK 1 +#endif + // Encoded free lists allow detection of corrupted free lists // and can detect buffer overflows, modify after free, and double `free`s. -#if (MI_SECURE>=3 || MI_DEBUG>=1 || MI_PADDING > 0) +#if (MI_SECURE>=3 || MI_DEBUG>=1) #define MI_ENCODE_FREELIST 1 #endif + +// We used to abandon huge pages in order to eagerly deallocate it if freed from another thread. +// Unfortunately, that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks nowadays if freed from +// another thread so the memory becomes "virtually" available (and eventually gets properly freed by +// the owning thread). +// #define MI_HUGE_PAGE_ABANDON 1 + + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -83,20 +115,43 @@ terms of the MIT license. A copy of the license can be found in the file // or otherwise one might define an intptr_t type that is larger than a pointer... // ------------------------------------------------------ -#if INTPTR_MAX == 9223372036854775807LL +#if INTPTR_MAX > INT64_MAX +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) +#elif INTPTR_MAX == INT64_MAX # define MI_INTPTR_SHIFT (3) -#elif INTPTR_MAX == 2147483647LL +#elif INTPTR_MAX == INT32_MAX # define MI_INTPTR_SHIFT (2) #else -#error platform must be 32 or 64 bits +#error platform pointers must be 32, 64, or 128 bits +#endif + +#if SIZE_MAX == UINT64_MAX +# define MI_SIZE_SHIFT (3) +typedef int64_t mi_ssize_t; +#elif SIZE_MAX == UINT32_MAX +# define MI_SIZE_SHIFT (2) +typedef int32_t mi_ssize_t; +#else +#error platform objects must be 32 or 64 bits +#endif + +#if (SIZE_MAX/2) > LONG_MAX +# define MI_ZU(x) x##ULL +# define MI_ZI(x) x##LL +#else +# define MI_ZU(x) x##UL +# define MI_ZI(x) x##L #endif #define MI_INTPTR_SIZE (1<<MI_INTPTR_SHIFT) #define MI_INTPTR_BITS (MI_INTPTR_SIZE*8) -#define KiB ((size_t)1024) -#define MiB (KiB*KiB) -#define GiB (MiB*KiB) +#define MI_SIZE_SIZE (1<<MI_SIZE_SHIFT) +#define MI_SIZE_BITS (MI_SIZE_SIZE*8) + +#define MI_KiB (MI_ZU(1024)) +#define MI_MiB (MI_KiB*MI_KiB) +#define MI_GiB (MI_MiB*MI_KiB) // ------------------------------------------------------ @@ -105,18 +160,27 @@ terms of the MIT license. A copy of the license can be found in the file // Main tuning parameters for segment and page sizes // Sizes for 64-bit, divide by two for 32-bit -#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64kb -#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512kb -#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4mb -#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4mb +#ifndef MI_SMALL_PAGE_SHIFT +#define MI_SMALL_PAGE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB +#endif +#ifndef MI_MEDIUM_PAGE_SHIFT +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB +#endif +#ifndef MI_LARGE_PAGE_SHIFT +#define MI_LARGE_PAGE_SHIFT ( 3 + MI_MEDIUM_PAGE_SHIFT) // 4MiB +#endif +#ifndef MI_SEGMENT_SHIFT +#define MI_SEGMENT_SHIFT ( MI_LARGE_PAGE_SHIFT) // 4MiB -- must be equal to `MI_LARGE_PAGE_SHIFT` +#endif // Derived constants -#define MI_SEGMENT_SIZE (1UL<<MI_SEGMENT_SHIFT) -#define MI_SEGMENT_MASK ((uintptr_t)MI_SEGMENT_SIZE - 1) +#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT) +#define MI_SEGMENT_ALIGN (MI_SEGMENT_SIZE) +#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1)) -#define MI_SMALL_PAGE_SIZE (1UL<<MI_SMALL_PAGE_SHIFT) -#define MI_MEDIUM_PAGE_SIZE (1UL<<MI_MEDIUM_PAGE_SHIFT) -#define MI_LARGE_PAGE_SIZE (1UL<<MI_LARGE_PAGE_SHIFT) +#define MI_SMALL_PAGE_SIZE (MI_ZU(1)<<MI_SMALL_PAGE_SHIFT) +#define MI_MEDIUM_PAGE_SIZE (MI_ZU(1)<<MI_MEDIUM_PAGE_SHIFT) +#define MI_LARGE_PAGE_SIZE (MI_ZU(1)<<MI_LARGE_PAGE_SHIFT) #define MI_SMALL_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_SMALL_PAGE_SIZE) #define MI_MEDIUM_PAGES_PER_SEGMENT (MI_SEGMENT_SIZE/MI_MEDIUM_PAGE_SIZE) @@ -124,25 +188,37 @@ terms of the MIT license. A copy of the license can be found in the file // The max object size are checked to not waste more than 12.5% internally over the page sizes. // (Except for large pages since huge objects are allocated in 4MiB chunks) -#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16kb -#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128kb -#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2mb +#define MI_SMALL_OBJ_SIZE_MAX (MI_SMALL_PAGE_SIZE/4) // 16KiB +#define MI_MEDIUM_OBJ_SIZE_MAX (MI_MEDIUM_PAGE_SIZE/4) // 128KiB +#define MI_LARGE_OBJ_SIZE_MAX (MI_LARGE_PAGE_SIZE/2) // 2MiB #define MI_LARGE_OBJ_WSIZE_MAX (MI_LARGE_OBJ_SIZE_MAX/MI_INTPTR_SIZE) -#define MI_HUGE_OBJ_SIZE_MAX (2*MI_INTPTR_SIZE*MI_SEGMENT_SIZE) // (must match MI_REGION_MAX_ALLOC_SIZE in memory.c) // Maximum number of size classes. (spaced exponentially in 12.5% increments) #define MI_BIN_HUGE (73U) #if (MI_LARGE_OBJ_WSIZE_MAX >= 655360) -#error "define more bins" +#error "mimalloc internal: define more bins" #endif -// Used as a special value to encode block sizes in 32 bits. -#define MI_HUGE_BLOCK_SIZE ((uint32_t)MI_HUGE_OBJ_SIZE_MAX) +// Maximum block size for which blocks are guaranteed to be block size aligned. (see `segment.c:_mi_segment_page_start`) +#define MI_MAX_ALIGN_GUARANTEE (MI_MEDIUM_OBJ_SIZE_MAX) + +// Alignments over MI_BLOCK_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_BLOCK_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) + +// We never allocate more than PTRDIFF_MAX (see also <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) +#define MI_MAX_ALLOC_SIZE PTRDIFF_MAX + +// ------------------------------------------------------ +// Mimalloc pages contain allocated blocks +// ------------------------------------------------------ // The free lists use encoded next fields // (Only actually encodes when MI_ENCODED_FREELIST is defined.) -typedef uintptr_t mi_encoded_t; +typedef uintptr_t mi_encoded_t; + +// thread id's +typedef size_t mi_threadid_t; // free lists contain blocks typedef struct mi_block_s { @@ -155,7 +231,7 @@ typedef enum mi_delayed_e { MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list - MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim + MI_NEVER_DELAYED_FREE = 3 // sticky: used for abondoned pages without a owning heap; this only resets on page reclaim } mi_delayed_t; @@ -194,94 +270,150 @@ typedef uintptr_t mi_thread_free_t; // implement a monotonic heartbeat. The `thread_free` list is needed for // avoiding atomic operations in the common case. // -// // `used - |thread_free|` == actual blocks that are in use (alive) // `used - |thread_free| + |free| + |local_free| == capacity` // // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). // -// Notes: -// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// Notes: +// - Access is optimized for `free.c:mi_free` and `alloc.c:mi_page_alloc` // - Using `uint16_t` does not seem to slow things down -// - The size is 8 words on 64-bit which helps the page index calculations -// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 -// and 12 are still good for address calculation) -// - To limit the structure size, the `xblock_size` is 32-bits only; for -// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size -// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// - The size is 10 words on 64-bit which helps the page index calculations +// (and 12 words on 32-bit, and encoded free lists add 2 words) +// - `xthread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning -// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// heap `thread_delayed_free` list (see `free.c:mi_free_block_mt`). // The invariant is that no-delayed-free is only set if there is -// at least one block that will be added, or as already been added, to +// at least one block that will be added, or as already been added, to // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint8_t segment_idx; // index in the segment `pages` array, `page == &segment->pages[page->segment_idx]` uint8_t segment_in_use:1; // `true` if the segment allocated this page - uint8_t is_reset:1; // `true` if the page memory was reset uint8_t is_committed:1; // `true` if the page virtual memory is committed - uint8_t is_zero_init:1; // `true` if the page was zero initialized + uint8_t is_zero_init:1; // `true` if the page was initially zero initialized + uint8_t is_huge:1; // `true` if the page is in a huge segment // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t is_zero:1; // `true` if the blocks in the free list are zero initialized + uint8_t free_is_zero:1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire:7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) - #ifdef MI_ENCODE_FREELIST - uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + uint16_t used; // number of blocks in use (including blocks in `thread_free`) + uint8_t block_size_shift; // if not zero, then `(1 << block_size_shift) == block_size` (only used for fast path in `free.c:_mi_page_ptr_unalign`) + uint8_t heap_tag; // tag of the owning heap, used for separated heaps by object type + // padding + size_t block_size; // size available in each block (always `>0`) + uint8_t* page_start; // start of the page area containing the blocks + + #if (MI_ENCODE_FREELIST || MI_PADDING) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary #endif - uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - uint32_t xblock_size; // size available in each block (always `>0`) - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + + struct mi_page_s* next; // next page owned by the heap with the same `block_size` + struct mi_page_s* prev; // previous page owned by the heap with the same `block_size` + + #if MI_INTPTR_SIZE==4 // pad to 12 words on 32-bit + void* padding[1]; + #endif } mi_page_t; +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + typedef enum mi_page_kind_e { - MI_PAGE_SMALL, // small blocks go into 64kb pages inside a segment - MI_PAGE_MEDIUM, // medium blocks go into 512kb pages inside a segment + MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment + MI_PAGE_MEDIUM, // medium blocks go into 512KiB pages inside a segment MI_PAGE_LARGE, // larger blocks go into a single page spanning a whole segment - MI_PAGE_HUGE // huge blocks (>512kb) are put into a single page in a segment of the exact size (but still 2mb aligned) + MI_PAGE_HUGE // a huge page is a single page in a segment of variable size (but still 2MiB aligned) + // used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or an aligment `> MI_BLOCK_ALIGNMENT_MAX`. } mi_page_kind_t; -// Segments are large allocated memory blocks (2mb on 64 bit) from -// the OS. Inside segments we allocated fixed size _pages_ that -// contain blocks. + +// --------------------------------------------------------------- +// a memory id tracks the provenance of arena/OS allocated memory +// --------------------------------------------------------------- + +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge OS pages (usually 1GiB, pinned to physical memory) + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // this arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large (2Mib) or huge (1GiB) OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + + +// --------------------------------------------------------------- +// Segments contain mimalloc pages +// --------------------------------------------------------------- + +// Segments are large allocated memory blocks (2MiB on 64 bit) from the OS. +// Inside segments we allocated fixed size _pages_ that contain blocks. typedef struct mi_segment_s { - // memory fields - size_t memid; // id for the os-level memory manager - bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_committed; // `true` if the whole segment is eagerly committed + // constant fields + mi_memid_t memid; // memory id to track provenance + bool allow_decommit; + bool allow_purge; + size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` // segment fields - _Atomic(struct mi_segment_s*) abandoned_next; struct mi_segment_s* next; // must be the first segment field after abandoned_next -- see `segment.c:segment_init` struct mi_segment_s* prev; + bool was_reclaimed; // true if it was reclaimed (used to limit on-free reclamation) size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim if it is too long) size_t used; // count of pages in use (`used <= capacity`) size_t capacity; // count of available pages (`#free + used`) - size_t segment_size; // for huge pages this may be different from `MI_SEGMENT_SIZE` size_t segment_info_size;// space we are using from the first page for segment meta-data and possible guard pages. uintptr_t cookie; // verify addresses in secure mode: `_mi_ptr_cookie(segment) == segment->cookie` // layout like this to optimize access in `mi_free` + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment size_t page_shift; // `1 << page_shift` == the page sizes == `page->block_size * page->reserved` (unless the first page, then `-segment_info_size`). - _Atomic(uintptr_t) thread_id; // unique id of the thread owning this segment - mi_page_kind_t page_kind; // kind of pages: small, large, or huge + mi_page_kind_t page_kind; // kind of pages: small, medium, large, or huge mi_page_t pages[1]; // up to `MI_SMALL_PAGES_PER_SEGMENT` pages } mi_segment_t; @@ -316,10 +448,11 @@ typedef struct mi_random_cxt_s { uint32_t input[16]; uint32_t output[16]; int output_available; + bool weak; } mi_random_ctx_t; -// In debug mode there is a padding stucture at the end of the blocks to check for buffer overflows +// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows #if (MI_PADDING) typedef struct mi_padding_s { uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) @@ -338,10 +471,9 @@ typedef struct mi_padding_s { // A heap owns a set of pages. struct mi_heap_s { mi_tld_t* tld; - mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. - mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") _Atomic(mi_block_t*) thread_delayed_free; - uintptr_t thread_id; // thread this heap belongs too + mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list mi_random_ctx_t random; // random number context used for secure allocation @@ -350,6 +482,9 @@ struct mi_heap_s { size_t page_retired_max; // largest retired index into the `pages` array. mi_heap_t* next; // list of heaps per thread bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + uint8_t tag; // custom tag, can be used for separating heaps based on the object types + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") }; @@ -358,9 +493,15 @@ struct mi_heap_s { // Debug // ------------------------------------------------------ +#if !defined(MI_DEBUG_UNINIT) #define MI_DEBUG_UNINIT (0xD0) +#endif +#if !defined(MI_DEBUG_FREED) #define MI_DEBUG_FREED (0xDF) +#endif +#if !defined(MI_DEBUG_PADDING) #define MI_DEBUG_PADDING (0xDE) +#endif #if (MI_DEBUG) // use our own assertion to print without memory allocation @@ -412,6 +553,7 @@ typedef struct mi_stats_s { mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; + mi_stat_count_t purged; mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; @@ -424,11 +566,15 @@ typedef struct mi_stats_s { mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; mi_stat_counter_t huge_count; - mi_stat_counter_t giant_count; + mi_stat_counter_t arena_count; + mi_stat_counter_t arena_crossover_count; + mi_stat_counter_t arena_rollback_count; #if MI_STAT>1 mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; #endif @@ -453,6 +599,7 @@ void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); #define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) #define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + // ------------------------------------------------------ // Thread Local data // ------------------------------------------------------ @@ -475,14 +622,12 @@ typedef struct mi_os_tld_s { typedef struct mi_segments_tld_s { mi_segment_queue_t small_free; // queue of segments with free small pages mi_segment_queue_t medium_free; // queue of segments with free medium pages - mi_page_queue_t pages_reset; // queue of freed pages that can be reset + mi_page_queue_t pages_purge; // queue of freed pages that are delay purged size_t count; // current number of segments; size_t peak_count; // peak number of segments size_t current_size; // current size of all segments size_t peak_size; // peak size of all segments - size_t cache_count; // number of segments in the cache - size_t cache_size; // total size of all segments in the cache - mi_segment_t* cache; // (small) cache of segments + size_t reclaim_count;// number of reclaimed (abandoned) segments mi_stats_t* stats; // points to tld stats mi_os_tld_t* os; // points to os stats } mi_segments_tld_t; diff --git a/contrib/libs/mimalloc/readme.md b/contrib/libs/mimalloc/readme.md index cdb1b82aad..a0296b43c3 100644 --- a/contrib/libs/mimalloc/readme.md +++ b/contrib/libs/mimalloc/readme.md @@ -9,24 +9,28 @@ mimalloc (pronounced "me-malloc") is a general purpose allocator with excellent [performance](#performance) characteristics. -Initially developed by Daan Leijen for the run-time systems of the +Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.0.2` (beta, 2021-06-17). -Latest stable tag: `v1.7.2` (2021-06-17). +Latest release tag: `v2.1.7` (2024-05-21). +Latest v1 tag: `v1.8.7` (2024-05-21). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: ``` -> LD_PRELOAD=/usr/bin/libmimalloc.so myprogram +> LD_PRELOAD=/usr/lib/libmimalloc.so myprogram ``` -It also has an easy way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: +It also includes a robust way to override the default allocator in [Windows](#override_on_windows). Notable aspects of the design include: - __small and consistent__: the library is about 8k LOC using simple and consistent data structures. This makes it very suitable to integrate and adapt in other projects. For runtime systems it provides hooks for a monotonic _heartbeat_ and deferred freeing (for bounded worst-case times with reference counting). + Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS, + Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding. + At the same time, it is an industrial strength allocator that runs (very) large scale + distributed services on thousands of machines with excellent worst case latencies. - __free list sharding__: instead of one big free list (per size class) we have many smaller lists per "mimalloc page" which reduces fragmentation and increases locality -- @@ -36,13 +40,13 @@ It also has an easy way to override the default allocator in [Windows](#override per mimalloc page, but for each page we have multiple free lists. In particular, there is one list for thread-local `free` operations, and another one for concurrent `free` operations. Free-ing from another thread can now be a single CAS without needing - sophisticated coordination between threads. Since there will be + sophisticated coordination between threads. Since there will be thousands of separate free lists, contention is naturally distributed over the heap, and the chance of contending on a single location will be low -- this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. -- __eager page reset__: when a "page" becomes empty (with increased chance - due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged") +- __eager page purging__: when a "page" becomes empty (with increased chance + due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. - __secure__: _mimalloc_ can be built in secure mode, adding guard pages, @@ -50,71 +54,79 @@ It also has an easy way to override the default allocator in [Windows](#override heap vulnerabilities. The performance penalty is usually around 10% on average over our benchmarks. - __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions. - A heap can be destroyed at once instead of deallocating each object separately. + A heap can be destroyed at once instead of deallocating each object separately. - __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation - times (_wcat_), bounded space overhead (~0.2% meta-data, with at most 12.5% waste in allocation sizes), - and has no internal points of contention using only atomic operations. + times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low + internal fragmentation), and has no internal points of contention using only atomic operations. - __fast__: In our benchmarks (see [below](#performance)), _mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc), - and often uses less memory. A nice property - is that it does consistently well over a wide range of benchmarks. There is also good huge OS page - support for larger server programs. + and often uses less memory. A nice property is that it does consistently well over a wide range + of benchmarks. There is also good huge OS page support for larger server programs. The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API. -You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. +You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results. -Enjoy! +Enjoy! ### Branches -* `master`: latest stable release. -* `dev`: development branch for mimalloc v1. -* `dev-slice`: development branch for mimalloc v2 with a new algorithm for managing internal mimalloc pages. +* `master`: latest stable release (based on `dev-slice`). +* `dev`: development branch for mimalloc v1. Use this branch for submitting PR's. +* `dev-slice`: development branch for mimalloc v2. This branch is downstream of `dev` (and is essentially equal to `dev` except for +`src/segment.c`) ### Releases -Note: the `v2.x` beta has a new algorithm for managing internal mimalloc pages that tends to use reduce memory usage +Note: the `v2.x` version has a different algorithm for managing internal mimalloc pages (as slices) that tends to use reduce +memory usage and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. -* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix - thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. - -* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). +* 2024-05-21, `v1.8.7`, `v2.1.7`: Fix build issues on less common platforms. Started upstreaming patches + from the CPython [integration](https://github.com/python/cpython/issues/113141#issuecomment-2119255217). Upstream `vcpkg` patches. +* 2024-05-13, `v1.8.6`, `v2.1.6`: Fix build errors on various (older) platforms. Refactored aligned allocation. +* 2024-04-22, `v1.8.4`, `v2.1.4`: Fixes various bugs and build issues. Add `MI_LIBC_MUSL` cmake flag for musl builds. + Free-ing code is refactored into a separate module (`free.c`). Mimalloc page info is simplified with the block size + directly available (and new `block_size_shift` to improve aligned block free-ing). + New approach to collection of abandoned segments: When + a thread terminates the segments it owns are abandoned (containing still live objects) and these can be + reclaimed by other threads. We no longer use a list of abandoned segments but this is now done using bitmaps in arena's + which is more concurrent (and more aggressive). Abandoned memory can now also be reclaimed if a thread frees an object in + an abandoned page (which can be disabled using `mi_option_abandoned_reclaim_on_free`). The option `mi_option_max_segment_reclaim` + gives a maximum percentage of abandoned segments that can be reclaimed per try (=10%). + +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. -* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. - -* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, - improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. +* 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. -### Older Releases +* 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision + with [asan](#asan) and [Valgrind](#valgrind), and added Windows event tracing [ETW](#ETW) (contributed by Xinglong He). Created an OS + abstraction layer to make it easier to port and separate platform dependent code (in `src/prim`). Fixed C++ STL compilation on older Microsoft C++ compilers, and various small bug fixes. -* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved - handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. -* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, - support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. -* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, - build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics. -* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda, - stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload, - fix aligned debug padding. -* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects). -* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding - and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise - heap block overflow detection in debug mode (besides the double-free detection and free-list - corruption detection). Add `nodiscard` attribute to most allocation functions. - Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages - for better memory footprint. -* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. -* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, -more eager concurrent free, addition of STL allocator, fixed potential memory leak. -* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger -free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. -* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. -* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. -* 2019-10-07, `v1.1.0`: stable release 1.1. -* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. -* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. +* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with [asan](#asan) and improved [Valgrind](#valgrind) support. + Support arbitrary large alignments (in particular for `std::pmr` pools). + Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev). + Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). + Various small bug fixes. + +* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow + detection. Initial + support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, . + +* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation + even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix + warnings under Clang 14, improve performance if many OS threads are created and destroyed, fix statistics for large object + allocations, using MIMALLOC_VERBOSE=1 has no maximum on the number of error messages, various small fixes. + +* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on + Windows 11, fix compilation with musl, potentially reduced + committed memory, add `bin/minject` for Windows, + improved wasm support, faster aligned allocation, + various small fixes. + +* [Older release notes](#older-release-notes) Special thanks to: @@ -124,9 +136,11 @@ Special thanks to: memory model bugs using the [genMC] model checker. * Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment at large scale services, leading to many improvements in the mimalloc algorithms for large workloads. -* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs +* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs in (early versions of) `mimalloc`. -* Manuel Pöter (@mpoeter) and Sam Gross (@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. +* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which + uses mimalloc internally. + [genMC]: https://plv.mpi-sws.org/genmc/ @@ -134,15 +148,18 @@ Special thanks to: mimalloc is used in various large scale low-latency services and programs, for example: -<a href="https://www.bing.com"><img align="left" height="50" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a> -<a href="https://azure.microsoft.com/"><img align="left" height="50" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a> -<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.jpg" style="border-radius=1ex;vertical-align:center"></a> +<a href="https://www.bing.com"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/e/e9/Bing_logo.svg"></a> +<a href="https://azure.microsoft.com/"><img height="50" align="left" src="https://upload.wikimedia.org/wikipedia/commons/a/a8/Microsoft_Azure_Logo.svg"></a> +<a href="https://deathstrandingpc.505games.com"><img height="100" src="doc/ds-logo.png"></a> +<a href="https://docs.unrealengine.com/4.26/en-US/WhatsNew/Builds/ReleaseNotes/4_25/"><img height="100" src="doc/unreal-logo.svg"></a> +<a href="https://cab.spbu.ru/software/spades/"><img height="100" src="doc/spades-logo.png"></a> + # Building ## Windows -Open `ide/vs2019/mimalloc.sln` in Visual Studio 2019 and build (or `ide/vs2017/mimalloc.sln`). +Open `ide/vs2022/mimalloc.sln` in Visual Studio 2022 and build. The `mimalloc` project builds a static library (in `out/msvc-x64`), while the `mimalloc-override` project builds a DLL for overriding malloc in the entire program. @@ -191,6 +208,11 @@ Notes: 2. Install CCMake: `sudo apt-get install cmake-curses-gui` +## Single source + +You can also directly build the single `src/static.c` file as part of your project without +needing `cmake` at all. Make sure to also add the mimalloc `include` directory to the include path. + # Using the library @@ -217,7 +239,7 @@ target_link_libraries(myapp PUBLIC mimalloc-static) to link with the static library. See `test\CMakeLists.txt` for an example. For best performance in C++ programs, it is also recommended to override the -global `new` and `delete` operators. For convience, mimalloc provides +global `new` and `delete` operators. For convenience, mimalloc provides [`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) which does this for you -- just include it in a single(!) source file in your project. In C++, mimalloc also provides the `mi_stl_allocator` struct which implements the `std::allocator` interface. @@ -265,47 +287,63 @@ completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options -You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), -or via environment variables: +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS - that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) - programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs. - As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page - reset occur less frequently instead of turning it off completely. + +Advanced options: + +- `MIMALLOC_ARENA_EAGER_COMMIT=2`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc + allocates segments and pages. Set this to 2 (default) to + only enable this on overcommit systems (e.g. Linux). Set this to 1 to enable explicitly on other systems + as well (like Windows or macOS) which may improve performance (as the whole arena is committed at once). + Note that eager commit only increases the commit but not the actual the peak resident set + (rss) so it is generally ok to enable this. +- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge + OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which + can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when + a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher + value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. + Setting it to `-1` disables purging completely. +- `MIMALLOC_PURGE_DECOMMITS=1`: By default "purging" memory means unused memory is decommitted (`MEM_DECOMMIT` on Windows, + `MADV_DONTNEED` (which decresease rss immediately) on `mmap` systems). Set this to 0 to instead "reset" unused + memory on a purge (`MEM_RESET` on Windows, generally `MADV_FREE` (which does not decrease rss immediately) on `mmap` systems). + Mimalloc generally does not "free" OS memory but only "purges" OS memory, in other words, it tries to keep virtual + address ranges and decommits within those ranges (to make the underlying physical memory available to other processes). + +Further options for large workloads and services: + - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly - improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs - to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes +- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2 or 4MiB) when available; for some workloads this can significantly + improve performance. When this option is disabled, it also disables transparent huge pages (THP) for the process + (on Linux and Android). Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs + to explicitly give permissions for large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - <!-- - - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions - show in the working set even though usually just a small part is committed to physical memory. This is why it - turned off by default on Windows as it looks not good in the task manager. However, turning it on has no - real drawbacks and may improve performance by a little. - --> -- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. - Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large + OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). - Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). + Note that we usually need to explicitly give permission for huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived - and allocate just a little to take up space in the huge OS page area (which cannot be reset). + and allocate just a little to take up space in the huge OS page area (which cannot be purged as huge OS pages are pinned + to physical memory). + The huge pages are usually allocated evenly among NUMA nodes. + We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all + the huge pages at a specific numa node instead. Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write for all pages in the original process including the huge OS pages. When any memory is now written in that area, the -OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in big increments. +OS will copy the entire 1GiB huge page (or 2MiB large page) which can cause the memory usage to grow in large increments. [linux-huge]: https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/5/html/tuning_and_optimizing_red_hat_enterprise_linux_for_oracle_9i_and_10g_databases/sect-oracle_9i_and_10g_tuning_guide-large_memory_optimization_big_pages_and_huge_pages-configuring_huge_pages_in_red_hat_enterprise_linux_4_or_5 [windows-huge]: https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/enable-the-lock-pages-in-memory-option-windows?view=sql-server-2017 @@ -337,15 +375,15 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to - Corrupted free-lists and some forms of use-after-free are detected. -# Overriding Malloc +# Overriding Standard Malloc -Overriding the standard `malloc` can be done either _dynamically_ or _statically_. +Overriding the standard `malloc` (and `new`) can be done either _dynamically_ or _statically_. ## Dynamic override This is the recommended way to override the standard malloc interface. -### Override on Linux, BSD +### Dynamic Override on Linux, BSD On these ELF-based systems we preload the mimalloc shared library so all calls to the standard `malloc` interface are @@ -364,60 +402,68 @@ or run with the debug version to get detailed statistics: > env MIMALLOC_SHOW_STATS=1 LD_PRELOAD=/usr/lib/libmimalloc-debug.so myprogram ``` -### Override on MacOS +### Dynamic Override on MacOS On macOS we can also preload the mimalloc shared library so all calls to the standard `malloc` interface are resolved to the _mimalloc_ library. ``` -> env DYLD_FORCE_FLAT_NAMESPACE=1 DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram +> env DYLD_INSERT_LIBRARIES=/usr/lib/libmimalloc.dylib myprogram ``` Note that certain security restrictions may apply when doing this from the [shell](https://stackoverflow.com/questions/43941322/dyld-insert-libraries-ignored-when-calling-application-through-bash). -(Note: macOS support for dynamic overriding is recent, please report any issues.) -### Override on Windows +### Dynamic Override on Windows -<span id="override_on_windows">Overriding on Windows</span> is robust and has the -particular advantage to be able to redirect all malloc/free calls that go through +<span id="override_on_windows">Dynamically overriding on mimalloc on Windows</span> +is robust and has the particular advantage to be able to redirect all malloc/free calls that go through the (dynamic) C runtime allocator, including those from other DLL's or libraries. - -The overriding on Windows requires that you link your program explicitly with -the mimalloc DLL and use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). -Also, the `mimalloc-redirect.dll` (or `mimalloc-redirect32.dll`) must be available -in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency). -The redirection DLL ensures that all calls to the C runtime malloc API get redirected to -mimalloc (in `mimalloc-override.dll`). - -To ensure the mimalloc DLL is loaded at run-time it is easiest to insert some -call to the mimalloc API in the `main` function, like `mi_version()` -(or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project -for an example on how to use this. For best performance on Windows with C++, it +As it intercepts all allocation calls on a low level, it can be used reliably +on large programs that include other 3rd party components. +There are four requirements to make the overriding work robustly: + +1. Use the C-runtime library as a DLL (using the `/MD` or `/MDd` switch). +2. Link your program explicitly with `mimalloc-override.dll` library. + To ensure the `mimalloc-override.dll` is loaded at run-time it is easiest to insert some + call to the mimalloc API in the `main` function, like `mi_version()` + (or use the `/INCLUDE:mi_version` switch on the linker). See the `mimalloc-override-test` project + for an example on how to use this. +3. The [`mimalloc-redirect.dll`](bin) (or `mimalloc-redirect32.dll`) must be put + in the same folder as the main `mimalloc-override.dll` at runtime (as it is a dependency of that DLL). + The redirection DLL ensures that all calls to the C runtime malloc API get redirected to + mimalloc functions (which reside in `mimalloc-override.dll`). +4. Ensure the `mimalloc-override.dll` comes as early as possible in the import + list of the final executable (so it can intercept all potential allocations). + +For best performance on Windows with C++, it is also recommended to also override the `new`/`delete` operations (by including -[`mimalloc-new-delete.h`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc-new-delete.h) a single(!) source file in your project). +[`mimalloc-new-delete.h`](include/mimalloc-new-delete.h) +a single(!) source file in your project). The environment variable `MIMALLOC_DISABLE_REDIRECT=1` can be used to disable dynamic overriding at run-time. Use `MIMALLOC_VERBOSE=1` to check if mimalloc was successfully redirected. -(Note: in principle, it is possible to even patch existing executables without any recompilation +We cannot always re-link an executable with `mimalloc-override.dll`, and similarly, we cannot always +ensure the the DLL comes first in the import table of the final executable. +In many cases though we can patch existing executables without any recompilation if they are linked with the dynamic C runtime (`ucrtbase.dll`) -- just put the `mimalloc-override.dll` into the import table (and put `mimalloc-redirect.dll` in the same folder) -Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388)). - +Such patching can be done for example with [CFF Explorer](https://ntcore.com/?page_id=388) or +the [`minject`](bin) program. ## Static override On Unix-like systems, you can also statically link with _mimalloc_ to override the standard malloc interface. The recommended way is to link the final program with the -_mimalloc_ single object file (`mimalloc-override.o`). We use +_mimalloc_ single object file (`mimalloc.o`). We use an object file instead of a library file as linkers give preference to that over archives to resolve symbols. To ensure that the standard malloc interface resolves to the _mimalloc_ library, link it as the first object file. For example: ``` -> gcc -o myprogram mimalloc-override.o myfile1.c ... +> gcc -o myprogram mimalloc.o myfile1.c ... ``` Another way to override statically that works on all platforms, is to @@ -427,6 +473,96 @@ This is provided by [`mimalloc-override.h`](https://github.com/microsoft/mimallo under your control or otherwise mixing of pointers from different heaps may occur! +# Tools + +Generally, we recommend using the standard allocator with memory tracking tools, but mimalloc +can also be build to support the [address sanitizer][asan] or the excellent [Valgrind] tool. +Moreover, it can be build to support Windows event tracing ([ETW]). +This has a small performance overhead but does allow detecting memory leaks and byte-precise +buffer overflows directly on final executables. See also the `test/test-wrong.c` file to test with various tools. + +## Valgrind + +To build with [valgrind] support, use the `MI_TRACK_VALGRIND=ON` cmake option: + +``` +> cmake ../.. -DMI_TRACK_VALGRIND=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs directly under valgrind: + +``` +> valgrind <myprogram> +``` + +If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly), +you also need to tell `valgrind` to not intercept those calls itself, and use: + +``` +> MIMALLOC_SHOW_STATS=1 valgrind --soname-synonyms=somalloc=*mimalloc* -- <myprogram> +``` + +By setting the `MIMALLOC_SHOW_STATS` environment variable you can check that mimalloc is indeed +used and not the standard allocator. Even though the [Valgrind option][valgrind-soname] +is called `--soname-synonyms`, this also +works when overriding with a static library or object file. Unfortunately, it is not possible to +dynamically override mimalloc using `LD_PRELOAD` together with `valgrind`. +See also the `test/test-wrong.c` file to test with `valgrind`. + +Valgrind support is in its initial development -- please report any issues. + +[Valgrind]: https://valgrind.org/ +[valgrind-soname]: https://valgrind.org/docs/manual/manual-core.html#opt.soname-synonyms + +## ASAN + +To build with the address sanitizer, use the `-DMI_TRACK_ASAN=ON` cmake option: + +``` +> cmake ../.. -DMI_TRACK_ASAN=ON +``` + +This can also be combined with secure mode or debug mode. +You can then run your programs as:' + +``` +> ASAN_OPTIONS=verbosity=1 <myprogram> +``` + +When you link a program with an address sanitizer build of mimalloc, you should +generally compile that program too with the address sanitizer enabled. +For example, assuming you build mimalloc in `out/debug`: + +``` +clang -g -o test-wrong -Iinclude test/test-wrong.c out/debug/libmimalloc-asan-debug.a -lpthread -fsanitize=address -fsanitize-recover=address +``` + +Since the address sanitizer redirects the standard allocation functions, on some platforms (macOSX for example) +it is required to compile mimalloc with `-DMI_OVERRIDE=OFF`. +Adress sanitizer support is in its initial development -- please report any issues. + +[asan]: https://github.com/google/sanitizers/wiki/AddressSanitizer + +## ETW + +Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though +mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. + +You can then capture an allocation trace using the Windows performance recorder (WPR), using the +`src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use: +``` +> wpr -start src\prim\windows\etw-mimalloc.wprp -filemode +> <my_mimalloc_program> +> wpr -stop <my_mimalloc_program>.etl +``` +and then open `<my_mimalloc_program>.etl` in the Windows Performance Analyzer (WPA), or +use a tool like [TraceControl] that is specialized for analyzing mimalloc traces. + +[ETW]: https://learn.microsoft.com/en-us/windows-hardware/test/wpt/event-tracing-for-windows +[TraceControl]: https://github.com/xinglonghe/TraceControl + + # Performance Last update: 2021-01-30 @@ -532,7 +668,7 @@ The _alloc-test_, by [OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of allocations in various size classes. The test is scaled such that when an allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it -means that it scales linearly. +means that it scales linearly. The _sh6bench_ and _sh8bench_ benchmarks are developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap. @@ -636,6 +772,7 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks --> + # References - \[1] Emery D. Berger, Kathryn S. McKinley, Robert D. Blumofe, and Paul R. Wilson. @@ -673,7 +810,6 @@ see the differences in the _larsonN_, _mstressN_, and _xmalloc-testN_ benchmarks In Proceedings of the 2019 ACM SIGPLAN International Symposium on Memory Management, 122–135. ACM. 2019. --> - # Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a @@ -683,3 +819,44 @@ the rights to use your contribution. For details, visit https://cla.microsoft.co When you submit a pull request, a CLA-bot will automatically determine whether you need to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions provided by the bot. You will only need to do this once across all repos using our CLA. + + +# Older Release Notes + +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. + +* 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved + handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. +* 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, + support for IllumOS and Haiku, NUMA support for Vista/XP, improved NUMA detection for AMD Ryzen, ubsan support. +* 2020-05-05, `v1.6.3`: stable release 1.6: improved behavior in out-of-memory situations, improved malloc zones on macOS, + build PIC static libraries by default, add option to abort on out-of-memory, line buffered statistics. +* 2020-04-20, `v1.6.2`: stable release 1.6: fix compilation on Android, MingW, Raspberry, and Conda, + stability fix for Windows 7, fix multiple mimalloc instances in one executable, fix `strnlen` overload, + fix aligned debug padding. +* 2020-02-17, `v1.6.1`: stable release 1.6: minor updates (build with clang-cl, fix alignment issue for small objects). +* 2020-02-09, `v1.6.0`: stable release 1.6: fixed potential memory leak, improved overriding + and thread local support on FreeBSD, NetBSD, DragonFly, and macOSX. New byte-precise + heap block overflow detection in debug mode (besides the double-free detection and free-list + corruption detection). Add `nodiscard` attribute to most allocation functions. + Enable `MIMALLOC_PAGE_RESET` by default. New reclamation strategy for abandoned heap pages + for better memory footprint. +* 2020-02-09, `v1.5.0`: stable release 1.5: improved free performance, small bug fixes. +* 2020-01-22, `v1.4.0`: stable release 1.4: improved performance for delayed OS page reset, +more eager concurrent free, addition of STL allocator, fixed potential memory leak. +* 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger +free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. + +* 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. +* 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. +* 2019-10-07, `v1.1.0`: stable release 1.1. +* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support. +* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements. diff --git a/contrib/libs/mimalloc/src/alloc-aligned.c b/contrib/libs/mimalloc/src/alloc-aligned.c index 724c0a1bfe..20c3604449 100644 --- a/contrib/libs/mimalloc/src/alloc-aligned.c +++ b/contrib/libs/mimalloc/src/alloc-aligned.c @@ -6,113 +6,222 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap -#include <string.h> // memset +#include <string.h> // memset // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ -static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { - // note: we don't require `size > offset`, we just guarantee that - // the address at offset is aligned regardless of the allocated size. - mi_assert(alignment > 0); - if (mi_unlikely(size > PTRDIFF_MAX)) return NULL; // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) - if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) return NULL; // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>) - const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` - - // try if there is a small block available with just the right alignment - const size_t padsize = size + MI_PADDING_SIZE; - if (mi_likely(padsize <= MI_SMALL_SIZE_MAX)) { - mi_page_t* page = _mi_heap_get_free_small_page(heap,padsize); - const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0; - if (mi_likely(page->free != NULL && is_aligned)) - { - #if MI_STAT>1 - mi_heap_stat_increase( heap, malloc, size); +static bool mi_malloc_is_naturally_aligned( size_t size, size_t alignment ) { + // objects up to `MI_MAX_ALIGN_GUARANTEE` are allocated aligned to their size (see `segment.c:_mi_segment_page_start`). + mi_assert_internal(_mi_is_power_of_two(alignment) && (alignment > 0)); + if (alignment > size) return false; + if (alignment <= MI_MAX_ALIGN_SIZE) return true; + const size_t bsize = mi_good_size(size); + return (bsize <= MI_MAX_ALIGN_GUARANTEE && (bsize & (alignment-1)) == 0); +} + +// Fallback aligned allocation that over-allocates -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_overalloc(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + mi_assert_internal(size <= (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)); + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); + + void* p; + size_t oversize; + if mi_unlikely(alignment > MI_BLOCK_ALIGNMENT_MAX) { + // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page) + // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the + // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down) + if mi_unlikely(offset != 0) { + // todo: cannot support offset alignment for very large alignments yet + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset); #endif - void* p = _mi_page_malloc(heap,page,padsize); // TODO: inline _mi_page_malloc - mi_assert_internal(p != NULL); - mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); - if (zero) _mi_block_zero_init(page,p,size); - return p; + return NULL; + } + oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); + p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block + // zero afterwards as only the area from the aligned_p may be committed! + if (p == NULL) return NULL; + } + else { + // otherwise over-allocate + oversize = size + alignment - 1; + p = _mi_heap_malloc_zero(heap, oversize, zero); + if (p == NULL) return NULL; + } + + // .. and align within the allocation + const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)` + const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask; + const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset); + mi_assert_internal(adjust < alignment); + void* aligned_p = (void*)((uintptr_t)p + adjust); + if (aligned_p != p) { + mi_page_t* page = _mi_ptr_page(p); + mi_page_set_has_aligned(page, true); + _mi_padding_shrink(page, (mi_block_t*)p, adjust + size); + } + // todo: expand padding if overallocated ? + + mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); + mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_page(aligned_p), aligned_p)); + mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); + mi_assert_internal(mi_usable_size(aligned_p)>=size); + mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); + + // now zero the block if needed + if (alignment > MI_BLOCK_ALIGNMENT_MAX) { + // for the tracker, on huge aligned allocations only from the start of the large block is defined + mi_track_mem_undefined(aligned_p, size); + if (zero) { + _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); } } - // use regular allocation if it is guaranteed to fit the alignment constraints - if (offset==0 && alignment<=padsize && padsize<=MI_MEDIUM_OBJ_SIZE_MAX && (padsize&align_mask)==0) { + if (p != aligned_p) { + mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); + } + return aligned_p; +} + +// Generic primitive aligned allocation -- split out for better codegen +static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_generic(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment)); + // we don't allocate more than MI_MAX_ALLOC_SIZE (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) + if mi_unlikely(size > (MI_MAX_ALLOC_SIZE - MI_PADDING_SIZE)) { + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + + // use regular allocation if it is guaranteed to fit the alignment constraints. + // this is important to try as the fast path in `mi_heap_malloc_zero_aligned` only works when there exist + // a page with the right block size, and if we always use the over-alloc fallback that would never happen. + if (offset == 0 && mi_malloc_is_naturally_aligned(size,alignment)) { void* p = _mi_heap_malloc_zero(heap, size, zero); mi_assert_internal(p == NULL || ((uintptr_t)p % alignment) == 0); - return p; + const bool is_aligned_or_null = (((uintptr_t)p) & (alignment-1))==0; + if mi_likely(is_aligned_or_null) { + return p; + } + else { + // this should never happen if the `mi_malloc_is_naturally_aligned` check is correct.. + mi_assert(false); + mi_free(p); + } + } + + // fall back to over-allocation + return mi_heap_malloc_zero_aligned_at_overalloc(heap,size,alignment,offset,zero); +} + +// Primitive aligned allocation +static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept +{ + // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>) + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; } - // otherwise over-allocate - void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero); - if (p == NULL) return NULL; + // try first if there happens to be a small block available with just the right alignment + if mi_likely(size <= MI_SMALL_SIZE_MAX && alignment <= size) { + const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)` + const size_t padsize = size + MI_PADDING_SIZE; + mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize); + if mi_likely(page->free != NULL) { + const bool is_aligned = (((uintptr_t)page->free + offset) & align_mask)==0; + if mi_likely(is_aligned) + { + #if MI_STAT>1 + mi_heap_stat_increase(heap, malloc, size); + #endif + void* p = (zero ? _mi_page_malloc_zeroed(heap,page,padsize) : _mi_page_malloc(heap,page,padsize)); // call specific page malloc for better codegen + mi_assert_internal(p != NULL); + mi_assert_internal(((uintptr_t)p + offset) % alignment == 0); + mi_track_malloc(p,size,zero); + return p; + } + } + } - // .. and align within the allocation - uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask); - mi_assert_internal(adjust <= alignment); - void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust)); - if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true); - mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); - mi_assert_internal( p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p),_mi_ptr_page(aligned_p),aligned_p) ); - return aligned_p; + // fallback to generic aligned allocation + return mi_heap_malloc_zero_aligned_at_generic(heap, size, alignment, offset, zero); } -mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +// ------------------------------------------------------ +// Optimized mi_heap_malloc_aligned / mi_malloc_aligned +// ------------------------------------------------------ + +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false); } -mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_malloc_aligned_at(heap, size, alignment, 0); } -mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +// ------------------------------------------------------ +// Aligned Allocation +// ------------------------------------------------------ + +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true); } -mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_zalloc_aligned_at(heap, size, alignment, 0); } -mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_zalloc_aligned_at(heap, total, alignment, offset); } -mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept { return mi_heap_calloc_aligned_at(heap,count,size,alignment,0); } -mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } -mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); +mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment); } -mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } -mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment); } -mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset); } -mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); +mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment); } +// ------------------------------------------------------ +// Aligned re-allocation +// ------------------------------------------------------ + static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset, bool zero) mi_attr_noexcept { mi_assert(alignment > 0); if (alignment <= sizeof(uintptr_t)) return _mi_heap_realloc_zero(heap,p,newsize,zero); @@ -123,19 +232,13 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne return p; // reallocation still fits, is aligned and not more than 50% waste } else { + // note: we don't zero allocate upfront so we only zero initialize the expanded part void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); if (newp != NULL) { if (zero && newsize > size) { - const mi_page_t* page = _mi_ptr_page(newp); - if (page->is_zero) { - // already zero initialized - mi_assert_expensive(mi_mem_is_zero(newp,newsize)); - } - else { - // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); - } + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)newp + start, newsize - start); } _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); mi_free(p); // only free if successful @@ -151,55 +254,54 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero); } -void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false); } -void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false); } -void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true); } -void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept { return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true); } -void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset); } -void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(newcount, size, &total)) return NULL; return mi_heap_rezalloc_aligned(heap, p, total, alignment); } -void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } -void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); +mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } -void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); +mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } -void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); +mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { + return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } -void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); +mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { + return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset); } -void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); +mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { + return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } - diff --git a/contrib/libs/mimalloc/src/alloc-override-osx.c b/contrib/libs/mimalloc/src/alloc-override-osx.c deleted file mode 100644 index f506d30a95..0000000000 --- a/contrib/libs/mimalloc/src/alloc-override-osx.c +++ /dev/null @@ -1,281 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -#if defined(MI_MALLOC_OVERRIDE) - -#if !defined(__APPLE__) -#error "this file should only be included on macOS" -#endif - -/* ------------------------------------------------------ - Override system malloc on macOS - This is done through the malloc zone interface. - It seems we also need to interpose (see `alloc-override.c`) - or otherwise we get zone errors as there are usually - already allocations done by the time we take over the - zone. Unfortunately, that means we need to replace - the `free` with a checked free (`cfree`) impacting - performance. ------------------------------------------------------- */ - -#include <AvailabilityMacros.h> -#include <malloc/malloc.h> -#include <string.h> // memset - -#if defined(MAC_OS_X_VERSION_10_6) && \ - MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 -// only available from OSX 10.6 -extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); -#endif - -/* ------------------------------------------------------ - malloc zone members ------------------------------------------------------- */ - -static size_t zone_size(malloc_zone_t* zone, const void* p) { - UNUSED(zone); - if (!mi_is_in_heap_region(p)) - return 0; // not our pointer, bail out - - return mi_usable_size(p); -} - -static void* zone_malloc(malloc_zone_t* zone, size_t size) { - UNUSED(zone); - return mi_malloc(size); -} - -static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { - UNUSED(zone); - return mi_calloc(count, size); -} - -static void* zone_valloc(malloc_zone_t* zone, size_t size) { - UNUSED(zone); - return mi_malloc_aligned(size, _mi_os_page_size()); -} - -static void zone_free(malloc_zone_t* zone, void* p) { - UNUSED(zone); - mi_free(p); -} - -static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { - UNUSED(zone); - return mi_realloc(p, newsize); -} - -static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { - UNUSED(zone); - return mi_malloc_aligned(size,alignment); -} - -static void zone_destroy(malloc_zone_t* zone) { - UNUSED(zone); - // todo: ignore for now? -} - -static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { - size_t i; - for (i = 0; i < count; i++) { - ps[i] = zone_malloc(zone, size); - if (ps[i] == NULL) break; - } - return i; -} - -static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { - for(size_t i = 0; i < count; i++) { - zone_free(zone, ps[i]); - ps[i] = NULL; - } -} - -static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { - UNUSED(zone); UNUSED(size); - mi_collect(false); - return 0; -} - -static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { - UNUSED(size); - zone_free(zone,p); -} - - -/* ------------------------------------------------------ - Introspection members ------------------------------------------------------- */ - -static kern_return_t intro_enumerator(task_t task, void* p, - unsigned type_mask, vm_address_t zone_address, - memory_reader_t reader, - vm_range_recorder_t recorder) -{ - // todo: enumerate all memory - UNUSED(task); UNUSED(p); UNUSED(type_mask); UNUSED(zone_address); - UNUSED(reader); UNUSED(recorder); - return KERN_SUCCESS; -} - -static size_t intro_good_size(malloc_zone_t* zone, size_t size) { - UNUSED(zone); - return mi_good_size(size); -} - -static boolean_t intro_check(malloc_zone_t* zone) { - UNUSED(zone); - return true; -} - -static void intro_print(malloc_zone_t* zone, boolean_t verbose) { - UNUSED(zone); UNUSED(verbose); - mi_stats_print(NULL); -} - -static void intro_log(malloc_zone_t* zone, void* p) { - UNUSED(zone); UNUSED(p); - // todo? -} - -static void intro_force_lock(malloc_zone_t* zone) { - UNUSED(zone); - // todo? -} - -static void intro_force_unlock(malloc_zone_t* zone) { - UNUSED(zone); - // todo? -} - -static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { - UNUSED(zone); - // todo... - stats->blocks_in_use = 0; - stats->size_in_use = 0; - stats->max_size_in_use = 0; - stats->size_allocated = 0; -} - -static boolean_t intro_zone_locked(malloc_zone_t* zone) { - UNUSED(zone); - return false; -} - - -/* ------------------------------------------------------ - At process start, override the default allocator ------------------------------------------------------- */ - -static malloc_zone_t* mi_get_default_zone() -{ - // The first returned zone is the real default - malloc_zone_t** zones = NULL; - unsigned count = 0; - kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); - if (ret == KERN_SUCCESS && count > 0) { - return zones[0]; - } - else { - // fallback - return malloc_default_zone(); - } -} - -static malloc_introspection_t mi_introspect = { - .enumerator = &intro_enumerator, - .good_size = &intro_good_size, - .check = &intro_check, - .print = &intro_print, - .log = &intro_log, - .force_lock = &intro_force_lock, - .force_unlock = &intro_force_unlock, -#if defined(MAC_OS_X_VERSION_10_6) && \ - MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 - .zone_locked = &intro_zone_locked, - .statistics = &intro_statistics, -#endif -}; - -static malloc_zone_t mi_malloc_zone = { - .size = &zone_size, - .zone_name = "mimalloc", - .introspect = &mi_introspect, - .malloc = &zone_malloc, - .calloc = &zone_calloc, - .valloc = &zone_valloc, - .free = &zone_free, - .realloc = &zone_realloc, - .destroy = &zone_destroy, - .batch_malloc = &zone_batch_malloc, - .batch_free = &zone_batch_free, -#if defined(MAC_OS_X_VERSION_10_6) && \ - MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 - // switch to version 9 on OSX 10.6 to support memalign. - .version = 9, - .memalign = &zone_memalign, - .free_definite_size = &zone_free_definite_size, - .pressure_relief = &zone_pressure_relief, -#else - .version = 4, -#endif -}; - - -#if defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) - -static malloc_zone_t *mi_malloc_default_zone(void) { - return &mi_malloc_zone; -} -// TODO: should use the macros in alloc-override but they aren't available here. -__attribute__((used)) static struct { - const void *replacement; - const void *target; -} replace_malloc_default_zone[] __attribute__((section("__DATA, __interpose"))) = { - { (const void*)mi_malloc_default_zone, (const void*)malloc_default_zone }, -}; -#endif - -static void __attribute__((constructor(0))) _mi_macos_override_malloc() { - malloc_zone_t* purgeable_zone = NULL; - -#if defined(MAC_OS_X_VERSION_10_6) && \ - MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6 - // force the purgeable zone to exist to avoid strange bugs - if (malloc_default_purgeable_zone) { - purgeable_zone = malloc_default_purgeable_zone(); - } -#endif - - // Register our zone. - // thomcc: I think this is still needed to put us in the zone list. - malloc_zone_register(&mi_malloc_zone); - // Unregister the default zone, this makes our zone the new default - // as that was the last registered. - malloc_zone_t *default_zone = mi_get_default_zone(); - // thomcc: Unsure if the next test is *always* false or just false in the - // cases I've tried. I'm also unsure if the code inside is needed. at all - if (default_zone != &mi_malloc_zone) { - malloc_zone_unregister(default_zone); - - // Reregister the default zone so free and realloc in that zone keep working. - malloc_zone_register(default_zone); - } - - // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs - // earlier than the default zone. - if (purgeable_zone != NULL) { - malloc_zone_unregister(purgeable_zone); - malloc_zone_register(purgeable_zone); - } - -} - -#endif // MI_MALLOC_OVERRIDE diff --git a/contrib/libs/mimalloc/src/alloc-override.c b/contrib/libs/mimalloc/src/alloc-override.c index 6a87e7bd2d..12837cdd94 100644 --- a/contrib/libs/mimalloc/src/alloc-override.c +++ b/contrib/libs/mimalloc/src/alloc-override.c @@ -13,15 +13,26 @@ terms of the MIT license. A copy of the license can be found in the file #error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)" #endif -#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) // || (defined(__APPLE__) && !defined(MI_INTERPOSE))) +#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32)) + +#if defined(__APPLE__) +#include <AvailabilityMacros.h> +mi_decl_externc void vfree(void* p); +mi_decl_externc size_t malloc_size(const void* p); +mi_decl_externc size_t malloc_good_size(size_t size); +#endif + +// helper definition for C override of C++ new +typedef void* mi_nothrow_t; // ------------------------------------------------------ // Override system malloc // ------------------------------------------------------ -#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) - // use aliasing to alias the exported function to one of our `mi_` functions +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !MI_TRACK_ENABLED + // gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions #if (defined(__GNUC__) && __GNUC__ >= 9) + #pragma GCC diagnostic ignored "-Wattributes" // or we get warnings that nodiscard is ignored on a forward #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"), copy(fun))); #else #define MI_FORWARD(fun) __attribute__((alias(#fun), used, visibility("default"))); @@ -32,7 +43,7 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_FORWARD0(fun,x) MI_FORWARD(fun) #define MI_FORWARD02(fun,x,y) MI_FORWARD(fun) #else - // use forwarding by calling our `mi_` function + // otherwise use forwarding by calling our `mi_` function #define MI_FORWARD1(fun,x) { return fun(x); } #define MI_FORWARD2(fun,x,y) { return fun(x,y); } #define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); } @@ -40,7 +51,17 @@ terms of the MIT license. A copy of the license can be found in the file #define MI_FORWARD02(fun,x,y) { fun(x,y); } #endif -#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_INTERPOSE) + +#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE) + // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for + // functions that are interposed (or the interposing does not work) + #define MI_OSX_IS_INTERPOSED + + mi_decl_externc size_t mi_malloc_size_checked(void *p) { + if (!mi_is_in_heap_region(p)) return 0; + return mi_usable_size(p); + } + // use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` // See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73> struct mi_interpose_s { @@ -49,36 +70,79 @@ terms of the MIT license. A copy of the license can be found in the file }; #define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } #define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) + __attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) = { MI_INTERPOSE_MI(malloc), MI_INTERPOSE_MI(calloc), MI_INTERPOSE_MI(realloc), MI_INTERPOSE_MI(strdup), + #if defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7 MI_INTERPOSE_MI(strndup), + #endif MI_INTERPOSE_MI(realpath), MI_INTERPOSE_MI(posix_memalign), MI_INTERPOSE_MI(reallocf), MI_INTERPOSE_MI(valloc), - #ifndef MI_OSX_ZONE - // some code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>) - MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us - #else - // We interpose malloc_default_zone in alloc-override-osx.c - MI_INTERPOSE_MI(free), + MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked), + MI_INTERPOSE_MI(malloc_good_size), + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + MI_INTERPOSE_MI(aligned_alloc), #endif - // some code allocates from a zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>) + #ifdef MI_OSX_ZONE + // we interpose malloc_default_zone in alloc-override-osx.c so we can use mi_free safely + MI_INTERPOSE_MI(free), + MI_INTERPOSE_FUN(vfree,mi_free), + #else + // sometimes code allocates from default zone but deallocates using plain free :-( (like NxHashResizeToCapacity <https://github.com/nneonneo/osx-10.9-opensource/blob/master/objc4-551.1/runtime/hashtable2.mm>) MI_INTERPOSE_FUN(free,mi_cfree), // use safe free that checks if pointers are from us + MI_INTERPOSE_FUN(vfree,mi_cfree), + #endif }; + + #ifdef __cplusplus + extern "C" { + #endif + void _ZdlPv(void* p); // delete + void _ZdaPv(void* p); // delete[] + void _ZdlPvm(void* p, size_t n); // delete + void _ZdaPvm(void* p, size_t n); // delete[] + void* _Znwm(size_t n); // new + void* _Znam(size_t n); // new[] + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new nothrow + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag); // new[] nothrow + #ifdef __cplusplus + } + #endif + __attribute__((used)) static struct mi_interpose_s _mi_cxx_interposes[] __attribute__((section("__DATA, __interpose"))) = + { + MI_INTERPOSE_FUN(_ZdlPv,mi_free), + MI_INTERPOSE_FUN(_ZdaPv,mi_free), + MI_INTERPOSE_FUN(_ZdlPvm,mi_free_size), + MI_INTERPOSE_FUN(_ZdaPvm,mi_free_size), + MI_INTERPOSE_FUN(_Znwm,mi_new), + MI_INTERPOSE_FUN(_Znam,mi_new), + MI_INTERPOSE_FUN(_ZnwmRKSt9nothrow_t,mi_new_nothrow), + MI_INTERPOSE_FUN(_ZnamRKSt9nothrow_t,mi_new_nothrow), + }; + #elif defined(_MSC_VER) // cannot override malloc unless using a dll. // we just override new/delete which does work in a static library. #else - // On all other systems forward to our API - void* malloc(size_t size) MI_FORWARD1(mi_malloc, size) - void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n) - void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) - void free(void* p) MI_FORWARD0(mi_free, p) + // On all other systems forward allocation primitives to our API + mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size) + mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n) + mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize) + mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p) + // In principle we do not need to forward `strdup`/`strndup` but on some systems these do not use `malloc` internally (but a more primitive call) + // We only override if `strdup` is not a macro (as on some older libc's, see issue #885) + #if !defined(strdup) + mi_decl_export char* strdup(const char* str) MI_FORWARD1(mi_strdup, str) + #endif + #if !defined(strndup) && (!defined(__APPLE__) || (defined(MAC_OS_X_VERSION_10_7) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7)) + mi_decl_export char* strndup(const char* str, size_t n) MI_FORWARD2(mi_strndup, str, n) + #endif #endif #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) @@ -96,18 +160,21 @@ terms of the MIT license. A copy of the license can be found in the file // see <https://en.cppreference.com/w/cpp/memory/new/operator_new> // ------------------------------------------------------ #include <new> - void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p) - void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p) - void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) - void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) + #ifndef MI_OSX_IS_INTERPOSED + void operator delete(void* p) noexcept MI_FORWARD0(mi_free,p) + void operator delete[](void* p) noexcept MI_FORWARD0(mi_free,p) + + void* operator new(std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) + void* operator new[](std::size_t n) noexcept(false) MI_FORWARD1(mi_new,n) - void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } - void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { UNUSED(tag); return mi_new_nothrow(n); } + void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { MI_UNUSED(tag); return mi_new_nothrow(n); } - #if (__cplusplus >= 201402L || _MSC_VER >= 1916) - void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) - void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) + #if (__cplusplus >= 201402L || _MSC_VER >= 1916) + void operator delete (void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) + void operator delete[](void* p, std::size_t n) noexcept MI_FORWARD02(mi_free_size,p,n) + #endif #endif #if (__cplusplus > 201402L && defined(__cpp_aligned_new)) && (!defined(__GNUC__) || (__GNUC__ > 5)) @@ -115,6 +182,8 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::align_val_t al) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } void operator delete (void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); }; void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); }; + void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } + void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } @@ -128,86 +197,109 @@ terms of the MIT license. A copy of the license can be found in the file // used by GCC and CLang). // See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling> // ------------------------------------------------------ + void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[] void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) void _ZdaPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n) + void _ZdlPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); } void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); } - typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; + void _ZdlPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete(void*, std::nothrow_t const&) + void _ZdaPvRKSt9nothrow_t(void* p, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free(p); } // operator delete[](void*, std::nothrow_t const&) + void _ZdlPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete(void*, std::align_val_t, std::nothrow_t const&) + void _ZdaPvSt11align_val_tRKSt9nothrow_t(void* p, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); mi_free_aligned(p,al); } // operator delete[](void*, std::align_val_t, std::nothrow_t const&) + #if (MI_INTPTR_SIZE==8) void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit + void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) - void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } - void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnamSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #elif (MI_INTPTR_SIZE==4) void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit + void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } + void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); } void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al) - void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { UNUSED(tag); return mi_new_nothrow(n); } - void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } - void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } + void* _ZnajSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); } #else - #error "define overloads for new/delete for this platform (just for performance, can be skipped)" + #error "define overloads for new/delete for this platform (just for performance, can be skipped)" #endif #endif // __cplusplus +// ------------------------------------------------------ +// Further Posix & Unix functions definitions +// ------------------------------------------------------ #ifdef __cplusplus extern "C" { #endif -// ------------------------------------------------------ -// Posix & Unix functions definitions -// ------------------------------------------------------ +#ifndef MI_OSX_IS_INTERPOSED + // Forward Posix/Unix calls as well + void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize) + size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p) + #if !defined(__ANDROID__) && !defined(__FreeBSD__) + size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) + #else + size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p) + #endif -void cfree(void* p) MI_FORWARD0(mi_free, p) -void* reallocf(void* p, size_t newsize) MI_FORWARD2(mi_reallocf,p,newsize) -size_t malloc_size(const void* p) MI_FORWARD1(mi_usable_size,p) -#if !defined(__ANDROID__) -size_t malloc_usable_size(void *p) MI_FORWARD1(mi_usable_size,p) -#else -size_t malloc_usable_size(const void *p) MI_FORWARD1(mi_usable_size,p) + // No forwarding here due to aliasing/name mangling issues + void* valloc(size_t size) { return mi_valloc(size); } + void vfree(void* p) { mi_free(p); } + size_t malloc_good_size(size_t size) { return mi_malloc_good_size(size); } + int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } + + // `aligned_alloc` is only available when __USE_ISOC11 is defined. + // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check + // for it if using glibc. + // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot + // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. + // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it + // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. + #if !defined(__GLIBC__) || __USE_ISOC11 + void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } + #endif #endif // no forwarding here due to aliasing/name mangling issues -void* valloc(size_t size) { return mi_valloc(size); } -void* pvalloc(size_t size) { return mi_pvalloc(size); } -void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } -void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } -int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } -void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } - -// `aligned_alloc` is only available when __USE_ISOC11 is defined. -// Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot -// override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. -// Fortunately, in the case where `aligned_alloc` is declared as `static inline` it -// uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. -#if __USE_ISOC11 -void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } -#endif +void cfree(void* p) { mi_free(p); } +void* pvalloc(size_t size) { return mi_pvalloc(size); } +void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } +void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } +void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); } +// some systems define reallocarr so mark it as a weak symbol (#751) +mi_decl_weak int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); } +#if defined(__wasi__) + // forward __libc interface (see PR #667) + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc, size) + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc, count, size) + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc, p, size) + void __libc_free(void* p) MI_FORWARD0(mi_free, p) + void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); } -#if defined(__GLIBC__) && defined(__linux__) +#elif defined(__GLIBC__) && defined(__linux__) // forward __libc interface (needed for glibc-based Linux distributions) - void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) - void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) - void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) - void __libc_free(void* p) MI_FORWARD0(mi_free,p) - void __libc_cfree(void* p) MI_FORWARD0(mi_free,p) - - void* __libc_valloc(size_t size) { return mi_valloc(size); } - void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); } - void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); } + void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size) + void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size) + void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc,p,size) + void __libc_free(void* p) MI_FORWARD0(mi_free,p) + void __libc_cfree(void* p) MI_FORWARD0(mi_free,p) + + void* __libc_valloc(size_t size) { return mi_valloc(size); } + void* __libc_pvalloc(size_t size) { return mi_pvalloc(size); } + void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment,size); } int __posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p,alignment,size); } #endif diff --git a/contrib/libs/mimalloc/src/alloc-posix.c b/contrib/libs/mimalloc/src/alloc-posix.c index 43931e56da..225752fd87 100644 --- a/contrib/libs/mimalloc/src/alloc-posix.c +++ b/contrib/libs/mimalloc/src/alloc-posix.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // ------------------------------------------------------ // Posix & Unix functions definitions @@ -32,14 +32,20 @@ terms of the MIT license. A copy of the license can be found in the file #endif -size_t mi_malloc_size(const void* p) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept { + // if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } -size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { +mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept { + // if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } +mi_decl_nodiscard size_t mi_malloc_good_size(size_t size) mi_attr_noexcept { + return mi_good_size(size); +} + void mi_cfree(void* p) mi_attr_noexcept { if (mi_is_in_heap_region(p)) { mi_free(p); @@ -50,53 +56,75 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html> if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment - if (!_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 - void* q = (mi_malloc_satisfies_alignment(alignment, size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); + if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment + // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` + if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 + void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; mi_assert_internal(((uintptr_t)q % alignment) == 0); *p = q; return 0; } -mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { - void* p = (mi_malloc_satisfies_alignment(alignment,size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); +mi_decl_nodiscard mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept { + void* p = mi_malloc_aligned(size, alignment); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } -mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept { return mi_memalign( _mi_os_page_size(), size ); } -mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept { size_t psize = _mi_os_page_size(); if (size >= SIZE_MAX - psize) return NULL; // overflow size_t asize = _mi_align_up(size, psize); return mi_malloc_aligned(asize, psize); } -mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { - if (alignment==0 || !_mi_is_power_of_two(alignment)) return NULL; - if ((size&(alignment-1)) != 0) return NULL; // C11 requires integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc> - void* p = (mi_malloc_satisfies_alignment(alignment, size) ? mi_malloc(size) : mi_malloc_aligned(size, alignment)); +mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept { + // C11 requires the size to be an integral multiple of the alignment, see <https://en.cppreference.com/w/c/memory/aligned_alloc>. + // unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check.. + /* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc> + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment); + #endif + return NULL; + } + */ + // C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned + void* p = mi_malloc_aligned(size, alignment); mi_assert_internal(((uintptr_t)p % alignment) == 0); return p; } -void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD +mi_decl_nodiscard void* mi_reallocarray( void* p, size_t count, size_t size ) mi_attr_noexcept { // BSD void* newp = mi_reallocn(p,count,size); - if (newp==NULL) errno = ENOMEM; + if (newp==NULL) { errno = ENOMEM; } return newp; } +mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_attr_noexcept { // NetBSD + mi_assert(p != NULL); + if (p == NULL) { + errno = EINVAL; + return EINVAL; + } + void** op = (void**)p; + void* newp = mi_reallocarray(*op, count, size); + if mi_unlikely(newp == NULL) { return errno; } + *op = newp; + return 0; +} + void* mi__expand(void* p, size_t newsize) mi_attr_noexcept { // Microsoft void* res = mi_expand(p, newsize); - if (res == NULL) errno = ENOMEM; + if (res == NULL) { errno = ENOMEM; } return res; } -mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept { if (s==NULL) return NULL; size_t len; for(len = 0; s[len] != 0; len++) { } @@ -108,7 +136,7 @@ mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noex return p; } -mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept { return (unsigned char*)mi_strdup((const char*)s); } @@ -122,7 +150,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { else { *buf = mi_strdup(p); if (*buf==NULL) return ENOMEM; - if (size != NULL) *size = strlen(p); + if (size != NULL) *size = _mi_strlen(p); } return 0; } @@ -148,10 +176,10 @@ int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) #endif } -void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft +mi_decl_nodiscard void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned_at(p, newcount, size, alignment, offset); } -void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft +mi_decl_nodiscard void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { // Microsoft return mi_recalloc_aligned(p, newcount, size, alignment); } diff --git a/contrib/libs/mimalloc/src/alloc.c b/contrib/libs/mimalloc/src/alloc.c index 8acff78327..6c9c5baf36 100644 --- a/contrib/libs/mimalloc/src/alloc.c +++ b/contrib/libs/mimalloc/src/alloc.c @@ -1,18 +1,24 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // for realpath() on Linux +#endif + #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() -#include <string.h> // memset, strlen -#include <stdlib.h> // malloc, exit +#include <string.h> // memset, strlen (for mi_strdup) +#include <stdlib.h> // malloc, abort #define MI_IN_ALLOC_C #include "alloc-override.c" +#include "free.c" #undef MI_IN_ALLOC_C // ------------------------------------------------------ @@ -21,625 +27,254 @@ terms of the MIT license. A copy of the license can be found in the file // Fast allocation in a page: just pop from the free list. // Fall back to generic allocation only if the list is empty. -extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { - mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size); +// Note: in release mode the (inlined) routine is about 7 instructions with a single test. +extern inline void* _mi_page_malloc_zero(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept +{ + mi_assert_internal(page->block_size == 0 /* empty heap */ || mi_page_block_size(page) >= size); mi_block_t* const block = page->free; - if (mi_unlikely(block == NULL)) { - return _mi_malloc_generic(heap, size); + if mi_unlikely(block == NULL) { + return _mi_malloc_generic(heap, size, zero, 0); } mi_assert_internal(block != NULL && _mi_ptr_page(block) == page); // pop from the free list - page->used++; page->free = mi_block_next(page, block); + page->used++; mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + #if MI_DEBUG>3 + if (page->free_is_zero) { + mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); + } + #endif -#if (MI_DEBUG>0) - if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); } -#elif (MI_SECURE!=0) - block->next = 0; // don't leak internal data -#endif + // allow use of the block internally + // note: when tracking we need to avoid ever touching the MI_PADDING since + // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`) + mi_track_mem_undefined(block, mi_page_usable_block_size(page)); + + // zero the block? note: we need to zero the full block size (issue #63) + if mi_unlikely(zero) { + mi_assert_internal(page->block_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) + mi_assert_internal(page->block_size >= MI_PADDING_SIZE); + if (page->free_is_zero) { + block->next = 0; + mi_track_mem_defined(block, page->block_size - MI_PADDING_SIZE); + } + else { + _mi_memzero_aligned(block, page->block_size - MI_PADDING_SIZE); + } + } + + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + if (!zero && !mi_page_is_huge(page)) { + memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); + } + #elif (MI_SECURE!=0) + if (!zero) { block->next = 0; } // don't leak internal data + #endif -#if (MI_STAT>0) + #if (MI_STAT>0) const size_t bsize = mi_page_usable_block_size(page); if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { mi_heap_stat_increase(heap, normal, bsize); mi_heap_stat_counter_increase(heap, normal_count, 1); -#if (MI_STAT>1) + #if (MI_STAT>1) const size_t bin = _mi_bin(bsize); mi_heap_stat_increase(heap, normal_bins[bin], 1); -#endif + #endif } -#endif + #endif -#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) - mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); - ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); - mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); - padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); - padding->delta = (uint32_t)(delta); - uint8_t* fill = (uint8_t*)padding - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes - for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } -#endif + #if MI_PADDING // && !MI_TRACK_ENABLED + mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); + ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); + #if (MI_DEBUG>=2) + mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); + #endif + mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess + padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); + padding->delta = (uint32_t)(delta); + #if MI_PADDING_CHECK + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)padding - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes + for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } + } + #endif + #endif return block; } -// allocate a small block -extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { - mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local +// extra entries for improved efficiency in `alloc-aligned.c`. +extern void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,false); +} +extern void* _mi_page_malloc_zeroed(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept { + return _mi_page_malloc_zero(heap,page,size,true); +} + +static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + mi_assert(heap != NULL); + #if MI_DEBUG + const uintptr_t tid = _mi_thread_id(); + mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local + #endif mi_assert(size <= MI_SMALL_SIZE_MAX); #if (MI_PADDING) - if (size == 0) { - size = sizeof(void*); - } + if (size == 0) { size = sizeof(void*); } #endif - mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE); - void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE); - mi_assert_internal(p==NULL || mi_usable_size(p) >= size); + + mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); + void* const p = _mi_page_malloc_zero(heap, page, size + MI_PADDING_SIZE, zero); + mi_track_malloc(p,size,zero); + #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } -extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small(mi_get_default_heap(), size); +// allocate a small block +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return mi_heap_malloc_small_zero(heap, size, false); +} + +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { + return mi_heap_malloc_small(mi_prim_get_default_heap(), size); } // The main allocation function -extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - if (mi_likely(size <= MI_SMALL_SIZE_MAX)) { - return mi_heap_malloc_small(heap, size); +extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { + if mi_likely(size <= MI_SMALL_SIZE_MAX) { + mi_assert_internal(huge_alignment == 0); + return mi_heap_malloc_small_zero(heap, size, zero); } else { mi_assert(heap!=NULL); - mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local - void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic - mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local + void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic + mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif - return p; - } -} - -extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { - return mi_heap_malloc(mi_get_default_heap(), size); -} - - -void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) { - // note: we need to initialize the whole usable block size to zero, not just the requested size, - // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63) - UNUSED(size); - mi_assert_internal(p != NULL); - mi_assert_internal(mi_usable_size(p) >= size); // size can be zero - mi_assert_internal(_mi_ptr_page(p)==page); - if (page->is_zero && size > sizeof(mi_block_t)) { - // already zero initialized memory - ((mi_block_t*)p)->next = 0; // clear the free list pointer - mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p))); - } - else { - // otherwise memset - memset(p, 0, mi_usable_size(p)); - } -} - -// zero initialized small block -mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { - void* p = mi_malloc_small(size); - if (p != NULL) { - _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again? - } - return p; -} - -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { - void* p = mi_heap_malloc(heap,size); - if (zero && p != NULL) { - _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? - } - return p; -} - -extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { - return _mi_heap_malloc_zero(heap, size, true); -} - -mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { - return mi_heap_zalloc(mi_get_default_heap(),size); -} - - -// ------------------------------------------------------ -// Check for double free in secure and debug mode -// This is somewhat expensive so only enabled for secure mode 4 -// ------------------------------------------------------ - -#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) -// linear check if the free list contains a specific element -static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { - while (list != NULL) { - if (elem==list) return true; - list = mi_block_next(page, list); - } - return false; -} - -static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { - // The decoded value is in the same page (or NULL). - // Walk the free lists to verify positively if it is already freed - if (mi_list_contains(page, page->free, block) || - mi_list_contains(page, page->local_free, block) || - mi_list_contains(page, mi_page_thread_free(page), block)) - { - _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); - return true; - } - return false; -} - -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field - if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? - (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? - { - // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free? - // (continue in separate function to improve code generation) - return mi_check_is_double_freex(page, block); - } - return false; -} -#else -static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); - UNUSED(block); - return false; -} -#endif - -// --------------------------------------------------------------------------- -// Check for heap block overflow by setting up padding at the end of the block -// --------------------------------------------------------------------------- - -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) -static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { - *bsize = mi_page_usable_block_size(page); - const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); - *delta = padding->delta; - return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize); -} - -// Return the exact usable size of a block. -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); mi_assert_internal(delta <= bsize); - return (ok ? bsize - delta : 0); -} - -static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - *size = *wrong = bsize; - if (!ok) return false; - mi_assert_internal(bsize >= delta); - *size = bsize - delta; - uint8_t* fill = (uint8_t*)block + bsize - delta; - const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes - for (size_t i = 0; i < maxpad; i++) { - if (fill[i] != MI_DEBUG_PADDING) { - *wrong = bsize - delta + i; - return false; - } - } - return true; -} - -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - size_t size; - size_t wrong; - if (!mi_verify_padding(page,block,&size,&wrong)) { - _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); - } -} - -// When a non-thread-local block is freed, it becomes part of the thread delayed free -// list that is freed later by the owning heap. If the exact usable size is too small to -// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) -// so it will later not trigger an overflow error in `mi_free_block`. -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); - if (!ok || (bsize - delta) >= min_size) return; // usually already enough space - mi_assert_internal(bsize >= min_size); - if (bsize < min_size) return; // should never happen - size_t new_delta = (bsize - min_size); - mi_assert_internal(new_delta < bsize); - mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - padding->delta = (uint32_t)new_delta; -} -#else -static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); - UNUSED(block); -} - -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - UNUSED(block); - return mi_page_usable_block_size(page); -} - -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - UNUSED(page); - UNUSED(block); - UNUSED(min_size); -} -#endif - -// only maintain stats for smaller objects if requested -#if (MI_STAT>0) -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { -#if (MI_STAT < 2) - UNUSED(block); -#endif - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_usable_block_size(page); -#if (MI_STAT>1) - const size_t usize = mi_page_usable_size_of(page, block); - mi_heap_stat_decrease(heap, malloc, usize); -#endif - if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, normal, bsize); -#if (MI_STAT > 1) - mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); -#endif - } -} -#else -static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { - UNUSED(page); UNUSED(block); -} -#endif - -#if (MI_STAT>0) -// maintain stats for huge objects -static void mi_stat_huge_free(const mi_page_t* page) { - mi_heap_t* const heap = mi_heap_get_default(); - const size_t bsize = mi_page_block_size(page); // to match stats in `page.c:mi_page_huge_alloc` - if (bsize <= MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, huge, bsize); - } - else { - mi_heap_stat_decrease(heap, giant, bsize); - } -} -#else -static void mi_stat_huge_free(const mi_page_t* page) { - UNUSED(page); -} -#endif - -// ------------------------------------------------------ -// Free -// ------------------------------------------------------ - -// multi-threaded free -static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block) -{ - // The padding check may access the non-thread-owned page for the key values. - // that is safe as these are constant and the page won't be freed (as the block is not freed yet). - mi_check_padding(page, block); - mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_usable_size(block)); - #endif - - // huge page segments are always abandoned and can be freed immediately - mi_segment_t* const segment = _mi_page_segment(page); - if (segment->page_kind==MI_PAGE_HUGE) { - mi_stat_huge_free(page); - _mi_segment_huge_page_free(segment, page, block); - return; - } - - // Try to put the block on either the page-local thread free list, or the heap delayed free list. - mi_thread_free_t tfreex; - bool use_delayed; - mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); - if (mi_unlikely(use_delayed)) { - // unlikely: this only happens on the first concurrent free in a page that is in the full list - tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); } - else { - // usual: directly add to page thread_free list - mi_block_set_next(page, block, mi_tf_block(tfree)); - tfreex = mi_tf_set_block(tfree,block); - } - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - - if (mi_unlikely(use_delayed)) { - // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) - mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); - mi_assert_internal(heap != NULL); - if (heap != NULL) { - // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) - mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); - do { - mi_block_set_nextx(heap,block,dfree, heap->keys); - } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); - } - - // and reset the MI_DELAYED_FREEING flag - tfree = mi_atomic_load_relaxed(&page->xthread_free); - do { - tfreex = tfree; - mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); - tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); - } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); - } -} - -// regular free -static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block) -{ - // and push it on the free list - if (mi_likely(local)) { - // owning thread can free a block directly - if (mi_unlikely(mi_check_is_double_free(page, block))) return; - mi_check_padding(page, block); - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - page->used--; - if (mi_unlikely(mi_page_all_free(page))) { - _mi_page_retire(page); - } - else if (mi_unlikely(mi_page_is_in_full(page))) { - _mi_page_unfull(page); - } - } - else { - _mi_free_block_mt(page,block); + return p; } } - -// Adjust a block that was allocated aligned, to the actual start of the block in the page. -mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p) { - mi_assert_internal(page!=NULL && p!=NULL); - const size_t diff = (uint8_t*)p - _mi_page_start(segment, page, NULL); - const size_t adjust = (diff % mi_page_block_size(page)); - return (mi_block_t*)((uintptr_t)p - adjust); -} - - -static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) { - mi_page_t* const page = _mi_segment_page_of(segment, p); - mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_stat_free(page, block); - _mi_free_block(page, local, block); +extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { + return _mi_heap_malloc_zero_ex(heap, size, zero, 0); } -// Get the segment data belonging to a pointer -// This is just a single `and` in assembly but does further checks in debug mode -// (and secure mode) if this was a valid pointer. -static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) -{ - UNUSED(msg); -#if (MI_DEBUG>0) - if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) { - _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); - return NULL; - } -#endif - - mi_segment_t* const segment = _mi_ptr_segment(p); - if (mi_unlikely(segment == NULL)) return NULL; // checks also for (p==NULL) - -#if (MI_DEBUG>0) - if (mi_unlikely(!mi_is_in_heap_region(p))) { - _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" - "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); - if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) { - _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); - } - } -#endif -#if (MI_DEBUG>0 || MI_SECURE>=4) - if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) { - _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", p); - } -#endif - return segment; -} - - -// Free a block -void mi_free(void* p) mi_attr_noexcept -{ - const mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - if (mi_unlikely(segment == NULL)) return; - - const uintptr_t tid = _mi_thread_id(); - mi_page_t* const page = _mi_segment_page_of(segment, p); - mi_block_t* const block = (mi_block_t*)p; - - if (mi_likely(tid == segment->thread_id && page->flags.full_aligned == 0)) { // the thread id matches and it is not a full page, nor has aligned blocks - // local, and not full or aligned - if (mi_unlikely(mi_check_is_double_free(page,block))) return; - mi_check_padding(page, block); - mi_stat_free(page, block); - #if (MI_DEBUG!=0) - memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); - #endif - mi_block_set_next(page, block, page->local_free); - page->local_free = block; - if (mi_unlikely(--page->used == 0)) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) - _mi_page_retire(page); - } - } - else { - // non-local, aligned blocks, or a full page; use the more generic path - // note: recalc page in generic to improve code generation - mi_free_generic(segment, tid == segment->thread_id, p); - } +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return _mi_heap_malloc_zero(heap, size, false); } -bool _mi_free_delayed_block(mi_block_t* block) { - // get segment and page - const mi_segment_t* const segment = _mi_ptr_segment(block); - mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(_mi_thread_id() == segment->thread_id); - mi_page_t* const page = _mi_segment_page_of(segment, block); - - // Clear the no-delayed flag so delayed freeing is used again for this page. - // This must be done before collecting the free lists on this page -- otherwise - // some blocks may end up in the page `thread_free` list with no blocks in the - // heap `thread_delayed_free` list which may cause the page to be never freed! - // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) - _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */); - - // collect all other non-local frees to ensure up-to-date `used` count - _mi_page_free_collect(page, false); - - // and free the block (possibly freeing the page as well since used is updated) - _mi_free_block(page, true, block); - return true; -} - -// Bytes available in a block -static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { - const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg); - if (segment==NULL) return 0; - const mi_page_t* const page = _mi_segment_page_of(segment, p); - const mi_block_t* block = (const mi_block_t*)p; - if (mi_unlikely(mi_page_has_aligned(page))) { - block = _mi_page_ptr_unalign(segment, page, p); - size_t size = mi_page_usable_size_of(page, block); - ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block; - mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); - return (size - adjust); - } - else { - return mi_page_usable_size_of(page, block); - } +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { + return mi_heap_malloc(mi_prim_get_default_heap(), size); } -size_t mi_usable_size(const void* p) mi_attr_noexcept { - return _mi_usable_size(p, "mi_usable_size"); +// zero initialized small block +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { + return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true); } - -// ------------------------------------------------------ -// ensure explicit external inline definitions are emitted! -// ------------------------------------------------------ - -#ifdef __cplusplus -void* _mi_externs[] = { - (void*)&_mi_page_malloc, - (void*)&mi_malloc, - (void*)&mi_malloc_small, - (void*)&mi_zalloc_small, - (void*)&mi_heap_malloc, - (void*)&mi_heap_zalloc, - (void*)&mi_heap_malloc_small -}; -#endif - - -// ------------------------------------------------------ -// Allocation extensions -// ------------------------------------------------------ - -void mi_free_size(void* p, size_t size) mi_attr_noexcept { - UNUSED_RELEASE(size); - mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); - mi_free(p); +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { + return _mi_heap_malloc_zero(heap, size, true); } -void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { - UNUSED_RELEASE(alignment); - mi_assert(((uintptr_t)p % alignment) == 0); - mi_free_size(p,size); +mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { + return mi_heap_zalloc(mi_prim_get_default_heap(),size); } -void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { - UNUSED_RELEASE(alignment); - mi_assert(((uintptr_t)p % alignment) == 0); - mi_free(p); -} -extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count,size,&total)) return NULL; return mi_heap_zalloc(heap,total); } -mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_calloc(mi_get_default_heap(),count,size); +mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_calloc(mi_prim_get_default_heap(),count,size); } // Uninitialized `calloc` -extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_malloc(heap, total); } -mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_mallocn(mi_get_default_heap(),count,size); +mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { + return mi_heap_mallocn(mi_prim_get_default_heap(),count,size); } -// Expand in place or fail +// Expand (or shrink) in place (or fail) void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { + #if MI_PADDING + // we do not shrink/expand with padding enabled + MI_UNUSED(p); MI_UNUSED(newsize); + return NULL; + #else if (p == NULL) return NULL; - size_t size = _mi_usable_size(p,"mi_expand"); + const size_t size = _mi_usable_size(p,"mi_expand"); if (newsize > size) return NULL; return p; // it fits + #endif } -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) { - if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero); - size_t size = _mi_usable_size(p,"mi_realloc"); - if (newsize <= size && newsize >= (size / 2)) { +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { + // if p == NULL then behave as malloc. + // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)). + // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) + const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) + if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) + mi_assert_internal(p!=NULL); + // todo: do not track as the usable size is still the same in the free; adjust potential padding? + // mi_track_resize(p,size,newsize) + // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); - if (mi_likely(newp != NULL)) { + if mi_likely(newp != NULL) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); + const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)newp + start, newsize - start); + } + else if (newsize == 0) { + ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725) + } + if mi_likely(p != NULL) { + const size_t copysize = (newsize > size ? size : newsize); + mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. + _mi_memcpy(newp, p, copysize); + mi_free(p); // only free the original pointer if successful } - _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); - mi_free(p); // only free if successful } return newp; } -void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, false); } -void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_realloc(heap, p, total); @@ -647,42 +282,42 @@ void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_a // Reallocate but free `p` on errors -void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { void* newp = mi_heap_realloc(heap, p, newsize); if (newp==NULL && p!=NULL) mi_free(p); return newp; } -void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { return _mi_heap_realloc_zero(heap, p, newsize, true); } -void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { +mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { size_t total; if (mi_count_size_overflow(count, size, &total)) return NULL; return mi_heap_rezalloc(heap, p, total); } -void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_realloc(mi_get_default_heap(),p,newsize); +mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize); } -void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_reallocn(mi_get_default_heap(),p,count,size); +mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors -void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_reallocf(mi_get_default_heap(),p,newsize); +mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize); } -void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); +mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { + return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize); } -void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_recalloc(mi_get_default_heap(), p, count, size); +mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { + return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size); } @@ -692,33 +327,33 @@ void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { // ------------------------------------------------------ // `strdup` using mi_malloc -mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept { if (s == NULL) return NULL; - size_t n = strlen(s); - char* t = (char*)mi_heap_malloc(heap,n+1); - if (t != NULL) _mi_memcpy(t, s, n + 1); + size_t len = _mi_strlen(s); + char* t = (char*)mi_heap_malloc(heap,len+1); + if (t == NULL) return NULL; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } -mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { - return mi_heap_strdup(mi_get_default_heap(), s); +mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { + return mi_heap_strdup(mi_prim_get_default_heap(), s); } // `strndup` using mi_malloc -mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept { if (s == NULL) return NULL; - const char* end = (const char*)memchr(s, 0, n); // find end of string in the first `n` characters (returns NULL if not found) - const size_t m = (end != NULL ? (size_t)(end - s) : n); // `m` is the minimum of `n` or the end-of-string - mi_assert_internal(m <= n); - char* t = (char*)mi_heap_malloc(heap, m+1); + const size_t len = _mi_strnlen(s,n); // len <= n + char* t = (char*)mi_heap_malloc(heap, len+1); if (t == NULL) return NULL; - _mi_memcpy(t, s, m); - t[m] = 0; + _mi_memcpy(t, s, len); + t[len] = 0; return t; } -mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { - return mi_heap_strndup(mi_get_default_heap(),s,n); +mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { + return mi_heap_strndup(mi_prim_get_default_heap(),s,n); } #ifndef __wasi__ @@ -728,7 +363,7 @@ mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { #define PATH_MAX MAX_PATH #endif #include <windows.h> -mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { // todo: use GetFullPathNameW to allow longer file names char buf[PATH_MAX]; DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL); @@ -746,8 +381,9 @@ mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char } } #else +/* #include <unistd.h> // pathconf -static size_t mi_path_max() { +static size_t mi_path_max(void) { static size_t path_max = 0; if (path_max <= 0) { long m = pathconf("/",_PC_PATH_MAX); @@ -757,25 +393,37 @@ static size_t mi_path_max() { } return path_max; } - +*/ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept { if (resolved_name != NULL) { return realpath(fname,resolved_name); } else { - size_t n = mi_path_max(); + char* rname = realpath(fname, NULL); + if (rname == NULL) return NULL; + char* result = mi_heap_strdup(heap, rname); + mi_cfree(rname); // use checked free (which may be redirected to our free but that's ok) + // note: with ASAN realpath is intercepted and mi_cfree may leak the returned pointer :-( + return result; + } + /* + const size_t n = mi_path_max(); char* buf = (char*)mi_malloc(n+1); - if (buf==NULL) return NULL; + if (buf == NULL) { + errno = ENOMEM; + return NULL; + } char* rname = realpath(fname,buf); char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL` mi_free(buf); return result; } + */ } #endif -mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { - return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); +mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { + return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name); } #endif @@ -796,9 +444,16 @@ static bool mi_try_new_handler(bool nothrow) { #else std::new_handler h = std::set_new_handler(); std::set_new_handler(h); - #endif + #endif if (h==NULL) { - if (!nothrow) throw std::bad_alloc(); + _mi_error_message(ENOMEM, "out of memory in 'new'"); + #if defined(_CPPUNWIND) || defined(__cpp_exceptions) // exceptions are not always enabled + if (!nothrow) { + throw std::bad_alloc(); + } + #else + MI_UNUSED(nothrow); + #endif return false; } else { @@ -807,13 +462,13 @@ static bool mi_try_new_handler(bool nothrow) { } } #else -typedef void (*std_new_handler_t)(); +typedef void (*std_new_handler_t)(void); -#if (defined(__GNUC__) || defined(__clang__)) -std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv() { +#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER))) // exclude clang-cl, see issue #631 +std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) { return NULL; } -static std_new_handler_t mi_get_new_handler() { +static std_new_handler_t mi_get_new_handler(void) { return _ZSt15get_new_handlerv(); } #else @@ -826,7 +481,10 @@ static std_new_handler_t mi_get_new_handler() { static bool mi_try_new_handler(bool nothrow) { std_new_handler_t h = mi_get_new_handler(); if (h==NULL) { - if (!nothrow) exit(ENOMEM); // cannot throw in plain C, use exit as we are out of memory anyway. + _mi_error_message(ENOMEM, "out of memory in 'new'"); + if (!nothrow) { + abort(); // cannot throw in plain C, use abort + } return false; } else { @@ -836,27 +494,53 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) { +mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { - p = mi_malloc(size); + p = mi_heap_malloc(heap,size); } return p; } -mi_decl_restrict void* mi_new(size_t size) { - void* p = mi_malloc(size); - if (mi_unlikely(p == NULL)) return mi_try_new(size,false); +static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { + return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow); +} + + +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { + void* p = mi_heap_malloc(heap,size); + if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; } -mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { + return mi_heap_alloc_new(mi_prim_get_default_heap(), size); +} + + +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { + size_t total; + if mi_unlikely(mi_count_size_overflow(count, size, &total)) { + mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc + return NULL; + } + else { + return mi_heap_alloc_new(heap,total); + } +} + +mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { + return mi_heap_alloc_new_n(mi_prim_get_default_heap(), size, count); +} + + +mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept { void* p = mi_malloc(size); - if (mi_unlikely(p == NULL)) return mi_try_new(size, true); + if mi_unlikely(p == NULL) return mi_try_new(size, true); return p; } -mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { +mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -865,7 +549,7 @@ mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) { return p; } -mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { +mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept { void* p; do { p = mi_malloc_aligned(size, alignment); @@ -874,18 +558,7 @@ mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_ return p; } -mi_decl_restrict void* mi_new_n(size_t count, size_t size) { - size_t total; - if (mi_unlikely(mi_count_size_overflow(count, size, &total))) { - mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc - return NULL; - } - else { - return mi_new(total); - } -} - -void* mi_new_realloc(void* p, size_t newsize) { +mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) { void* q; do { q = mi_realloc(p, newsize); @@ -893,9 +566,9 @@ void* mi_new_realloc(void* p, size_t newsize) { return q; } -void* mi_new_reallocn(void* p, size_t newcount, size_t size) { +mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) { size_t total; - if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) { + if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc return NULL; } @@ -903,3 +576,23 @@ void* mi_new_reallocn(void* p, size_t newcount, size_t size) { return mi_new_realloc(p, total); } } + +// ------------------------------------------------------ +// ensure explicit external inline definitions are emitted! +// ------------------------------------------------------ + +#ifdef __cplusplus +void* _mi_externs[] = { + (void*)&_mi_page_malloc, + (void*)&_mi_heap_malloc_zero, + (void*)&_mi_heap_malloc_zero_ex, + (void*)&mi_malloc, + (void*)&mi_malloc_small, + (void*)&mi_zalloc_small, + (void*)&mi_heap_malloc, + (void*)&mi_heap_zalloc, + (void*)&mi_heap_malloc_small + // (void*)&mi_heap_alloc_new, + // (void*)&mi_heap_alloc_new_n +}; +#endif diff --git a/contrib/libs/mimalloc/src/arena.c b/contrib/libs/mimalloc/src/arena.c index 0e6615a420..25ce56ec8f 100644 --- a/contrib/libs/mimalloc/src/arena.c +++ b/contrib/libs/mimalloc/src/arena.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -7,107 +7,214 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- "Arenas" are fixed area's of OS memory from which we can allocate -large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB). +large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. -Currently arenas are only used to for huge OS page (1GiB) reservations, -otherwise it delegates to direct allocation from the OS. -In the future, we can expose an API to manually add more kinds of arenas -which is sometimes needed for embedded devices or shared memory for example. -(We can also employ this with WASI or `sbrk` systems to reserve large arenas - on demand and be able to reuse them efficiently). - -The arena allocation needs to be thread safe and we use an atomic -bitmap to allocate. The current implementation of the bitmap can -only do this within a field (`uintptr_t`) so we can allocate at most -blocks of 2GiB (64*32MiB) and no object can cross the boundary. This -can lead to fragmentation but fortunately most objects will be regions -of 256MiB in practice. +Arenas are used to for huge OS page (1GiB) reservations or for reserving +OS memory upfront which can be improve performance or is sometimes needed +on embedded devices. We can also employ this with WASI or `sbrk` systems +to reserve large arenas upfront and be able to reuse the memory more effectively. + +The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include <string.h> // memset -#include <errno.h> // ENOMEM +#include <errno.h> // ENOMEM #include "bitmap.h" // atomic bitmap - -// os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); - -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); - -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); - /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB -#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB -#define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid) +// Block info: bit 0 contains the `in_use` bit, the upper bits the +// size in count of arena blocks. +typedef uintptr_t mi_block_info_t; +#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) +#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB +#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { + mi_arena_id_t id; // arena id; 0 for non-specific + mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself (including its bitmaps) + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool is_zero_init; // is the arena zero initialized? - bool is_committed; // is the memory fully committed? (if so, block_committed == NULL) - bool is_large; // large- or huge OS pages (always committed) - _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) + _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? - mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed? + mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) + mi_bitmap_field_t* blocks_abandoned; // blocks that start with an abandoned segment. (This crosses API's but it is convenient to have here) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) + // do not add further fields here as the dirty, committed, purged, and abandoned bitmaps follow the inuse bitmap fields. } mi_arena_t; // The available arenas static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; -static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0 +static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 + +//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; /* ----------------------------------------------------------- - Arena allocations get a memory id where the lower 8 bits are - the arena index +1, and the upper bits the block index. + Arena id's + id = arena_index + 1 ----------------------------------------------------------- */ -// Use `0` as a special id for direct OS allocated memory. -#define MI_MEMID_OS 0 +static size_t mi_arena_id_index(mi_arena_id_t id) { + return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1); +} + +static mi_arena_id_t mi_arena_id_create(size_t arena_index) { + mi_assert_internal(arena_index < MI_MAX_ARENAS); + return (int)arena_index + 1; +} + +mi_arena_id_t _mi_arena_id_none(void) { + return 0; +} -static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) { - mi_assert_internal(arena_index < 0xFE); - mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? - return ((bitmap_index << 8) | ((arena_index+1) & 0xFF)); +static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) { + return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) || + (arena_id == req_arena_id)); } -static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - mi_assert_internal(memid != MI_MEMID_OS); - *arena_index = (memid & 0xFF) - 1; - *bitmap_index = (memid >> 8); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + if (memid.memkind == MI_MEM_ARENA) { + return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + } + else { + return mi_arena_id_is_suitable(_mi_arena_id_none(), false, request_arena_id); + } } + +/* ----------------------------------------------------------- + Arena allocations get a (currently) 16-bit memory id where the + lower 8 bits are the arena id, and the upper bits the block index. +----------------------------------------------------------- */ + static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } +static size_t mi_arena_block_size(size_t bcount) { + return (bcount * MI_ARENA_BLOCK_SIZE); +} + +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_arena_block_size(arena->block_count); +} + +static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); + memid.mem.arena.id = id; + memid.mem.arena.block_index = bitmap_index; + memid.mem.arena.is_exclusive = is_exclusive; + return memid; +} + +static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.mem.arena.id); + *bitmap_index = memid.mem.arena.block_index; + return memid.mem.arena.is_exclusive; +} + + + +/* ----------------------------------------------------------- + Special static area for mimalloc internal structures + to avoid OS calls (for example, for the arena metadata) +----------------------------------------------------------- */ + +#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit + +static mi_decl_cache_align uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; // must be cache aligned, see issue #895 +static mi_decl_cache_align _Atomic(size_t) mi_arena_static_top; + +static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { + *memid = _mi_memid_none(); + if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; + const size_t toplow = mi_atomic_load_relaxed(&mi_arena_static_top); + if ((toplow + size) > MI_ARENA_STATIC_MAX) return NULL; + + // try to claim space + if (alignment < MI_MAX_ALIGN_SIZE) { alignment = MI_MAX_ALIGN_SIZE; } + const size_t oversize = size + alignment - 1; + if (toplow + oversize > MI_ARENA_STATIC_MAX) return NULL; + const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); + size_t top = oldtop + oversize; + if (top > MI_ARENA_STATIC_MAX) { + // try to roll back, ok if this fails + mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop); + return NULL; + } + + // success + *memid = _mi_memid_create(MI_MEM_STATIC); + memid->initially_zero = true; + const size_t start = _mi_align_up(oldtop, alignment); + uint8_t* const p = &mi_arena_static[start]; + _mi_memzero_aligned(p, size); + return p; +} + +static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { + *memid = _mi_memid_none(); + + // try static + void* p = mi_arena_static_zalloc(size, MI_MAX_ALIGN_SIZE, memid); + if (p != NULL) return p; + + // or fall back to the OS + p = _mi_os_alloc(size, memid, stats); + if (p == NULL) return NULL; + + // zero the OS memory if needed + if (!memid->initially_zero) { + _mi_memzero_aligned(p, size); + memid->initially_zero = true; + } + return p; +} + +static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { + if (mi_memkind_is_os(memid.memkind)) { + _mi_os_free(p, size, memid, stats); + } + else { + mi_assert(memid.memkind == MI_MEM_STATIC); + } +} + +static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { + return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); +} + + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) + +// claim the `blocks_inuse` bits +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { - size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search - if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time + size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter + if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx, stats)) { + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -118,194 +225,713 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool commit, mi_memid_t* memid, mi_os_tld_t* tld) { + MI_UNUSED(arena_index); + mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); + mi_bitmap_index_t bitmap_index; - if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; - - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_arena_id_create(arena_index, bitmap_index); - *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *large = arena->is_large; - *is_pinned = (arena->is_large || arena->is_committed); - if (arena->is_committed) { + if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index, tld->stats)) return NULL; + + // claimed it! + void* p = mi_arena_block_start(arena, bitmap_index); + *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); + memid->is_pinned = arena->memid.is_pinned; + + // none of the claimed blocks should be scheduled for a decommit + if (arena->blocks_purge != NULL) { + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); + } + + // set the dirty bits (todo: no need for an atomic op here?) + if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + } + + // set commit state + if (arena->blocks_committed == NULL) { // always committed - *commit = true; + memid->initially_committed = true; } - else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + else if (commit) { + // commit requested, but the range may not be committed as a whole: ensure it is committed now + memid->initially_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { - bool commit_zero; - _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + bool commit_zero = false; + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { + memid->initially_committed = false; + } + else { + if (commit_zero) { memid->initially_zero = true; } + } } } else { // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + return p; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - size_t* memid, mi_os_tld_t* tld) +// allocate in a speficic arena +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { - mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; - *is_zero = false; - *is_pinned = false; - - // try to allocate in an arena if the alignment is small enough - // and the object is not too large or too small. - if (alignment <= MI_SEGMENT_ALIGN && - size >= MI_ARENA_MIN_OBJ_SIZE && - mi_atomic_load_relaxed(&mi_arena_count) > 0) - { - const size_t bcount = mi_block_count_of_size(size); - const int numa_node = _mi_os_numa_node(tld); // current numa node + MI_UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t bcount = mi_block_count_of_size(size); + const size_t arena_index = mi_arena_id_index(arena_id); + mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); + mi_assert_internal(size <= mi_arena_block_size(bcount)); + + // Check arena suitability + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (!allow_large && arena->is_large) return NULL; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + } - mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE); + // try to allocate + void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); + mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); + return p; +} + + +// allocate from an arena with fallback to the OS +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +{ + MI_UNUSED(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + if mi_likely(max_arena == 0) return NULL; + + if (req_arena_id != _mi_arena_id_none()) { + // try a specific arena if requested + if (mi_arena_id_index(req_arena_id) < max_arena) { + void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } + else { // try numa affine allocation - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; } + // try from another numa node instead.. - for (size_t i = 0; i < MI_MAX_ARENAS; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena==NULL) break; // end reached - if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } } + return NULL; +} - // finally, fall back to the OS - if (mi_option_is_enabled(mi_option_limit_os_alloc)) { +// try to reserve a fresh arena space +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +{ + if (_mi_preloading()) return false; // use OS only while pre loading + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for WASM for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_count >= 8 && arena_count <= 128) { + arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + } + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive? */, arena_id) == 0); +} + + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) +{ + mi_assert_internal(memid != NULL && tld != NULL); + mi_assert_internal(size > 0); + *memid = _mi_memid_none(); + + const int numa_node = _mi_os_numa_node(tld); // current numa node + + // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) + if (!mi_option_is_enabled(mi_option_disallow_arena_alloc) || req_arena_id != _mi_arena_id_none()) { // is arena allocation allowed? + if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } + } + } + + // if we cannot use OS allocation, return NULL + if (mi_option_is_enabled(mi_option_disallow_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } - *is_zero = true; - *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats); - if (p != NULL) *is_pinned = *large; - return p; + + // finally, fall back to the OS + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); + } } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } + +void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { + if (size != NULL) *size = 0; + size_t arena_index = mi_arena_id_index(arena_id); + if (arena_index >= MI_MAX_ARENAS) return NULL; + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } + return arena->start; +} + + +/* ----------------------------------------------------------- + Arena purge +----------------------------------------------------------- */ + +static long mi_arena_purge_delay(void) { + // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay + return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); +} + +// reset or decommit in an arena and update the committed/decommit bitmaps +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_committed != NULL); + mi_assert_internal(arena->blocks_purge != NULL); + mi_assert_internal(!arena->memid.is_pinned); + const size_t size = mi_arena_block_size(blocks); + void* const p = mi_arena_block_start(arena, bitmap_idx); + bool needs_recommit; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + // all blocks are committed, we can purge freely + needs_recommit = _mi_os_purge(p, size, stats); + } + else { + // some blocks are not committed -- this can happen when a partially committed block is freed + // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) + mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + if (needs_recommit) { _mi_stat_increase(&_mi_stats_main.committed, size); } + } + + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); + // update committed bitmap + if (needs_recommit) { + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } +} + +// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +// Note: assumes we (still) own the area as we may purge immediately +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_purge != NULL); + const long delay = mi_arena_purge_delay(); + if (delay < 0) return; // is purging allowed at all? + + if (_mi_preloading() || delay == 0) { + // decommit directly + mi_arena_purge(arena, bitmap_idx, blocks, stats); + } + else { + // schedule decommit + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire != 0) { + mi_atomic_addi64_acq_rel(&arena->purge_expire, (mi_msecs_t)(delay/10)); // add smallish extra delay + } + else { + mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + } + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + } +} + +// purge a range of blocks +// return true if the full range was purged. +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startidx + bitlen; + size_t bitidx = startidx; + bool all_purged = false; + while (bitidx < endidx) { + // count consequetive ones in the purge mask + size_t count = 0; + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { + count++; + } + if (count > 0) { + // found range to be purged + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); + mi_arena_purge(arena, range_idx, count, stats); + if (count == bitlen) { + all_purged = true; + } + } + bitidx += (count+1); // +1 to skip the zero bit (or end) + } + return all_purged; +} + +// returns true if anything was purged +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) +{ + if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) return false; + if (!force && expire > now) return false; + + // reset expire (if not already set concurrently) + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, (mi_msecs_t)0); + + // potential purges scheduled, walk through the bitmap + bool any_purged = false; + bool full_purge = true; + for (size_t i = 0; i < arena->field_count; i++) { + size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); + if (purge != 0) { + size_t bitidx = 0; + while (bitidx < MI_BITMAP_FIELD_BITS) { + // find consequetive range of ones in the purge mask + size_t bitlen = 0; + while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { + bitlen++; + } + // try to claim the longest range of corresponding in_use bits + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + while( bitlen > 0 ) { + if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { + break; + } + bitlen--; + } + // actual claimed bits at `in_use` + if (bitlen > 0) { + // read purge again now that we have the in_use bits + purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + full_purge = false; + } + any_purged = true; + // release the claimed `in_use` bits again + _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); + } + bitidx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitidx + } // purge != 0 + } + // if not fully purged, make sure to purge again in the future + if (!full_purge) { + const long delay = mi_arena_purge_delay(); + mi_msecs_t expected = 0; + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); + } + return any_purged; +} + +static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { + if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); + if (max_arena == 0) return; + + // allow only one thread to purge at a time + static mi_atomic_guard_t purge_guard; + mi_atomic_guard(&purge_guard) + { + mi_msecs_t now = _mi_clock_now(); + size_t max_purge_count = (visit_all ? max_arena : 1); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } + } + } + } +} + + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); + mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; - if (memid == MI_MEMID_OS) { + const bool all_committed = (committed_size == size); + + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - _mi_os_free_ex(p, size, all_committed, stats); + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) + _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + } + _mi_os_free(p, size, memid, stats); } - else { + else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena size_t arena_idx; size_t bitmap_idx; - mi_arena_id_indices(memid, &arena_idx, &bitmap_idx); + mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); + // checks if (arena == NULL) { - _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from an invalid arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx)); if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) { - _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); + _mi_error_message(EINVAL, "trying to free from an invalid arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } + + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit - if (arena->is_committed) { - mi_assert_internal(all_committed); + if (arena->memid.is_pinned || arena->blocks_committed == NULL) { + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); - _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails - _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_assert_internal(arena->blocks_purge != NULL); + + if (!all_committed) { + // mark the entire range as no longer committed (so we recommit the full range when re-using) + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); + if (committed_size > 0) { + // if partially committed, adjust the committed stats (is it will be recommitted when re-using) + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + _mi_stat_decrease(&_mi_stats_main.committed, committed_size); + } + // note: if not all committed, it may be that the purge will reset/decommit the entire range + // that contains already decommitted parts. Since purge consistently uses reset or decommit that + // works (as we should never reset decommitted parts). + } + // (delay) purge the entire range + mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } - // and make it available to others again + + // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { - _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); return; }; } + else { + // arena was none, external, or static; nothing to do + mi_assert_internal(memid.memkind < MI_MEM_OS); + } + + // purge expired decommits + mi_arenas_try_purge(false, false, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_unsafe_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); + } + else { + new_max_arena = i; + } + mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + +// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired +void _mi_arenas_collect(bool force_purge, mi_stats_t* stats) { + mi_arenas_try_purge(force_purge, force_purge /* visit all? */, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { + mi_arenas_unsafe_destroy(); + _mi_arenas_collect(true /* force purge */, stats); // purge non-owned arenas +} + +// Is a pointer inside any of our arenas? +bool _mi_arena_contains(const void* p) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + return true; + } + } + return false; +} + +/* ----------------------------------------------------------- + Abandoned blocks/segments. + This is used to atomically abandon/reclaim segments + (and crosses the arena API but it is convenient to have here). + Abandoned segments still have live blocks; they get reclaimed + when a thread frees a block in it, or when a thread needs a fresh + segment; these threads scan the abandoned segments through + the arena bitmaps. +----------------------------------------------------------- */ + +// Maintain a count of all abandoned segments +static mi_decl_cache_align _Atomic(size_t)abandoned_count; + +size_t _mi_arena_segment_abandoned_count(void) { + return mi_atomic_load_relaxed(&abandoned_count); +} + +// reclaim a specific abandoned segment; `true` on success. +// sets the thread_id. +bool _mi_arena_segment_clear_abandoned(mi_segment_t* segment ) +{ + if (segment->memid.memkind != MI_MEM_ARENA) { + // not in an arena, consider it un-abandoned now. + // but we need to still claim it atomically -- we use the thread_id for that. + size_t expected = 0; + if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected, _mi_thread_id())) { + mi_atomic_decrement_relaxed(&abandoned_count); + return true; + } + else { + return false; + } + } + // arena segment: use the blocks_abandoned bitmap. + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + bool was_marked = _mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx); + if (was_marked) { + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); + mi_atomic_decrement_relaxed(&abandoned_count); + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); + } + // mi_assert_internal(was_marked); + mi_assert_internal(!was_marked || _mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return was_marked; +} + +// mark a specific segment as abandoned +// clears the thread_id. +void _mi_arena_segment_mark_abandoned(mi_segment_t* segment) +{ + mi_atomic_store_release(&segment->thread_id, 0); + mi_assert_internal(segment->used == segment->abandoned); + if (segment->memid.memkind != MI_MEM_ARENA) { + // not in an arena; count it as abandoned and return + mi_atomic_increment_relaxed(&abandoned_count); + return; + } + size_t arena_idx; + size_t bitmap_idx; + mi_arena_memid_indices(segment->memid, &arena_idx, &bitmap_idx); + mi_assert_internal(arena_idx < MI_MAX_ARENAS); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + mi_assert_internal(arena != NULL); + const bool was_unmarked = _mi_bitmap_claim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx, NULL); + if (was_unmarked) { mi_atomic_increment_relaxed(&abandoned_count); } + mi_assert_internal(was_unmarked); + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); +} + +// start a cursor at a randomized arena +void _mi_arena_field_cursor_init(mi_heap_t* heap, mi_arena_field_cursor_t* current) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + current->start = (max_arena == 0 ? 0 : (mi_arena_id_t)( _mi_heap_random_next(heap) % max_arena)); + current->count = 0; + current->bitmap_idx = 0; +} + +// reclaim abandoned segments +// this does not set the thread id (so it appears as still abandoned) +mi_segment_t* _mi_arena_segment_clear_abandoned_next(mi_arena_field_cursor_t* previous ) +{ + const int max_arena = (int)mi_atomic_load_relaxed(&mi_arena_count); + if (max_arena <= 0 || mi_atomic_load_relaxed(&abandoned_count) == 0) return NULL; + + int count = previous->count; + size_t field_idx = mi_bitmap_index_field(previous->bitmap_idx); + size_t bit_idx = mi_bitmap_index_bit_in_field(previous->bitmap_idx) + 1; + // visit arena's (from previous) + for (; count < max_arena; count++, field_idx = 0, bit_idx = 0) { + mi_arena_id_t arena_idx = previous->start + count; + if (arena_idx >= max_arena) { arena_idx = arena_idx % max_arena; } // wrap around + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_idx]); + if (arena != NULL) { + // visit the abandoned fields (starting at previous_idx) + for ( ; field_idx < arena->field_count; field_idx++, bit_idx = 0) { + size_t field = mi_atomic_load_relaxed(&arena->blocks_abandoned[field_idx]); + if mi_unlikely(field != 0) { // skip zero fields quickly + // visit each set bit in the field (todo: maybe use `ctz` here?) + for ( ; bit_idx < MI_BITMAP_FIELD_BITS; bit_idx++) { + // pre-check if the bit is set + size_t mask = ((size_t)1 << bit_idx); + if mi_unlikely((field & mask) == mask) { + mi_bitmap_index_t bitmap_idx = mi_bitmap_index_create(field_idx, bit_idx); + // try to reclaim it atomically + if (_mi_bitmap_unclaim(arena->blocks_abandoned, arena->field_count, 1, bitmap_idx)) { + mi_atomic_decrement_relaxed(&abandoned_count); + previous->bitmap_idx = bitmap_idx; + previous->count = count; + mi_assert_internal(_mi_bitmap_is_claimed(arena->blocks_inuse, arena->field_count, 1, bitmap_idx)); + mi_segment_t* segment = (mi_segment_t*)mi_arena_block_start(arena, bitmap_idx); + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0); + //mi_assert_internal(arena->blocks_committed == NULL || _mi_bitmap_is_claimed(arena->blocks_committed, arena->field_count, 1, bitmap_idx)); + return segment; + } + } + } + } + } + } + } + // no more found + previous->bitmap_idx = 0; + previous->count = 0; + return NULL; } + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ -static bool mi_arena_add(mi_arena_t* arena) { +static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t* stats) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); + if (arena_id != NULL) { *arena_id = -1; } - uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count); + size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } + _mi_stat_counter_increase(&stats->arena_count,1); + arena->id = mi_arena_id_create(i); mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } return true; } -bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); + if (size < MI_ARENA_BLOCK_SIZE) return false; + if (is_large) { - mi_assert_internal(is_committed); - is_committed = true; + mi_assert_internal(memid.initially_committed && memid.is_pinned); } - - const size_t bcount = mi_block_count_of_size(size); + + const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (is_committed ? 2 : 3); + const size_t bitmaps = (memid.is_pinned ? 3 : 5); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_memid_t meta_memid; + mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; + // already zero'd due to zalloc + // _mi_memzero(arena, asize); + arena->id = _mi_arena_id_none(); + arena->memid = memid; + arena->exclusive = exclusive; + arena->meta_size = asize; + arena->meta_memid = meta_memid; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; - arena->is_zero_init = is_zero; - arena->is_committed = is_committed; + arena->purge_expire = 0; arena->search_idx = 0; - arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - // the bitmaps are already zero initialized due to os_alloc - // just claim leftover blocks if needed + // consequetive bitmaps + arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap + arena->blocks_abandoned = &arena->blocks_inuse[2 * fields]; // just after dirty bitmap + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after abandoned bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[4*fields]); // just after committed bitmap + // initialize committed bitmap? + if (arena->blocks_committed != NULL && arena->memid.initially_committed) { + memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning + } + + // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); if (post > 0) { @@ -313,52 +939,132 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } + return mi_arena_add(arena, arena_id, &_mi_stats_main); - mi_arena_add(arena); - return true; +} + +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.initially_committed = is_committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); } // Reserve a range of regular OS memory -int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept -{ - size = _mi_os_good_alloc_size(size); - bool large = allow_large; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); - if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) { - _mi_os_free_ex(start, size, commit, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = _mi_arena_id_none(); + size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu KiB memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } - _mi_verbose_message("reserved %zu kb memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); return 0; } +// Manage a range of regular OS memory +bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); +} + +// Reserve a range of regular OS memory +int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { + return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); +} + + +/* ----------------------------------------------------------- + Debugging +----------------------------------------------------------- */ + +static size_t mi_debug_show_bitmap(const char* prefix, const char* header, size_t block_count, mi_bitmap_field_t* fields, size_t field_count ) { + _mi_verbose_message("%s%s:\n", prefix, header); + size_t bcount = 0; + size_t inuse_count = 0; + for (size_t i = 0; i < field_count; i++) { + char buf[MI_BITMAP_FIELD_BITS + 1]; + uintptr_t field = mi_atomic_load_relaxed(&fields[i]); + for (size_t bit = 0; bit < MI_BITMAP_FIELD_BITS; bit++, bcount++) { + if (bcount < block_count) { + bool inuse = ((((uintptr_t)1 << bit) & field) != 0); + if (inuse) inuse_count++; + buf[bit] = (inuse ? 'x' : '.'); + } + else { + buf[bit] = ' '; + } + } + buf[MI_BITMAP_FIELD_BITS] = 0; + _mi_verbose_message("%s %s\n", prefix, buf); + } + _mi_verbose_message("%s total ('x'): %zu\n", prefix, inuse_count); + return inuse_count; +} + +void mi_debug_show_arenas(bool show_inuse, bool show_abandoned, bool show_purge) mi_attr_noexcept { + size_t max_arenas = mi_atomic_load_relaxed(&mi_arena_count); + size_t inuse_total = 0; + size_t abandoned_total = 0; + size_t purge_total = 0; + for (size_t i = 0; i < max_arenas; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); + if (arena == NULL) break; + _mi_verbose_message("arena %zu: %zu blocks of size %zuMiB (in %zu fields) %s\n", i, arena->block_count, MI_ARENA_BLOCK_SIZE / MI_MiB, arena->field_count, (arena->memid.is_pinned ? ", pinned" : "")); + if (show_inuse) { + inuse_total += mi_debug_show_bitmap(" ", "inuse blocks", arena->block_count, arena->blocks_inuse, arena->field_count); + } + if (arena->blocks_committed != NULL) { + mi_debug_show_bitmap(" ", "committed blocks", arena->block_count, arena->blocks_committed, arena->field_count); + } + if (show_abandoned) { + abandoned_total += mi_debug_show_bitmap(" ", "abandoned blocks", arena->block_count, arena->blocks_abandoned, arena->field_count); + } + if (show_purge && arena->blocks_purge != NULL) { + purge_total += mi_debug_show_bitmap(" ", "purgeable blocks", arena->block_count, arena->blocks_purge, arena->field_count); + } + } + if (show_inuse) _mi_verbose_message("total inuse blocks : %zu\n", inuse_total); + if (show_abandoned) _mi_verbose_message("total abandoned blocks: %zu\n", abandoned_total); + if (show_purge) _mi_verbose_message("total purgeable blocks: %zu\n", purge_total); +} + + /* ----------------------------------------------------------- Reserve a huge page arena. ----------------------------------------------------------- */ // reserve at a specific numa node -int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { +int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + if (arena_id != NULL) *arena_id = -1; if (pages==0) return 0; if (numa_node < -1) numa_node = -1; if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { - _mi_warning_message("failed to reserve %zu gb huge pages\n", pages); + _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } - _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages); + _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) { - _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; } +int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept { + return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL); +} // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected) int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept { @@ -389,10 +1095,11 @@ int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t } int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept { - UNUSED(max_secs); + MI_UNUSED(max_secs); _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n"); if (pages_reserved != NULL) *pages_reserved = 0; int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0)); if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } + diff --git a/contrib/libs/mimalloc/src/bitmap.c b/contrib/libs/mimalloc/src/bitmap.c index 3b5c8199ca..976ba72c63 100644 --- a/contrib/libs/mimalloc/src/bitmap.c +++ b/contrib/libs/mimalloc/src/bitmap.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -7,18 +7,17 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- Concurrent bitmap that can set/reset sequences of bits atomically, -represeted as an array of fields where each field is a machine word (`uintptr_t`) +represented as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #include "bitmap.h" /* ----------------------------------------------------------- @@ -26,12 +25,12 @@ between the fields. (This is used in arena allocation) ----------------------------------------------------------- */ // The bit mask for a given number of blocks at a specified bit index. -static inline uintptr_t mi_bitmap_mask_(size_t count, size_t bitidx) { +static inline size_t mi_bitmap_mask_(size_t count, size_t bitidx) { mi_assert_internal(count + bitidx <= MI_BITMAP_FIELD_BITS); mi_assert_internal(count > 0); if (count >= MI_BITMAP_FIELD_BITS) return MI_BITMAP_FIELD_FULL; if (count == 0) return 0; - return ((((uintptr_t)1 << count) - 1) << bitidx); + return ((((size_t)1 << count) - 1) << bitidx); } @@ -46,29 +45,29 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ { mi_assert_internal(bitmap_idx != NULL); mi_assert_internal(count <= MI_BITMAP_FIELD_BITS); - _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t map = mi_atomic_load_relaxed(field); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); if (map==MI_BITMAP_FIELD_FULL) return false; // short cut // search for 0-bit sequence of length count - const uintptr_t mask = mi_bitmap_mask_(count, 0); - const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; + const size_t mask = mi_bitmap_mask_(count, 0); + const size_t bitidx_max = MI_BITMAP_FIELD_BITS - count; #ifdef MI_HAVE_FAST_BITSCAN size_t bitidx = mi_ctz(~map); // quickly find the first zero bit if possible #else size_t bitidx = 0; // otherwise start at 0 #endif - uintptr_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx + size_t m = (mask << bitidx); // invariant: m == mask shifted by bitidx // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - const uintptr_t mapm = map & m; + const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - const uintptr_t newmap = map | m; + const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -81,7 +80,8 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -100,7 +100,7 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap + if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -108,23 +108,16 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel return false; } -/* -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); -} -*/ // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. -bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - uintptr_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -134,11 +127,11 @@ bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, m bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); - uintptr_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } @@ -146,13 +139,30 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_ones) { const size_t idx = mi_bitmap_index_field(bitmap_idx); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - const uintptr_t mask = mi_bitmap_mask_(count, bitidx); - mi_assert_internal(bitmap_fields > idx); UNUSED(bitmap_fields); - uintptr_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; +} + + bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); } @@ -169,87 +179,93 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // between the fields. This is used in arena allocation //-------------------------------------------------------------------------- -// Try to atomically claim a sequence of `count` bits starting from the field +// Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. -static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) +static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { mi_assert_internal(bitmap_idx != NULL); - + // check initial trailing zeros - _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t map = mi_atomic_load_relaxed(field); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t map = mi_atomic_load_relaxed(field); const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; - uintptr_t mask = 0; // mask bits for the final field + size_t mask = 0; // mask bits for the final field while(found < count) { field++; map = mi_atomic_load_relaxed(field); - const uintptr_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; + if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); - // found range of zeros up to the final field; mask contains mask in the final field - // now claim it atomically - _Atomic(uintptr_t)* const final_field = field; - const uintptr_t final_mask = mask; - _Atomic(uintptr_t)* const initial_field = &bitmap[idx]; - const uintptr_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically + mi_bitmap_field_t* const final_field = field; + const size_t final_mask = mask; + mi_bitmap_field_t* const initial_field = &bitmap[idx]; + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field - uintptr_t newmap; + size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { - newmap = map | initial_mask; + newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); - + // intermediate fields while (++field < final_field) { newmap = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); map = 0; if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; } } - + // final field mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { - newmap = map | final_mask; + newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + mi_stat_counter_increase(stats->arena_crossover_count,1); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; -rollback: +rollback: // roll back intermediate fields + // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = mi_bitmap_mask_(MI_BITMAP_FIELD_BITS, 0); mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } - if (field == initial_field) { + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); - newmap = map & ~initial_mask; + newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); - } + } + mi_stat_counter_increase(stats->arena_rollback_count,1); // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries < 4) { - return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); + if (retries <= 2) { + return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx, stats); } else { return false; @@ -259,20 +275,27 @@ rollback: // Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. // Starts at idx, and wraps around to search in all `bitmap_fields` fields. -bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats) { mi_assert_internal(count > 0); - if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - // try to claim inside the field + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field + /* if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } - // try to claim across fields - if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { + */ + // if that fails, then try to claim across fields + if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx, stats)) { return true; } } @@ -280,10 +303,10 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm } // Helper for masks across fields; returns the mid count, post_mask may be 0 -static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, uintptr_t* pre_mask, uintptr_t* mid_mask, uintptr_t* post_mask) { - UNUSED_RELEASE(bitmap_fields); +static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) { + MI_UNUSED(bitmap_fields); const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); - if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) { + if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) { *pre_mask = mi_bitmap_mask_(count, bitidx); *mid_mask = 0; *post_mask = 0; @@ -308,37 +331,37 @@ static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_ // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { size_t idx = mi_bitmap_index_field(bitmap_idx); - uintptr_t pre_mask; - uintptr_t mid_mask; - uintptr_t post_mask; - size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); + size_t pre_mask; + size_t mid_mask; + size_t post_mask; + size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; - _Atomic(uintptr_t)*field = &bitmap[idx]; - uintptr_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } - return all_one; + return all_one; } // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_zero) { size_t idx = mi_bitmap_index_field(bitmap_idx); - uintptr_t pre_mask; - uintptr_t mid_mask; - uintptr_t post_mask; + size_t pre_mask; + size_t mid_mask; + size_t post_mask; size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_zero = true; bool any_zero = false; - _Atomic(uintptr_t)*field = &bitmap[idx]; - uintptr_t prev = mi_atomic_or_acq_rel(field++, pre_mask); + _Atomic(size_t)*field = &bitmap[idx]; + size_t prev = mi_atomic_or_acq_rel(field++, pre_mask); if ((prev & pre_mask) != 0) all_zero = false; if ((prev & pre_mask) != pre_mask) any_zero = true; while (mid_count-- > 0) { @@ -351,23 +374,23 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } - if (pany_zero != NULL) *pany_zero = any_zero; + if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } -// Returns `true` if all `count` bits were 1. +// Returns `true` if all `count` bits were 1. // `any_ones` is `true` if there was at least one bit set to one. static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) { size_t idx = mi_bitmap_index_field(bitmap_idx); - uintptr_t pre_mask; - uintptr_t mid_mask; - uintptr_t post_mask; + size_t pre_mask; + size_t mid_mask; + size_t post_mask; size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_ones = true; bool any_ones = false; - _Atomic(uintptr_t)* field = &bitmap[idx]; - uintptr_t prev = mi_atomic_load_relaxed(field++); + mi_bitmap_field_t* field = &bitmap[idx]; + size_t prev = mi_atomic_load_relaxed(field++); if ((prev & pre_mask) != pre_mask) all_ones = false; if ((prev & pre_mask) != 0) any_ones = true; while (mid_count-- > 0) { @@ -379,8 +402,8 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field prev = mi_atomic_load_relaxed(field); if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; - } - if (pany_ones != NULL) *pany_ones = any_ones; + } + if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } diff --git a/contrib/libs/mimalloc/src/bitmap.h b/contrib/libs/mimalloc/src/bitmap.h index 21fd4e13d0..a1e7686abc 100644 --- a/contrib/libs/mimalloc/src/bitmap.h +++ b/contrib/libs/mimalloc/src/bitmap.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -7,7 +7,7 @@ terms of the MIT license. A copy of the license can be found in the file /* ---------------------------------------------------------------------------- Concurrent bitmap that can set/reset sequences of bits atomically, -represeted as an array of fields where each field is a machine word (`uintptr_t`) +represented as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). @@ -24,11 +24,11 @@ between the fields. (This is used in arena allocation) Bitmap definition ----------------------------------------------------------- */ -#define MI_BITMAP_FIELD_BITS (8*MI_INTPTR_SIZE) -#define MI_BITMAP_FIELD_FULL (~((uintptr_t)0)) // all bits set +#define MI_BITMAP_FIELD_BITS (8*MI_SIZE_SIZE) +#define MI_BITMAP_FIELD_FULL (~((size_t)0)) // all bits set -// An atomic bitmap of `uintptr_t` fields -typedef _Atomic(uintptr_t) mi_bitmap_field_t; +// An atomic bitmap of `size_t` fields +typedef _Atomic(size_t) mi_bitmap_field_t; typedef mi_bitmap_field_t* mi_bitmap_t; // A bitmap index is the index of the bit in a bitmap. @@ -69,7 +69,11 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. -bool mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. @@ -86,7 +90,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Find `count` bits of zeros and set them to 1 atomically; returns `true` on success. // Starts at idx, and wraps around to search in all `bitmap_fields` fields. -bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx); +bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx, mi_stats_t* stats); // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. diff --git a/contrib/libs/mimalloc/src/free.c b/contrib/libs/mimalloc/src/free.c new file mode 100644 index 0000000000..c065d2f3f6 --- /dev/null +++ b/contrib/libs/mimalloc/src/free.c @@ -0,0 +1,520 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#if !defined(MI_IN_ALLOC_C) +#error "this file should be included from 'alloc.c' (so aliases can work from alloc-override)" +// add includes help an IDE +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() +#endif + +// forward declarations +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block); +static bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block); +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block); +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block); + + +// ------------------------------------------------------ +// Free +// ------------------------------------------------------ + +// forward declaration of multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static mi_decl_noinline void mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block); + +// regular free of a (thread local) block pointer +// fast path written carefully to prevent spilling on the stack +static inline void mi_free_block_local(mi_page_t* page, mi_block_t* block, bool track_stats, bool check_full) +{ + // checks + if mi_unlikely(mi_check_is_double_free(page, block)) return; + mi_check_padding(page, block); + if (track_stats) { mi_stat_free(page, block); } + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN + memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); + #endif + if (track_stats) { mi_track_free_size(block, mi_page_usable_size_of(page, block)); } // faster then mi_usable_size as we already know the page and that p is unaligned + + // actual free: push on the local free list + mi_block_set_next(page, block, page->local_free); + page->local_free = block; + if mi_unlikely(--page->used == 0) { + _mi_page_retire(page); + } + else if mi_unlikely(check_full && mi_page_is_in_full(page)) { + _mi_page_unfull(page); + } +} + +// Adjust a block that was allocated aligned, to the actual start of the block in the page. +// note: this can be called from `mi_free_generic_mt` where a non-owning thread accesses the +// `page_start` and `block_size` fields; however these are constant and the page won't be +// deallocated (as the block we are freeing keeps it alive) and thus safe to read concurrently. +mi_block_t* _mi_page_ptr_unalign(const mi_page_t* page, const void* p) { + mi_assert_internal(page!=NULL && p!=NULL); + + size_t diff = (uint8_t*)p - page->page_start; + size_t adjust; + if mi_likely(page->block_size_shift != 0) { + adjust = diff & (((size_t)1 << page->block_size_shift) - 1); + } + else { + adjust = diff % mi_page_block_size(page); + } + + return (mi_block_t*)((uintptr_t)p - adjust); +} + +// free a local pointer (page parameter comes first for better codegen) +static void mi_decl_noinline mi_free_generic_local(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { + MI_UNUSED(segment); + mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(page, p) : (mi_block_t*)p); + mi_free_block_local(page, block, true /* track stats */, true /* check for a full page */); +} + +// free a pointer owned by another thread (page parameter comes first for better codegen) +static void mi_decl_noinline mi_free_generic_mt(mi_page_t* page, mi_segment_t* segment, void* p) mi_attr_noexcept { + mi_block_t* const block = _mi_page_ptr_unalign(page, p); // don't check `has_aligned` flag to avoid a race (issue #865) + mi_free_block_mt(page, segment, block); +} + +// generic free (for runtime integration) +void mi_decl_noinline _mi_free_generic(mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { + if (is_local) mi_free_generic_local(page,segment,p); + else mi_free_generic_mt(page,segment,p); +} + +// Get the segment data belonging to a pointer +// This is just a single `and` in release mode but does further checks in debug mode +// (and secure mode) to see if this was a valid pointer. +static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg) +{ + MI_UNUSED(msg); + +#if (MI_DEBUG>0) + if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) { + _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p); + return NULL; + } +#endif + + mi_segment_t* const segment = _mi_ptr_segment(p); + if mi_unlikely(segment==NULL) return segment; + +#if (MI_DEBUG>0) + if mi_unlikely(!mi_is_in_heap_region(p)) { + _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n" + "(this may still be a valid very large allocation (over 64MiB))\n", msg, p); + if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) { + _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p); + } + } +#endif +#if (MI_DEBUG>0 || MI_SECURE>=4) + if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) { + _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p); + return NULL; + } +#endif + + return segment; +} + +// Free a block +// Fast path written carefully to prevent register spilling on the stack +void mi_free(void* p) mi_attr_noexcept +{ + mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); + if mi_unlikely(segment==NULL) return; + + const bool is_local = (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_page_t* const page = _mi_segment_page_of(segment, p); + + if mi_likely(is_local) { // thread-local free? + if mi_likely(page->flags.full_aligned == 0) { // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned) + // thread-local, aligned, and not a full page + mi_block_t* const block = (mi_block_t*)p; + mi_free_block_local(page, block, true /* track stats */, false /* no need to check if the page is full */); + } + else { + // page is full or contains (inner) aligned blocks; use generic path + mi_free_generic_local(page, segment, p); + } + } + else { + // not thread-local; use generic path + mi_free_generic_mt(page, segment, p); + } +} + +// return true if successful +bool _mi_free_delayed_block(mi_block_t* block) { + // get segment and page + mi_assert_internal(block!=NULL); + const mi_segment_t* const segment = _mi_ptr_segment(block); + mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(_mi_thread_id() == segment->thread_id); + mi_page_t* const page = _mi_segment_page_of(segment, block); + + // Clear the no-delayed flag so delayed freeing is used again for this page. + // This must be done before collecting the free lists on this page -- otherwise + // some blocks may end up in the page `thread_free` list with no blocks in the + // heap `thread_delayed_free` list which may cause the page to be never freed! + // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`) + if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) { + return false; + } + + // collect all other non-local frees (move from `thread_free` to `free`) to ensure up-to-date `used` count + _mi_page_free_collect(page, false); + + // and free the block (possibly freeing the page as well since `used` is updated) + mi_free_block_local(page, block, false /* stats have already been adjusted */, true /* check for a full page */); + return true; +} + +// ------------------------------------------------------ +// Multi-threaded Free (`_mt`) +// ------------------------------------------------------ + +// Push a block that is owned by another thread on its page-local thread free +// list or it's heap delayed free list. Such blocks are later collected by +// the owning thread in `_mi_free_delayed_block`. +static void mi_decl_noinline mi_free_block_delayed_mt( mi_page_t* page, mi_block_t* block ) +{ + // Try to put the block on either the page-local thread free list, + // or the heap delayed free list (if this is the first non-local free in that page) + mi_thread_free_t tfreex; + bool use_delayed; + mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE); + if mi_unlikely(use_delayed) { + // unlikely: this only happens on the first concurrent free in a page that is in the full list + tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING); + } + else { + // usual: directly add to page thread_free list + mi_block_set_next(page, block, mi_tf_block(tfree)); + tfreex = mi_tf_set_block(tfree,block); + } + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + // If this was the first non-local free, we need to push it on the heap delayed free list instead + if mi_unlikely(use_delayed) { + // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`) + mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page); + mi_assert_internal(heap != NULL); + if (heap != NULL) { + // add to the delayed free list of this heap. (do this atomically as the lock only protects heap memory validity) + mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); + do { + mi_block_set_nextx(heap,block,dfree, heap->keys); + } while (!mi_atomic_cas_ptr_weak_release(mi_block_t,&heap->thread_delayed_free, &dfree, block)); + } + + // and reset the MI_DELAYED_FREEING flag + tfree = mi_atomic_load_relaxed(&page->xthread_free); + do { + tfreex = tfree; + mi_assert_internal(mi_tf_delayed(tfree) == MI_DELAYED_FREEING); + tfreex = mi_tf_set_delayed(tfree,MI_NO_DELAYED_FREE); + } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + } +} + +// Multi-threaded free (`_mt`) (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON) +static void mi_decl_noinline mi_free_block_mt(mi_page_t* page, mi_segment_t* segment, mi_block_t* block) +{ + // first see if the segment was abandoned and if we can reclaim it into our thread + if (mi_option_is_enabled(mi_option_abandoned_reclaim_on_free) && + #if MI_HUGE_PAGE_ABANDON + segment->page_kind != MI_PAGE_HUGE && + #endif + mi_atomic_load_relaxed(&segment->thread_id) == 0) + { + // the segment is abandoned, try to reclaim it into our heap + if (_mi_segment_attempt_reclaim(mi_heap_get_default(), segment)) { + mi_assert_internal(_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + mi_free(block); // recursively free as now it will be a local free in our heap + return; + } + } + + // The padding check may access the non-thread-owned page for the key values. + // that is safe as these are constant and the page won't be freed (as the block is not freed yet). + mi_check_padding(page, block); + + // adjust stats (after padding check and potentially recursive `mi_free` above) + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free_size(block, mi_page_usable_size_of(page,block)); + + // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + _mi_padding_shrink(page, block, sizeof(mi_block_t)); + + if (segment->page_kind == MI_PAGE_HUGE) { + #if MI_HUGE_PAGE_ABANDON + // huge page segments are always abandoned and can be freed immediately + _mi_segment_huge_page_free(segment, page, block); + return; + #else + // huge pages are special as they occupy the entire segment + // as these are large we reset the memory occupied by the page so it is available to other threads + // (as the owning thread needs to actually free the memory later). + _mi_segment_huge_page_reset(segment, page, block); + #endif + } + else { + #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading + memset(block, MI_DEBUG_FREED, mi_usable_size(block)); + #endif + } + + // and finally free the actual block by pushing it on the owning heap + // thread_delayed free list (or heap delayed free list) + mi_free_block_delayed_mt(page,block); +} + + +// ------------------------------------------------------ +// Usable size +// ------------------------------------------------------ + +// Bytes available in a block +static size_t mi_decl_noinline mi_page_usable_aligned_size_of(const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(page, p); + const size_t size = mi_page_usable_size_of(page, block); + const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); +} + +static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); + if mi_unlikely(segment==NULL) return 0; + const mi_page_t* const page = _mi_segment_page_of(segment, p); + if mi_likely(!mi_page_has_aligned(page)) { + const mi_block_t* block = (const mi_block_t*)p; + return mi_page_usable_size_of(page, block); + } + else { + // split out to separate routine for improved code generation + return mi_page_usable_aligned_size_of(page, p); + } +} + +mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept { + return _mi_usable_size(p, "mi_usable_size"); +} + + +// ------------------------------------------------------ +// Free variants +// ------------------------------------------------------ + +void mi_free_size(void* p, size_t size) mi_attr_noexcept { + MI_UNUSED_RELEASE(size); + mi_assert(p == NULL || size <= _mi_usable_size(p,"mi_free_size")); + mi_free(p); +} + +void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free_size(p,size); +} + +void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept { + MI_UNUSED_RELEASE(alignment); + mi_assert(((uintptr_t)p % alignment) == 0); + mi_free(p); +} + + +// ------------------------------------------------------ +// Check for double free in secure and debug mode +// This is somewhat expensive so only enabled for secure mode 4 +// ------------------------------------------------------ + +#if (MI_ENCODE_FREELIST && (MI_SECURE>=4 || MI_DEBUG!=0)) +// linear check if the free list contains a specific element +static bool mi_list_contains(const mi_page_t* page, const mi_block_t* list, const mi_block_t* elem) { + while (list != NULL) { + if (elem==list) return true; + list = mi_block_next(page, list); + } + return false; +} + +static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, const mi_block_t* block) { + // The decoded value is in the same page (or NULL). + // Walk the free lists to verify positively if it is already freed + if (mi_list_contains(page, page->free, block) || + mi_list_contains(page, page->local_free, block) || + mi_list_contains(page, mi_page_thread_free(page), block)) + { + _mi_error_message(EAGAIN, "double free detected of block %p with size %zu\n", block, mi_page_block_size(page)); + return true; + } + return false; +} + +#define mi_track_page(page,access) { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); } + +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + bool is_double_free = false; + mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field + if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 && // quick check: aligned pointer? + (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL? + { + // Suspicious: decoded value a in block is in the same page (or NULL) -- maybe a double free? + // (continue in separate function to improve code generation) + is_double_free = mi_check_is_double_freex(page, block); + } + return is_double_free; +} +#else +static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); + return false; +} +#endif + + +// --------------------------------------------------------------------------- +// Check for heap block overflow by setting up padding at the end of the block +// --------------------------------------------------------------------------- + +#if MI_PADDING // && !MI_TRACK_ENABLED +static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { + *bsize = mi_page_usable_block_size(page); + const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + *delta = padding->delta; + uint32_t canary = padding->canary; + uintptr_t keys[2]; + keys[0] = page->keys[0]; + keys[1] = page->keys[1]; + bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize); + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); + return ok; +} + +// Return the exact usable size of a block. +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); mi_assert_internal(delta <= bsize); + return (ok ? bsize - delta : 0); +} + +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + padding->delta = (uint32_t)new_delta; + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); +} +#else +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +#if MI_PADDING && MI_PADDING_CHECK + +static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + *size = *wrong = bsize; + if (!ok) return false; + mi_assert_internal(bsize >= delta); + *size = bsize - delta; + if (!mi_page_is_huge(page)) { + uint8_t* fill = (uint8_t*)block + bsize - delta; + const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes + mi_track_mem_defined(fill, maxpad); + for (size_t i = 0; i < maxpad; i++) { + if (fill[i] != MI_DEBUG_PADDING) { + *wrong = bsize - delta + i; + ok = false; + break; + } + } + mi_track_mem_noaccess(fill, maxpad); + } + return ok; +} + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + size_t size; + size_t wrong; + if (!mi_verify_padding(page,block,&size,&wrong)) { + _mi_error_message(EFAULT, "buffer overflow in heap block %p of size %zu: write after %zu bytes\n", block, size, wrong ); + } +} + +#else + +static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); + MI_UNUSED(block); +} + +#endif + +// only maintain stats for smaller objects if requested +#if (MI_STAT>0) +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { +#if (MI_STAT < 2) + MI_UNUSED(block); +#endif + mi_heap_t* const heap = mi_heap_get_default(); + const size_t bsize = mi_page_usable_block_size(page); +#if (MI_STAT>1) + const size_t usize = mi_page_usable_size_of(page, block); + mi_heap_stat_decrease(heap, malloc, usize); +#endif + if (bsize <= MI_LARGE_OBJ_SIZE_MAX) { + mi_heap_stat_decrease(heap, normal, bsize); +#if (MI_STAT > 1) + mi_heap_stat_decrease(heap, normal_bins[_mi_bin(bsize)], 1); +#endif + } + else { + const size_t bpsize = mi_page_block_size(page); // match stat in page.c:mi_huge_page_alloc + mi_heap_stat_decrease(heap, huge, bpsize); + } +} +#else +static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(page); MI_UNUSED(block); +} +#endif diff --git a/contrib/libs/mimalloc/src/heap.c b/contrib/libs/mimalloc/src/heap.c index bda10699d0..f6f2354913 100644 --- a/contrib/libs/mimalloc/src/heap.c +++ b/contrib/libs/mimalloc/src/heap.c @@ -6,8 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap #include <string.h> // memset, memcpy @@ -30,15 +31,18 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void // visit all pages #if MI_DEBUG>1 size_t total = heap->page_count; - #endif size_t count = 0; + #endif + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_assert_internal(mi_page_heap(page) == heap); + #if MI_DEBUG>1 count++; + #endif if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue } @@ -50,9 +54,9 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void #if MI_DEBUG>=2 static bool mi_heap_page_is_valid(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(pq); mi_assert_internal(mi_page_heap(page) == heap); mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(segment->thread_id == heap->thread_id); @@ -86,13 +90,13 @@ typedef enum mi_collect_e { static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg_collect, void* arg2 ) { - UNUSED(arg2); - UNUSED(heap); + MI_UNUSED(arg2); + MI_UNUSED(heap); mi_assert_internal(mi_heap_page_is_valid(heap, pq, page, NULL, NULL)); mi_collect_t collect = *((mi_collect_t*)arg_collect); _mi_page_free_collect(page, collect >= MI_FORCE); if (mi_page_all_free(page)) { - // no more used blocks, free the page. + // no more used blocks, free the page. // note: this will free retired pages as well. _mi_page_free(page, pq, collect >= MI_FORCE); } @@ -104,10 +108,10 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t } static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); return true; // don't break } @@ -115,47 +119,53 @@ static bool mi_heap_page_never_delayed_free(mi_heap_t* heap, mi_page_queue_t* pq static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) { if (heap==NULL || !mi_heap_is_initialized(heap)) return; - _mi_deferred_free(heap, collect >= MI_FORCE); - // note: never reclaim on collect but leave it to threads that need storage to reclaim + const bool force = (collect >= MI_FORCE); + _mi_deferred_free(heap, force); + + // python/cpython#112532: we may be called from a thread that is not the owner of the heap + const bool is_main_thread = (_mi_is_main_thread() && heap->thread_id == _mi_thread_id()); + + // note: never reclaim on collect but leave it to threads that need storage to reclaim if ( #ifdef NDEBUG collect == MI_FORCE #else collect >= MI_FORCE #endif - && _mi_is_main_thread() && mi_heap_is_backing(heap) && !heap->no_reclaim) + && is_main_thread && mi_heap_is_backing(heap) && !heap->no_reclaim) { // the main thread is abandoned (end-of-program), try to reclaim all abandoned segments. // if all memory is freed by now, all segments should be freed. _mi_abandoned_reclaim_all(heap, &heap->tld->segments); } - + // if abandoning, mark all pages to no longer add to delayed_free if (collect == MI_ABANDON) { mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL); } - // free thread delayed blocks. + // free all current thread delayed blocks. // (if abandoning, after this there are no more thread-delayed references into the pages.) - _mi_heap_delayed_free(heap); + _mi_heap_delayed_free_all(heap); // collect retired pages - _mi_heap_collect_retired(heap, collect >= MI_FORCE); + _mi_heap_collect_retired(heap, force); // collect all pages owned by this thread mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect segment caches - if (collect >= MI_FORCE) { - _mi_segment_thread_collect(&heap->tld->segments); - } + // collect segments (purge pages, this can be expensive so don't force on abandonment) + _mi_segments_collect(collect == MI_FORCE, &heap->tld->segments); - // collect regions on program-exit (or shared library unload) - if (collect >= MI_FORCE && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - _mi_mem_collect(&heap->tld->os); + // if forced, collect thread data cache on program-exit (or shared library unload) + if (force && is_main_thread && mi_heap_is_backing(heap)) { + _mi_thread_data_collect(); // collect thread data cache } + + // collect arenas (this is program wide so don't force purges on abandonment of threads) + _mi_arenas_collect(collect == MI_FORCE /* force purge? */, &heap->tld->stats); } void _mi_heap_collect_abandon(mi_heap_t* heap) { @@ -167,7 +177,7 @@ void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { } void mi_collect(bool force) mi_attr_noexcept { - mi_heap_collect(mi_get_default_heap(), force); + mi_heap_collect(mi_prim_get_default_heap(), force); } @@ -177,9 +187,14 @@ void mi_collect(bool force) mi_attr_noexcept { mi_heap_t* mi_heap_get_default(void) { mi_thread_init(); - return mi_get_default_heap(); + return mi_prim_get_default_heap(); +} + +static bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_prim_get_default_heap()); } + mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* heap = mi_heap_get_default(); mi_assert_internal(heap!=NULL); @@ -189,24 +204,44 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_heap_t* mi_heap_new(void) { - mi_heap_t* bheap = mi_heap_get_backing(); - mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? - if (heap==NULL) return NULL; +void _mi_heap_init(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool noreclaim, uint8_t tag) { _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); - heap->tld = bheap->tld; - heap->thread_id = _mi_thread_id(); - _mi_random_split(&bheap->random, &heap->random); + heap->tld = tld; + heap->thread_id = _mi_thread_id(); + heap->arena_id = arena_id; + heap->no_reclaim = noreclaim; + heap->tag = tag; + if (heap == tld->heap_backing) { + _mi_random_init(&heap->random); + } + else { + _mi_random_split(&tld->heap_backing->random, &heap->random); + } heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); - heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe // push on the thread local heaps list heap->next = heap->tld->heaps; heap->tld->heaps = heap; +} + +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? + if (heap == NULL) return NULL; + // don't reclaim abandoned pages or otherwise destroy is unsafe + _mi_heap_init(heap, bheap->tld, arena_id, true /* no reclaim */, 0 /* default tag */); return heap; } +mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { + return mi_heap_new_in_arena(_mi_arena_id_none()); +} + +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { + return _mi_arena_memid_is_suitable(memid, heap->arena_id); +} + uintptr_t _mi_heap_random_next(mi_heap_t* heap) { return _mi_random_next(&heap->random); } @@ -217,9 +252,6 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); -#ifdef MI_MEDIUM_DIRECT - memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium)); -#endif _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); heap->thread_delayed_free = NULL; heap->page_count = 0; @@ -240,7 +272,7 @@ static void mi_heap_free(mi_heap_t* heap) { // remove ourselves from the thread local heaps list // linear search but we expect the number of heaps to be relatively small mi_heap_t* prev = NULL; - mi_heap_t* curr = heap->tld->heaps; + mi_heap_t* curr = heap->tld->heaps; while (curr != heap && curr != NULL) { prev = curr; curr = curr->next; @@ -256,16 +288,28 @@ static void mi_heap_free(mi_heap_t* heap) { mi_free(heap); } +// return a heap on the same thread as `heap` specialized for the specified tag (if it exists) +mi_heap_t* _mi_heap_by_tag(mi_heap_t* heap, uint8_t tag) { + if (heap->tag == tag) { + return heap; + } + for (mi_heap_t *curr = heap->tld->heaps; curr != NULL; curr = curr->next) { + if (curr->tag == tag) { + return curr; + } + } + return NULL; +} /* ----------------------------------------------------------- Heap destroy ----------------------------------------------------------- */ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* arg1, void* arg2) { - UNUSED(arg1); - UNUSED(arg2); - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(arg1); + MI_UNUSED(arg2); + MI_UNUSED(heap); + MI_UNUSED(pq); // ensure no more thread_delayed_free will be added _mi_page_use_delayed_free(page, MI_NEVER_DELAYED_FREE, false); @@ -273,12 +317,7 @@ static bool _mi_heap_page_destroy(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_ // stats const size_t bsize = mi_page_block_size(page); if (bsize > MI_LARGE_OBJ_SIZE_MAX) { - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - mi_heap_stat_decrease(heap, giant, bsize); - } - else { - mi_heap_stat_decrease(heap, huge, bsize); - } + mi_heap_stat_decrease(heap, huge, bsize); } #if (MI_STAT) _mi_page_free_collect(page, false); // update used count @@ -310,6 +349,14 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_reset_pages(heap); } +#if MI_TRACK_HEAP_DESTROY +static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { + MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); + mi_track_free_size(block,mi_usable_size(block)); + return true; +} +#endif + void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); @@ -321,27 +368,45 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_heap_delete(heap); } else { + // track all blocks as freed + #if MI_TRACK_HEAP_DESTROY + mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL); + #endif // free all pages _mi_heap_destroy_pages(heap); mi_heap_free(heap); } } - +// forcefully destroy all heaps in the current thread +void _mi_heap_unsafe_destroy_all(void) { + mi_heap_t* bheap = mi_heap_get_backing(); + mi_heap_t* curr = bheap->tld->heaps; + while (curr != NULL) { + mi_heap_t* next = curr->next; + if (curr->no_reclaim) { + mi_heap_destroy(curr); + } + else { + _mi_heap_destroy_pages(curr); + } + curr = next; + } +} /* ----------------------------------------------------------- Safe Heap delete ----------------------------------------------------------- */ -// Tranfer the pages from one heap to the other +// Transfer the pages from one heap to the other static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { mi_assert_internal(heap!=NULL); if (from==NULL || from->page_count == 0) return; // reduce the size of the delayed frees - _mi_heap_delayed_free(from); - - // transfer all pages by appending the queues; this will set a new heap field + _mi_heap_delayed_free_partial(from); + + // transfer all pages by appending the queues; this will set a new heap field // so threads may do delayed frees in either heap for a while. // note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state // so after this only the new heap will get delayed frees @@ -354,17 +419,17 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) { } mi_assert_internal(from->page_count == 0); - // and do outstanding delayed frees in the `from` heap + // and do outstanding delayed frees in the `from` heap // note: be careful here as the `heap` field in all those pages no longer point to `from`, - // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a + // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a // the regular `_mi_free_delayed_block` which is safe. - _mi_heap_delayed_free(from); + _mi_heap_delayed_free_all(from); #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353 mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL); #endif // and reset the `from` heap - mi_heap_reset_pages(from); + mi_heap_reset_pages(from); } // Safe delete a heap without freeing any still allocated blocks in that heap. @@ -376,7 +441,7 @@ void mi_heap_delete(mi_heap_t* heap) if (heap==NULL || !mi_heap_is_initialized(heap)) return; if (!mi_heap_is_backing(heap)) { - // tranfer still used pages to the backing heap + // transfer still used pages to the backing heap mi_heap_absorb(heap->tld->heap_backing, heap); } else { @@ -392,7 +457,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_prim_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -410,7 +475,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) { mi_segment_t* segment = _mi_ptr_segment(p); bool valid = (_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(valid); - if (mi_unlikely(!valid)) return NULL; + if mi_unlikely(!valid) return NULL; return mi_page_heap(_mi_segment_page_of(segment,p)); } @@ -422,11 +487,10 @@ bool mi_heap_contains_block(mi_heap_t* heap, const void* p) { static bool mi_heap_page_check_owned(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* p, void* vfound) { - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(heap); + MI_UNUSED(pq); bool* found = (bool*)vfound; - mi_segment_t* segment = _mi_page_segment(page); - void* start = _mi_page_start(segment, page, NULL); + void* start = mi_page_start(page); void* end = (uint8_t*)start + (page->capacity * mi_page_block_size(page)); *found = (p >= start && p < end); return (!*found); // continue if not found @@ -442,7 +506,7 @@ bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { } bool mi_check_owned(const void* p) { - return mi_heap_check_owned(mi_get_default_heap(), p); + return mi_heap_check_owned(mi_prim_get_default_heap(), p); } /* ----------------------------------------------------------- @@ -470,13 +534,14 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v if (page->used == 0) return true; const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); // without padding size_t psize; - uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(mi_page_heap(page), area, pstart, bsize, arg); + return visitor(mi_page_heap(page), area, pstart, ubsize, arg); } // create a bitmap of free blocks. @@ -484,9 +549,13 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; memset(free_map, 0, sizeof(free_map)); + #if MI_DEBUG>1 size_t free_count = 0; + #endif for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + #if MI_DEBUG>1 free_count++; + #endif mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); @@ -499,7 +568,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->capacity == (free_count + page->used)); // walk through all blocks skipping the free ones + #if MI_DEBUG>1 size_t used_count = 0; + #endif for (size_t i = 0; i < page->capacity; i++) { size_t bitidx = (i / sizeof(uintptr_t)); size_t bit = i - (bitidx * sizeof(uintptr_t)); @@ -508,9 +579,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v i += (sizeof(uintptr_t) - 1); // skip a run of free blocks } else if ((m & ((uintptr_t)1 << bit)) == 0) { + #if MI_DEBUG>1 used_count++; + #endif uint8_t* block = pstart + (i * bsize); - if (!visitor(mi_page_heap(page), area, block, bsize, arg)) return false; + if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } } mi_assert_internal(page->used == used_count); @@ -521,17 +594,19 @@ typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { - UNUSED(heap); - UNUSED(pq); + MI_UNUSED(heap); + MI_UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); xarea.page = page; xarea.area.reserved = page->reserved * bsize; xarea.area.committed = page->capacity * bsize; - xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used; - xarea.area.block_size = bsize; + xarea.area.blocks = mi_page_start(page); + xarea.area.used = page->used; // number of blocks in use (#553) + xarea.area.block_size = ubsize; + xarea.area.full_block_size = bsize; return fun(heap, &xarea, arg); } diff --git a/contrib/libs/mimalloc/src/init.c b/contrib/libs/mimalloc/src/init.c index c0f09b5ed8..62bb69ddcb 100644 --- a/contrib/libs/mimalloc/src/init.c +++ b/contrib/libs/mimalloc/src/init.c @@ -1,33 +1,42 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" #include <string.h> // memcpy, memset #include <stdlib.h> // atexit + // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, + 0, + false, false, false, false, 0, // capacity 0, // reserved capacity { 0 }, // flags false, // is_zero 0, // retire_expire NULL, // free - #if MI_ENCODE_FREELIST + NULL, // local_free + 0, // used + 0, // block size shift + 0, // heap tag + 0, // block_size + NULL, // page_start + #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif - 0, // used - 0, // xblock_size - NULL, // local_free - ATOMIC_VAR_INIT(0), // xthread_free - ATOMIC_VAR_INIT(0), // xheap + MI_ATOMIC_VAR_INIT(0), // xthread_free + MI_ATOMIC_VAR_INIT(0), // xheap NULL, NULL + #if MI_INTPTR_SIZE==4 + , { NULL } + #endif }; #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) @@ -74,7 +83,9 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -89,19 +100,26 @@ const mi_page_t _mi_page_empty = { mi_decl_cache_align const mi_heap_t _mi_heap_empty = { NULL, - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY, - ATOMIC_VAR_INIT(NULL), + MI_ATOMIC_VAR_INIT(NULL), 0, // tid 0, // cookie + 0, // arena id { 0, 0 }, // keys - { {0}, {0}, 0 }, + { {0}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next - false + false, // can reclaim + 0, // tag + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY }; + +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + return _mi_prim_thread_id(); +} + // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; @@ -111,7 +129,7 @@ static mi_tld_t tld_main = { 0, false, &_mi_heap_main, &_mi_heap_main, { { NULL, NULL }, {NULL ,NULL}, {NULL ,NULL, 0}, - 0, 0, 0, 0, 0, 0, NULL, + 0, 0, 0, 0, 0, &tld_main.stats, &tld_main.os }, // segments { 0, &tld_main.stats }, // os @@ -120,17 +138,19 @@ static mi_tld_t tld_main = { mi_heap_t _mi_heap_main = { &tld_main, - MI_SMALL_PAGES_EMPTY, - MI_PAGE_QUEUES_EMPTY, - ATOMIC_VAR_INIT(NULL), + MI_ATOMIC_VAR_INIT(NULL), 0, // thread id 0, // initial cookie + 0, // arena id { 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!) - { {0x846ca68b}, {0}, 0 }, // random + { {0x846ca68b}, {0}, 0, true }, // random 0, // page count MI_BIN_FULL, 0, // page retired min/max NULL, // next heap - false // can reclaim + false, // can reclaim + 0, // tag + MI_SMALL_PAGES_EMPTY, + MI_PAGE_QUEUES_EMPTY }; bool _mi_process_is_initialized = false; // set to `true` in `mi_process_init`. @@ -141,8 +161,13 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL }; static void mi_heap_main_init(void) { if (_mi_heap_main.cookie == 0) { _mi_heap_main.thread_id = _mi_thread_id(); - _mi_heap_main.cookie = _os_random_weak((uintptr_t)&mi_heap_main_init); - _mi_random_init(&_mi_heap_main.random); + _mi_heap_main.cookie = 1; + #if defined(_WIN32) && !defined(MI_SHARED_LIB) + _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking + #else + _mi_random_init(&_mi_heap_main.random); + #endif + _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main); _mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main); } @@ -160,54 +185,123 @@ mi_heap_t* _mi_heap_main_get(void) { // note: in x64 in release build `sizeof(mi_thread_data_t)` is under 4KiB (= OS page size). typedef struct mi_thread_data_s { - mi_heap_t heap; // must come first due to cast in `_mi_heap_done` + mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; // must come last due to zero'ing } mi_thread_data_t; + +// Thread meta-data is allocated directly from the OS. For +// some programs that do not use thread pools and allocate and +// destroy many OS threads, this may causes too much overhead +// per thread so we maintain a small cache of recently freed metadata. + +#define TD_CACHE_SIZE (16) +static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; + +static mi_thread_data_t* mi_thread_data_zalloc(void) { + // try to find thread metadata in the cache + bool is_zero = false; + mi_thread_data_t* td = NULL; + for (int i = 0; i < TD_CACHE_SIZE; i++) { + td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + // found cached allocation, try use it + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + break; + } + } + } + + // if that fails, allocate as meta data + if (td == NULL) { + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); + if (td == NULL) { + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } + } + if (td != NULL) { + td->memid = memid; + is_zero = memid.initially_zero; + } + } + + if (td != NULL && !is_zero) { + _mi_memzero_aligned(td, offsetof(mi_thread_data_t,memid)); + } + return td; +} + +static void mi_thread_data_free( mi_thread_data_t* tdfree ) { + // try to add the thread metadata to the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td == NULL) { + mi_thread_data_t* expected = NULL; + if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { + return; + } + } + } + // if that fails, just free it directly + _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); +} + +void _mi_thread_data_collect(void) { + // free all thread metadata from the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); + } + } + } +} + // Initialize the thread local default heap, called from `mi_thread_init` -static bool _mi_heap_init(void) { - if (mi_heap_is_initialized(mi_get_default_heap())) return true; +static bool _mi_thread_heap_init(void) { + if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); - //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation? - if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - return false; - } - } - // OS allocated so already zero initialized + mi_thread_data_t* td = mi_thread_data_zalloc(); + if (td == NULL) return false; + mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; - _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(*heap)); - heap->thread_id = _mi_thread_id(); - _mi_random_init(&heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; - heap->keys[0] = _mi_heap_random_next(heap); - heap->keys[1] = _mi_heap_random_next(heap); - heap->tld = tld; - tld->heap_backing = heap; - tld->heaps = heap; - tld->segments.stats = &tld->stats; - tld->segments.os = &tld->os; - tld->os.stats = &tld->stats; - _mi_heap_set_default_direct(heap); + _mi_tld_init(tld, heap); // must be before `_mi_heap_init` + _mi_heap_init(heap, tld, _mi_arena_id_none(), false /* can reclaim */, 0 /* default tag */); + _mi_heap_set_default_direct(heap); } return false; } +// initialize thread local data +void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap) { + _mi_memzero_aligned(tld,sizeof(mi_tld_t)); + tld->heap_backing = bheap; + tld->heaps = NULL; + tld->segments.stats = &tld->stats; + tld->segments.os = &tld->os; + tld->os.stats = &tld->stats; +} + // Free the thread local default heap (called from `mi_thread_done`) -static bool _mi_heap_done(mi_heap_t* heap) { +static bool _mi_thread_heap_done(mi_heap_t* heap) { if (!mi_heap_is_initialized(heap)) return true; // reset default heap @@ -234,23 +328,23 @@ static bool _mi_heap_done(mi_heap_t* heap) { if (heap != &_mi_heap_main) { _mi_heap_collect_abandon(heap); } - + // merge stats - _mi_stats_done(&heap->tld->stats); + _mi_stats_done(&heap->tld->stats); // free if not the main thread if (heap != &_mi_heap_main) { mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); - _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); + mi_thread_data_free((mi_thread_data_t*)heap); } -#if 0 - // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, - // there may still be delete/free calls after the mi_fls_done is called. Issue #207 else { + #if 0 + // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, + // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + #endif } -#endif return false; } @@ -272,57 +366,12 @@ static bool _mi_heap_done(mi_heap_t* heap) { // to set up the thread local keys. // -------------------------------------------------------- -static void _mi_thread_done(mi_heap_t* default_heap); - -#ifdef __wasi__ -// no pthreads in the WebAssembly Standard Interface -#elif !defined(_WIN32) -#define MI_USE_PTHREADS -#endif - -#if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain -#elif defined(_WIN32) && !defined(MI_SHARED_LIB) - // use thread local storage keys to detect thread ending - #include <windows.h> - #include <fibersapi.h> - #if (_WIN32_WINNT < 0x600) // before Windows Vista - WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); - WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); - WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); - WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); - #endif - static DWORD mi_fls_key = (DWORD)(-1); - static void NTAPI mi_fls_done(PVOID value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); - } -#elif defined(MI_USE_PTHREADS) - // use pthread local storage keys to detect thread ending - // (and used with MI_TLS_PTHREADS for the default heap) - #include <pthread.h> - pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); - static void mi_pthread_done(void* value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); - } -#elif defined(__wasi__) -// no pthreads in the WebAssembly Standard Interface -#else - #pragma message("define a way to call mi_thread_done when a thread is done") -#endif - // Set up handlers so `mi_thread_done` is called automatically static void mi_process_setup_auto_thread_done(void) { static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_fls_key = FlsAlloc(&mi_fls_done); - #elif defined(MI_USE_PTHREADS) - mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); - pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); - #endif + _mi_prim_thread_init_auto_done(); _mi_heap_set_default_direct(&_mi_heap_main); } @@ -331,41 +380,62 @@ bool _mi_is_main_thread(void) { return (_mi_heap_main.thread_id==0 || _mi_heap_main.thread_id == _mi_thread_id()); } +static _Atomic(size_t) thread_count = MI_ATOMIC_VAR_INIT(1); + +size_t _mi_current_thread_count(void) { + return mi_atomic_load_relaxed(&thread_count); +} + // This is called from the `mi_malloc_generic` void mi_thread_init(void) mi_attr_noexcept { // ensure our process has started already mi_process_init(); - + // initialize the thread local default heap // (this will call `_mi_heap_set_default_direct` and thus set the // fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called) - if (_mi_heap_init()) return; // returns true if already initialized + if (_mi_thread_heap_init()) return; // returns true if already initialized _mi_stat_increase(&_mi_stats_main.threads, 1); + mi_atomic_increment_relaxed(&thread_count); //_mi_verbose_message("thread init: 0x%zx\n", _mi_thread_id()); } void mi_thread_done(void) mi_attr_noexcept { - _mi_thread_done(mi_get_default_heap()); + _mi_thread_done(NULL); } -static void _mi_thread_done(mi_heap_t* heap) { +void _mi_thread_done(mi_heap_t* heap) +{ + // calling with NULL implies using the default heap + if (heap == NULL) { + heap = mi_prim_get_default_heap(); + if (heap == NULL) return; + } + + // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) + if (!mi_heap_is_initialized(heap)) { + return; + } + + // adjust stats + mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; - + // abandon the thread local heap - if (_mi_heap_done(heap)) return; // returns true if already ran + if (_mi_thread_heap_done(heap)) return; // returns true if already ran } void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); #if defined(MI_TLS_SLOT) - mi_tls_slot_set(MI_TLS_SLOT,heap); + mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) - *mi_tls_pthread_heap_slot() = heap; + *mi_prim_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) // we use _mi_heap_default_key #else @@ -374,38 +444,29 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_assert_internal(mi_fls_key != 0); - FlsSetValue(mi_fls_key, heap); - #elif defined(MI_USE_PTHREADS) - if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD - pthread_setspecific(_mi_heap_default_key, heap); - } - #endif + _mi_prim_thread_associate_default_heap(heap); } // -------------------------------------------------------- // Run functions on process init/done, and thread init/done // -------------------------------------------------------- -static void mi_process_done(void); +static void mi_cdecl mi_process_done(void); static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. -bool _mi_preloading(void) { +bool mi_decl_noinline _mi_preloading(void) { return os_preloading; } -bool mi_is_redirected(void) mi_attr_noexcept { +mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept { return mi_redirected; } // Communicate with the redirection module on Windows -#if defined(_WIN32) && defined(MI_SHARED_LIB) +#if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT) #ifdef __cplusplus extern "C" { #endif @@ -421,8 +482,8 @@ mi_decl_export void _mi_redirect_entry(DWORD reason) { mi_thread_done(); } } -__declspec(dllimport) bool mi_allocator_init(const char** message); -__declspec(dllimport) void mi_allocator_done(void); +__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message); +__declspec(dllimport) void mi_cdecl mi_allocator_done(void); #ifdef __cplusplus } #endif @@ -439,15 +500,18 @@ static void mi_allocator_done(void) { // Called once by the process loader static void mi_process_load(void) { mi_heap_main_init(); - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; - UNUSED(dummy); + if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697) #endif os_preloading = false; + mi_assert_internal(_mi_is_main_thread()); + #if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521) atexit(&mi_process_done); + #endif _mi_options_init(); + mi_process_setup_auto_thread_done(); mi_process_init(); - //mi_stats_reset();- if (mi_redirected) _mi_verbose_message("malloc is redirected.\n"); // show message from the redirector (if present) @@ -456,6 +520,9 @@ static void mi_process_load(void) { if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) { _mi_fputs(NULL,NULL,NULL,msg); } + + // reseed random + _mi_random_reinit_if_weak(&_mi_heap_main.random); } #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -466,7 +533,7 @@ static void mi_detect_cpu_features(void) { // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) int32_t cpu_info[4]; __cpuid(cpu_info, 7); - _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https ://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features> + _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see <https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features> } #else static void mi_detect_cpu_features(void) { @@ -477,33 +544,57 @@ static void mi_detect_cpu_features(void) { // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once - if (_mi_process_is_initialized) return; + static mi_atomic_once_t process_init; + #if _MSC_VER < 1920 + mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + #endif + if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; + _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_process_setup_auto_thread_done(); - _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); - #if (MI_DEBUG) + #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif _mi_verbose_message("secure level: %d\n", MI_SECURE); + _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); + #if MI_TSAN + _mi_verbose_message("thread santizer enabled\n"); + #endif mi_thread_init(); + + #if defined(_WIN32) + // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. + // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup + // will not call _mi_thread_done on the (still executing) main thread. See issue #508. + _mi_prim_thread_associate_default_heap(NULL); + #endif + mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - size_t pages = mi_option_get(mi_option_reserve_huge_os_pages); - mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); - } + size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); + long reserve_at = mi_option_get(mi_option_reserve_huge_os_pages_at); + if (reserve_at != -1) { + mi_reserve_huge_os_pages_at(pages, reserve_at, pages*500); + } else { + mi_reserve_huge_os_pages_interleave(pages, 0, pages*500); + } + } if (mi_option_is_enabled(mi_option_reserve_os_memory)) { long ksize = mi_option_get(mi_option_reserve_os_memory); - if (ksize > 0) mi_reserve_os_memory((size_t)ksize*KiB, true, true); + if (ksize > 0) { + mi_reserve_os_memory((size_t)ksize*MI_KiB, true, true); + } } } // Called when the process is done (through `at_exit`) -static void mi_process_done(void) { +static void mi_cdecl mi_process_done(void) { // only shutdown if we were initialized if (!_mi_process_is_initialized) return; // ensure we are called once @@ -511,22 +602,31 @@ static void mi_process_done(void) { if (process_done) return; process_done = true; - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsSetValue(mi_fls_key, NULL); // don't call main-thread callback - FlsFree(mi_fls_key); // call thread-done on all threads to prevent dangling callback pointer if statically linked with a DLL; Issue #208 - #endif - - #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) - // free all memory if possible on process exit. This is not needed for a stand-alone process - // but should be done if mimalloc is statically linked into another shared library which - // is repeatedly loaded/unloaded, see issue #281. - mi_collect(true /* force */ ); + // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread + _mi_prim_thread_done_auto_done(); + + #ifndef MI_SKIP_COLLECT_ON_EXIT + #if (MI_DEBUG || !defined(MI_SHARED_LIB)) + // free all memory if possible on process exit. This is not needed for a stand-alone process + // but should be done if mimalloc is statically linked into another shared library which + // is repeatedly loaded/unloaded, see issue #281. + mi_collect(true /* force */ ); + #endif #endif + // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free + // since after process_done there might still be other code running that calls `free` (like at_exit routines, + // or C-runtime termination code. + if (mi_option_is_enabled(mi_option_destroy_on_exit)) { + mi_collect(true /* force */); + _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) + _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); + } + if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { mi_stats_print(NULL); } - mi_allocator_done(); + mi_allocator_done(); _mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id); os_preloading = true; // don't call the C runtime anymore } @@ -536,31 +636,22 @@ static void mi_process_done(void) { #if defined(_WIN32) && defined(MI_SHARED_LIB) // Windows DLL: easy to hook into process_init and thread_done __declspec(dllexport) BOOL WINAPI DllMain(HINSTANCE inst, DWORD reason, LPVOID reserved) { - UNUSED(reserved); - UNUSED(inst); + MI_UNUSED(reserved); + MI_UNUSED(inst); if (reason==DLL_PROCESS_ATTACH) { mi_process_load(); } + else if (reason==DLL_PROCESS_DETACH) { + mi_process_done(); + } else if (reason==DLL_THREAD_DETACH) { - if (!mi_is_redirected()) mi_thread_done(); + if (!mi_is_redirected()) { + mi_thread_done(); + } } return TRUE; } -#elif defined(__cplusplus) - // C++: use static initialization to detect process start - static bool _mi_process_init(void) { - mi_process_load(); - return (_mi_heap_main.thread_id != 0); - } - static bool mi_initialized = _mi_process_init(); - -#elif defined(__GNUC__) || defined(__clang__) - // GCC,Clang: use the constructor attribute - static void __attribute__((constructor)) _mi_process_init(void) { - mi_process_load(); - } - #elif defined(_MSC_VER) // MSVC: use data section magic for static libraries // See <https://www.codeguru.com/cpp/misc/misc/applicationcontrol/article.php/c6945/Running-Code-Before-and-After-Main.htm> @@ -568,17 +659,31 @@ static void mi_process_done(void) { mi_process_load(); return 0; } - typedef int(*_crt_cb)(void); - #ifdef _M_X64 + typedef int(*_mi_crt_callback_t)(void); + #if defined(_M_X64) || defined(_M_ARM64) __pragma(comment(linker, "/include:" "_mi_msvc_initu")) #pragma section(".CRT$XIU", long, read) #else __pragma(comment(linker, "/include:" "__mi_msvc_initu")) #endif #pragma data_seg(".CRT$XIU") - _crt_cb _mi_msvc_initu[] = { &_mi_process_init }; + mi_decl_externc _mi_crt_callback_t _mi_msvc_initu[] = { &_mi_process_init }; #pragma data_seg() +#elif defined(__cplusplus) + // C++: use static initialization to detect process start + static bool _mi_process_init(void) { + mi_process_load(); + return (_mi_heap_main.thread_id != 0); + } + static bool mi_initialized = _mi_process_init(); + +#elif defined(__GNUC__) || defined(__clang__) + // GCC,Clang: use the constructor attribute + static void __attribute__((constructor)) _mi_process_init(void) { + mi_process_load(); + } + #else #pragma message("define a way to call mi_process_load on your platform") #endif diff --git a/contrib/libs/mimalloc/src/libc.c b/contrib/libs/mimalloc/src/libc.c new file mode 100644 index 0000000000..dd6b400737 --- /dev/null +++ b/contrib/libs/mimalloc/src/libc.c @@ -0,0 +1,273 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// -------------------------------------------------------- +// This module defines various std libc functions to reduce +// the dependency on libc, and also prevent errors caused +// by some libc implementations when called before `main` +// executes (due to malloc redirection) +// -------------------------------------------------------- + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // mi_prim_getenv + +char _mi_toupper(char c) { + if (c >= 'a' && c <= 'z') return (c - 'a' + 'A'); + else return c; +} + +int _mi_strnicmp(const char* s, const char* t, size_t n) { + if (n == 0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (_mi_toupper(*s) != _mi_toupper(*t)) break; + } + return (n == 0 ? 0 : *s - *t); +} + +void _mi_strlcpy(char* dest, const char* src, size_t dest_size) { + if (dest==NULL || src==NULL || dest_size == 0) return; + // copy until end of src, or when dest is (almost) full + while (*src != 0 && dest_size > 1) { + *dest++ = *src++; + dest_size--; + } + // always zero terminate + *dest = 0; +} + +void _mi_strlcat(char* dest, const char* src, size_t dest_size) { + if (dest==NULL || src==NULL || dest_size == 0) return; + // find end of string in the dest buffer + while (*dest != 0 && dest_size > 1) { + dest++; + dest_size--; + } + // and catenate + _mi_strlcpy(dest, src, dest_size); +} + +size_t _mi_strlen(const char* s) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0) { len++; } + return len; +} + +size_t _mi_strnlen(const char* s, size_t max_len) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0 && len < max_len) { len++; } + return len; +} + +#ifdef MI_NO_GETENV +bool _mi_getenv(const char* name, char* result, size_t result_size) { + MI_UNUSED(name); + MI_UNUSED(result); + MI_UNUSED(result_size); + return false; +} +#else +bool _mi_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL || result == NULL || result_size < 64) return false; + return _mi_prim_getenv(name,result,result_size); +} +#endif + +// -------------------------------------------------------- +// Define our own limited `_mi_vsnprintf` and `_mi_snprintf` +// This is mostly to avoid calling these when libc is not yet +// initialized (and to reduce dependencies) +// +// format: d i, p x u, s +// prec: z l ll L +// width: 10 +// align-left: - +// fill: 0 +// plus: + +// -------------------------------------------------------- + +static void mi_outc(char c, char** out, char* end) { + char* p = *out; + if (p >= end) return; + *p = c; + *out = p + 1; +} + +static void mi_outs(const char* s, char** out, char* end) { + if (s == NULL) return; + char* p = *out; + while (*s != 0 && p < end) { + *p++ = *s++; + } + *out = p; +} + +static void mi_out_fill(char fill, size_t len, char** out, char* end) { + char* p = *out; + for (size_t i = 0; i < len && p < end; i++) { + *p++ = fill; + } + *out = p; +} + +static void mi_out_alignright(char fill, char* start, size_t len, size_t extra, char* end) { + if (len == 0 || extra == 0) return; + if (start + len + extra >= end) return; + // move `len` characters to the right (in reverse since it can overlap) + for (size_t i = 1; i <= len; i++) { + start[len + extra - i] = start[len - i]; + } + // and fill the start + for (size_t i = 0; i < extra; i++) { + start[i] = fill; + } +} + + +static void mi_out_num(uintptr_t x, size_t base, char prefix, char** out, char* end) +{ + if (x == 0 || base == 0 || base > 16) { + if (prefix != 0) { mi_outc(prefix, out, end); } + mi_outc('0',out,end); + } + else { + // output digits in reverse + char* start = *out; + while (x > 0) { + char digit = (char)(x % base); + mi_outc((digit <= 9 ? '0' + digit : 'A' + digit - 10),out,end); + x = x / base; + } + if (prefix != 0) { + mi_outc(prefix, out, end); + } + size_t len = *out - start; + // and reverse in-place + for (size_t i = 0; i < (len / 2); i++) { + char c = start[len - i - 1]; + start[len - i - 1] = start[i]; + start[i] = c; + } + } +} + + +#define MI_NEXTC() c = *in; if (c==0) break; in++; + +void _mi_vsnprintf(char* buf, size_t bufsize, const char* fmt, va_list args) { + if (buf == NULL || bufsize == 0 || fmt == NULL) return; + buf[bufsize - 1] = 0; + char* const end = buf + (bufsize - 1); + const char* in = fmt; + char* out = buf; + while (true) { + if (out >= end) break; + char c; + MI_NEXTC(); + if (c != '%') { + if ((c >= ' ' && c <= '~') || c=='\n' || c=='\r' || c=='\t') { // output visible ascii or standard control only + mi_outc(c, &out, end); + } + } + else { + MI_NEXTC(); + char fill = ' '; + size_t width = 0; + char numtype = 'd'; + char numplus = 0; + bool alignright = true; + if (c == '+' || c == ' ') { numplus = c; MI_NEXTC(); } + if (c == '-') { alignright = false; MI_NEXTC(); } + if (c == '0') { fill = '0'; MI_NEXTC(); } + if (c >= '1' && c <= '9') { + width = (c - '0'); MI_NEXTC(); + while (c >= '0' && c <= '9') { + width = (10 * width) + (c - '0'); MI_NEXTC(); + } + if (c == 0) break; // extra check due to while + } + if (c == 'z' || c == 't' || c == 'L') { numtype = c; MI_NEXTC(); } + else if (c == 'l') { + numtype = c; MI_NEXTC(); + if (c == 'l') { numtype = 'L'; MI_NEXTC(); } + } + + char* start = out; + if (c == 's') { + // string + const char* s = va_arg(args, const char*); + mi_outs(s, &out, end); + } + else if (c == 'p' || c == 'x' || c == 'u') { + // unsigned + uintptr_t x = 0; + if (c == 'x' || c == 'u') { + if (numtype == 'z') x = va_arg(args, size_t); + else if (numtype == 't') x = va_arg(args, uintptr_t); // unsigned ptrdiff_t + else if (numtype == 'L') x = (uintptr_t)va_arg(args, unsigned long long); + else x = va_arg(args, unsigned long); + } + else if (c == 'p') { + x = va_arg(args, uintptr_t); + mi_outs("0x", &out, end); + start = out; + width = (width >= 2 ? width - 2 : 0); + } + if (width == 0 && (c == 'x' || c == 'p')) { + if (c == 'p') { width = 2 * (x <= UINT32_MAX ? 4 : ((x >> 16) <= UINT32_MAX ? 6 : sizeof(void*))); } + if (width == 0) { width = 2; } + fill = '0'; + } + mi_out_num(x, (c == 'x' || c == 'p' ? 16 : 10), numplus, &out, end); + } + else if (c == 'i' || c == 'd') { + // signed + intptr_t x = 0; + if (numtype == 'z') x = va_arg(args, intptr_t ); + else if (numtype == 't') x = va_arg(args, ptrdiff_t); + else if (numtype == 'L') x = (intptr_t)va_arg(args, long long); + else x = va_arg(args, long); + char pre = 0; + if (x < 0) { + pre = '-'; + if (x > INTPTR_MIN) { x = -x; } + } + else if (numplus != 0) { + pre = numplus; + } + mi_out_num((uintptr_t)x, 10, pre, &out, end); + } + else if (c >= ' ' && c <= '~') { + // unknown format + mi_outc('%', &out, end); + mi_outc(c, &out, end); + } + + // fill & align + mi_assert_internal(out <= end); + mi_assert_internal(out >= start); + const size_t len = out - start; + if (len < width) { + mi_out_fill(fill, width - len, &out, end); + if (alignright && out <= end) { + mi_out_alignright(fill, start, len, width - len, end); + } + } + } + } + mi_assert_internal(out <= end); + *out = 0; +} + +void _mi_snprintf(char* buf, size_t buflen, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + _mi_vsnprintf(buf, buflen, fmt, args); + va_end(args); +} diff --git a/contrib/libs/mimalloc/src/options.c b/contrib/libs/mimalloc/src/options.c index 30025db226..db6e040fe8 100644 --- a/contrib/libs/mimalloc/src/options.c +++ b/contrib/libs/mimalloc/src/options.c @@ -5,32 +5,24 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_out_stderr -#include <stdio.h> -#include <stdlib.h> // strtol -#include <string.h> // strncpy, strncat, strlen, strstr -#include <ctype.h> // toupper -#include <stdarg.h> +#include <stdio.h> // stdin/stdout +#include <stdlib.h> // abort -#ifdef _MSC_VER -#pragma warning(disable:4996) // strncpy, strncat -#endif -static uintptr_t mi_max_error_count = 16; // stop outputting errors after this -static uintptr_t mi_max_warning_count = 16; // stop outputting warnings after this +static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) +static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) -static void mi_add_stderr_output(); +static void mi_add_stderr_output(void); int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } -#ifdef _WIN32 -#include <conio.h> -#endif // -------------------------------------------------------- // Options @@ -49,10 +41,11 @@ typedef struct mi_option_desc_s { mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix + const char* legacy_name; // potential legacy option name } mi_option_desc_t; -#define MI_OPTION(opt) mi_option_##opt, #opt -#define MI_OPTION_DESC(opt) {0, UNINIT, MI_OPTION(opt) } +#define MI_OPTION(opt) mi_option_##opt, #opt, NULL +#define MI_OPTION_LEGACY(opt,legacy) mi_option_##opt, #opt, #legacy static mi_option_desc_t options[_mi_option_last] = { @@ -66,65 +59,95 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(verbose) }, // the following options are experimental and not all combinations make sense. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) - #if defined(_WIN32) || (MI_INTPTR_SIZE <= 4) // and other OS's without overcommit? - { 0, UNINIT, MI_OPTION(eager_region_commit) }, - { 1, UNINIT, MI_OPTION(reset_decommits) }, // reset decommits memory - #else - { 1, UNINIT, MI_OPTION(eager_region_commit) }, - { 0, UNINIT, MI_OPTION(reset_decommits) }, // reset uses MADV_FREE/MADV_DONTNEED - #endif - { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages - { 0, UNINIT, MI_OPTION(reserve_os_memory) }, - { 0, UNINIT, MI_OPTION(segment_cache) }, // cache N segments per thread - { 1, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION(abandoned_page_reset) },// reset free page memory when a thread terminates - { 0, UNINIT, MI_OPTION(segment_reset) }, // reset segment memory on free (needs eager commit) + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) + { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N + { 0, UNINIT, MI_OPTION(reserve_os_memory) }, // reserve N KiB OS memory in advance (use `option_get_size`) + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION(abandoned_page_purge) }, // purge free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) #if defined(__NetBSD__) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) #endif - { 100, UNINIT, MI_OPTION(reset_delay) }, // reset delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) - { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) } // maximum warnings that are output + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION_LEGACY(disallow_os_alloc,limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 32, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 32, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 10, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. percentage of the abandoned segments to be reclaimed per try. + { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + #if (MI_INTPTR_SIZE>4) + { 1024L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time (=1GiB) (use `option_get_size`) + #else + { 128L*1024L, UNINIT, MI_OPTION(arena_reserve) }, // =128MiB on 32-bit + #endif + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, + { 1, UNINIT, MI_OPTION(abandoned_reclaim_on_free) },// reclaim an abandoned segment on a free + { 0, UNINIT, MI_OPTION(disallow_arena_alloc) }, // 1 = do not use arena's for allocation (except if using specific arena id's) + { 400, UNINIT, MI_OPTION(retry_on_oom) }, // windows only: retry on out-of-memory for N milli seconds (=400), set to 0 to disable retries. }; static void mi_option_init(mi_option_desc_t* desc); +static bool mi_option_has_size_in_kib(mi_option_t option) { + return (option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); +} + void _mi_options_init(void) { // called on process load; should not be called before the CRT is initialized! // (e.g. do not call this from process_init as that may run before CRT initialization) mi_add_stderr_output(); // now it safe to use stderr for output for(int i = 0; i < _mi_option_last; i++ ) { mi_option_t option = (mi_option_t)i; - long l = mi_option_get(option); UNUSED(l); // initialize - if (option != mi_option_verbose) { + long l = mi_option_get(option); MI_UNUSED(l); // initialize + // if (option != mi_option_verbose) + { mi_option_desc_t* desc = &options[option]; - _mi_verbose_message("option '%s': %ld\n", desc->name, desc->value); + _mi_verbose_message("option '%s': %ld %s\n", desc->name, desc->value, (mi_option_has_size_in_kib(option) ? "KiB" : "")); } } mi_max_error_count = mi_option_get(mi_option_max_errors); mi_max_warning_count = mi_option_get(mi_option_max_warnings); } -long mi_option_get(mi_option_t option) { +mi_decl_nodiscard long mi_option_get(mi_option_t option) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return 0; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option - if (mi_unlikely(desc->init == UNINIT)) { + if mi_unlikely(desc->init == UNINIT) { mi_option_init(desc); } return desc->value; } +mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long max) { + long x = mi_option_get(option); + return (x < min ? min : (x > max ? max : x)); +} + +mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { + mi_assert_internal(mi_option_has_size_in_kib(option)); + const long x = mi_option_get(option); + size_t size = (x < 0 ? 0 : (size_t)x); + if (mi_option_has_size_in_kib(option)) { + size *= MI_KiB; + } + return size; +} + void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; mi_assert(desc->option == option); // index should match the option desc->value = value; @@ -133,13 +156,14 @@ void mi_option_set(mi_option_t option, long value) { void mi_option_set_default(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); + if (option < 0 || option >= _mi_option_last) return; mi_option_desc_t* desc = &options[option]; if (desc->init != INITIALIZED) { desc->value = value; } } -bool mi_option_is_enabled(mi_option_t option) { +mi_decl_nodiscard bool mi_option_is_enabled(mi_option_t option) { return (mi_option_get(option) != 0); } @@ -159,16 +183,11 @@ void mi_option_disable(mi_option_t option) { mi_option_set_enabled(option,false); } - -static void mi_out_stderr(const char* msg, void* arg) { - UNUSED(arg); - #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output - // after the main thread closes so we use direct console output. - if (!_mi_preloading()) { _cputs(msg); } - #else - fputs(msg, stderr); - #endif +static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { + MI_UNUSED(arg); + if (msg != NULL && msg[0] != 0) { + _mi_prim_out_stderr(msg); + } } // Since an output function can be registered earliest in the `main` @@ -176,19 +195,19 @@ static void mi_out_stderr(const char* msg, void* arg) { // an output function is registered it is called immediately with // the output up to that point. #ifndef MI_MAX_DELAY_OUTPUT -#define MI_MAX_DELAY_OUTPUT ((uintptr_t)(32*1024)) +#define MI_MAX_DELAY_OUTPUT ((size_t)(32*1024)) #endif static char out_buf[MI_MAX_DELAY_OUTPUT+1]; -static _Atomic(uintptr_t) out_len; +static _Atomic(size_t) out_len; -static void mi_out_buf(const char* msg, void* arg) { - UNUSED(arg); +static void mi_cdecl mi_out_buf(const char* msg, void* arg) { + MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; - size_t n = strlen(msg); + size_t n = _mi_strlen(msg); if (n==0) return; // claim space - uintptr_t start = mi_atomic_add_acq_rel(&out_len, n); + size_t start = mi_atomic_add_acq_rel(&out_len, n); if (start >= MI_MAX_DELAY_OUTPUT) return; // check bound if (start+n >= MI_MAX_DELAY_OUTPUT) { @@ -213,7 +232,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) { // Once this module is loaded, switch to this routine // which outputs to stderr and the delayed output buffer. -static void mi_out_buf_stderr(const char* msg, void* arg) { +static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) { mi_out_stderr(msg,arg); mi_out_buf(msg,arg); } @@ -242,7 +261,7 @@ void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { } // add stderr to the delayed output after the module is loaded -static void mi_add_stderr_output() { +static void mi_add_stderr_output(void) { mi_assert_internal(mi_out_default == NULL); mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output @@ -251,31 +270,46 @@ static void mi_add_stderr_output() { // -------------------------------------------------------- // Messages, all end up calling `_mi_fputs`. // -------------------------------------------------------- -static _Atomic(uintptr_t) error_count; // = 0; // when >= max_error_count stop emitting errors -static _Atomic(uintptr_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings +static _Atomic(size_t) error_count; // = 0; // when >= max_error_count stop emitting errors +static _Atomic(size_t) warning_count; // = 0; // when >= max_warning_count stop emitting warnings // When overriding malloc, we may recurse into mi_vfprintf if an allocation // inside the C runtime causes another message. +// In some cases (like on macOS) the loader already allocates which +// calls into mimalloc; if we then access thread locals (like `recurse`) +// this may crash as the access may call _tlv_bootstrap that tries to +// (recursively) invoke malloc again to allocate space for the thread local +// variables on demand. This is why we use a _mi_preloading test on such +// platforms. However, C code generator may move the initial thread local address +// load before the `if` and we therefore split it out in a separate funcion. static mi_decl_thread bool recurse = false; -static bool mi_recurse_enter(void) { - #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) - if (_mi_preloading()) return true; - #endif +static mi_decl_noinline bool mi_recurse_enter_prim(void) { if (recurse) return false; recurse = true; return true; } +static mi_decl_noinline void mi_recurse_exit_prim(void) { + recurse = false; +} + +static bool mi_recurse_enter(void) { + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) + if (_mi_preloading()) return false; + #endif + return mi_recurse_enter_prim(); +} + static void mi_recurse_exit(void) { #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) if (_mi_preloading()) return; #endif - recurse = false; + mi_recurse_exit_prim(); } void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message) { - if (out==NULL || (FILE*)out==stdout || (FILE*)out==stderr) { // TODO: use mi_out_stderr for stderr? + if (out==NULL || (void*)out==(void*)stdout || (void*)out==(void*)stderr) { // TODO: use mi_out_stderr for stderr? if (!mi_recurse_enter()) return; out = mi_out_get_default(&arg); if (prefix != NULL) out(prefix, arg); @@ -289,12 +323,12 @@ void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* me } // Define our own limited `fprintf` that avoids memory allocation. -// We do this using `snprintf` with a limited buffer. +// We do this using `_mi_vsnprintf` with a limited buffer. static void mi_vfprintf( mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args ) { char buf[512]; if (fmt==NULL) return; if (!mi_recurse_enter()) return; - vsnprintf(buf,sizeof(buf)-1,fmt,args); + _mi_vsnprintf(buf, sizeof(buf)-1, fmt, args); mi_recurse_exit(); _mi_fputs(out,arg,prefix,buf); } @@ -306,11 +340,22 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { va_end(args); } +static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { + if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) { + char tprefix[64]; + _mi_snprintf(tprefix, sizeof(tprefix), "%sthread 0x%tx: ", prefix, (uintptr_t)_mi_thread_id()); + mi_vfprintf(out, arg, tprefix, fmt, args); + } + else { + mi_vfprintf(out, arg, prefix, fmt, args); + } +} + void _mi_trace_message(const char* fmt, ...) { if (mi_option_get(mi_option_verbose) <= 1) return; // only with verbose level 2 or higher va_list args; va_start(args, fmt); - mi_vfprintf(NULL, NULL, "mimalloc: ", fmt, args); + mi_vfprintf_thread(NULL, NULL, "mimalloc: ", fmt, args); va_end(args); } @@ -323,17 +368,21 @@ void _mi_verbose_message(const char* fmt, ...) { } static void mi_show_error_message(const char* fmt, va_list args) { - if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; - mi_vfprintf(NULL, NULL, "mimalloc: error: ", fmt, args); + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_error_count >= 0 && (long)mi_atomic_increment_acq_rel(&error_count) > mi_max_error_count) return; + } + mi_vfprintf_thread(NULL, NULL, "mimalloc: error: ", fmt, args); } void _mi_warning_message(const char* fmt, ...) { - if (!mi_option_is_enabled(mi_option_show_errors) && !mi_option_is_enabled(mi_option_verbose)) return; - if (mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; + if (!mi_option_is_enabled(mi_option_verbose)) { + if (!mi_option_is_enabled(mi_option_show_errors)) return; + if (mi_max_warning_count >= 0 && (long)mi_atomic_increment_acq_rel(&warning_count) > mi_max_warning_count) return; + } va_list args; va_start(args,fmt); - mi_vfprintf(NULL, NULL, "mimalloc: warning: ", fmt, args); + mi_vfprintf_thread(NULL, NULL, "mimalloc: warning: ", fmt, args); va_end(args); } @@ -353,8 +402,8 @@ static mi_error_fun* volatile mi_error_handler; // = NULL static _Atomic(void*) mi_error_arg; // = NULL static void mi_error_default(int err) { - UNUSED(err); -#if (MI_DEBUG>0) + MI_UNUSED(err); +#if (MI_DEBUG>0) if (err==EFAULT) { #ifdef _MSC_VER __debugbreak(); @@ -398,108 +447,34 @@ void _mi_error_message(int err, const char* fmt, ...) { // Initialize options by checking the environment // -------------------------------------------------------- -static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { - dest[0] = 0; - strncpy(dest, src, dest_size - 1); - dest[dest_size - 1] = 0; -} +// TODO: implement ourselves to reduce dependencies on the C runtime +#include <stdlib.h> // strtol +#include <string.h> // strstr -static void mi_strlcat(char* dest, const char* src, size_t dest_size) { - strncat(dest, src, dest_size - 1); - dest[dest_size - 1] = 0; -} -static inline int mi_strnicmp(const char* s, const char* t, size_t n) { - if (n==0) return 0; - for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { - if (toupper(*s) != toupper(*t)) break; - } - return (n==0 ? 0 : *s - *t); -} - -#if defined _WIN32 -// On Windows use GetEnvironmentVariable instead of getenv to work -// reliably even when this is invoked before the C runtime is initialized. -// i.e. when `_mi_preloading() == true`. -// Note: on windows, environment names are not case sensitive. -#include <windows.h> -static bool mi_getenv(const char* name, char* result, size_t result_size) { - result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); - return (len > 0 && len < result_size); -} -#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) -// On Posix systemsr use `environ` to acces environment variables -// even before the C runtime is initialized. -#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>) -#include <crt_externs.h> -static char** mi_get_environ(void) { - return (*_NSGetEnviron()); -} -#else -extern char** environ; -static char** mi_get_environ(void) { - return environ; -} -#endif -static bool mi_getenv(const char* name, char* result, size_t result_size) { - if (name==NULL) return false; - const size_t len = strlen(name); - if (len == 0) return false; - char** env = mi_get_environ(); - if (env == NULL) return false; - // compare up to 256 entries - for (int i = 0; i < 256 && env[i] != NULL; i++) { - const char* s = env[i]; - if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive - // found it - mi_strlcpy(result, s + len + 1, result_size); - return true; - } - } - return false; -} -#else -// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime -static bool mi_getenv(const char* name, char* result, size_t result_size) { - // cannot call getenv() when still initializing the C runtime. - if (_mi_preloading()) return false; - const char* s = getenv(name); - if (s == NULL) { - // we check the upper case name too. - char buf[64+1]; - size_t len = strlen(name); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; - for (size_t i = 0; i < len; i++) { - buf[i] = toupper(name[i]); +static void mi_option_init(mi_option_desc_t* desc) { + // Read option value from the environment + char s[64 + 1]; + char buf[64+1]; + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->name, sizeof(buf)); + bool found = _mi_getenv(buf, s, sizeof(s)); + if (!found && desc->legacy_name != NULL) { + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + found = _mi_getenv(buf, s, sizeof(s)); + if (found) { + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name); } - buf[len] = 0; - s = getenv(buf); } - if (s != NULL && strlen(s) < result_size) { - mi_strlcpy(result, s, result_size); - return true; - } - else { - return false; - } -} -#endif -static void mi_option_init(mi_option_desc_t* desc) { - // Read option value from the environment - char buf[64+1]; - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->name, sizeof(buf)); - char s[64+1]; - if (mi_getenv(buf, s, sizeof(s))) { - size_t len = strlen(s); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; + if (found) { + size_t len = _mi_strnlen(s, sizeof(buf) - 1); for (size_t i = 0; i < len; i++) { - buf[i] = (char)toupper(s[i]); + buf[i] = _mi_toupper(s[i]); } buf[len] = 0; - if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; } @@ -510,21 +485,38 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory) { - // this option is interpreted in KiB to prevent overflow of `long` + if (mi_option_has_size_in_kib(desc->option)) { + // this option is interpreted in KiB to prevent overflow of `long` for large allocations + // (long is 32-bit on 64-bit windows, which allows for 4TiB max.) + size_t size = (value < 0 ? 0 : (size_t)value); + bool overflow = false; if (*end == 'K') { end++; } - else if (*end == 'M') { value *= KiB; end++; } - else if (*end == 'G') { value *= MiB; end++; } - else { value = (value + KiB - 1) / KiB; } - if (*end == 'B') { end++; } + else if (*end == 'M') { overflow = mi_mul_overflow(size,MI_KiB,&size); end++; } + else if (*end == 'G') { overflow = mi_mul_overflow(size,MI_MiB,&size); end++; } + else if (*end == 'T') { overflow = mi_mul_overflow(size,MI_GiB,&size); end++; } + else { size = (size + MI_KiB - 1) / MI_KiB; } + if (end[0] == 'I' && end[1] == 'B') { end += 2; } // KiB, MiB, GiB, TiB + else if (*end == 'B') { end++; } // Kb, Mb, Gb, Tb + if (overflow || size > MI_MAX_ALLOC_SIZE) { size = (MI_MAX_ALLOC_SIZE / MI_KiB); } + value = (size > LONG_MAX ? LONG_MAX : (long)size); } if (*end == 0) { desc->value = value; desc->init = INITIALIZED; } else { - _mi_warning_message("environment option mimalloc_%s has an invalid value: %s\n", desc->name, buf); + // set `init` first to avoid recursion through _mi_warning_message on mimalloc_verbose. desc->init = DEFAULTED; + if (desc->option == mi_option_verbose && desc->value == 0) { + // if the 'mimalloc_verbose' env var has a bogus value we'd never know + // (since the value defaults to 'off') so in that case briefly enable verbose + desc->value = 1; + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); + desc->value = 0; + } + else { + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); + } } } mi_assert_internal(desc->init != UNINIT); diff --git a/contrib/libs/mimalloc/src/os.c b/contrib/libs/mimalloc/src/os.c index 85415232d7..88e7fcb32e 100644 --- a/contrib/libs/mimalloc/src/os.c +++ b/contrib/libs/mimalloc/src/os.c @@ -1,552 +1,225 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // ensure mmap flags are defined -#endif - -#if defined(__sun) -// illumos provides new mman.h api when any of these are defined -// otherwise the old api based on caddr_t which predates the void pointers one. -// stock solaris provides only the former, chose to atomically to discard those -// flags only here rather than project wide tough. -#undef _XOPEN_SOURCE -#undef _POSIX_C_SOURCE -#endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include <string.h> // strerror - -#ifdef _MSC_VER -#pragma warning(disable:4996) // strerror -#endif - +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#if defined(_WIN32) -#include <windows.h> -#elif defined(__wasi__) -// stdlib.h is all we need, and has already been included in mimalloc.h -#else -#include <sys/mman.h> // mmap -#include <unistd.h> // sysconf -#if defined(__linux__) -#include <features.h> -#if defined(__GLIBC__) -#include <linux/mman.h> // linux mmap flags -#else -#include <sys/mman.h> -#endif -#endif -#if defined(__APPLE__) -#include <TargetConditionals.h> -#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR -#include <mach/vm_statistics.h> -#endif -#endif -#if defined(__HAIKU__) -#define madvise posix_madvise -#define MADV_DONTNEED POSIX_MADV_DONTNEED -#endif -#endif /* ----------------------------------------------------------- - Initialization. - On windows initializes support for aligned allocation and - large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). + Initialization. ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -static void* mi_align_up_ptr(void* p, size_t alignment) { - return (void*)_mi_align_up((uintptr_t)p, alignment); -} +static mi_os_mem_config_t mi_os_mem_config = { + 4096, // page size + 0, // large page size (usually 2MiB) + 4096, // allocation granularity + true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) + false, // can we partially free allocated blocks? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) +}; -static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { - mi_assert_internal(alignment != 0); - uintptr_t mask = alignment - 1; - if ((alignment & mask) == 0) { // power of two? - return (sz & ~mask); - } - else { - return ((sz / alignment) * alignment); - } +bool _mi_os_has_overcommit(void) { + return mi_os_mem_config.has_overcommit; } -static void* mi_align_down_ptr(void* p, size_t alignment) { - return (void*)_mi_align_down((uintptr_t)p, alignment); +bool _mi_os_has_virtual_reserve(void) { + return mi_os_mem_config.has_virtual_reserve; } -// page size (initialized properly in `os_init`) -static size_t os_page_size = 4096; - -// minimal allocation granularity -static size_t os_alloc_granularity = 4096; - -// if non-zero, use large page allocation -static size_t large_os_page_size = 0; // OS (small) page size -size_t _mi_os_page_size() { - return os_page_size; +size_t _mi_os_page_size(void) { + return mi_os_mem_config.page_size; } // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) -size_t _mi_os_large_page_size() { - return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); +size_t _mi_os_large_page_size(void) { + return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } -static bool use_large_os_page(size_t size, size_t alignment) { +bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements - if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; - return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; + return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { size_t align_size; - if (size < 512*KiB) align_size = _mi_os_page_size(); - else if (size < 2*MiB) align_size = 64*KiB; - else if (size < 8*MiB) align_size = 256*KiB; - else if (size < 32*MiB) align_size = 1*MiB; - else align_size = 4*MiB; - if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow? + if (size < 512*MI_KiB) align_size = _mi_os_page_size(); + else if (size < 2*MI_MiB) align_size = 64*MI_KiB; + else if (size < 8*MI_MiB) align_size = 256*MI_KiB; + else if (size < 32*MI_MiB) align_size = 1*MI_MiB; + else align_size = 4*MI_MiB; + if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow? return _mi_align_up(size, align_size); } -#if defined(_WIN32) -// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. -// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) -// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) -// -// We hide MEM_EXTENDED_PARAMETER to compile with older SDK's. -#include <winternl.h> -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ void*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, /* MEM_EXTENDED_PARAMETER* */ PVOID, ULONG); -static PVirtualAlloc2 pVirtualAlloc2 = NULL; -static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; - -// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 -#if (_WIN32_WINNT < 0x601) // before Win7 -typedef struct _PROCESSOR_NUMBER { WORD Group; BYTE Number; BYTE Reserved; } PROCESSOR_NUMBER, *PPROCESSOR_NUMBER; -#endif -typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(PPROCESSOR_NUMBER ProcNumber); -typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(PPROCESSOR_NUMBER Processor, PUSHORT NodeNumber); -typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); -static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; -static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; -static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; - -static bool mi_win_enable_large_os_pages() -{ - if (large_os_page_size > 0) return true; - - // Try to see if large OS pages are supported - // To use large pages on Windows, we first need access permission - // Set "Lock pages in memory" permission in the group policy editor - // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643> - unsigned long err = 0; - HANDLE token = NULL; - BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); - if (ok) { - TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); - if (ok) { - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); - if (ok) { - err = GetLastError(); - ok = (err == ERROR_SUCCESS); - if (ok) { - large_os_page_size = GetLargePageMinimum(); - } - } - } - CloseHandle(token); - } - if (!ok) { - if (err == 0) err = GetLastError(); - _mi_warning_message("cannot enable large OS page support, error %lu\n", err); - } - return (ok!=0); -} - void _mi_os_init(void) { - // get the page size - SYSTEM_INFO si; - GetSystemInfo(&si); - if (si.dwPageSize > 0) os_page_size = si.dwPageSize; - if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; - // get the VirtualAlloc2 function - HINSTANCE hDll; - hDll = LoadLibrary(TEXT("kernelbase.dll")); - if (hDll != NULL) { - // use VirtualAlloc2FromApp if possible as it is available to Windows store apps - pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); - FreeLibrary(hDll); - } - // NtAllocateVirtualMemoryEx is used for huge page allocation - hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { - pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); - FreeLibrary(hDll); - } - // Try to use Win7+ numa API - hDll = LoadLibrary(TEXT("kernel32.dll")); - if (hDll != NULL) { - pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); - pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); - pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); - FreeLibrary(hDll); - } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - mi_win_enable_large_os_pages(); - } -} -#elif defined(__wasi__) -void _mi_os_init() { - os_page_size = 0x10000; // WebAssembly has a fixed page size: 64KB - os_alloc_granularity = 16; -} -#else -void _mi_os_init() { - // get the page size - long result = sysconf(_SC_PAGESIZE); - if (result > 0) { - os_page_size = (size_t)result; - os_alloc_granularity = os_page_size; - } - large_os_page_size = 2*MiB; // TODO: can we query the OS for this? + _mi_prim_mem_init(&mi_os_mem_config); } -#endif /* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) and Unix's (mmap). ------------------------------------------------------------ */ + Util +-------------------------------------------------------------- */ +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); -static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) -{ - if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) - bool err = false; -#if defined(_WIN32) - err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); -#elif defined(__wasi__) - err = 0; // WebAssembly's heap cannot be shrunk -#else - err = (munmap(addr, size) == -1); -#endif - if (was_committed) _mi_stat_decrease(&stats->committed, size); - _mi_stat_decrease(&stats->reserved, size); - if (err) { - _mi_warning_message("munmap failed: %s, addr 0x%8li, size %lu\n", strerror(errno), (size_t)addr, size); - return false; +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); } else { - return true; - } -} - -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size); - -#ifdef _WIN32 -static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) - // on 64-bit systems, try to use the virtual address area after 4TiB for 4MiB aligned allocations - void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment,size)) != NULL) { - void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); - if (p != NULL) return p; - DWORD err = GetLastError(); - if (err != ERROR_INVALID_ADDRESS && // If linked with multiple instances, we may have tried to allocate at an already allocated area (#210) - err != ERROR_INVALID_PARAMETER) { // Windows7 instability (#230) - return NULL; - } - // fall through - } -#endif -#if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - // on modern Windows try use VirtualAlloc2 for aligned allocation - if (try_alignment > 0 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; - reqs.Alignment = try_alignment; - MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; - param.Type = MemExtendedParameterAddressRequirements; - param.Pointer = &reqs; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + return ((sz / alignment) * alignment); } -#endif - // last resort - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } -static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { - mi_assert_internal(!(large_only && !allow_large)); - static _Atomic(uintptr_t) large_page_try_ok; // = 0; - void* p = NULL; - if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. - // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - // large OS pages must always reserve and commit. - *is_large = true; - p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); - if (large_only) return p; - // fall back to non-large page allocation on error (`p == NULL`). - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations - } - } - } - if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); - p = mi_win_virtual_allocx(addr, size, try_alignment, flags); - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, GetLastError(), addr, large_only, allow_large); - } - return p; +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); } -#elif defined(__wasi__) -static void* mi_wasm_heap_grow(size_t size, size_t try_alignment) { - uintptr_t base = __builtin_wasm_memory_size(0) * _mi_os_page_size(); - uintptr_t aligned_base = _mi_align_up(base, (uintptr_t) try_alignment); - size_t alloc_size = _mi_align_up( aligned_base - base + size, _mi_os_page_size()); - mi_assert(alloc_size >= size && (alloc_size % _mi_os_page_size()) == 0); - if (alloc_size < size) return NULL; - if (__builtin_wasm_memory_grow(0, alloc_size / _mi_os_page_size()) == SIZE_MAX) { - errno = ENOMEM; - return NULL; - } - return (void*)aligned_base; -} -#else -#define MI_OS_USE_MMAP -static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - void* p = NULL; - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) - // on 64-bit systems, use the virtual address area after 4TiB for 4MiB aligned allocations - void* hint; - if (addr == NULL && (hint = mi_os_get_aligned_hint(try_alignment, size)) != NULL) { - p = mmap(hint,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; // fall back to regular mmap - } - #else - UNUSED(try_alignment); - UNUSED(mi_os_get_aligned_hint); - #endif - if (p==NULL) { - p = mmap(addr,size,protect_flags,flags,fd,0); - if (p==MAP_FAILED) p = NULL; - } - return p; -} -static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - #if !defined(MAP_NORESERVE) - #define MAP_NORESERVE 0 - #endif - int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; - int fd = -1; - #if defined(MAP_ALIGNED) // BSD - if (try_alignment > 0) { - size_t n = mi_bsr(try_alignment); - if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - flags |= MAP_ALIGNED(n); - } - } - #endif - #if defined(PROT_MAX) - protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) os_tag = 100; - fd = VM_MAKE_TAG(os_tag); - #endif - if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static _Atomic(uintptr_t) large_page_try_ok; // = 0; - uintptr_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // If the OS is not configured for large OS pages, or the user does not have - // enough permission, the `mmap` will always fail (but it might also fail for other reasons). - // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times - // to avoid too many failing calls to mmap. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux - int lfd = fd; - #ifdef MAP_ALIGNED_SUPER - lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB - lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB - static bool mi_huge_pages_available = true; - if ((size % GiB) == 0 && mi_huge_pages_available) { - lflags |= MAP_HUGE_1GB; - } - else - #endif - { - #ifdef MAP_HUGE_2MB - lflags |= MAP_HUGE_2MB; - #endif - } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB - lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif - if (large_only || lflags != flags) { - // try large OS page allocation - *is_large = true; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB - if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { - mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); - lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - } - #endif - if (large_only) return p; - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok, (uintptr_t)10); // on error, don't try again for the next N allocations - } - } - } - } - if (p == NULL) { - *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - #if defined(MADV_HUGEPAGE) - // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (THP). It is not required to call `madvise` with MADV_HUGE - // though since properly aligned allocations will already use large pages if available - // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvise` anyways. - if (allow_large && use_large_os_page(size, try_alignment)) { - if (madvise(p, size, MADV_HUGEPAGE) == 0) { - *is_large = true; // possibly - }; - } - #endif - #if defined(__sun) - if (allow_large && use_large_os_page(size, try_alignment)) { - struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; - cmd.mha_cmd = MHA_MAPSIZE_VA; - if (memcntl(p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { - *is_large = true; - } - } - #endif - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } - return p; -} -#endif +/* ----------------------------------------------------------- + aligned hinting +-------------------------------------------------------------- */ // On 64-bit systems, we can do efficient aligned allocation by using -// the 4TiB to 30TiB area to allocate them. -#if (MI_INTPTR_SIZE >= 8) && (defined(_WIN32) || (defined(MI_OS_USE_MMAP) && !defined(MAP_ALIGNED))) -static mi_decl_cache_align _Atomic(uintptr_t) aligned_base; +// the 2TiB to 30TiB area to allocate those. +#if (MI_INTPTR_SIZE >= 8) +static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; -// Return a 4MiB aligned address that is probably available. -// If this returns NULL, the OS will determine the address but on some OS's that may not be +// Return a MI_SEGMENT_SIZE aligned address that is probably available. +// If this returns NULL, the OS will determine the address but on some OS's that may not be // properly aligned which can be more costly as it needs to be adjusted afterwards. -// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; -// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses +// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; +// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses // in the middle of the 2TiB - 6TiB address range (see issue #372)) -#define KK_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start -#define KK_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) -#define KK_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) +#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start +#define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) +#define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { - if (try_alignment == 0 || try_alignment > MI_SEGMENT_SIZE) return NULL; - if ((size%MI_SEGMENT_SIZE) != 0) return NULL; - if (size > 1*GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(KK_HINT_AREA / 1<<30) = 1/4096. + if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; + size = _mi_align_up(size, MI_SEGMENT_SIZE); + if (size > 1*MI_GiB) return NULL; // guarantee the chance of fixed valid address is at most 1/(MI_HINT_AREA / 1<<30) = 1/4096. #if (MI_SECURE>0) size += MI_SEGMENT_SIZE; // put in `MI_SEGMENT_SIZE` virtual gaps between hinted blocks; this splits VLA's but increases guarded areas. #endif uintptr_t hint = mi_atomic_add_acq_rel(&aligned_base, size); - if (hint == 0 || hint > KK_HINT_MAX) { // wrap or initialize - uintptr_t init = KK_HINT_BASE; + if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize + uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); - init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % KK_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); + init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; mi_atomic_cas_strong_acq_rel(&aligned_base, &expected, init); - hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > KK_HINT_MAX but that is ok, it is a hint after all + hint = mi_atomic_add_acq_rel(&aligned_base, size); // this may still give 0 or > MI_HINT_MAX but that is ok, it is a hint after all } if (hint%try_alignment != 0) return NULL; return (void*)hint; } #else -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { - UNUSED(try_alignment); UNUSED(size); +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { + MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif -// Primitive allocation from the OS. +/* ----------------------------------------------------------- + Free memory +-------------------------------------------------------------- */ + +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); + +static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + mi_assert_internal((size % _mi_os_page_size()) == 0); + if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); + if (err != 0) { + _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); + } + if (still_committed) { _mi_stat_decrease(&stats->committed, size); } + _mi_stat_decrease(&stats->reserved, size); +} + +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { + if (mi_memkind_is_os(memid.memkind)) { + size_t csize = _mi_os_good_alloc_size(size); + void* base = addr; + // different base? (due to alignment) + if (memid.mem.os.base != NULL) { + mi_assert(memid.mem.os.base <= addr); + mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); + base = memid.mem.os.base; + csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); + } + // free it + if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(base, csize, tld_stats); + } + else { + mi_os_prim_free(base, csize, still_committed, tld_stats); + } + } + else { + // nothing to do + mi_assert(memid.memkind < MI_MEM_OS); + } +} + +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, memid, tld_stats); +} + + +/* ----------------------------------------------------------- + Primitive allocation from the OS. +-------------------------------------------------------------- */ + // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* tld_stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(is_large != NULL); if (size == 0) return NULL; - if (!commit) allow_large = false; - + if (!commit) { allow_large = false; } + if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning + *is_zero = false; void* p = NULL; - /* - if (commit && allow_large) { - p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); - if (p != NULL) { - *is_large = true; - return p; - } + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); + if (err != 0) { + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } - */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(__wasi__) - *is_large = false; - p = mi_wasm_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) { _mi_stat_increase(&stats->committed, size); } + if (commit) { + _mi_stat_increase(&stats->committed, size); + // seems needed for asan (or `mimalloc-test-api` fails) + #ifdef MI_TRACK_ASAN + if (*is_zero) { mi_track_mem_defined(p,size); } + else { mi_track_mem_undefined(p,size); } + #endif + } } return p; } @@ -554,119 +227,147 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_large != NULL); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(base != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; - // if not aligned, free it, overallocate, and unmap around it - if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, commit, stats); + // aligned already? + if (((uintptr_t)p % alignment) == 0) { + *base = p; + } + else { + // if not aligned, free it, overallocate, and unmap around it + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); + mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow - size_t over_size = size + alignment; - -#if _WIN32 - // over-allocate and than re-allocate exactly at an aligned address in there. - // this may fail due to threads allocating at the same time so we - // retry this at most 3 times before giving up. - // (we can not decommit around the overallocation on Windows, because we can only - // free the original pointer, not one pointing inside the area) - int flags = MEM_RESERVE; - if (commit) flags |= MEM_COMMIT; - for (int tries = 0; tries < 3; tries++) { - // over-allocate to determine a virtual memory range - p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); - if (p == NULL) return NULL; // error - if (((uintptr_t)p % alignment) == 0) { - // if p happens to be aligned, just decommit the left-over area - _mi_os_decommit((uint8_t*)p + size, over_size - size, stats); - break; - } - else { - // otherwise free and allocate at an aligned address in there - mi_os_mem_free(p, over_size, commit, stats); - void* aligned_p = mi_align_up_ptr(p, alignment); - p = mi_win_virtual_alloc(aligned_p, size, alignment, flags, false, allow_large, is_large); - if (p == aligned_p) break; // success! - if (p != NULL) { // should not happen? - mi_os_mem_free(p, size, commit, stats); - p = NULL; - } + const size_t over_size = size + alignment; + + if (!mi_os_mem_config.has_partial_free) { // win32 virtualAlloc cannot free parts of an allocated block + // over-allocate uncommitted (virtual) memory + p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); + if (p == NULL) return NULL; + + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual base pointer + // this is handled though by having the `base` field in the memid's + *base = p; // remember the base + p = mi_align_up_ptr(p, alignment); + + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); } } -#else - // overallocate... - p = mi_os_mem_alloc(over_size, alignment, commit, false, is_large, stats); - if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. - void* aligned_p = mi_align_up_ptr(p, alignment); - size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; - size_t mid_size = _mi_align_up(size, _mi_os_page_size()); - size_t post_size = over_size - pre_size - mid_size; - mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); - // we can return the aligned pointer on `mmap` systems - p = aligned_p; -#endif + else { // mmap can free inside an allocation + // overallocate... + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); + if (p == NULL) return NULL; + + // and selectively unmap parts around the over-allocated area. + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); + if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } + if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } + // we can return the aligned pointer on `mmap` systems + p = aligned_p; + *base = aligned_p; // since we freed the pre part, `*base == p`. + } } - mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0)); return p; } + /* ----------------------------------------------------------- - OS API: alloc, free, alloc_aligned + OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); - bool is_large = false; - return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); -} - -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (size == 0 || p == NULL) return; - size = _mi_os_good_alloc_size(size); - mi_os_mem_free(p, size, was_committed, stats); -} - -void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { - _mi_os_free_ex(p, size, true, stats); + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + } + return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) { - UNUSED(tld_stats); + MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - bool allow_large = false; - if (large != NULL) { - allow_large = *large; - *large = false; + + bool os_is_large = false; + bool os_is_zero = false; + void* os_base = NULL; + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, stats ); + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.alignment = alignment; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); + return p; } +/* ----------------------------------------------------------- + OS aligned allocation with an offset. This is used + for large alignments > MI_BLOCK_ALIGNMENT_MAX. We use a large mimalloc + page where the object can be aligned at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers using `mi_free_aligned` + to use the actual start of the memory region. +----------------------------------------------------------- */ +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats) { + mi_assert(offset <= MI_SEGMENT_SIZE); + mi_assert(offset <= size); + mi_assert((alignment % _mi_os_page_size()) == 0); + *memid = _mi_memid_none(); + if (offset > MI_SEGMENT_SIZE) return NULL; + if (offset == 0) { + // regular aligned allocation + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, stats); + } + else { + // overallocate to align at an offset + const size_t extra = _mi_align_up(offset, alignment) - offset; + const size_t oversize = size + extra; + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, stats); + if (start == NULL) return NULL; + + void* const p = (uint8_t*)start + extra; + mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + // decommit the overallocation at the start + if (commit && extra > _mi_os_page_size()) { + _mi_os_decommit(start, extra, stats); + } + return p; + } +} /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ - // OS page align within a given area, either conservative (pages inside the area only), // or not (straddling pages outside the area is possible) static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { @@ -691,176 +392,115 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -static void mi_mprotect_hint(int err) { -#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page - if (err == ENOMEM) { - _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" - " On Linux this is controlled by the vm.max_map_count. For example:\n" - " > sudo sysctl -w vm.max_map_count=262144\n"); - } -#else - UNUSED(err); -#endif -} - -// Commit/Decommit memory. -// Usually commit is aligned liberal, while decommit is aligned conservative. -// (but not for the reset version where we want commit to be conservative as well) -static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { - // page align in the range, commit liberally, decommit conservative +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (is_zero != NULL) { *is_zero = false; } + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&stats->commit_calls, 1); + + // page align range size_t csize; - void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - int err = 0; - if (commit) { - _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&stats->commit_calls, 1); - } - else { - _mi_stat_decrease(&stats->committed, size); - } + void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); + if (csize == 0) return true; - #if defined(_WIN32) - if (commit) { - // if the memory was already committed, the call succeeds but it is not zero'd - // *is_zero = true; - void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); - err = (p == start ? 0 : GetLastError()); - } - else { - BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); - err = (ok ? 0 : GetLastError()); - } - #elif defined(__wasi__) - // WebAssembly guests can't control memory protection - #elif defined(MAP_FIXED) - if (!commit) { - // use mmap with MAP_FIXED to discard the existing memory (and reduce commit charge) - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), -1, 0); - if (p != start) { err = errno; } + // commit + bool os_is_zero = false; + int err = _mi_prim_commit(start, csize, &os_is_zero); + if (err != 0) { + _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + return false; } - else { - // for commit, just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - #if defined(MADV_FREE_REUSE) - while ((err = madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } - #endif + if (os_is_zero && is_zero != NULL) { + *is_zero = true; + mi_assert_expensive(mi_mem_is_zero(start, csize)); } - #else - err = mprotect(start, csize, (commit ? (PROT_READ | PROT_WRITE) : PROT_NONE)); - if (err != 0) { err = errno; } + // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) + #ifdef MI_TRACK_ASAN + if (os_is_zero) { mi_track_mem_defined(start,csize); } + else { mi_track_mem_undefined(start,csize); } #endif + return true; +} + +static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; + mi_assert_internal(needs_recommit!=NULL); + _mi_stat_decrease(&stats->committed, size); + + // page align + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return true; + + // decommit + *needs_recommit = true; + int err = _mi_prim_decommit(start,csize,needs_recommit); if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%x, err: %i\n", commit ? "commit" : "decommit", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); -} - bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); + bool needs_recommit; + return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); } -static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true /* conservative */, is_zero, stats); -} // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); - if (!reset) return true; // nothing to do on unreset! + _mi_stat_increase(&stats->reset, csize); + _mi_stat_counter_increase(&stats->reset_calls, 1); - #if (MI_DEBUG>1) - if (MI_SECURE==0) { - memset(start, 0, csize); // pretend it is eagerly reset - } + #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN + memset(start, 0, csize); // pretend it is eagerly reset #endif -#if defined(_WIN32) - // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory - void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); - mi_assert_internal(p == start); - #if 1 - if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set - } - #endif - if (p != start) return false; -#else -#if defined(MADV_FREE) - #if defined(MADV_FREE_REUSABLE) - #define KK_MADV_FREE_INITIAL MADV_FREE_REUSABLE - #else - #define KK_MADV_FREE_INITIAL MADV_FREE - #endif - static _Atomic(uintptr_t) advice = ATOMIC_VAR_INIT(KK_MADV_FREE_INITIAL); - int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; - while ((err = madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; - if (err != 0 && errno == EINVAL && oadvice == KK_MADV_FREE_INITIAL) { - // if MADV_FREE/MADV_FREE_REUSABLE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_store_release(&advice, (uintptr_t)MADV_DONTNEED); - err = madvise(start, csize, MADV_DONTNEED); - } -#elif defined(__wasi__) - int err = 0; -#else - int err = madvise(start, csize, MADV_DONTNEED); -#endif + int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%x, errno: %i\n", start, csize, errno); + _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } - //mi_assert(err == 0); - if (err != 0) return false; -#endif - return true; + return (err == 0); } -// Signal to the OS that the address range is no longer in use -// but may be used later again. This will release physical memory -// pages and reduce swapping while keeping the memory committed. -// We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(addr, size, stats); + +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) +{ + if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? + _mi_stat_counter_increase(&stats->purge_calls, 1); + _mi_stat_increase(&stats->purged, size); + + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + bool needs_recommit = true; + mi_os_decommit_ex(p, size, &needs_recommit, stats); + return needs_recommit; } else { - return mi_os_resetx(addr, size, true, stats); + if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed + _mi_os_reset(p, size, stats); + } + return false; // needs no recommit } } -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { + return _mi_os_purge_ex(p, size, true, stats); } @@ -875,20 +515,9 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ - int err = 0; -#ifdef _WIN32 - DWORD oldprotect = 0; - BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); - err = (ok ? 0 : GetLastError()); -#elif defined(__wasi__) - err = 0; -#else - err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } -#endif + int err = _mi_prim_protect(start,csize,protect); if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%x, err: %i\n", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } @@ -903,121 +532,12 @@ bool _mi_os_unprotect(void* addr, size_t size) { -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { - // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); - if (oldsize < newsize || p == NULL) return false; - if (oldsize == newsize) return true; - - // oldsize and newsize should be page aligned or we cannot shrink precisely - void* addr = (uint8_t*)p + newsize; - size_t size = 0; - void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); - if (size == 0 || start != addr) return false; - -#ifdef _WIN32 - // we cannot shrink on windows, but we can decommit - return _mi_os_decommit(start, size, stats); -#else - return mi_os_mem_free(start, size, true, stats); -#endif -} - - /* ---------------------------------------------------------------------------- Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ -#define MI_HUGE_OS_PAGE_SIZE (GiB) +#define MI_HUGE_OS_PAGE_SIZE (MI_GiB) -#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) -{ - mi_assert_internal(size%GiB == 0); - mi_assert_internal(addr != NULL); - const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - - mi_win_enable_large_os_pages(); - - #if defined(MEM_EXTENDED_PARAMETER_TYPE_BITS) - MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - static bool mi_huge_pages_available = true; - if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - #ifndef MEM_EXTENDED_PARAMETER_NONPAGED_HUGE - #define MEM_EXTENDED_PARAMETER_NONPAGED_HUGE (0x10) - #endif - params[0].Type = 5; // == MemExtendedParameterAttributeFlags; - params[0].ULong64 = MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 1; - if (numa_node >= 0) { - param_count++; - params[1].Type = MemExtendedParameterNumaNode; - params[1].ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0 && base != NULL) { - return base; - } - else { - // fall back to regular large pages - mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1gb) pages, trying large (2mb) pages instead (status 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation - if (pVirtualAlloc2 != NULL && numa_node >= 0) { - params[0].Type = MemExtendedParameterNumaNode; - params[0].ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); - } - #else - UNUSED(numa_node); - #endif - // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); -} - -#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) -#include <sys/syscall.h> -#ifndef MPOL_PREFERRED -#define MPOL_PREFERRED 1 -#endif -#if defined(SYS_mbind) -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); -} -#else -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - UNUSED(start); UNUSED(len); UNUSED(mode); UNUSED(nmask); UNUSED(maxnode); UNUSED(flags); - return 0; -} -#endif -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%GiB == 0); - bool is_large = true; - void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes - uintptr_t numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we - // use `set_mempolicy` before calling mmap instead? - // see: <https://lkml.org/lkml/2017/2/9/875> - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); - if (err != 0) { - _mi_warning_message("failed to bind huge (1gb) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } - } - return p; -} -#else -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - UNUSED(addr); UNUSED(size); UNUSED(numa_node); - return NULL; -} -#endif #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages @@ -1036,10 +556,10 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB -#endif + #endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); @@ -1050,14 +570,15 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { } #else static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { - UNUSED(pages); + MI_UNUSED(pages); if (total_size != NULL) *total_size = 0; return NULL; } #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { + *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -1068,23 +589,32 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); - size_t page; - for (page = 0; page < pages; page++) { + size_t page = 0; + bool all_zero = true; + while (page < pages) { // allocate a page + bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = NULL; + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); + if (!is_zero) { all_zero = false; } + if (err != 0) { + _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); + break; + } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } // success, record it + page++; // increase before timeout check (see issue #711) _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); @@ -1098,109 +628,41 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); + _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page); break; } } } mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); - if (pages_reserved != NULL) *pages_reserved = page; - if (psize != NULL) *psize = page * MI_HUGE_OS_PAGE_SIZE; + if (pages_reserved != NULL) { *pages_reserved = page; } + if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } + if (page != 0) { + mi_assert(start != NULL); + *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + memid->memkind = MI_MEM_OS_HUGE; + mi_assert(memid->is_pinned); + #ifdef MI_TRACK_ASAN + if (all_zero) { mi_track_mem_defined(start,size); } + #endif + } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; + base += MI_HUGE_OS_PAGE_SIZE; } } /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 -static size_t mi_os_numa_nodex() { - USHORT numa_node = 0; - if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { - // Extended API is supported - PROCESSOR_NUMBER pnum; - (*pGetCurrentProcessorNumberEx)(&pnum); - USHORT nnode = 0; - BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) numa_node = nnode; - } - else { - // Vista or earlier, use older API that is limited to 64 processors. Issue #277 - DWORD pnum = GetCurrentProcessorNumber(); - UCHAR nnode = 0; - BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) numa_node = nnode; - } - return numa_node; -} - -static size_t mi_os_numa_node_countx(void) { - ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); - // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { - if (pGetNumaNodeProcessorMaskEx != NULL) { - // Extended API is supported - GROUP_AFFINITY affinity; - if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { - if (affinity.Mask != 0) break; // found the maximum non-empty node - } - } - else { - // Vista or earlier, use older API that is limited to 64 processors. - ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node - }; - } - // max node was invalid or had no processor assigned, try again - numa_max--; - } - return ((size_t)numa_max + 1); -} -#elif defined(__linux__) -#include <sys/syscall.h> // getcpu -#include <stdio.h> // access - -static size_t mi_os_numa_nodex(void) { -#ifdef SYS_getcpu - unsigned long node = 0; - unsigned long ncpu = 0; - long err = syscall(SYS_getcpu, &ncpu, &node, NULL); - if (err != 0) return 0; - return node; -#else - return 0; -#endif -} -static size_t mi_os_numa_node_countx(void) { - char buf[128]; - unsigned node = 0; - for(node = 0; node < 256; node++) { - // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; - } - return (node+1); -} -#else -static size_t mi_os_numa_nodex(void) { - return 0; -} -static size_t mi_os_numa_node_countx(void) { - return 1; -} -#endif _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count @@ -1212,9 +674,9 @@ size_t _mi_os_numa_node_count_get(void) { count = (size_t)ncount; } else { - count = mi_os_numa_node_countx(); // or detect dynamically + count = _mi_prim_numa_node_count(); // or detect dynamically if (count == 0) count = 1; - } + } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } @@ -1222,11 +684,11 @@ size_t _mi_os_numa_node_count_get(void) { } int _mi_os_numa_node_get(mi_os_tld_t* tld) { - UNUSED(tld); + MI_UNUSED(tld); size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = mi_os_numa_nodex(); + size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } return (int)numa_node; } diff --git a/contrib/libs/mimalloc/src/page-queue.c b/contrib/libs/mimalloc/src/page-queue.c index 365257e766..02a8008d4a 100644 --- a/contrib/libs/mimalloc/src/page-queue.c +++ b/contrib/libs/mimalloc/src/page-queue.c @@ -1,5 +1,5 @@ /*---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -11,6 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MI_IN_PAGE_C #error "this file should be included from 'page.c'" +// include to help an IDE +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #endif /* ----------------------------------------------------------- @@ -53,7 +57,7 @@ static inline bool mi_page_queue_is_special(const mi_page_queue_t* pq) { // Returns MI_BIN_HUGE if the size is too large. // We use `wsize` for the size in "machine word sizes", // i.e. byte size == `wsize*sizeof(void*)`. -extern inline uint8_t _mi_bin(size_t size) { +static inline uint8_t mi_bin(size_t size) { size_t wsize = _mi_wsize_from_size(size); uint8_t bin; if (wsize <= 1) { @@ -76,7 +80,7 @@ extern inline uint8_t _mi_bin(size_t size) { bin = MI_BIN_HUGE; } else { - #if defined(MI_ALIGN4W) + #if defined(MI_ALIGN4W) if (wsize <= 16) { wsize = (wsize+3)&~3; } // round to 4x word sizes #endif wsize--; @@ -98,6 +102,10 @@ extern inline uint8_t _mi_bin(size_t size) { Queue of pages with free blocks ----------------------------------------------------------- */ +uint8_t _mi_bin(size_t size) { + return mi_bin(size); +} + size_t _mi_bin_size(uint8_t bin) { return _mi_heap_empty.pages[bin].block_size; } @@ -105,10 +113,10 @@ size_t _mi_bin_size(uint8_t bin) { // Good size for allocation size_t mi_good_size(size_t size) mi_attr_noexcept { if (size <= MI_LARGE_OBJ_SIZE_MAX) { - return _mi_bin_size(_mi_bin(size)); + return _mi_bin_size(mi_bin(size + MI_PADDING_SIZE)); } else { - return _mi_align_up(size,_mi_os_page_size()); + return _mi_align_up(size + MI_PADDING_SIZE,_mi_os_page_size()); } } @@ -133,21 +141,21 @@ static bool mi_heap_contains_queue(const mi_heap_t* heap, const mi_page_queue_t* } #endif -static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); - mi_heap_t* heap = mi_page_heap(page); - mi_assert_internal(heap != NULL && bin <= MI_BIN_FULL); +static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { + mi_assert_internal(heap!=NULL); + uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : (mi_page_is_huge(page) ? MI_BIN_HUGE : mi_bin(mi_page_block_size(page)))); + mi_assert_internal(bin <= MI_BIN_FULL); mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(bin >= MI_BIN_HUGE || page->xblock_size == pq->block_size); - mi_assert_expensive(mi_page_queue_contains(pq, page)); + mi_assert_internal((mi_page_block_size(page) == pq->block_size) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(pq)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(pq))); return pq; } -static mi_page_queue_t* mi_heap_page_queue_of(mi_heap_t* heap, const mi_page_t* page) { - uint8_t bin = (mi_page_is_in_full(page) ? MI_BIN_FULL : _mi_bin(page->xblock_size)); - mi_assert_internal(bin <= MI_BIN_FULL); - mi_page_queue_t* pq = &heap->pages[bin]; - mi_assert_internal(mi_page_is_in_full(page) || page->xblock_size == pq->block_size); +static mi_page_queue_t* mi_page_queue_of(const mi_page_t* page) { + mi_heap_t* heap = mi_page_heap(page); + mi_page_queue_t* pq = mi_heap_page_queue_of(heap, page); + mi_assert_expensive(mi_page_queue_contains(pq, page)); return pq; } @@ -177,9 +185,9 @@ static inline void mi_heap_queue_first_update(mi_heap_t* heap, const mi_page_que } else { // find previous size; due to minimal alignment upto 3 previous bins may need to be skipped - uint8_t bin = _mi_bin(size); + uint8_t bin = mi_bin(size); const mi_page_queue_t* prev = pq - 1; - while( bin == _mi_bin(prev->block_size) && prev > &heap->pages[0]) { + while( bin == mi_bin(prev->block_size) && prev > &heap->pages[0]) { prev--; } start = 1 + _mi_wsize_from_size(prev->block_size); @@ -202,7 +210,9 @@ static bool mi_page_queue_is_empty(mi_page_queue_t* queue) { static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(queue, page)); - mi_assert_internal(page->xblock_size == queue->block_size || (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || + (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; @@ -224,9 +234,11 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) { static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) { mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(!mi_page_queue_contains(queue, page)); + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(page->xblock_size == queue->block_size || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(queue)) || + #endif + mi_assert_internal(mi_page_block_size(page) == queue->block_size || + (mi_page_is_huge(page) && mi_page_queue_is_huge(queue)) || (mi_page_is_in_full(page) && mi_page_queue_is_full(queue))); mi_page_set_in_full(page, mi_page_queue_is_full(queue)); @@ -252,11 +264,13 @@ static void mi_page_queue_enqueue_from(mi_page_queue_t* to, mi_page_queue_t* fro mi_assert_internal(page != NULL); mi_assert_expensive(mi_page_queue_contains(from, page)); mi_assert_expensive(!mi_page_queue_contains(to, page)); - mi_assert_internal((page->xblock_size == to->block_size && page->xblock_size == from->block_size) || - (page->xblock_size == to->block_size && mi_page_queue_is_full(from)) || - (page->xblock_size == from->block_size && mi_page_queue_is_full(to)) || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_huge(to)) || - (page->xblock_size > MI_LARGE_OBJ_SIZE_MAX && mi_page_queue_is_full(to))); + const size_t bsize = mi_page_block_size(page); + MI_UNUSED(bsize); + mi_assert_internal((bsize == to->block_size && bsize == from->block_size) || + (bsize == to->block_size && mi_page_queue_is_full(from)) || + (bsize == from->block_size && mi_page_queue_is_full(to)) || + (mi_page_is_huge(page) && mi_page_queue_is_huge(to)) || + (mi_page_is_huge(page) && mi_page_queue_is_full(to))); mi_heap_t* heap = mi_page_heap(page); if (page->prev != NULL) page->prev->next = page->next; @@ -297,7 +311,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue for (mi_page_t* page = append->first; page != NULL; page = page->next) { // inline `mi_page_set_heap` to avoid wrong assertion during absorption; // in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive. - mi_atomic_store_release(&page->xheap, (uintptr_t)heap); + mi_atomic_store_release(&page->xheap, (uintptr_t)heap); // set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a // side effect that it spins until any DELAYED_FREEING is finished. This ensures // that after appending only the new heap will be used for delayed free operations. diff --git a/contrib/libs/mimalloc/src/page.c b/contrib/libs/mimalloc/src/page.c index c08be9c00b..5a18b78027 100644 --- a/contrib/libs/mimalloc/src/page.c +++ b/contrib/libs/mimalloc/src/page.c @@ -1,5 +1,5 @@ /*---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -7,13 +7,13 @@ terms of the MIT license. A copy of the license can be found in the file /* ----------------------------------------------------------- The core of the allocator. Every segment contains - pages of a {certain block size. The main function + pages of a certain block size. The main function exported is `mi_malloc_generic`. ----------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" /* ----------------------------------------------------------- Definition of page queues for each block size @@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file // Index a block in a page static inline mi_block_t* mi_page_block_at(const mi_page_t* page, void* page_start, size_t block_size, size_t i) { - UNUSED(page); + MI_UNUSED(page); mi_assert_internal(page != NULL); mi_assert_internal(i <= page->reserved); return (mi_block_t*)((uint8_t*)page_start + (i * block_size)); @@ -59,42 +59,54 @@ static inline uint8_t* mi_page_area(const mi_page_t* page) { static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { size_t psize; - uint8_t* page_area = _mi_page_start(_mi_page_segment(page), page, &psize); + uint8_t* page_area = _mi_segment_page_start(_mi_page_segment(page), page, &psize); mi_block_t* start = (mi_block_t*)page_area; mi_block_t* end = (mi_block_t*)(page_area + psize); while(p != NULL) { if (p < start || p >= end) return false; p = mi_block_next(page, p); } +#if MI_DEBUG>3 // generally too expensive to check this + if (page->free_is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); + } + } +#endif return true; } static bool mi_page_is_valid_init(mi_page_t* page) { - mi_assert_internal(page->xblock_size > 0); + mi_assert_internal(mi_page_block_size(page) > 0); mi_assert_internal(page->used <= page->capacity); mi_assert_internal(page->capacity <= page->reserved); - const size_t bsize = mi_page_block_size(page); + // const size_t bsize = mi_page_block_size(page); mi_segment_t* segment = _mi_page_segment(page); - uint8_t* start = _mi_page_start(segment,page,NULL); - mi_assert_internal(start == _mi_segment_page_start(segment,page,bsize,NULL,NULL)); + uint8_t* start = mi_page_start(page); + mi_assert_internal(start == _mi_segment_page_start(segment,page,NULL)); + mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE)); //mi_assert_internal(start + page->capacity*page->block_size == page->top); mi_assert_internal(mi_page_list_is_valid(page,page->free)); mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); #if MI_DEBUG>3 // generally too expensive to check this - if (page->flags.is_zero) { - for(mi_block_t* block = page->free; block != NULL; mi_block_next(page,block)) { - mi_assert_expensive(mi_mem_is_zero(block + 1, page->block_size - sizeof(mi_block_t))); + if (page->free_is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); } } #endif + #if !MI_TRACK_ENABLED && !MI_TSAN mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); //size_t tfree_count = mi_page_list_count(page, tfree); //mi_assert_internal(tfree_count <= page->thread_freed + 1); + #endif size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -102,6 +114,8 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } +extern bool _mi_process_is_initialized; // has mi_process_init been called? + bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE @@ -110,7 +124,10 @@ bool _mi_page_is_valid(mi_page_t* page) { if (mi_page_heap(page)!=NULL) { mi_segment_t* segment = _mi_page_segment(page); mi_assert_internal(!_mi_process_is_initialized || segment->thread_id == mi_page_heap(page)->thread_id || segment->thread_id==0); - if (segment->page_kind != MI_PAGE_HUGE) { + #if MI_HUGE_PAGE_ABANDON + if (segment->page_kind != MI_PAGE_HUGE) + #endif + { mi_page_queue_t* pq = mi_page_queue_of(page); mi_assert_internal(mi_page_queue_contains(pq, page)); mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_LARGE_OBJ_SIZE_MAX || mi_page_is_in_full(page)); @@ -122,14 +139,23 @@ bool _mi_page_is_valid(mi_page_t* page) { #endif void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { + while (!_mi_page_try_use_delayed_free(page, delay, override_never)) { + mi_atomic_yield(); + } +} + +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) { mi_thread_free_t tfreex; mi_delayed_t old_delay; - mi_thread_free_t tfree; + mi_thread_free_t tfree; + size_t yield_count = 0; do { tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS; tfreex = mi_tf_set_delayed(tfree, delay); old_delay = mi_tf_delayed(tfree); - if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) { + if mi_unlikely(old_delay == MI_DELAYED_FREEING) { + if (yield_count >= 4) return false; // give up after 4 tries + yield_count++; mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done. // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail } @@ -141,6 +167,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid } } while ((old_delay == MI_DELAYED_FREEING) || !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex)); + + return true; // success } /* ----------------------------------------------------------- @@ -165,8 +193,8 @@ static void _mi_page_thread_free_collect(mi_page_t* page) if (head == NULL) return; // find the tail -- also to get a proper count (without data races) - uint32_t max_count = page->capacity; // cannot collect more than capacity - uint32_t count = 1; + size_t max_count = page->capacity; // cannot collect more than capacity + size_t count = 1; mi_block_t* tail = head; mi_block_t* next; while ((next = mi_block_next(page,tail)) != NULL && count <= max_count) { @@ -184,7 +212,7 @@ static void _mi_page_thread_free_collect(mi_page_t* page) page->local_free = head; // update counts now - page->used -= count; + page->used -= (uint16_t)count; } void _mi_page_free_collect(mi_page_t* page, bool force) { @@ -197,11 +225,11 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // and the local free list if (page->local_free != NULL) { - if (mi_likely(page->free == NULL)) { + if mi_likely(page->free == NULL) { // usual case page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation @@ -213,7 +241,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } } @@ -231,8 +259,10 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { mi_assert_expensive(mi_page_is_valid_init(page)); mi_assert_internal(mi_page_heap(page) == heap); mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE); + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_assert_internal(!page->is_reset); + #endif + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -240,19 +270,27 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { } // allocate a fresh page from a segment -static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) { - mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq)); - mi_assert_internal(pq==NULL||block_size == pq->block_size); - mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os); +static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) { + #if !MI_HUGE_PAGE_ABANDON + mi_assert_internal(pq != NULL); + mi_assert_internal(mi_heap_contains_queue(heap, pq)); + mi_assert_internal(page_alignment > 0 || block_size > MI_LARGE_OBJ_SIZE_MAX || block_size == pq->block_size); + #endif + mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os); if (page == NULL) { // this may be out-of-memory, or an abandoned page was reclaimed (and in our queue) return NULL; } - // a fresh page was found, initialize it + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(pq==NULL || _mi_page_segment(page)->page_kind != MI_PAGE_HUGE); - mi_page_init(heap, page, block_size, heap->tld); - _mi_stat_increase(&heap->tld->stats.pages, 1); - if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL + #endif + mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size); + // a fresh page was found, initialize it + const size_t full_block_size = (pq == NULL || mi_page_is_huge(page) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc + mi_assert_internal(full_block_size >= block_size); + mi_page_init(heap, page, full_block_size, heap->tld); + mi_heap_stat_increase(heap, pages, 1); + if (pq != NULL) { mi_page_queue_push(heap, pq, page); } mi_assert_expensive(_mi_page_is_valid(page)); return page; } @@ -260,7 +298,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size // Get a fresh page to use static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { mi_assert_internal(mi_heap_contains_queue(heap, pq)); - mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size); + mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0); if (page==NULL) return NULL; mi_assert_internal(pq->block_size==mi_page_block_size(page)); mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page))); @@ -271,10 +309,18 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) { Do any delayed frees (put there by other threads if they deallocated in a full page) ----------------------------------------------------------- */ -void _mi_heap_delayed_free(mi_heap_t* heap) { +void _mi_heap_delayed_free_all(mi_heap_t* heap) { + while (!_mi_heap_delayed_free_partial(heap)) { + mi_atomic_yield(); + } +} + +// returns true if all delayed frees were processed +bool _mi_heap_delayed_free_partial(mi_heap_t* heap) { // take over the list (note: no atomic exchange since it is often NULL) mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ }; + bool all_freed = true; // and free them all while(block != NULL) { @@ -282,7 +328,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { // use internal free instead of regular one to keep stats etc correct if (!_mi_free_delayed_block(block)) { // we might already start delayed freeing while another thread has not yet - // reset the delayed_freeing flag; in that case delay it further by reinserting. + // reset the delayed_freeing flag; in that case delay it further by reinserting the current block + // into the delayed free list + all_freed = false; mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free); do { mi_block_set_nextx(heap, block, dfree, heap->keys); @@ -290,6 +338,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) { } block = next; } + return all_freed; } /* ----------------------------------------------------------- @@ -342,7 +391,7 @@ void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq) { mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_page_set_heap(page, NULL); -#if MI_DEBUG>1 +#if (MI_DEBUG>1) && !MI_TRACK_ENABLED // check there are no references left.. for (mi_block_t* block = (mi_block_t*)pheap->thread_delayed_free; block != NULL; block = mi_block_nextx(pheap, block, pheap->keys)) { mi_assert_internal(_mi_ptr_page(block) != page); @@ -376,8 +425,8 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { _mi_segment_page_free(page, force, segments_tld); } -#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX -#define MI_RETIRE_CYCLES (8) +#define MI_MAX_RETIRE_SIZE MI_LARGE_OBJ_SIZE_MAX // should be less than size for MI_BIN_HUGE +#define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks // Important to not retire too quickly though as new @@ -385,7 +434,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // Note: called from `mi_free` and benchmarks often // trigger this due to freeing everything and then // allocating again so careful when changing this. -void _mi_page_retire(mi_page_t* page) { +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept { mi_assert_internal(page != NULL); mi_assert_expensive(_mi_page_is_valid(page)); mi_assert_internal(mi_page_all_free(page)); @@ -399,10 +448,11 @@ void _mi_page_retire(mi_page_t* page) { // how to check this efficiently though... // for now, we don't retire if it is the only page left of this size class. mi_page_queue_t* pq = mi_page_queue_of(page); - if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) { + const size_t bsize = mi_page_block_size(page); + if mi_likely( /* bsize < MI_MAX_RETIRE_SIZE && */ !mi_page_queue_is_special(pq)) { // not full or huge queue? if (pq->last==page && pq->first==page) { // the only page in the queue? mi_stat_counter_increase(_mi_stats_main.page_no_retire,1); - page->retire_expire = (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); + page->retire_expire = (bsize <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4); mi_heap_t* heap = mi_page_heap(page); mi_assert_internal(pq >= heap->pages); const size_t index = pq - heap->pages; @@ -410,7 +460,7 @@ void _mi_page_retire(mi_page_t* page) { if (index < heap->page_retired_min) heap->page_retired_min = index; if (index > heap->page_retired_max) heap->page_retired_max = index; mi_assert_internal(mi_page_all_free(page)); - return; // dont't free after all + return; // don't free after all } } @@ -458,14 +508,14 @@ void _mi_heap_collect_retired(mi_heap_t* heap, bool force) { #define MI_MIN_SLICES (2) static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { - UNUSED(stats); + MI_UNUSED(stats); #if (MI_SECURE<=2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); - void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL); + void* const page_area = mi_page_start(page); // initialize a randomized free list // set up `slice_count` slices to alternate between @@ -516,14 +566,14 @@ static void mi_page_free_list_extend_secure(mi_heap_t* const heap, mi_page_t* co static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, const size_t bsize, const size_t extend, mi_stats_t* const stats) { - UNUSED(stats); + MI_UNUSED(stats); #if (MI_SECURE <= 2) mi_assert_internal(page->free == NULL); mi_assert_internal(page->local_free == NULL); #endif mi_assert_internal(page->capacity + extend <= page->reserved); mi_assert_internal(bsize == mi_page_block_size(page)); - void* const page_area = _mi_page_start(_mi_page_segment(page), page, NULL ); + void* const page_area = mi_page_start(page); mi_block_t* const start = mi_page_block_at(page, page_area, bsize, page->capacity); @@ -566,20 +616,23 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) if (page->capacity >= page->reserved) return; size_t page_size; - //uint8_t* page_start = - _mi_page_start(_mi_page_segment(page), page, &page_size); + //uint8_t* page_start = + _mi_segment_page_start(_mi_page_segment(page), page, &page_size); mi_stat_counter_increase(tld->stats.pages_extended, 1); // calculate the extend count - const size_t bsize = (page->xblock_size < MI_HUGE_BLOCK_SIZE ? page->xblock_size : page_size); + const size_t bsize = mi_page_block_size(page); size_t extend = page->reserved - page->capacity; - size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize); - if (max_extend < MI_MIN_EXTEND) max_extend = MI_MIN_EXTEND; + mi_assert_internal(extend > 0); + + size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/bsize); + if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; } + mi_assert_internal(max_extend > 0); if (extend > max_extend) { // ensure we don't touch memory beyond the page to reduce page commit. // the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%. - extend = (max_extend==0 ? 1 : max_extend); + extend = max_extend; } mi_assert_internal(extend > 0 && extend + page->capacity <= page->reserved); @@ -595,11 +648,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); - - // extension into zero initialized memory preserves the zero'd free list - if (!page->is_zero_init) { - page->is_zero = false; - } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -611,16 +659,30 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(block_size > 0); // set fields mi_page_set_heap(page, heap); + page->block_size = block_size; size_t page_size; - _mi_segment_page_start(segment, page, block_size, &page_size, NULL); - page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); + page->page_start = _mi_segment_page_start(segment, page, &page_size); + mi_track_mem_noaccess(page->page_start,page_size); mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); - #ifdef MI_ENCODE_FREELIST + mi_assert_internal(page->reserved > 0); + #if (MI_PADDING || MI_ENCODE_FREELIST) page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - page->is_zero = page->is_zero_init; + page->free_is_zero = page->is_zero_init; + #if MI_DEBUG>2 + if (page->is_zero_init) { + mi_track_mem_defined(page->page_start, page_size); + mi_assert_expensive(mi_mem_is_zero(page->page_start, page_size)); + } + #endif + if (block_size > 0 && _mi_is_power_of_two(block_size)) { + page->block_size_shift = (uint8_t)(mi_ctz((uintptr_t)block_size)); + } + else { + page->block_size_shift = 0; + } mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); @@ -630,10 +692,11 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); - #if (MI_ENCODE_FREELIST) + #if (MI_PADDING || MI_ENCODE_FREELIST) mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif + mi_assert_internal(page->block_size_shift == 0 || (block_size == ((size_t)1 << page->block_size_shift))); mi_assert_expensive(mi_page_is_valid_init(page)); // initialize an initial free list @@ -650,12 +713,16 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order + #if MI_STAT size_t count = 0; + #endif mi_page_t* page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next + #if MI_STAT count++; + #endif // 0. collect freed blocks by us and other threads _mi_page_free_collect(page, false); @@ -680,14 +747,14 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p page = next; } // for each page - mi_stat_counter_increase(heap->tld->stats.searches, count); + mi_heap_stat_counter_increase(heap, searches, count); if (page == NULL) { _mi_heap_collect_retired(heap, false); // perhaps make a page available page = mi_page_fresh(heap, pq); if (page == NULL && first_try) { // out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again - page = mi_page_queue_find_free_ex(heap, pq, false); + page = mi_page_queue_find_free_ex(heap, pq, false); } } else { @@ -705,17 +772,17 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) { mi_page_queue_t* pq = mi_page_queue(heap,size); mi_page_t* page = pq->first; if (page != NULL) { - #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness + #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) { mi_page_extend_free(heap, page, heap->tld); mi_assert_internal(mi_page_immediate_available(page)); } - else + else #endif { _mi_page_free_collect(page,false); } - + if (mi_page_immediate_available(page)) { page->retire_expire = 0; return page; // fast path @@ -754,31 +821,31 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex General allocation ----------------------------------------------------------- */ -// A huge page is allocated directly without being in a queue. -// Because huge pages contain just one block, and the segment contains -// just that page, we always treat them as abandoned and any thread -// that frees the block can free the whole page and segment directly. -static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { +// Huge pages contain just one block, and the segment contains just that page. +// Huge pages are also use if the requested alignment is very large (> MI_BLOCK_ALIGNMENT_MAX) +// so their size is not always `> MI_LARGE_OBJ_SIZE_MAX`. +static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) { size_t block_size = _mi_os_good_alloc_size(size); - mi_assert_internal(_mi_bin(block_size) == MI_BIN_HUGE); - mi_page_t* page = mi_page_fresh_alloc(heap,NULL,block_size); + mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0); + #if MI_HUGE_PAGE_ABANDON + mi_page_queue_t* pq = NULL; + #else + mi_page_queue_t* pq = mi_page_queue(heap, MI_LARGE_OBJ_SIZE_MAX+1); // always in the huge queue regardless of the block size + mi_assert_internal(mi_page_queue_is_huge(pq)); + #endif + mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment); if (page != NULL) { - const size_t bsize = mi_page_block_size(page); // note: not `mi_page_usable_block_size` as `size` includes padding already - mi_assert_internal(bsize >= size); + mi_assert_internal(mi_page_block_size(page) >= size); mi_assert_internal(mi_page_immediate_available(page)); - mi_assert_internal(_mi_page_segment(page)->page_kind==MI_PAGE_HUGE); + mi_assert_internal(mi_page_is_huge(page)); + mi_assert_internal(_mi_page_segment(page)->page_kind == MI_PAGE_HUGE); mi_assert_internal(_mi_page_segment(page)->used==1); + #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue mi_page_set_heap(page, NULL); - - if (bsize > MI_HUGE_OBJ_SIZE_MAX) { - _mi_stat_increase(&heap->tld->stats.giant, bsize); - _mi_stat_counter_increase(&heap->tld->stats.giant_count, 1); - } - else { - _mi_stat_increase(&heap->tld->stats.huge, bsize); - _mi_stat_counter_increase(&heap->tld->stats.huge_count, 1); - } + #endif + mi_heap_stat_increase(heap, huge, mi_page_block_size(page)); + mi_heap_stat_counter_increase(heap, huge_count, 1); } return page; } @@ -786,54 +853,57 @@ static mi_page_t* mi_huge_page_alloc(mi_heap_t* heap, size_t size) { // Allocate a page // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept { +static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept { // huge allocation? - const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` - if (mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) { - if (mi_unlikely(req_size > PTRDIFF_MAX)) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + if mi_unlikely(req_size > (MI_LARGE_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) { + if mi_unlikely(req_size > MI_MAX_ALLOC_SIZE) { _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size); return NULL; } else { - return mi_huge_page_alloc(heap,size); + return mi_huge_page_alloc(heap,size,huge_alignment); } } else { // otherwise find a page with free blocks in our size segregated queues + #if MI_PADDING mi_assert_internal(size >= MI_PADDING_SIZE); + #endif return mi_find_free_page(heap, size); } } // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed. // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed. -void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept +// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for +// very large requested alignments in which case we use a huge segment. +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept { mi_assert_internal(heap != NULL); // initialize if necessary - if (mi_unlikely(!mi_heap_is_initialized(heap))) { - mi_thread_init(); // calls `_mi_heap_init` in turn - heap = mi_get_default_heap(); - if (mi_unlikely(!mi_heap_is_initialized(heap))) { return NULL; } + if mi_unlikely(!mi_heap_is_initialized(heap)) { + heap = mi_heap_get_default(); // calls mi_thread_init + if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); // call potential deferred free routines _mi_deferred_free(heap, false); - // free delayed frees from other threads - _mi_heap_delayed_free(heap); + // free delayed frees from other threads (but skip contended ones) + _mi_heap_delayed_free_partial(heap); // find (or allocate) a page of the right size - mi_page_t* page = mi_find_page(heap, size); - if (mi_unlikely(page == NULL)) { // first time out of memory, try to collect and retry the allocation once more + mi_page_t* page = mi_find_page(heap, size, huge_alignment); + if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more mi_heap_collect(heap, true /* force */); - page = mi_find_page(heap, size); + page = mi_find_page(heap, size, huge_alignment); } - if (mi_unlikely(page == NULL)) { // out of memory - const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` + if mi_unlikely(page == NULL) { // out of memory + const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size` _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size); return NULL; } @@ -841,6 +911,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_assert_internal(mi_page_immediate_available(page)); mi_assert_internal(mi_page_block_size(page) >= size); - // and try again, this time succeeding! (i.e. this should never recurse) - return _mi_page_malloc(heap, page, size); + // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc) + if mi_unlikely(zero && page->block_size == 0) { + // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case. + void* p = _mi_page_malloc(heap, page, size); + mi_assert_internal(p != NULL); + _mi_memzero_aligned(p, mi_page_usable_block_size(page)); + return p; + } + else { + return _mi_page_malloc_zero(heap, page, size, zero); + } } diff --git a/contrib/libs/mimalloc/src/prim/osx/alloc-override-zone.c b/contrib/libs/mimalloc/src/prim/osx/alloc-override-zone.c new file mode 100644 index 0000000000..1515b886b2 --- /dev/null +++ b/contrib/libs/mimalloc/src/prim/osx/alloc-override-zone.c @@ -0,0 +1,461 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" + +#if defined(MI_MALLOC_OVERRIDE) + +#if !defined(__APPLE__) +#error "this file should only be included on macOS" +#endif + +/* ------------------------------------------------------ + Override system malloc on macOS + This is done through the malloc zone interface. + It seems to be most robust in combination with interposing + though or otherwise we may get zone errors as there are could + be allocations done by the time we take over the + zone. +------------------------------------------------------ */ + +#include <AvailabilityMacros.h> +#include <malloc/malloc.h> +#include <string.h> // memset +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +// only available from OSX 10.6 +extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); +#endif + +/* ------------------------------------------------------ + malloc zone members +------------------------------------------------------ */ + +static size_t zone_size(malloc_zone_t* zone, const void* p) { + MI_UNUSED(zone); + if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out + return mi_usable_size(p); +} + +static void* zone_malloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc(size); +} + +static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { + MI_UNUSED(zone); + return mi_calloc(count, size); +} + +static void* zone_valloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size, _mi_os_page_size()); +} + +static void zone_free(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + mi_cfree(p); +} + +static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { + MI_UNUSED(zone); + return mi_realloc(p, newsize); +} + +static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size,alignment); +} + +static void zone_destroy(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo: ignore for now? +} + +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { + size_t i; + for (i = 0; i < count; i++) { + ps[i] = zone_malloc(zone, size); + if (ps[i] == NULL) break; + } + return i; +} + +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { + for(size_t i = 0; i < count; i++) { + zone_free(zone, ps[i]); + ps[i] = NULL; + } +} + +static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); MI_UNUSED(size); + mi_collect(false); + return 0; +} + +static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { + MI_UNUSED(size); + zone_free(zone,p); +} + +static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + return mi_is_in_heap_region(p); +} + + +/* ------------------------------------------------------ + Introspection members +------------------------------------------------------ */ + +static kern_return_t intro_enumerator(task_t task, void* p, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) +{ + // todo: enumerate all memory + MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); + MI_UNUSED(reader); MI_UNUSED(recorder); + return KERN_SUCCESS; +} + +static size_t intro_good_size(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_good_size(size); +} + +static boolean_t intro_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static void intro_print(malloc_zone_t* zone, boolean_t verbose) { + MI_UNUSED(zone); MI_UNUSED(verbose); + mi_stats_print(NULL); +} + +static void intro_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); + // todo? +} + +static void intro_force_lock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_force_unlock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + MI_UNUSED(zone); + // todo... + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +static boolean_t intro_zone_locked(malloc_zone_t* zone) { + MI_UNUSED(zone); + return false; +} + + +/* ------------------------------------------------------ + At process start, override the default allocator +------------------------------------------------------ */ + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +static malloc_introspection_t mi_introspect = { + .enumerator = &intro_enumerator, + .good_size = &intro_good_size, + .check = &intro_check, + .print = &intro_print, + .log = &intro_log, + .force_lock = &intro_force_lock, + .force_unlock = &intro_force_unlock, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) + .statistics = &intro_statistics, + .zone_locked = &intro_zone_locked, +#endif +}; + +static malloc_zone_t mi_malloc_zone = { + // note: even with designators, the order is important for C++ compilation + //.reserved1 = NULL, + //.reserved2 = NULL, + .size = &zone_size, + .malloc = &zone_malloc, + .calloc = &zone_calloc, + .valloc = &zone_valloc, + .free = &zone_free, + .realloc = &zone_realloc, + .destroy = &zone_destroy, + .zone_name = "mimalloc", + .batch_malloc = &zone_batch_malloc, + .batch_free = &zone_batch_free, + .introspect = &mi_introspect, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .version = 10, + #else + .version = 9, + #endif + // switch to version 9+ on OSX 10.6 to support memalign. + .memalign = &zone_memalign, + .free_definite_size = &zone_free_definite_size, + #if defined(MAC_OS_X_VERSION_10_7) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_7) + .pressure_relief = &zone_pressure_relief, + #endif + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .claimed_address = &zone_claimed_address, + #endif +#else + .version = 4, +#endif +}; + +#ifdef __cplusplus +} +#endif + + +#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) + +// ------------------------------------------------------ +// Override malloc_xxx and malloc_zone_xxx api's to use only +// our mimalloc zone. Since even the loader uses malloc +// on macOS, this ensures that all allocations go through +// mimalloc (as all calls are interposed). +// The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, +// Here, we also override macOS specific API's like +// `malloc_zone_calloc` etc. see <https://github.com/aosm/libmalloc/blob/master/man/malloc_zone_malloc.3> +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + static bool init; + if mi_unlikely(!init) { + init = true; + malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>) + } + return &mi_malloc_zone; +} + +mi_decl_externc int malloc_jumpstart(uintptr_t cookie); +mi_decl_externc void _malloc_fork_prepare(void); +mi_decl_externc void _malloc_fork_parent(void); +mi_decl_externc void _malloc_fork_child(void); + + +static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { + MI_UNUSED(size); MI_UNUSED(flags); + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_zone (void) { + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { + return mi_get_default_zone(); +} + +static void mi_malloc_destroy_zone(malloc_zone_t* zone) { + MI_UNUSED(zone); + // nothing. +} + +static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { + MI_UNUSED(task); MI_UNUSED(mr); + if (addresses != NULL) *addresses = NULL; + if (count != NULL) *count = 0; + return KERN_SUCCESS; +} + +static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { + return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); +} + +static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { + MI_UNUSED(zone); MI_UNUSED(name); +} + +static int mi_malloc_jumpstart(uintptr_t cookie) { + MI_UNUSED(cookie); + return 1; // or 0 for no error? +} + +static void mi__malloc_fork_prepare(void) { + // nothing +} +static void mi__malloc_fork_parent(void) { + // nothing +} +static void mi__malloc_fork_child(void) { + // nothing +} + +static void mi_malloc_printf(const char* fmt, ...) { + MI_UNUSED(fmt); +} + +static bool zone_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static malloc_zone_t* zone_from_ptr(const void* p) { + MI_UNUSED(p); + return mi_get_default_zone(); +} + +static void zone_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); +} + +static void zone_print(malloc_zone_t* zone, bool b) { + MI_UNUSED(zone); MI_UNUSED(b); +} + +static void zone_print_ptr_info(void* p) { + MI_UNUSED(p); +} + +static void zone_register(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +static void zone_unregister(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` +// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73> +struct mi_interpose_s { + const void* replacement; + const void* target; +}; +#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } +#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) +#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) +__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = +{ + + MI_INTERPOSE_MI(malloc_create_zone), + MI_INTERPOSE_MI(malloc_default_purgeable_zone), + MI_INTERPOSE_MI(malloc_default_zone), + MI_INTERPOSE_MI(malloc_destroy_zone), + MI_INTERPOSE_MI(malloc_get_all_zones), + MI_INTERPOSE_MI(malloc_get_zone_name), + MI_INTERPOSE_MI(malloc_jumpstart), + MI_INTERPOSE_MI(malloc_printf), + MI_INTERPOSE_MI(malloc_set_zone_name), + MI_INTERPOSE_MI(_malloc_fork_child), + MI_INTERPOSE_MI(_malloc_fork_parent), + MI_INTERPOSE_MI(_malloc_fork_prepare), + + MI_INTERPOSE_ZONE(zone_batch_free), + MI_INTERPOSE_ZONE(zone_batch_malloc), + MI_INTERPOSE_ZONE(zone_calloc), + MI_INTERPOSE_ZONE(zone_check), + MI_INTERPOSE_ZONE(zone_free), + MI_INTERPOSE_ZONE(zone_from_ptr), + MI_INTERPOSE_ZONE(zone_log), + MI_INTERPOSE_ZONE(zone_malloc), + MI_INTERPOSE_ZONE(zone_memalign), + MI_INTERPOSE_ZONE(zone_print), + MI_INTERPOSE_ZONE(zone_print_ptr_info), + MI_INTERPOSE_ZONE(zone_realloc), + MI_INTERPOSE_ZONE(zone_register), + MI_INTERPOSE_ZONE(zone_unregister), + MI_INTERPOSE_ZONE(zone_valloc) +}; + + +#else + +// ------------------------------------------------------ +// hook into the zone api's without interposing +// This is the official way of adding an allocator but +// it seems less robust than using interpose. +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + // The first returned zone is the real default + malloc_zone_t** zones = NULL; + unsigned count = 0; + kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); + if (ret == KERN_SUCCESS && count > 0) { + return zones[0]; + } + else { + // fallback + return malloc_default_zone(); + } +} + +#if defined(__clang__) +__attribute__((constructor(0))) +#else +__attribute__((constructor)) // seems not supported by g++-11 on the M1 +#endif +__attribute__((used)) +static void _mi_macos_override_malloc(void) { + malloc_zone_t* purgeable_zone = NULL; + + #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + // force the purgeable zone to exist to avoid strange bugs + if (malloc_default_purgeable_zone) { + purgeable_zone = malloc_default_purgeable_zone(); + } + #endif + + // Register our zone. + // thomcc: I think this is still needed to put us in the zone list. + malloc_zone_register(&mi_malloc_zone); + // Unregister the default zone, this makes our zone the new default + // as that was the last registered. + malloc_zone_t *default_zone = mi_get_default_zone(); + // thomcc: Unsure if the next test is *always* false or just false in the + // cases I've tried. I'm also unsure if the code inside is needed. at all + if (default_zone != &mi_malloc_zone) { + malloc_zone_unregister(default_zone); + + // Reregister the default zone so free and realloc in that zone keep working. + malloc_zone_register(default_zone); + } + + // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs + // earlier than the default zone. + if (purgeable_zone != NULL) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } + +} +#endif // MI_OSX_INTERPOSE + +#endif // MI_MALLOC_OVERRIDE diff --git a/contrib/libs/mimalloc/src/prim/osx/prim.c b/contrib/libs/mimalloc/src/prim/osx/prim.c new file mode 100644 index 0000000000..8a2f4e8aa4 --- /dev/null +++ b/contrib/libs/mimalloc/src/prim/osx/prim.c @@ -0,0 +1,9 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// We use the unix/prim.c with the mmap API on macOSX +#include "../unix/prim.c" diff --git a/contrib/libs/mimalloc/src/prim/prim.c b/contrib/libs/mimalloc/src/prim/prim.c new file mode 100644 index 0000000000..992a1a4be5 --- /dev/null +++ b/contrib/libs/mimalloc/src/prim/prim.c @@ -0,0 +1,27 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Select the implementation of the primitives +// depending on the OS. + +#if defined(_WIN32) +#error #include "windows/prim.c" // VirtualAlloc (Windows) + +#elif defined(__APPLE__) +#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) + +#elif defined(__wasi__) +#define MI_USE_SBRK +#error #include "wasi/prim.c" // memory-grow or sbrk (Wasm) + +#elif defined(__EMSCRIPTEN__) +#error #include "emscripten/prim.c" // emmalloc_*, + pthread support + +#else +#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) + +#endif diff --git a/contrib/libs/mimalloc/src/prim/unix/prim.c b/contrib/libs/mimalloc/src/prim/unix/prim.c new file mode 100644 index 0000000000..7890f936b9 --- /dev/null +++ b/contrib/libs/mimalloc/src/prim/unix/prim.c @@ -0,0 +1,882 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +#include <sys/mman.h> // mmap +#include <unistd.h> // sysconf +#include <fcntl.h> // open, close, read, access + +#if defined(__linux__) + #include <features.h> + #if defined(MI_NO_THP) + #include <sys/prctl.h> + #endif + #if defined(__GLIBC__) + #include <linux/mman.h> // linux mmap flags + #else + #include <sys/mman.h> + #endif +#elif defined(__APPLE__) + #include <AvailabilityMacros.h> + #include <TargetConditionals.h> + #if !defined(TARGET_OS_OSX) || TARGET_OS_OSX // see issue #879, used to be (!TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR) + #include <mach/vm_statistics.h> // VM_MAKE_TAG, VM_FLAGS_SUPERPAGE_SIZE_2MB, etc. + #endif + #if !defined(MAC_OS_X_VERSION_10_7) + #define MAC_OS_X_VERSION_10_7 1070 + #endif +#elif defined(__FreeBSD__) || defined(__DragonFly__) + #include <sys/param.h> + #if __FreeBSD_version >= 1200000 + #include <sys/cpuset.h> + #error #include <sys/domainset.h> + #endif + #include <sys/sysctl.h> +#endif + +#if defined(__linux__) || defined(__FreeBSD__) + #define MI_HAS_SYSCALL_H + #include <sys/syscall.h> +#endif + + +//------------------------------------------------------------------------------------ +// Use syscalls for some primitives to allow for libraries that override open/read/close etc. +// and do allocation themselves; using syscalls prevents recursion when mimalloc is +// still initializing (issue #713) +// Declare inline to avoid unused function warnings. +//------------------------------------------------------------------------------------ + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) + +static inline int mi_prim_open(const char* fpath, int open_flags) { + return syscall(SYS_open,fpath,open_flags,0); +} +static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return syscall(SYS_read,fd,buf,bufsize); +} +static inline int mi_prim_close(int fd) { + return syscall(SYS_close,fd); +} +static inline int mi_prim_access(const char *fpath, int mode) { + return syscall(SYS_access,fpath,mode); +} + +#else + +static inline int mi_prim_open(const char* fpath, int open_flags) { + return open(fpath,open_flags); +} +static inline ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return read(fd,buf,bufsize); +} +static inline int mi_prim_close(int fd) { + return close(fd); +} +static inline int mi_prim_access(const char *fpath, int mode) { + return access(fpath,mode); +} + +#endif + + + +//--------------------------------------------- +// init +//--------------------------------------------- + +static bool unix_detect_overcommit(void) { + bool os_overcommit = true; +#if defined(__linux__) + int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd >= 0) { + char buf[32]; + ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf)); + mi_prim_close(fd); + // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting> + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif + return os_overcommit; +} + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ + long psize = sysconf(_SC_PAGESIZE); + if (psize > 0) { + config->page_size = (size_t)psize; + config->alloc_granularity = (size_t)psize; + } + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->has_overcommit = unix_detect_overcommit(); + config->has_partial_free = true; // mmap can free in parts + config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) + + // disable transparent huge pages for this process? + #if (defined(__linux__) || defined(__ANDROID__)) && defined(PR_GET_THP_DISABLE) + #if defined(MI_NO_THP) + if (true) + #else + if (!mi_option_is_enabled(mi_option_allow_large_os_pages)) // disable THP also if large OS pages are not allowed in the options + #endif + { + int val = 0; + if (prctl(PR_GET_THP_DISABLE, &val, 0, 0, 0) != 0) { + // Most likely since distros often come with always/madvise settings. + val = 1; + // Disabling only for mimalloc process rather than touching system wide settings + (void)prctl(PR_SET_THP_DISABLE, &val, 0, 0, 0); + } + } + #endif +} + + +//--------------------------------------------- +// free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + bool err = (munmap(addr, size) == -1); + return (err ? errno : 0); +} + + +//--------------------------------------------- +// mmap +//--------------------------------------------- + +static int unix_madvise(void* addr, size_t size, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, size, advice); + #endif +} + +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + void* p = NULL; + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_trace_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? + int err = 0; + #else + int err = errno; + #endif + _mi_trace_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static int unix_mmap_fd(void) { + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + return VM_MAKE_TAG(os_tag); + #else + return -1; + #endif +} + +static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + void* p = NULL; + const int fd = unix_mmap_fd(); + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + // huge page allocation + if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) == MAP_HUGE_1GB) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = _mi_os_large_page_size(); + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + + *is_zero = true; + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +static void unix_mprotect_hint(int err) { + #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("The next warning may be caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" + " For example: sudo sysctl -w vm.max_map_count=262144\n"); + } + #else + MI_UNUSED(err); + #endif +} + + + + + +int _mi_prim_commit(void* start, size_t size, bool* is_zero) { + // commit: ensure we can access the area + // note: we may think that *is_zero can be true since the memory + // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but + // we sometimes call commit on a range with still partially committed + // memory and `mprotect` does not zero the range. + *is_zero = false; + int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { + err = errno; + unix_mprotect_hint(err); + } + return err; +} + +int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { + int err = 0; + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #if !MI_DEBUG && !MI_SECURE + *needs_recommit = false; + #else + *needs_recommit = true; + mprotect(start, size, PROT_NONE); + #endif + /* + // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) + *needs_recommit = true; + const int fd = unix_mmap_fd(); + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + */ + return err; +} + +int _mi_prim_reset(void* start, size_t size) { + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // will not reduce the `rss` stats in tools like `top` even though the memory is available + // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by + // default `MADV_DONTNEED` is used though. + #if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); + } + #else + int err = unix_madvise(start, size, MADV_DONTNEED); + #endif + return err; +} + +int _mi_prim_protect(void* start, size_t size, bool protect) { + int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + unix_mprotect_hint(err); + return err; +} + + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__) + +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind) +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + bool is_large = true; + *is_zero = true; + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: <https://lkml.org/lkml/2017/2/9/875> + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); + } + } + return (*addr != NULL ? 0 : errno); +} + +#else + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = false; + *addr = NULL; + return ENOMEM; +} + +#endif + +//--------------------------------------------- +// NUMA nodes +//--------------------------------------------- + +#if defined(__linux__) + +size_t _mi_prim_numa_node(void) { + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu) + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; + #else + return 0; + #endif +} + +size_t _mi_prim_numa_node_count(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + _mi_snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (mi_prim_access(buf,R_OK) != 0) break; + } + return (node+1); +} + +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 + +size_t _mi_prim_numa_node(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} + +#elif defined(__DragonFly__) + +size_t _mi_prim_numa_node(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} + +#else + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + +#endif + +// ---------------------------------------------------------------- +// Clock +// ---------------------------------------------------------------- + +#include <time.h> + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) +#include <stdio.h> +#include <unistd.h> +#include <sys/resource.h> + +#if defined(__APPLE__) +#include <mach/mach.h> +#endif + +#if defined(__HAIKU__) +#error #include <kernel/OS.h> +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + pinfo->page_faults = rusage.ru_majflt; +#endif +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + pinfo->peak_rss += mem.ram_size; + } + pinfo->page_faults = 0; +#elif defined(__APPLE__) + pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + #ifdef MACH_TASK_BASIC_INFO + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #else + struct task_basic_info info; + mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #endif +#else + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB +#endif + // use defaults for commit +} + +#else + +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + +#endif + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to access environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>) +#include <crt_externs.h> +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = _mi_strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 10000 entries + for (int i = 0; i < 10000 && env[i] != NULL; i++) { + const char* s = env[i]; + if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + _mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} +#endif // !MI_USE_ENVIRON + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(__APPLE__) && defined(MAC_OS_X_VERSION_10_15) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_15) +#include <CommonCrypto/CommonCryptoError.h> +#include <CommonCrypto/CommonRandom.h> + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html> + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) || \ + (defined(__APPLE__) && (MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_X_VERSION_10_7)) + +#include <stdlib.h> +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} + +#elif defined(__APPLE__) || defined(__linux__) || defined(__HAIKU__) // also for old apple versions < 10.7 (issue #829) + +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom) + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom + } + #endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = mi_prim_open("/dev/urandom", flags); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + mi_prim_close(fd); + return (count==buf_len); +} + +#else + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + +#endif + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // do not leak the key, see issue #809 + pthread_key_delete(_mi_heap_default_key); + } +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/contrib/libs/mimalloc/src/random.c b/contrib/libs/mimalloc/src/random.c index 255bede4db..4fc8b2f8fb 100644 --- a/contrib/libs/mimalloc/src/random.c +++ b/contrib/libs/mimalloc/src/random.c @@ -5,9 +5,9 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" - -#include <string.h> // memset +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // _mi_prim_random_buf +#include <string.h> // memset /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation @@ -154,118 +154,13 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- -To initialize a fresh random context we rely on the OS: -- Windows : BCryptGenRandom (or RtlGenRandom) -- osX,bsd,wasi: arc4random_buf -- Linux : getrandom,/dev/urandom +To initialize a fresh random context. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ -#if defined(_WIN32) - -#if !defined(MI_USE_RTLGENRANDOM) -// We prefer BCryptGenRandom over RtlGenRandom -#pragma comment (lib,"bcrypt.lib") -#include <bcrypt.h> -static bool os_random_buf(void* buf, size_t buf_len) { - return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); -} -#else -// Use (unofficial) RtlGenRandom -#pragma comment (lib,"advapi32.lib") -#define RtlGenRandom SystemFunction036 -#ifdef __cplusplus -extern "C" { -#endif -BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); -#ifdef __cplusplus -} -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - return (RtlGenRandom(buf, (ULONG)buf_len) != 0); -} -#endif - -#elif defined(ANDROID) || defined(XP_DARWIN) || defined(__APPLE__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__sun) || defined(__wasi__) -#include <stdlib.h> -static bool os_random_buf(void* buf, size_t buf_len) { - arc4random_buf(buf, buf_len); - return true; -} -#elif defined(__linux__) -#include <sys/syscall.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <errno.h> -static bool os_random_buf(void* buf, size_t buf_len) { - // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` - // and for the latter the actual `getrandom` call is not always defined. - // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>) - // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. -#ifdef SYS_getrandom - #ifndef GRND_NONBLOCK - #define GRND_NONBLOCK (1) - #endif - static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_load_acquire(&no_getrandom)==0) { - ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); - if (ret >= 0) return (buf_len == (size_t)ret); - if (ret != ENOSYS) return false; - mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom - } -#endif - int flags = O_RDONLY; - #if defined(O_CLOEXEC) - flags |= O_CLOEXEC; - #endif - int fd = open("/dev/urandom", flags, 0); - if (fd < 0) return false; - size_t count = 0; - while(count < buf_len) { - ssize_t ret = read(fd, (char*)buf + count, buf_len - count); - if (ret<=0) { - if (errno!=EAGAIN && errno!=EINTR) break; - } - else { - count += ret; - } - } - close(fd); - return (count==buf_len); -} -#else -static bool os_random_buf(void* buf, size_t buf_len) { - return false; -} -#endif - -#if defined(_WIN32) -#include <windows.h> -#elif defined(__APPLE__) -#include <mach/mach_time.h> -#else -#include <time.h> -#endif - -uintptr_t _os_random_weak(uintptr_t extra_seed) { - uintptr_t x = (uintptr_t)&_os_random_weak ^ extra_seed; // ASLR makes the address random - - #if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); - #elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); - #else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; - #endif +uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { + uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random + x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; for (uintptr_t i = 0; i < max; i++) { @@ -275,21 +170,41 @@ uintptr_t _os_random_weak(uintptr_t extra_seed) { return x; } -void _mi_random_init(mi_random_ctx_t* ctx) { +static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; - if (!os_random_buf(key, sizeof(key))) { + if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time - _mi_warning_message("unable to use secure randomness\n"); - uintptr_t x = _os_random_weak(0); + #if !defined(__wasi__) + if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); } + #endif + uintptr_t x = _mi_os_random_weak(0); for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words. x = _mi_random_shuffle(x); ((uint32_t*)key)[i] = (uint32_t)x; } + ctx->weak = true; + } + else { + ctx->weak = false; } chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ ); } +void _mi_random_init(mi_random_ctx_t* ctx) { + mi_random_init_ex(ctx, false); +} + +void _mi_random_init_weak(mi_random_ctx_t * ctx) { + mi_random_init_ex(ctx, true); +} + +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) { + if (ctx->weak) { + _mi_random_init(ctx); + } +} + /* -------------------------------------------------------- test vectors from <https://tools.ietf.org/html/rfc8439> ----------------------------------------------------------- */ diff --git a/contrib/libs/mimalloc/src/region.c b/contrib/libs/mimalloc/src/region.c deleted file mode 100644 index 7954073099..0000000000 --- a/contrib/libs/mimalloc/src/region.c +++ /dev/null @@ -1,505 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses - in that object :-( (`malloc-large` tests this). This means we need a cheaper - way to reuse memory. -3. This layer allows for NUMA aware allocation. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" - -#include <string.h> // memset - -#include "bitmap.h" - -// Internal raw OS interface -size_t _mi_os_large_page_size(); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); - -// arena.c -void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld); - - - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * GiB) // 64KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * GiB) // ~ KiB for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE - -#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) -#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB -#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) - -// Region info -typedef union mi_region_info_u { - uintptr_t value; - struct { - bool valid; // initialized? - bool is_large:1; // allocated in fixed large/huge OS pages - bool is_pinned:1; // pinned memory cannot be decommitted - short numa_node; // the associated NUMA node (where -1 means no associated node) - } x; -} mi_region_info_t; - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - _Atomic(uintptr_t) info; // mi_region_info_t.value - _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - _Atomic(uintptr_t) arena_memid; // if allocated from a (huge page) arena - uintptr_t padding; // round to 8 fields -} mem_region_t; - -// The region map -static mem_region_t regions[MI_REGION_MAX]; - -// Allocated regions -static _Atomic(uintptr_t) regions_count; // = 0; - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - return _mi_divide_up(size, MI_SEGMENT_SIZE); -} - -/* -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} -*/ - -// Return if a pointer points into a region reserved by us. -bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); - mi_assert_internal(start != NULL); - return (start + (bit_idx * MI_SEGMENT_SIZE)); -} - -static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { - mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); - size_t idx = region - regions; - mi_assert_internal(®ions[idx] == region); - return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; -} - -static size_t mi_memid_create_from_arena(size_t arena_memid) { - return (arena_memid << 1) | 1; -} - - -static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if ((id&1)==1) { - if (arena_memid != NULL) *arena_memid = (id>>1); - return true; - } - else { - size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; - *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; - *region = ®ions[idx]; - return false; - } -} - - -/* ---------------------------------------------------------------------------- - Allocate a region is allocated from the OS (or an arena) ------------------------------------------------------------------------------*/ - -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // not out of regions yet? - if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - - // try to allocate a fresh region from the OS - bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; - bool is_pinned = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, ®ion_commit, ®ion_large, &is_pinned, &is_zero, &arena_memid, tld); - if (start == NULL) return false; - mi_assert_internal(!(region_large && !allow_large)); - mi_assert_internal(!region_large || region_commit); - - // claim a fresh slot - const uintptr_t idx = mi_atomic_increment_acq_rel(®ions_count); - if (idx >= MI_REGION_MAX) { - mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats); - _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, GiB)); - return false; - } - - // allocated, initialize and claim the initial blocks - mem_region_t* r = ®ions[idx]; - r->arena_memid = arena_memid; - mi_atomic_store_release(&r->in_use, (uintptr_t)0); - mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_store_release(&r->reset, (uintptr_t)0); - *bit_idx = 0; - _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_store_ptr_release(void,&r->start, start); - - // and share it - mi_region_info_t info; - info.value = 0; // initialize the full union to zero - info.x.valid = true; - info.x.is_large = region_large; - info.x.is_pinned = is_pinned; - info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_store_release(&r->info, info.value); // now make it available to others - *region = r; - return true; -} - -/* ---------------------------------------------------------------------------- - Try to claim blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { - // initialized at all? - mi_region_info_t info; - info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); - if (info.value==0) return false; - - // numa correct - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.x.numa_node; - if (rnode >= 0 && rnode != numa_node) return false; - } - - // check allow-large - if (!allow_large && info.x.is_large) return false; - - return true; -} - - -static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // try all regions for a free slot - const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - mem_region_t* r = ®ions[idx]; - // if this region suits our demand (numa node matches, large OS page matches) - if (mi_region_is_suitable(r, numa_node, allow_large)) { - // then try to atomically claim a segment(s) in this region - if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { - tld->region_idx = idx; // remember the last found position - *region = r; - return true; - } - } - } - return false; -} - - -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); - mem_region_t* region; - mi_bitmap_index_t bit_idx; - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { - // out of regions or memory - return NULL; - } - } - - // ------------------------------------------------ - // found a region and claimed `blocks` at `bit_idx`, initialize them now - mi_assert_internal(region != NULL); - mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*large)); - mi_assert_internal(start != NULL); - - *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *large = info.x.is_large; - *is_pinned = info.x.is_pinned; - *memid = mi_memid_create(region, bit_idx); - void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - - // commit - if (*commit) { - // ensure commit - bool any_uncommitted; - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); - if (any_uncommitted) { - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - bool commit_zero = false; - if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { - // failed to commit! unclaim and return - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - return NULL; - } - if (commit_zero) *is_zero = true; - } - } - else { - // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); - } - mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); - - // unreset reset blocks - if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { - // some blocks are still reset - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; - } - } - mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - - #if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - - // and return the allocation - mi_assert_internal(p != NULL); - return p; -} - - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = 0; - *is_zero = false; - *is_pinned = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - if (size == 0) return NULL; - size = _mi_align_up(size, _mi_os_page_size()); - - // allocate from regions if possible - void* p = NULL; - size_t arena_memid; - const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) { - p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); - if (p == NULL) { - _mi_warning_message("unable to allocate from region: size %zu\n", size); - } - } - if (p == NULL) { - // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - } - - if (p != NULL) { - mi_assert_internal((uintptr_t)p % alignment == 0); -#if (MI_DEBUG>=2) - if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed -#endif - } - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld != NULL); - if (p==NULL) return; - if (size==0) return; - size = _mi_align_up(size, _mi_os_page_size()); - - size_t arena_memid = 0; - mi_bitmap_index_t bit_idx; - mem_region_t* region; - if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { - // was a direct arena allocation, pass through - _mi_arena_free(p, size, arena_memid, full_commit, tld->stats); - } - else { - // allocated in a region - mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - mi_assert_internal(info.value != 0); - void* blocks_start = mi_region_blocks_start(region, bit_idx); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - - // committed? - if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); - } - - if (any_reset) { - // set the is_reset bits if any pages were reset - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); - } - - // reset the blocks to reduce the working set. - if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) - && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead - { - bool any_unreset; - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); - if (any_unreset) { - _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) - _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); - } - } - - // and unclaim - bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - mi_assert_internal(all_unclaimed); UNUSED(all_unclaimed); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_os_tld_t* tld) { - // free every region that has no segments in use. - uintptr_t rcount = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < rcount; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_load_relaxed(®ion->info) != 0) { - // if no segments used, try to claim the whole region - uintptr_t m = mi_atomic_load_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; - if (m == 0) { - // on success, free the whole region - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); - uintptr_t commit = mi_atomic_load_relaxed(®ions[i].commit); - memset(®ions[i], 0, sizeof(mem_region_t)); - // and release the whole region - mi_atomic_store_release(®ion->info, (uintptr_t)0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats); - } - } - } - } -} - - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_reset(p, size, tld->stats); -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_unreset(p, size, is_zero, tld->stats); -} - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_commit(p, size, is_zero, tld->stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_decommit(p, size, tld->stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/contrib/libs/mimalloc/src/segment-map.c b/contrib/libs/mimalloc/src/segment-map.c new file mode 100644 index 0000000000..1efb1e2360 --- /dev/null +++ b/contrib/libs/mimalloc/src/segment-map.c @@ -0,0 +1,155 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" + +#if (MI_INTPTR_SIZE>=8) && MI_TRACK_ASAN +#define MI_MAX_ADDRESS ((size_t)140 << 40) // 140TB (see issue #881) +#elif (MI_INTPTR_SIZE >= 8) +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + // note: segment can be invalid or NULL. + mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return MI_SEGMENT_MAP_WSIZE; + } + else { + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; + } +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index==MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index == MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); // segment can be NULL + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { + return segment; // yes, allocated by us + } + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else if (index == 0) { + return NULL; + } + else { + mi_assert_internal(index > 0); + uintptr_t lomask = mask; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if mi_unlikely(!cookie_ok) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/contrib/libs/mimalloc/src/segment.c b/contrib/libs/mimalloc/src/segment.c index 1d59be9d06..fc13d2e778 100644 --- a/contrib/libs/mimalloc/src/segment.c +++ b/contrib/libs/mimalloc/src/segment.c @@ -1,12 +1,12 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen +Copyright (c) 2018-2024, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include <string.h> // memset #include <stdio.h> @@ -17,22 +17,23 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ /* -------------------------------------------------------------------------------- Segment allocation - We allocate pages inside bigger "segments" (4mb on 64-bit). This is to avoid + We allocate pages inside bigger "segments" (4MiB on 64-bit). This is to avoid splitting VMA's on Linux and reduce fragmentation on other OS's. Each thread owns its own segments. Currently we have: - - small pages (64kb), 64 in one segment - - medium pages (512kb), 8 in one segment - - large pages (4mb), 1 in one segment - - huge blocks > MI_LARGE_OBJ_SIZE_MAX become large segment with 1 page + - small pages (64KiB), 64 in one segment + - medium pages (512KiB), 8 in one segment + - large pages (4MiB), 1 in one segment + - huge segments have 1 page in one segment that can be larger than `MI_SEGMENT_SIZE`. + it is used for blocks `> MI_LARGE_OBJ_SIZE_MAX` or with alignment `> MI_BLOCK_ALIGNMENT_MAX`. - In any case the memory for a segment is virtual and usually committed on demand. + The memory for a segment is usually committed on demand. (i.e. we are careful to not touch the memory until we actually allocate a block there) - If a thread ends, it "abandons" pages with used blocks - and there is an abandoned segment list whose segments can - be reclaimed by still running threads, much like work-stealing. + If a thread ends, it "abandons" pages that still contain live blocks. + Such segments are abondoned and these can be reclaimed by still running threads, + (much like work-stealing). -------------------------------------------------------------------------------- */ @@ -54,9 +55,11 @@ static bool mi_segment_queue_contains(const mi_segment_queue_t* queue, const mi_ } #endif +/* static bool mi_segment_queue_is_empty(const mi_segment_queue_t* queue) { return (queue->first == NULL); } +*/ static void mi_segment_queue_remove(mi_segment_queue_t* queue, mi_segment_t* segment) { mi_assert_expensive(mi_segment_queue_contains(queue, segment)); @@ -110,17 +113,7 @@ static void mi_segment_insert_in_free_queue(mi_segment_t* segment, mi_segments_t Invariant checking ----------------------------------------------------------- */ -#if (MI_DEBUG>=2) -static bool mi_segment_is_in_free_queue(const mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_segment_queue_t* queue = mi_segment_free_queue(segment, tld); - bool in_queue = (queue!=NULL && (segment->next != NULL || segment->prev != NULL || queue->first == segment)); - if (in_queue) { - mi_assert_expensive(mi_segment_queue_contains(queue, segment)); - } - return in_queue; -} -#endif - +#if (MI_DEBUG >= 2) || (MI_SECURE >= 2) static size_t mi_segment_page_size(const mi_segment_t* segment) { if (segment->capacity > 1) { mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); @@ -131,11 +124,11 @@ static size_t mi_segment_page_size(const mi_segment_t* segment) { return segment->segment_size; } } - +#endif #if (MI_DEBUG>=2) -static bool mi_pages_reset_contains(const mi_page_t* page, mi_segments_tld_t* tld) { - mi_page_t* p = tld->pages_reset.first; +static bool mi_pages_purge_contains(const mi_page_t* page, mi_segments_tld_t* tld) { + mi_page_t* p = tld->pages_purge.first; while (p != NULL) { if (p == page) return true; p = p->next; @@ -150,15 +143,17 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->used <= segment->capacity); mi_assert_internal(segment->abandoned <= segment->used); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM || segment->capacity == 1); // one large or huge page per segment size_t nfree = 0; for (size_t i = 0; i < segment->capacity; i++) { const mi_page_t* const page = &segment->pages[i]; if (!page->segment_in_use) { nfree++; } - if (page->segment_in_use || page->is_reset) { - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + if (page->segment_in_use) { + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); } + mi_assert_internal(page->is_huge == (segment->page_kind == MI_PAGE_HUGE)); } mi_assert_internal(nfree + segment->used == segment->capacity); // mi_assert_internal(segment->thread_id == _mi_thread_id() || (segment->thread_id==0)); // or 0 @@ -171,12 +166,12 @@ static bool mi_segment_is_valid(const mi_segment_t* segment, mi_segments_tld_t* static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page != NULL); if (page->next != NULL || page->prev != NULL) { - mi_assert_internal(mi_pages_reset_contains(page, tld)); + mi_assert_internal(mi_pages_purge_contains(page, tld)); return false; } else { // both next and prev are NULL, check for singleton list - return (tld->pages_reset.first != page && tld->pages_reset.last != page); + return (tld->pages_purge.first != page && tld->pages_purge.last != page); } } @@ -187,10 +182,10 @@ static bool mi_page_not_in_queue(const mi_page_t* page, mi_segments_tld_t* tld) static void mi_segment_protect_range(void* p, size_t size, bool protect) { if (protect) { - _mi_mem_protect(p, size); + _mi_os_protect(p, size); } else { - _mi_mem_unprotect(p, size); + _mi_os_unprotect(p, size); } } @@ -202,14 +197,17 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_assert_internal((segment->segment_info_size - os_psize) >= (sizeof(mi_segment_t) + ((segment->capacity - 1) * sizeof(mi_page_t)))); mi_assert_internal(((uintptr_t)segment + segment->segment_info_size) % os_psize == 0); mi_segment_protect_range((uint8_t*)segment + segment->segment_info_size - os_psize, os_psize, protect); - if (MI_SECURE <= 1 || segment->capacity == 1) { + #if (MI_SECURE >= 2) + if (segment->capacity == 1) + #endif + { // and protect the last (or only) page too mi_assert_internal(MI_SECURE <= 1 || segment->page_kind >= MI_PAGE_LARGE); uint8_t* start = (uint8_t*)segment + segment->segment_size - os_psize; - if (protect && !segment->mem_is_committed) { + if (protect && !segment->memid.initially_committed) { if (protect) { // ensure secure page is committed - if (_mi_mem_commit(start, os_psize, NULL, tld)) { // if this fails that is ok (as it is an unaccessible page) + if (_mi_os_commit(start, os_psize, NULL, tld->stats)) { // if this fails that is ok (as it is an unaccessible page) mi_segment_protect_range(start, os_psize, protect); } } @@ -218,6 +216,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* mi_segment_protect_range(start, os_psize, protect); } } + #if (MI_SECURE >= 2) else { // or protect every page const size_t page_size = mi_segment_page_size(segment); @@ -227,6 +226,7 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* } } } + #endif } } @@ -234,35 +234,39 @@ static void mi_segment_protect(mi_segment_t* segment, bool protect, mi_os_tld_t* Page reset ----------------------------------------------------------- */ -static void mi_page_reset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) { +static void mi_page_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + // todo: should we purge the guard page as well when MI_SECURE>=2 ? mi_assert_internal(page->is_committed); - if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; + mi_assert_internal(!page->segment_in_use); + if (!segment->allow_purge) return; + mi_assert_internal(page->used == 0); + mi_assert_internal(page->free == NULL); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); size_t psize; void* start = mi_segment_raw_page_start(segment, page, &psize); - page->is_reset = true; - mi_assert_internal(size <= psize); - size_t reset_size = ((size == 0 || size > psize) ? psize : size); - if (reset_size > 0) _mi_mem_reset(start, reset_size, tld->os); + const bool needs_recommit = _mi_os_purge(start, psize, tld->stats); + if (needs_recommit) { page->is_committed = false; } } -static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, mi_segments_tld_t* tld) -{ - mi_assert_internal(page->is_reset); - mi_assert_internal(page->is_committed); - mi_assert_internal(!segment->mem_is_pinned); - if (segment->mem_is_pinned || !page->is_committed || !page->is_reset) return true; - page->is_reset = false; +static bool mi_page_ensure_committed(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + if (page->is_committed) return true; + mi_assert_internal(segment->allow_decommit); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); + size_t psize; uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - size_t unreset_size = (size == 0 || size > psize ? psize : size); bool is_zero = false; - bool ok = true; - if (unreset_size > 0) { - ok = _mi_mem_unreset(start, unreset_size, &is_zero, tld->os); + const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); + bool ok = _mi_os_commit(start, psize + gsize, &is_zero, tld->stats); + if (!ok) return false; // failed to commit! + page->is_committed = true; + page->used = 0; + page->free = NULL; + page->is_zero_init = is_zero; + if (gsize > 0) { + mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) page->is_zero_init = true; - return ok; + return true; } @@ -270,37 +274,49 @@ static bool mi_page_unreset(mi_segment_t* segment, mi_page_t* page, size_t size, The free page queue ----------------------------------------------------------- */ -// we re-use the `used` field for the expiration counter. Since this is a -// a 32-bit field while the clock is always 64-bit we need to guard -// against overflow, we use substraction to check for expiry which work +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use substraction to check for expiry which works // as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) -static void mi_page_reset_set_expire(mi_page_t* page) { - uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_reset_delay); - page->used = expire; +static uint32_t mi_page_get_expire( mi_page_t* page ) { + return (uint32_t)((uintptr_t)page->free); } -static bool mi_page_reset_is_expired(mi_page_t* page, mi_msecs_t now) { - int32_t expire = (int32_t)(page->used); +static void mi_page_set_expire( mi_page_t* page, uint32_t expire ) { + page->free = (mi_block_t*)((uintptr_t)expire); +} + +static void mi_page_purge_set_expire(mi_page_t* page) { + mi_assert_internal(mi_page_get_expire(page)==0); + uint32_t expire = (uint32_t)_mi_clock_now() + mi_option_get(mi_option_purge_delay); + mi_page_set_expire(page, expire); +} + +// we re-use the `free` field for the expiration counter. Since this is a +// a pointer size field while the clock is always 64-bit we need to guard +// against overflow, we use substraction to check for expiry which work +// as long as the reset delay is under (2^30 - 1) milliseconds (~12 days) +static bool mi_page_purge_is_expired(mi_page_t* page, mi_msecs_t now) { + int32_t expire = (int32_t)mi_page_get_expire(page); return (((int32_t)now - expire) >= 0); } -static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { - mi_assert_internal(!page->segment_in_use || !page->is_committed); +static void mi_segment_schedule_purge(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { + mi_assert_internal(!page->segment_in_use); mi_assert_internal(mi_page_not_in_queue(page,tld)); - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); mi_assert_internal(_mi_page_segment(page)==segment); - if (!mi_option_is_enabled(mi_option_page_reset)) return; - if (segment->mem_is_pinned || page->segment_in_use || !page->is_committed || page->is_reset) return; + if (!segment->allow_purge) return; - if (mi_option_get(mi_option_reset_delay) == 0) { - // reset immediately? - mi_page_reset(segment, page, 0, tld); + if (mi_option_get(mi_option_purge_delay) == 0) { + // purge immediately? + mi_page_purge(segment, page, tld); } - else { + else if (mi_option_get(mi_option_purge_delay) > 0) { // no purging if the delay is negative // otherwise push on the delayed page reset queue - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; // push on top - mi_page_reset_set_expire(page); + mi_page_purge_set_expire(page); page->next = pq->first; page->prev = NULL; if (pq->first == NULL) { @@ -314,29 +330,30 @@ static void mi_pages_reset_add(mi_segment_t* segment, mi_page_t* page, mi_segmen } } -static void mi_pages_reset_remove(mi_page_t* page, mi_segments_tld_t* tld) { +static void mi_page_purge_remove(mi_page_t* page, mi_segments_tld_t* tld) { if (mi_page_not_in_queue(page,tld)) return; - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; mi_assert_internal(pq!=NULL); mi_assert_internal(!page->segment_in_use); - mi_assert_internal(mi_pages_reset_contains(page, tld)); + mi_assert_internal(mi_page_get_expire(page) != 0); + mi_assert_internal(mi_pages_purge_contains(page, tld)); if (page->prev != NULL) page->prev->next = page->next; if (page->next != NULL) page->next->prev = page->prev; if (page == pq->last) pq->last = page->prev; if (page == pq->first) pq->first = page->next; page->next = page->prev = NULL; - page->used = 0; + mi_page_set_expire(page,0); } -static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool force_reset, mi_segments_tld_t* tld) { - if (segment->mem_is_pinned) return; // never reset in huge OS pages +static void mi_segment_remove_all_purges(mi_segment_t* segment, bool force_purge, mi_segments_tld_t* tld) { + if (segment->memid.is_pinned) return; // never reset in huge OS pages for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; - if (!page->segment_in_use && page->is_committed && !page->is_reset) { - mi_pages_reset_remove(page, tld); - if (force_reset) { - mi_page_reset(segment, page, 0, tld); + if (!page->segment_in_use) { + mi_page_purge_remove(page, tld); + if (force_purge && page->is_committed) { + mi_page_purge(segment, page, tld); } } else { @@ -345,17 +362,17 @@ static void mi_pages_reset_remove_all_in_segment(mi_segment_t* segment, bool for } } -static void mi_reset_delayed(mi_segments_tld_t* tld) { - if (!mi_option_is_enabled(mi_option_page_reset)) return; +static void mi_pages_try_purge(bool force, mi_segments_tld_t* tld) { + if (mi_option_get(mi_option_purge_delay) < 0) return; // purging is not allowed + mi_msecs_t now = _mi_clock_now(); - mi_page_queue_t* pq = &tld->pages_reset; + mi_page_queue_t* pq = &tld->pages_purge; // from oldest up to the first that has not expired yet mi_page_t* page = pq->last; - while (page != NULL && mi_page_reset_is_expired(page,now)) { + while (page != NULL && (force || mi_page_purge_is_expired(page,now))) { mi_page_t* const prev = page->prev; // save previous field - mi_page_reset(_mi_page_segment(page), page, 0, tld); - page->used = 0; - page->prev = page->next = NULL; + mi_page_purge_remove(page, tld); // remove from the list to maintain invariant for mi_page_purge + mi_page_purge(_mi_page_segment(page), page, tld); page = prev; } // discard the reset pages from the queue @@ -373,10 +390,14 @@ static void mi_reset_delayed(mi_segments_tld_t* tld) { Segment size calculations ----------------------------------------------------------- */ +static size_t mi_segment_raw_page_size(const mi_segment_t* segment) { + return (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); +} + // Raw start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) // The raw start is not taking aligned block allocation into consideration. static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { - size_t psize = (segment->page_kind == MI_PAGE_HUGE ? segment->segment_size : (size_t)1 << segment->page_shift); + size_t psize = mi_segment_raw_page_size(segment); uint8_t* p = (uint8_t*)segment + page->segment_idx * psize; if (page->segment_idx == 0) { @@ -394,35 +415,36 @@ static uint8_t* mi_segment_raw_page_start(const mi_segment_t* segment, const mi_ #endif if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->xblock_size == 0 || _mi_ptr_page(p) == page); + mi_assert_internal(page->block_size == 0 || _mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } // Start of the page available memory; can be used on uninitialized pages (only `segment_idx` must be set) -uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t block_size, size_t* page_size, size_t* pre_size) +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { size_t psize; uint8_t* p = mi_segment_raw_page_start(segment, page, &psize); - if (pre_size != NULL) *pre_size = 0; - if (page->segment_idx == 0 && block_size > 0 && segment->page_kind <= MI_PAGE_MEDIUM) { + const size_t block_size = mi_page_block_size(page); + if (/*page->segment_idx == 0 &&*/ block_size > 0 && block_size <= MI_MAX_ALIGN_GUARANTEE) { // for small and medium objects, ensure the page start is aligned with the block size (PR#66 by kickunderscore) + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); size_t adjust = block_size - ((uintptr_t)p % block_size); - if (adjust < block_size) { + if (adjust < block_size && psize >= block_size + adjust) { p += adjust; psize -= adjust; - if (pre_size != NULL) *pre_size = adjust; + mi_assert_internal((uintptr_t)p % block_size == 0); } - mi_assert_internal((uintptr_t)p % block_size == 0); } if (page_size != NULL) *page_size = psize; - mi_assert_internal(page->xblock_size==0 || _mi_ptr_page(p) == page); + mi_assert_internal(_mi_ptr_page(p) == page); mi_assert_internal(_mi_ptr_segment(p) == segment); return p; } -static size_t mi_segment_size(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) + +static size_t mi_segment_calculate_sizes(size_t capacity, size_t required, size_t* pre_size, size_t* info_size) { const size_t minsize = sizeof(mi_segment_t) + ((capacity - 1) * sizeof(mi_page_t)) + 16 /* padding */; size_t guardsize = 0; @@ -464,110 +486,104 @@ static void mi_segments_track_size(long segment_size, mi_segments_tld_t* tld) { static void mi_segment_os_free(mi_segment_t* segment, size_t segment_size, mi_segments_tld_t* tld) { segment->thread_id = 0; + _mi_segment_map_freed_at(segment); mi_segments_track_size(-((long)segment_size),tld); + if (segment->was_reclaimed) { + tld->reclaim_count--; + segment->was_reclaimed = false; + } + if (MI_SECURE != 0) { - mi_assert_internal(!segment->mem_is_pinned); + mi_assert_internal(!segment->memid.is_pinned); mi_segment_protect(segment, false, tld->os); // ensure no more guard pages are set } - bool any_reset = false; bool fully_committed = true; + size_t committed_size = 0; + const size_t page_size = mi_segment_raw_page_size(segment); for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; + if (page->is_committed) { committed_size += page_size; } if (!page->is_committed) { fully_committed = false; } - if (page->is_reset) { any_reset = true; } - } - if (any_reset && mi_option_is_enabled(mi_option_reset_decommits)) { - fully_committed = false; } - _mi_mem_free(segment, segment_size, segment->memid, fully_committed, any_reset, tld->os); + MI_UNUSED(fully_committed); + mi_assert_internal((fully_committed && committed_size == segment_size) || (!fully_committed && committed_size < segment_size)); + + _mi_abandoned_await_readers(); // prevent ABA issue if concurrent readers try to access our memory (that might be purged) + _mi_arena_free(segment, segment_size, committed_size, segment->memid, tld->stats); } +// called from `heap_collect`. +void _mi_segments_collect(bool force, mi_segments_tld_t* tld) { + mi_pages_try_purge(force,tld); + #if MI_DEBUG>=2 + if (!_mi_is_main_thread()) { + mi_assert_internal(tld->pages_purge.first == NULL); + mi_assert_internal(tld->pages_purge.last == NULL); + } + #endif +} -// The thread local segment cache is limited to be at most 1/8 of the peak size of segments in use, -#define MI_SEGMENT_CACHE_FRACTION (8) -// note: returned segment may be partially reset -static mi_segment_t* mi_segment_cache_pop(size_t segment_size, mi_segments_tld_t* tld) { - if (segment_size != 0 && segment_size != MI_SEGMENT_SIZE) return NULL; - mi_segment_t* segment = tld->cache; - if (segment == NULL) return NULL; - tld->cache_count--; - tld->cache = segment->next; - segment->next = NULL; - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - _mi_stat_decrease(&tld->stats->segments_cache, 1); - return segment; -} +/* ----------------------------------------------------------- + Segment allocation +----------------------------------------------------------- */ -static bool mi_segment_cache_full(mi_segments_tld_t* tld) +static mi_segment_t* mi_segment_os_alloc(bool eager_delayed, size_t page_alignment, mi_arena_id_t req_arena_id, + size_t pre_size, size_t info_size, bool commit, size_t segment_size, + mi_segments_tld_t* tld, mi_os_tld_t* tld_os) { - // if (tld->count == 1 && tld->cache_count==0) return false; // always cache at least the final segment of a thread - size_t max_cache = mi_option_get(mi_option_segment_cache); - if (tld->cache_count < max_cache - && tld->cache_count < (1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION)) // at least allow a 1 element cache - ) { - return false; + mi_memid_t memid; + bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy + size_t align_offset = 0; + size_t alignment = MI_SEGMENT_SIZE; + if (page_alignment > 0) { + alignment = page_alignment; + align_offset = _mi_align_up(pre_size, MI_SEGMENT_SIZE); + segment_size = segment_size + (align_offset - pre_size); // adjust the segment size } - // take the opportunity to reduce the segment cache if it is too large (now) - // TODO: this never happens as we check against peak usage, should we use current usage instead? - while (tld->cache_count > max_cache) { //(1 + (tld->peak_count / MI_SEGMENT_CACHE_FRACTION))) { - mi_segment_t* segment = mi_segment_cache_pop(0,tld); - mi_assert_internal(segment != NULL); - if (segment != NULL) mi_segment_os_free(segment, segment->segment_size, tld); - } - return true; -} -static bool mi_segment_cache_push(mi_segment_t* segment, mi_segments_tld_t* tld) { - mi_assert_internal(!mi_segment_is_in_free_queue(segment, tld)); - mi_assert_internal(segment->next == NULL); - if (segment->segment_size != MI_SEGMENT_SIZE || mi_segment_cache_full(tld)) { - return false; + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, tld_os); + if (segment == NULL) { + return NULL; // failed to allocate } - mi_assert_internal(segment->segment_size == MI_SEGMENT_SIZE); - segment->next = tld->cache; - tld->cache = segment; - tld->cache_count++; - _mi_stat_increase(&tld->stats->segments_cache,1); - return true; -} -// called by threads that are terminating to free cached segments -void _mi_segment_thread_collect(mi_segments_tld_t* tld) { - mi_segment_t* segment; - while ((segment = mi_segment_cache_pop(0,tld)) != NULL) { - mi_segment_os_free(segment, segment->segment_size, tld); - } - mi_assert_internal(tld->cache_count == 0); - mi_assert_internal(tld->cache == NULL); -#if MI_DEBUG>=2 - if (!_mi_is_main_thread()) { - mi_assert_internal(tld->pages_reset.first == NULL); - mi_assert_internal(tld->pages_reset.last == NULL); + if (!memid.initially_committed) { + // ensure the initial info is committed + mi_assert_internal(!memid.is_pinned); + bool ok = _mi_os_commit(segment, pre_size, NULL, tld_os->stats); + if (!ok) { + // commit failed; we cannot touch the memory: free the segment directly and return `NULL` + _mi_arena_free(segment, segment_size, 0, memid, tld_os->stats); + return NULL; + } } -#endif -} - -/* ----------------------------------------------------------- - Segment allocation ------------------------------------------------------------ */ + MI_UNUSED(info_size); + segment->memid = memid; + segment->allow_decommit = !memid.is_pinned; + segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); + segment->segment_size = segment_size; + mi_segments_track_size((long)(segment_size), tld); + _mi_segment_map_allocated_at(segment); + return segment; +} // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` . -static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, size_t page_alignment, + mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - // the segment parameter is non-null if it came from our cache - mi_assert_internal(segment==NULL || (required==0 && page_kind <= MI_PAGE_LARGE)); + // required is only > 0 for huge page allocations + mi_assert_internal((required > 0 && page_kind > MI_PAGE_LARGE)|| (required==0 && page_kind <= MI_PAGE_LARGE)); // calculate needed sizes first size_t capacity; if (page_kind == MI_PAGE_HUGE) { - mi_assert_internal(page_shift == MI_SEGMENT_SHIFT && required > 0); + mi_assert_internal(page_shift == MI_SEGMENT_SHIFT + 1 && required > 0); capacity = 1; } else { - mi_assert_internal(required == 0); + mi_assert_internal(required == 0 && page_alignment == 0); size_t page_size = (size_t)1 << page_shift; capacity = MI_SEGMENT_SIZE / page_size; mi_assert_internal(MI_SEGMENT_SIZE % page_size == 0); @@ -575,108 +591,44 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ } size_t info_size; size_t pre_size; - size_t segment_size = mi_segment_size(capacity, required, &pre_size, &info_size); - mi_assert_internal(segment_size >= required); + const size_t init_segment_size = mi_segment_calculate_sizes(capacity, required, &pre_size, &info_size); + mi_assert_internal(init_segment_size >= required); // Initialize parameters - const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); + const bool eager_delayed = (page_kind <= MI_PAGE_MEDIUM && // don't delay for large objects + // !_mi_os_has_overcommit() && // never delay on overcommit systems + _mi_current_thread_count() > 1 && // do not delay for the first N threads + tld->peak_count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delayed && mi_option_is_enabled(mi_option_eager_commit); - bool commit = eager; // || (page_kind >= MI_PAGE_LARGE); - bool pages_still_good = false; - bool is_zero = false; + const bool init_commit = eager; // || (page_kind >= MI_PAGE_LARGE); - // Try to get it from our thread local cache first - if (segment != NULL) { - // came from cache - mi_assert_internal(segment->segment_size == segment_size); - if (page_kind <= MI_PAGE_MEDIUM && segment->page_kind == page_kind && segment->segment_size == segment_size) { - pages_still_good = true; - } - else - { - if (MI_SECURE!=0) { - mi_assert_internal(!segment->mem_is_pinned); - mi_segment_protect(segment, false, tld->os); // reset protection if the page kind differs - } - // different page kinds; unreset any reset pages, and unprotect - // TODO: optimize cache pop to return fitting pages if possible? - for (size_t i = 0; i < segment->capacity; i++) { - mi_page_t* page = &segment->pages[i]; - if (page->is_reset) { - if (!commit && mi_option_is_enabled(mi_option_reset_decommits)) { - page->is_reset = false; - } - else { - mi_page_unreset(segment, page, 0, tld); // todo: only unreset the part that was reset? (instead of the full page) - } - } - } - // ensure the initial info is committed - if (segment->capacity < capacity) { - bool commit_zero = false; - bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); - if (commit_zero) is_zero = true; - if (!ok) { - return NULL; - } - } - } - } - else { - // Allocate the segment from the OS - size_t memid; - bool mem_large = (!eager_delayed && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - bool is_pinned = false; - segment = (mi_segment_t*)_mi_mem_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate - if (!commit) { - // ensure the initial info is committed - mi_assert_internal(!mem_large && !is_pinned); - bool commit_zero = false; - bool ok = _mi_mem_commit(segment, pre_size, &commit_zero, tld->os); - if (commit_zero) is_zero = true; - if (!ok) { - // commit failed; we cannot touch the memory: free the segment directly and return `NULL` - _mi_mem_free(segment, MI_SEGMENT_SIZE, memid, false, false, os_tld); - return NULL; - } - } - segment->memid = memid; - segment->mem_is_pinned = (mem_large || is_pinned); - segment->mem_is_committed = commit; - mi_segments_track_size((long)segment_size, tld); - } + // Allocate the segment from the OS (segment_size can change due to alignment) + mi_segment_t* segment = mi_segment_os_alloc(eager_delayed, page_alignment, req_arena_id, pre_size, info_size, init_commit, init_segment_size, tld, os_tld); + if (segment == NULL) return NULL; mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - mi_assert_internal(segment->mem_is_pinned ? segment->mem_is_committed : true); - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - if (!pages_still_good) { - // zero the segment info (but not the `mem` fields) - ptrdiff_t ofs = offsetof(mi_segment_t, next); - memset((uint8_t*)segment + ofs, 0, info_size - ofs); - - // initialize pages info - for (uint8_t i = 0; i < capacity; i++) { - segment->pages[i].segment_idx = i; - segment->pages[i].is_reset = false; - segment->pages[i].is_committed = commit; - segment->pages[i].is_zero_init = is_zero; - } - } - else { - // zero the segment info but not the pages info (and mem fields) - ptrdiff_t ofs = offsetof(mi_segment_t, next); - memset((uint8_t*)segment + ofs, 0, offsetof(mi_segment_t,pages) - ofs); + mi_assert_internal(segment->memid.is_pinned ? segment->memid.initially_committed : true); + + // zero the segment info (but not the `mem` fields) + ptrdiff_t ofs = offsetof(mi_segment_t, next); + _mi_memzero((uint8_t*)segment + ofs, info_size - ofs); + + // initialize pages info + const bool is_huge = (page_kind == MI_PAGE_HUGE); + for (size_t i = 0; i < capacity; i++) { + mi_assert_internal(i <= 255); + segment->pages[i].segment_idx = (uint8_t)i; + segment->pages[i].is_committed = segment->memid.initially_committed; + segment->pages[i].is_zero_init = segment->memid.initially_zero; + segment->pages[i].is_huge = is_huge; } // initialize segment->page_kind = page_kind; segment->capacity = capacity; segment->page_shift = page_shift; - segment->segment_size = segment_size; segment->segment_info_size = pre_size; segment->thread_id = _mi_thread_id(); segment->cookie = _mi_ptr_cookie(segment); - // _mi_stat_increase(&tld->stats->page_committed, segment->segment_info_size); // set protection mi_segment_protect(segment, true, tld->os); @@ -686,21 +638,16 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_ mi_segment_insert_in_free_queue(segment, tld); } - //fprintf(stderr,"mimalloc: alloc segment at %p\n", (void*)segment); return segment; } -static mi_segment_t* mi_segment_alloc(size_t required, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - return mi_segment_init(NULL, required, page_kind, page_shift, tld, os_tld); -} static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) { - UNUSED(force); + MI_UNUSED(force); mi_assert(segment != NULL); - // note: don't reset pages even on abandon as the whole segment is freed? (and ready for reuse) - bool force_reset = (force && mi_option_is_enabled(mi_option_abandoned_page_reset)); - mi_pages_reset_remove_all_in_segment(segment, force_reset, tld); - mi_segment_remove_from_free_queue(segment,tld); + // don't purge as we are freeing now + mi_segment_remove_all_purges(segment, false /* don't force as we are about to free */, tld); + mi_segment_remove_from_free_queue(segment, tld); mi_assert_expensive(!mi_segment_queue_contains(&tld->small_free, segment)); mi_assert_expensive(!mi_segment_queue_contains(&tld->medium_free, segment)); @@ -708,13 +655,8 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t mi_assert(segment->prev == NULL); _mi_stat_decrease(&tld->stats->page_committed, segment->segment_info_size); - if (!force && mi_segment_cache_push(segment, tld)) { - // it is put in our cache - } - else { - // otherwise return it to the OS - mi_segment_os_free(segment, segment->segment_size, tld); - } + // return it to the OS + mi_segment_os_free(segment, segment->segment_size, tld); } /* ----------------------------------------------------------- @@ -729,35 +671,15 @@ static bool mi_segment_has_free(const mi_segment_t* segment) { static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(_mi_page_segment(page) == segment); mi_assert_internal(!page->segment_in_use); - mi_pages_reset_remove(page, tld); + mi_page_purge_remove(page, tld); + // check commit - if (!page->is_committed) { - mi_assert_internal(!segment->mem_is_pinned); - mi_assert_internal(!page->is_reset); - size_t psize; - uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); - bool is_zero = false; - const size_t gsize = (MI_SECURE >= 2 ? _mi_os_page_size() : 0); - bool ok = _mi_mem_commit(start, psize + gsize, &is_zero, tld->os); - if (!ok) return false; // failed to commit! - if (gsize > 0) { mi_segment_protect_range(start + psize, gsize, true); } - if (is_zero) { page->is_zero_init = true; } - page->is_committed = true; - } + if (!mi_page_ensure_committed(segment, page, tld)) return false; + // set in-use before doing unreset to prevent delayed reset page->segment_in_use = true; segment->used++; - // check reset - if (page->is_reset) { - mi_assert_internal(!segment->mem_is_pinned); - bool ok = mi_page_unreset(segment, page, 0, tld); - if (!ok) { - page->segment_in_use = false; - segment->used--; - return false; - } - } - mi_assert_internal(page->segment_in_use); + mi_assert_internal(page->segment_in_use && page->is_committed && page->used==0 && !mi_pages_purge_contains(page,tld)); mi_assert_internal(segment->used <= segment->capacity); if (segment->used == segment->capacity && segment->page_kind <= MI_PAGE_MEDIUM) { // if no more free pages, remove from the queue @@ -775,7 +697,7 @@ static bool mi_segment_page_claim(mi_segment_t* segment, mi_page_t* page, mi_seg static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld); // clear page data; can be called on abandoned segments -static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool allow_reset, mi_segments_tld_t* tld) +static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(page->segment_in_use); mi_assert_internal(mi_page_all_free(page)); @@ -786,35 +708,30 @@ static void mi_segment_page_clear(mi_segment_t* segment, mi_page_t* page, bool a _mi_stat_decrease(&tld->stats->page_committed, inuse); _mi_stat_decrease(&tld->stats->pages, 1); - // calculate the used size from the raw (non-aligned) start of the page - //size_t pre_size; - //_mi_segment_page_start(segment, page, page->block_size, NULL, &pre_size); - //size_t used_size = pre_size + (page->capacity * page->block_size); - page->is_zero_init = false; page->segment_in_use = false; - // reset the page memory to reduce memory pressure? - // note: must come after setting `segment_in_use` to false but before block_size becomes 0 - //mi_page_reset(segment, page, 0 /*used_size*/, tld); - - // zero the page data, but not the segment fields and capacity, and block_size (for page size calculations) - uint32_t block_size = page->xblock_size; + // zero the page data, but not the segment fields and capacity, page start, and block_size (for page size calculations) + size_t block_size = page->block_size; + uint8_t block_size_shift = page->block_size_shift; + uint8_t heap_tag = page->heap_tag; + uint8_t* page_start = page->page_start; uint16_t capacity = page->capacity; uint16_t reserved = page->reserved; ptrdiff_t ofs = offsetof(mi_page_t,capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->capacity = capacity; page->reserved = reserved; - page->xblock_size = block_size; + page->block_size = block_size; + page->block_size_shift = block_size_shift; + page->heap_tag = heap_tag; + page->page_start = page_start; segment->used--; - // add to the free page list for reuse/reset - if (allow_reset) { - mi_pages_reset_add(segment, page, tld); - } + // schedule purge + mi_segment_schedule_purge(segment, page, tld); - page->capacity = 0; // after reset these can be zero'd now + page->capacity = 0; // after purge these can be zero'd now page->reserved = 0; } @@ -823,10 +740,10 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_assert(page != NULL); mi_segment_t* segment = _mi_page_segment(page); mi_assert_expensive(mi_segment_is_valid(segment,tld)); - mi_reset_delayed(tld); + mi_pages_try_purge(false /*force?*/, tld); // mark it as free now - mi_segment_page_clear(segment, page, true, tld); + mi_segment_page_clear(segment, page, tld); if (segment->used == 0) { // no more used pages; remove from the free list and free the segment @@ -838,9 +755,11 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld) mi_segment_abandon(segment,tld); } else if (segment->used + 1 == segment->capacity) { - mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // for now we only support small and medium pages - // move back to segments free list - mi_segment_insert_in_free_queue(segment,tld); + mi_assert_internal(segment->page_kind <= MI_PAGE_MEDIUM); // large and huge pages are always the single page in a segment + if (segment->page_kind <= MI_PAGE_MEDIUM) { + // move back to segments free list + mi_segment_insert_in_free_queue(segment,tld); + } } } } @@ -852,171 +771,21 @@ Abandonment When threads terminate, they can leave segments with live blocks (reached through other threads). Such segments are "abandoned" and will be reclaimed by other threads to -reuse their pages and/or free them eventually - -We maintain a global list of abandoned segments that are -reclaimed on demand. Since this is shared among threads -the implementation needs to avoid the A-B-A problem on -popping abandoned segments: <https://en.wikipedia.org/wiki/ABA_problem> -We use tagged pointers to avoid accidentially identifying -reused segments, much like stamped references in Java. -Secondly, we maintain a reader counter to avoid resetting -or decommitting segments that have a pending read operation. - -Note: the current implementation is one possible design; -another way might be to keep track of abandoned segments -in the regions. This would have the advantage of keeping -all concurrent code in one place and not needing to deal -with ABA issues. The drawback is that it is unclear how to -scan abandoned segments efficiently in that case as they -would be spread among all other segments in the regions. ------------------------------------------------------------ */ +reuse their pages and/or free them eventually. The +`thread_id` of such segments is 0. -// Use the bottom 20-bits (on 64-bit) of the aligned segment pointers -// to put in a tag that increments on update to avoid the A-B-A problem. -#define MI_TAGGED_MASK MI_SEGMENT_MASK -typedef uintptr_t mi_tagged_segment_t; +When a block is freed in an abandoned segment, the segment +is reclaimed into that thread. -static mi_segment_t* mi_tagged_segment_ptr(mi_tagged_segment_t ts) { - return (mi_segment_t*)(ts & ~MI_TAGGED_MASK); -} - -static mi_tagged_segment_t mi_tagged_segment(mi_segment_t* segment, mi_tagged_segment_t ts) { - mi_assert_internal(((uintptr_t)segment & MI_TAGGED_MASK) == 0); - uintptr_t tag = ((ts & MI_TAGGED_MASK) + 1) & MI_TAGGED_MASK; - return ((uintptr_t)segment | tag); -} - -// This is a list of visited abandoned pages that were full at the time. -// this list migrates to `abandoned` when that becomes NULL. The use of -// this list reduces contention and the rate at which segments are visited. -static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL - -// The abandoned page list (tagged as it supports pop) -static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL - -// Maintain these for debug purposes (these counts may be a bit off) -static mi_decl_cache_align _Atomic(uintptr_t) abandoned_count; -static mi_decl_cache_align _Atomic(uintptr_t) abandoned_visited_count; - -// We also maintain a count of current readers of the abandoned list -// in order to prevent resetting/decommitting segment memory if it might -// still be read. -static mi_decl_cache_align _Atomic(uintptr_t) abandoned_readers; // = 0 - -// Push on the visited list -static void mi_abandoned_visited_push(mi_segment_t* segment) { - mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t,&segment->abandoned_next) == NULL); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_internal(segment->used > 0); - mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited); - do { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, anext); - } while (!mi_atomic_cas_ptr_weak_release(mi_segment_t, &abandoned_visited, &anext, segment)); - mi_atomic_increment_relaxed(&abandoned_visited_count); -} - -// Move the visited list to the abandoned list. -static bool mi_abandoned_visited_revisit(void) -{ - // quick check if the visited list is empty - if (mi_atomic_load_ptr_relaxed(mi_segment_t, &abandoned_visited) == NULL) return false; - - // grab the whole visited list - mi_segment_t* first = mi_atomic_exchange_ptr_acq_rel(mi_segment_t, &abandoned_visited, NULL); - if (first == NULL) return false; - - // first try to swap directly if the abandoned list happens to be NULL - mi_tagged_segment_t afirst; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - if (mi_tagged_segment_ptr(ts)==NULL) { - uintptr_t count = mi_atomic_load_relaxed(&abandoned_visited_count); - afirst = mi_tagged_segment(first, ts); - if (mi_atomic_cas_strong_acq_rel(&abandoned, &ts, afirst)) { - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); - return true; - } - } - - // find the last element of the visited list: O(n) - mi_segment_t* last = first; - mi_segment_t* next; - while ((next = mi_atomic_load_ptr_relaxed(mi_segment_t, &last->abandoned_next)) != NULL) { - last = next; - } - - // and atomically prepend to the abandoned list - // (no need to increase the readers as we don't access the abandoned segments) - mi_tagged_segment_t anext = mi_atomic_load_relaxed(&abandoned); - uintptr_t count; - do { - count = mi_atomic_load_relaxed(&abandoned_visited_count); - mi_atomic_store_ptr_release(mi_segment_t, &last->abandoned_next, mi_tagged_segment_ptr(anext)); - afirst = mi_tagged_segment(first, anext); - } while (!mi_atomic_cas_weak_release(&abandoned, &anext, afirst)); - mi_atomic_add_relaxed(&abandoned_count, count); - mi_atomic_sub_relaxed(&abandoned_visited_count, count); - return true; -} - -// Push on the abandoned list. -static void mi_abandoned_push(mi_segment_t* segment) { - mi_assert_internal(segment->thread_id == 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); - mi_assert_internal(segment->next == NULL && segment->prev == NULL); - mi_assert_internal(segment->used > 0); - mi_tagged_segment_t next; - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - do { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, mi_tagged_segment_ptr(ts)); - next = mi_tagged_segment(segment, ts); - } while (!mi_atomic_cas_weak_release(&abandoned, &ts, next)); - mi_atomic_increment_relaxed(&abandoned_count); -} +Moreover, if threads are looking for a fresh segment, they +will first consider abondoned segments -- these can be found +by scanning the arena memory +(segments outside arena memoryare only reclaimed by a free). +----------------------------------------------------------- */ -// Wait until there are no more pending reads on segments that used to be in the abandoned list +// legacy: Wait until there are no more pending reads on segments that used to be in the abandoned list void _mi_abandoned_await_readers(void) { - uintptr_t n; - do { - n = mi_atomic_load_acquire(&abandoned_readers); - if (n != 0) mi_atomic_yield(); - } while (n != 0); -} - -// Pop from the abandoned list -static mi_segment_t* mi_abandoned_pop(void) { - mi_segment_t* segment; - // Check efficiently if it is empty (or if the visited list needs to be moved) - mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned); - segment = mi_tagged_segment_ptr(ts); - if (mi_likely(segment == NULL)) { - if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL - return NULL; - } - } - - // Do a pop. We use a reader count to prevent - // a segment to be decommitted while a read is still pending, - // and a tagged pointer to prevent A-B-A link corruption. - // (this is called from `region.c:_mi_mem_free` for example) - mi_atomic_increment_relaxed(&abandoned_readers); // ensure no segment gets decommitted - mi_tagged_segment_t next = 0; - ts = mi_atomic_load_acquire(&abandoned); - do { - segment = mi_tagged_segment_ptr(ts); - if (segment != NULL) { - mi_segment_t* anext = mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next); - next = mi_tagged_segment(anext, ts); // note: reads the segment's `abandoned_next` field so should not be decommitted - } - } while (segment != NULL && !mi_atomic_cas_weak_acq_rel(&abandoned, &ts, next)); - mi_atomic_decrement_relaxed(&abandoned_readers); // release reader lock - if (segment != NULL) { - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); - mi_atomic_decrement_relaxed(&abandoned_count); - } - return segment; + // nothing needed } /* ----------------------------------------------------------- @@ -1026,22 +795,27 @@ static mi_segment_t* mi_abandoned_pop(void) { static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(segment->used == segment->abandoned); mi_assert_internal(segment->used > 0); - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); + // Potentially force purge. Only abandoned segments in arena memory can be + // reclaimed without a free so if a segment is not from an arena we force purge here to be conservative. + mi_pages_try_purge(false /*force?*/,tld); + const bool force_purge = (segment->memid.memkind != MI_MEM_ARENA) || mi_option_is_enabled(mi_option_abandoned_page_purge); + mi_segment_remove_all_purges(segment, force_purge, tld); + // remove the segment from the free page queue if needed - mi_reset_delayed(tld); - mi_pages_reset_remove_all_in_segment(segment, mi_option_is_enabled(mi_option_abandoned_page_reset), tld); mi_segment_remove_from_free_queue(segment, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); mi_segments_track_size(-((long)segment->segment_size), tld); - segment->thread_id = 0; segment->abandoned_visits = 0; - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); - mi_abandoned_push(segment); + if (segment->was_reclaimed) { + tld->reclaim_count--; + segment->was_reclaimed = false; + } + _mi_arena_segment_mark_abandoned(segment); } void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { @@ -1049,7 +823,7 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); mi_segment_t* segment = _mi_page_segment(page); - mi_assert_expensive(!mi_pages_reset_contains(page, tld)); + mi_assert_expensive(!mi_pages_purge_contains(page, tld)); mi_assert_expensive(mi_segment_is_valid(segment, tld)); segment->abandoned++; _mi_stat_increase(&tld->stats->pages_abandoned, 1); @@ -1067,7 +841,6 @@ void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld) { // Possibly clear pages and check if free space is available static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool* all_pages_free) { - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); bool has_page = false; size_t pages_used = 0; size_t pages_used_empty = 0; @@ -1083,7 +856,7 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool pages_used_empty++; has_page = true; } - else if (page->xblock_size == block_size && mi_page_has_any_available(page)) { + else if (mi_page_block_size(page) == block_size && mi_page_has_any_available(page)) { // a page has available free blocks of the right size has_page = true; } @@ -1104,11 +877,13 @@ static bool mi_segment_check_free(mi_segment_t* segment, size_t block_size, bool // Reclaim a segment; returns NULL if the segment was freed // set `right_page_reclaimed` to `true` if it reclaimed a page of the right `block_size` that was not full. static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, size_t requested_block_size, bool* right_page_reclaimed, mi_segments_tld_t* tld) { - mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_segment_t, &segment->abandoned_next) == NULL); if (right_page_reclaimed != NULL) { *right_page_reclaimed = false; } - - segment->thread_id = _mi_thread_id(); + // can be 0 still with abandoned_next, or already a thread id for segments outside an arena that are reclaimed on a free. + mi_assert_internal(mi_atomic_load_relaxed(&segment->thread_id) == 0 || mi_atomic_load_relaxed(&segment->thread_id) == _mi_thread_id()); + mi_atomic_store_release(&segment->thread_id, _mi_thread_id()); segment->abandoned_visits = 0; + segment->was_reclaimed = true; + tld->reclaim_count++; mi_segments_track_size((long)segment->segment_size, tld); mi_assert_internal(segment->next == NULL && segment->prev == NULL); mi_assert_expensive(mi_segment_is_valid(segment, tld)); @@ -1117,7 +892,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, for (size_t i = 0; i < segment->capacity; i++) { mi_page_t* page = &segment->pages[i]; if (page->segment_in_use) { - mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_not_in_queue(page, tld)); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); @@ -1126,26 +900,32 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, mi_assert(page->next == NULL); _mi_stat_decrease(&tld->stats->pages_abandoned, 1); // set the heap again and allow heap thread delayed free again. - mi_page_set_heap(page, heap); + mi_heap_t* target_heap = _mi_heap_by_tag(heap, page->heap_tag); // allow custom heaps to separate objects + if (target_heap == NULL) { + target_heap = heap; + _mi_error_message(EINVAL, "page with tag %u cannot be reclaimed by a heap with the same tag (using %u instead)\n", page->heap_tag, heap->tag ); + } + mi_page_set_heap(page, target_heap); _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, true); // override never (after heap is set) - // TODO: should we not collect again given that we just collected in `check_free`? _mi_page_free_collect(page, false); // ensure used count is up to date if (mi_page_all_free(page)) { // if everything free already, clear the page directly - mi_segment_page_clear(segment, page, true, tld); // reset is ok now + mi_segment_page_clear(segment, page, tld); // reset is ok now } else { // otherwise reclaim it into the heap - _mi_page_reclaim(heap, page); - if (requested_block_size == page->xblock_size && mi_page_has_any_available(page)) { + _mi_page_reclaim(target_heap, page); + if (requested_block_size == mi_page_block_size(page) && mi_page_has_any_available(page) && heap == target_heap) { if (right_page_reclaimed != NULL) { *right_page_reclaimed = true; } } } } - else if (page->is_committed && !page->is_reset) { // not in-use, and not reset yet + /* expired + else if (page->is_committed) { // not in-use, and not reset yet // note: do not reset as this includes pages that were not touched before - // mi_pages_reset_add(segment, page, tld); + // mi_pages_purge_add(segment, page, tld); } + */ } mi_assert_internal(segment->abandoned == 0); if (segment->used == 0) { @@ -1161,21 +941,55 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, } } +// attempt to reclaim a particular segment (called from multi threaded free `alloc.c:mi_free_block_mt`) +bool _mi_segment_attempt_reclaim(mi_heap_t* heap, mi_segment_t* segment) { + if (mi_atomic_load_relaxed(&segment->thread_id) != 0) return false; // it is not abandoned + // don't reclaim more from a free than half the current segments + // this is to prevent a pure free-ing thread to start owning too many segments + if (heap->tld->segments.reclaim_count * 2 > heap->tld->segments.count) return false; + if (_mi_arena_segment_clear_abandoned(segment)) { // atomically unabandon + mi_segment_t* res = mi_segment_reclaim(segment, heap, 0, NULL, &heap->tld->segments); + mi_assert_internal(res == segment); + return (res != NULL); + } + return false; +} void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld) { mi_segment_t* segment; - while ((segment = mi_abandoned_pop()) != NULL) { + mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); + while ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL) { mi_segment_reclaim(segment, heap, 0, NULL, tld); } } +static long mi_segment_get_reclaim_tries(void) { + // limit the tries to 10% (default) of the abandoned segments with at least 8 and at most 1024 tries. + const size_t perc = (size_t)mi_option_get_clamp(mi_option_max_segment_reclaim, 0, 100); + if (perc <= 0) return 0; + const size_t total_count = _mi_arena_segment_abandoned_count(); + if (total_count == 0) return 0; + const size_t relative_count = (total_count > 10000 ? (total_count / 100) * perc : (total_count * perc) / 100); // avoid overflow + long max_tries = (long)(relative_count <= 1 ? 1 : (relative_count > 1024 ? 1024 : relative_count)); + if (max_tries < 8 && total_count > 8) { max_tries = 8; } + return max_tries; +} + static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, bool* reclaimed, mi_segments_tld_t* tld) { *reclaimed = false; + long max_tries = mi_segment_get_reclaim_tries(); + if (max_tries <= 0) return NULL; + mi_segment_t* segment; - int max_tries = 8; // limit the work to bound allocation times - while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) { + mi_arena_field_cursor_t current; _mi_arena_field_cursor_init(heap, ¤t); + while ((max_tries-- > 0) && ((segment = _mi_arena_segment_clear_abandoned_next(¤t)) != NULL)) + { segment->abandoned_visits++; + // todo: should we respect numa affinity for abondoned reclaim? perhaps only for the first visit? + // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments and use many tries + // Perhaps we can skip non-suitable ones in a better way? + bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid); bool all_pages_free; bool has_page = mi_segment_check_free(segment,block_size,&all_pages_free); // try to free up pages (due to concurrent frees) if (all_pages_free) { @@ -1186,19 +1000,20 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, // freeing but that would violate some invariants temporarily) mi_segment_reclaim(segment, heap, 0, NULL, tld); } - else if (has_page && segment->page_kind == page_kind) { + else if (has_page && segment->page_kind == page_kind && is_suitable) { // found a free page of the right kind, or page of the right block_size with free space // we return the result of reclaim (which is usually `segment`) as it might free // the segment due to concurrent frees (in which case `NULL` is returned). return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld); } - else if (segment->abandoned_visits >= 3) { + else if (segment->abandoned_visits >= 3 && is_suitable) { // always reclaim on 3rd visit to limit the list length. mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, push on the visited list so it gets not looked at too quickly again - mi_abandoned_visited_push(segment); + // otherwise, mark it back as abandoned + // todo: reset delayed pages in the segment? + _mi_arena_segment_mark_abandoned(segment); } } return NULL; @@ -1212,16 +1027,12 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t block_size, static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t page_kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_assert_internal(page_kind <= MI_PAGE_LARGE); - mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE); - // 1. try to get a segment from our cache - mi_segment_t* segment = mi_segment_cache_pop(MI_SEGMENT_SIZE, tld); - if (segment != NULL) { - mi_segment_init(segment, 0, page_kind, page_shift, tld, os_tld); - return segment; - } - // 2. try to reclaim an abandoned segment + mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX); + + // 1. try to reclaim an abandoned segment bool reclaimed; - segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + mi_segment_t* segment = mi_segment_try_reclaim(heap, block_size, page_kind, &reclaimed, tld); + mi_assert_internal(segment == NULL || _mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); if (reclaimed) { // reclaimed the right page right into the heap mi_assert_internal(segment != NULL && segment->page_kind == page_kind && page_kind <= MI_PAGE_LARGE); @@ -1231,8 +1042,8 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t block_s // reclaimed a segment with empty pages (of `page_kind`) in it return segment; } - // 3. otherwise allocate a fresh segment - return mi_segment_alloc(0, page_kind, page_shift, tld, os_tld); + // 2. otherwise allocate a fresh segment + return mi_segment_alloc(0, page_kind, page_shift, 0, heap->arena_id, tld, os_tld); } @@ -1260,24 +1071,33 @@ static mi_page_t* mi_segment_page_alloc_in(mi_segment_t* segment, mi_segments_tl return mi_segment_find_free(segment, tld); } -static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +static mi_page_t* mi_segment_page_try_alloc_in_queue(mi_heap_t* heap, mi_page_kind_t kind, mi_segments_tld_t* tld) { // find an available segment the segment free queue mi_segment_queue_t* const free_queue = mi_segment_free_queue_of_kind(kind, tld); - if (mi_segment_queue_is_empty(free_queue)) { + for (mi_segment_t* segment = free_queue->first; segment != NULL; segment = segment->next) { + if (_mi_arena_memid_is_suitable(segment->memid, heap->arena_id) && mi_segment_has_free(segment)) { + return mi_segment_page_alloc_in(segment, tld); + } + } + return NULL; +} + +static mi_page_t* mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_page_kind_t kind, size_t page_shift, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { + mi_page_t* page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); + if (page == NULL) { // possibly allocate or reclaim a fresh segment mi_segment_t* const segment = mi_segment_reclaim_or_alloc(heap, block_size, kind, page_shift, tld, os_tld); if (segment == NULL) return NULL; // return NULL if out-of-memory (or reclaimed) - mi_assert_internal(free_queue->first == segment); mi_assert_internal(segment->page_kind==kind); mi_assert_internal(segment->used < segment->capacity); + mi_assert_internal(_mi_arena_memid_is_suitable(segment->memid, heap->arena_id)); + page = mi_segment_page_try_alloc_in_queue(heap, kind, tld); // this should now succeed } - mi_assert_internal(free_queue->first != NULL); - mi_page_t* const page = mi_segment_page_alloc_in(free_queue->first, tld); mi_assert_internal(page != NULL); -#if MI_DEBUG>=2 + #if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN // verify it is committed - _mi_segment_page_start(_mi_page_segment(page), page, sizeof(void*), NULL, NULL)[0] = 0; -#endif + mi_segment_raw_page_start(_mi_page_segment(page), page, NULL)[0] = 0; + #endif return page; } @@ -1298,24 +1118,45 @@ static mi_page_t* mi_segment_large_page_alloc(mi_heap_t* heap, size_t block_size if (segment == NULL) return NULL; mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); -#if MI_DEBUG>=2 - _mi_segment_page_start(segment, page, sizeof(void*), NULL, NULL)[0] = 0; +#if MI_DEBUG>=2 && !MI_TRACK_ENABLED // && !MI_TSAN + mi_segment_raw_page_start(segment, page, NULL)[0] = 0; #endif return page; } -static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) +static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT,tld,os_tld); + mi_segment_t* segment = mi_segment_alloc(size, MI_PAGE_HUGE, MI_SEGMENT_SHIFT + 1, page_alignment, req_arena_id, tld, os_tld); if (segment == NULL) return NULL; mi_assert_internal(mi_segment_page_size(segment) - segment->segment_info_size - (2*(MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= size); + #if MI_HUGE_PAGE_ABANDON segment->thread_id = 0; // huge pages are immediately abandoned mi_segments_track_size(-(long)segment->segment_size, tld); + #endif mi_page_t* page = mi_segment_find_free(segment, tld); mi_assert_internal(page != NULL); + mi_assert_internal(page->is_huge); + + // for huge pages we initialize the block_size as we may + // overallocate to accommodate large alignments. + size_t psize; + uint8_t* start = mi_segment_raw_page_start(segment, page, &psize); + page->block_size = psize; + + // reset the part of the page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE) + if (page_alignment > 0 && segment->allow_decommit && page->is_committed) { + uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment); + mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment)); + mi_assert_internal(psize - (aligned_p - start) >= size); + uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list + ptrdiff_t decommit_size = aligned_p - decommit_start; + _mi_os_reset(decommit_start, decommit_size, os_tld->stats); // do not decommit as it may be in a region + } + return page; } +#if MI_HUGE_PAGE_ABANDON // free huge block from another thread void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { // huge page segments are always abandoned and can be freed immediately by any thread @@ -1326,12 +1167,12 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block // claim it and free mi_heap_t* heap = mi_heap_get_default(); // issue #221; don't use the internal get_default_heap as we need to ensure the thread is initialized. // paranoia: if this it the last reference, the cas should always succeed - uintptr_t expected_tid = 0; + size_t expected_tid = 0; if (mi_atomic_cas_strong_acq_rel(&segment->thread_id, &expected_tid, heap->thread_id)) { mi_block_set_next(page, block, page->free); page->free = block; page->used--; - page->is_zero = false; + page->is_zero_init = false; mi_assert(page->used == 0); mi_tld_t* tld = heap->tld; mi_segments_track_size((long)segment->segment_size, &tld->segments); @@ -1344,27 +1185,52 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block #endif } +#else +// reset memory of a huge block from another thread +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) { + mi_assert_internal(segment->page_kind == MI_PAGE_HUGE); + mi_assert_internal(segment == _mi_page_segment(page)); + mi_assert_internal(page->used == 1); // this is called just before the free + mi_assert_internal(page->free == NULL); + if (segment->allow_decommit && page->is_committed) { + size_t usize = mi_usable_size(block); + if (usize > sizeof(mi_block_t)) { + usize = usize - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_reset(p, usize, &_mi_stats_main); + } + } +} +#endif + /* ----------------------------------------------------------- Page allocation ----------------------------------------------------------- */ -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { mi_page_t* page; - if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { + if mi_unlikely(page_alignment > MI_BLOCK_ALIGNMENT_MAX) { + mi_assert_internal(_mi_is_power_of_two(page_alignment)); + mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE); + //mi_assert_internal((MI_SEGMENT_SIZE % page_alignment) == 0); + if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; } + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld); + } + else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) { page = mi_segment_small_page_alloc(heap, block_size, tld, os_tld); } else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) { page = mi_segment_medium_page_alloc(heap, block_size, tld, os_tld); } - else if (block_size <= MI_LARGE_OBJ_SIZE_MAX) { + else if (block_size <= MI_LARGE_OBJ_SIZE_MAX /* || mi_is_good_fit(block_size, MI_LARGE_PAGE_SIZE - sizeof(mi_segment_t)) */ ) { page = mi_segment_large_page_alloc(heap, block_size, tld, os_tld); } else { - page = mi_segment_huge_page_alloc(block_size,tld,os_tld); + page = mi_segment_huge_page_alloc(block_size, page_alignment, heap->arena_id, tld, os_tld); } mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); + // mi_segment_try_purge(tld); mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); return page; } diff --git a/contrib/libs/mimalloc/src/static.c b/contrib/libs/mimalloc/src/static.c deleted file mode 100644 index 4b3abc285a..0000000000 --- a/contrib/libs/mimalloc/src/static.c +++ /dev/null @@ -1,39 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE -#endif -#if defined(__sun) -// same remarks as os.c for the static's context. -#undef _XOPEN_SOURCE -#undef _POSIX_C_SOURCE -#endif - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -// For a static override we create a single object file -// containing the whole library. If it is linked first -// it will override all the standard library allocation -// functions (on Unix's). -#include "stats.c" -#include "random.c" -#include "os.c" -#include "bitmap.c" -#include "arena.c" -#include "region.c" -#include "segment.c" -#include "page.c" -#include "heap.c" -#include "alloc.c" -#include "alloc-aligned.c" -#include "alloc-posix.c" -#if MI_OSX_ZONE -#include "alloc-override-osx.c" -#endif -#include "init.c" -#include "options.c" diff --git a/contrib/libs/mimalloc/src/stats.c b/contrib/libs/mimalloc/src/stats.c index 7358539aa5..99cf89c5b7 100644 --- a/contrib/libs/mimalloc/src/stats.c +++ b/contrib/libs/mimalloc/src/stats.c @@ -5,10 +5,10 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#include <stdio.h> // fputs, stderr #include <string.h> // memset #if defined(_MSC_VER) && (_MSC_VER < 1920) @@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file static bool mi_is_in_main(void* stat) { return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main - && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); + && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t))); } static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { @@ -51,7 +51,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) { } } -void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) { if (mi_is_in_main(stat)) { mi_atomic_addi64_relaxed( &stat->count, 1 ); mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount ); @@ -77,7 +77,7 @@ static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64 mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit); mi_atomic_addi64_relaxed( &stat->current, src->current * unit); mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit); - // peak scores do not work across threads.. + // peak scores do not work across threads.. mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit); } @@ -95,6 +95,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->purged, &src->purged, 1); mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); @@ -110,12 +111,13 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1); + mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); mi_stat_counter_add(&stats->normal_count, &src->normal_count, 1); - mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); - mi_stat_counter_add(&stats->giant_count, &src->giant_count, 1); + mi_stat_counter_add(&stats->huge_count, &src->huge_count, 1); #if MI_STAT>1 for (size_t i = 0; i <= MI_BIN_HUGE; i++) { if (src->normal_bins[i].allocated > 0 || src->normal_bins[i].freed > 0) { @@ -129,31 +131,35 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { Display statistics ----------------------------------------------------------- */ -// unit > 0 : size in binary bytes +// unit > 0 : size in binary bytes // unit == 0: count as decimal // unit < 0 : count in binary static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) { - char buf[32]; + char buf[32]; buf[0] = 0; int len = 32; - const char* suffix = (unit <= 0 ? " " : "b"); + const char* suffix = (unit <= 0 ? " " : "B"); const int64_t base = (unit == 0 ? 1000 : 1024); if (unit>0) n *= unit; const int64_t pos = (n < 0 ? -n : n); if (pos < base) { - snprintf(buf, len, "%d %s ", (int)n, suffix); + if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column + _mi_snprintf(buf, len, "%lld %-3s", (long long)n, (n==0 ? "" : suffix)); + } } else { int64_t divider = base; - const char* magnitude = "k"; - if (pos >= divider*base) { divider *= base; magnitude = "m"; } - if (pos >= divider*base) { divider *= base; magnitude = "g"; } + const char* magnitude = "K"; + if (pos >= divider*base) { divider *= base; magnitude = "M"; } + if (pos >= divider*base) { divider *= base; magnitude = "G"; } const int64_t tens = (n / (divider/10)); const long whole = (long)(tens/10); const long frac1 = (long)(tens%10); - snprintf(buf, len, "%ld.%ld %s%s", whole, (frac1 < 0 ? -frac1 : frac1), magnitude, suffix); + char unitdesc[8]; + _mi_snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); + _mi_snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); } - _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf); } @@ -162,58 +168,71 @@ static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* a } static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { - if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + if (unit==1) _mi_fprintf(out, arg, "%12s"," "); else mi_print_amount(n,0,out,arg); } -static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) { +static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) { _mi_fprintf(out, arg,"%10s:", msg); - if (unit>0) { - mi_print_amount(stat->peak, unit, out, arg); - mi_print_amount(stat->allocated, unit, out, arg); - mi_print_amount(stat->freed, unit, out, arg); - mi_print_amount(stat->current, unit, out, arg); - mi_print_amount(unit, 1, out, arg); - mi_print_count(stat->allocated, unit, out, arg); - if (stat->allocated > stat->freed) - _mi_fprintf(out, arg, " not all freed!\n"); - else - _mi_fprintf(out, arg, " ok\n"); - } - else if (unit<0) { - mi_print_amount(stat->peak, -1, out, arg); - mi_print_amount(stat->allocated, -1, out, arg); - mi_print_amount(stat->freed, -1, out, arg); - mi_print_amount(stat->current, -1, out, arg); - if (unit==-1) { - _mi_fprintf(out, arg, "%22s", ""); + if (unit != 0) { + if (unit > 0) { + mi_print_amount(stat->peak, unit, out, arg); + mi_print_amount(stat->allocated, unit, out, arg); + mi_print_amount(stat->freed, unit, out, arg); + mi_print_amount(stat->current, unit, out, arg); + mi_print_amount(unit, 1, out, arg); + mi_print_count(stat->allocated, unit, out, arg); } else { - mi_print_amount(-unit, 1, out, arg); - mi_print_count((stat->allocated / -unit), 0, out, arg); + mi_print_amount(stat->peak, -1, out, arg); + mi_print_amount(stat->allocated, -1, out, arg); + mi_print_amount(stat->freed, -1, out, arg); + mi_print_amount(stat->current, -1, out, arg); + if (unit == -1) { + _mi_fprintf(out, arg, "%24s", ""); + } + else { + mi_print_amount(-unit, 1, out, arg); + mi_print_count((stat->allocated / -unit), 0, out, arg); + } + } + if (stat->allocated > stat->freed) { + _mi_fprintf(out, arg, " "); + _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); + _mi_fprintf(out, arg, "\n"); } - if (stat->allocated > stat->freed) - _mi_fprintf(out, arg, " not all freed!\n"); - else + else { _mi_fprintf(out, arg, " ok\n"); + } } else { mi_print_amount(stat->peak, 1, out, arg); mi_print_amount(stat->allocated, 1, out, arg); - _mi_fprintf(out, arg, "%11s", " "); // no freed + _mi_fprintf(out, arg, "%11s", " "); // no freed mi_print_amount(stat->current, 1, out, arg); _mi_fprintf(out, arg, "\n"); } } +static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + mi_stat_print_ex(stat, msg, unit, out, arg, NULL); +} + +static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->peak, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); _mi_fprintf(out, arg, "\n"); } + static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { - const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); + const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); const long avg_frac1 = (long)(avg_tens%10); _mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1); @@ -221,7 +240,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 @@ -232,7 +251,7 @@ static void mi_stats_print_bins(const mi_stat_count_t* bins, size_t max, const c if (bins[i].allocated > 0) { found = true; int64_t unit = _mi_bin_size((uint8_t)i); - snprintf(buf, 64, "%s %3lu", fmt, (long)i); + _mi_snprintf(buf, 64, "%s %3lu", fmt, (long)i); mi_stat_print(&bins[i], buf, unit, out, arg); } } @@ -253,7 +272,7 @@ typedef struct buffered_s { mi_output_fun* out; // original output function void* arg; // and state char* buf; // local buffer of at least size `count+1` - size_t used; // currently used chars `used <= count` + size_t used; // currently used chars `used <= count` size_t count; // total chars available for output } buffered_t; @@ -263,7 +282,7 @@ static void mi_buffered_flush(buffered_t* buf) { buf->used = 0; } -static void mi_buffered_out(const char* msg, void* arg) { +static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { buffered_t* buf = (buffered_t*)arg; if (msg==NULL || buf==NULL) return; for (const char* src = msg; *src != 0; src++) { @@ -279,8 +298,6 @@ static void mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); - static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; @@ -296,21 +313,20 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) #endif #if MI_STAT mi_stat_print(&stats->normal, "normal", (stats->normal_count.count == 0 ? 1 : -(stats->normal.allocated / stats->normal_count.count)), out, arg); - mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); - mi_stat_print(&stats->giant, "giant", (stats->giant_count.count == 0 ? 1 : -(stats->giant.allocated / stats->giant_count.count)), out, arg); + mi_stat_print(&stats->huge, "huge", (stats->huge_count.count == 0 ? 1 : -(stats->huge.allocated / stats->huge_count.count)), out, arg); mi_stat_count_t total = { 0,0,0,0 }; mi_stat_add(&total, &stats->normal, 1); mi_stat_add(&total, &stats->huge, 1); - mi_stat_add(&total, &stats->giant, 1); mi_stat_print(&total, "total", 1, out, arg); #endif #if MI_STAT>1 mi_stat_print(&stats->malloc, "malloc req", 1, out, arg); _mi_fprintf(out, arg, "\n"); #endif - mi_stat_print(&stats->reserved, "reserved", 1, out, arg); - mi_stat_print(&stats->committed, "committed", 1, out, arg); - mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); + mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); + mi_stat_peak_print(&stats->reset, "reset", 1, out, arg ); + mi_stat_peak_print(&stats->purged, "purged", 1, out, arg ); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); @@ -319,22 +335,27 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_print(&stats->pages_abandoned, "-abandoned", -1, out, arg); mi_stat_counter_print(&stats->pages_extended, "-extended", out, arg); mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); + mi_stat_counter_print(&stats->arena_count, "arenas", out, arg); + mi_stat_counter_print(&stats->arena_crossover_count, "-crossover", out, arg); + mi_stat_counter_print(&stats->arena_rollback_count, "-rollback", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); + mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %7i\n", "numa nodes", _mi_os_numa_node_count()); - - mi_msecs_t elapsed; - mi_msecs_t user_time; - mi_msecs_t sys_time; + _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); + + size_t elapsed; + size_t user_time; + size_t sys_time; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; - mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); @@ -342,7 +363,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) _mi_fprintf(out, arg, ", commit: "); mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s"); } - _mi_fprintf(out, arg, "\n"); + _mi_fprintf(out, arg, "\n"); } static mi_msecs_t mi_process_start; // = 0 @@ -392,42 +413,12 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { // ---------------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- -#ifdef _WIN32 -#include <windows.h> -static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { - static LARGE_INTEGER mfreq; // = 0 - if (mfreq.QuadPart == 0LL) { - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - mfreq.QuadPart = f.QuadPart/1000LL; - if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; - } - return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); -} + +static mi_msecs_t mi_clock_diff; mi_msecs_t _mi_clock_now(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return mi_to_msecs(t); -} -#else -#include <time.h> -#ifdef CLOCK_REALTIME -mi_msecs_t _mi_clock_now(void) { - struct timespec t; - clock_gettime(CLOCK_REALTIME, &t); - return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -mi_msecs_t _mi_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); + return _mi_prim_clock_now(); } -#endif -#endif - - -static mi_msecs_t mi_clock_diff; mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { @@ -447,129 +438,27 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { // Basic process statistics // -------------------------------------------------------- -#if defined(_WIN32) -#include <windows.h> -#include <psapi.h> -#pragma comment(lib,"psapi.lib") - -static mi_msecs_t filetime_msecs(const FILETIME* ftime) { - ULARGE_INTEGER i; - i.LowPart = ftime->dwLowDateTime; - i.HighPart = ftime->dwHighDateTime; - mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds - return msecs; -} - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - FILETIME ct; - FILETIME ut; - FILETIME st; - FILETIME et; - GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); - PROCESS_MEMORY_COUNTERS info; - GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; -} - -#elif defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) -#include <stdio.h> -#include <unistd.h> -#include <sys/resource.h> - -#if defined(__APPLE__) -#include <mach/mach.h> -#endif - -#if defined(__HAIKU__) -#error #include <kernel/OS.h> -#endif - -static mi_msecs_t timeval_secs(const struct timeval* tv) { - return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); -} - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); -#if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate -#if defined(__HAIKU__) - // Haiku does not have (yet?) a way to - // get these stats per process - thread_info tid; - area_info mem; - ssize_t c; - get_thread_info(find_thread(0), &tid); - while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; - } -#elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes - struct mach_task_basic_info info; - mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; - if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; - } -#else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB -#endif -} - -#else -#ifndef __wasi__ -// WebAssembly instances are not processes -#pragma message("define a way to get process info") -#endif - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; -} -#endif - - mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = 0; - mi_msecs_t utime = 0; - mi_msecs_t stime = 0; - size_t current_rss0 = 0; - size_t peak_rss0 = 0; - size_t current_commit0 = 0; - size_t peak_commit0 = 0; - size_t page_faults0 = 0; - mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); - if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); - if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); - if (current_rss!=NULL) *current_rss = current_rss0; - if (peak_rss!=NULL) *peak_rss = peak_rss0; - if (current_commit!=NULL) *current_commit = current_commit0; - if (peak_commit!=NULL) *peak_commit = peak_commit0; - if (page_faults!=NULL) *page_faults = page_faults0; + mi_process_info_t pinfo; + _mi_memzero_var(pinfo); + pinfo.elapsed = _mi_clock_end(mi_process_start); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.page_faults = 0; + + _mi_prim_process_info(&pinfo); + + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = pinfo.current_rss; + if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; + if (current_commit!=NULL) *current_commit = pinfo.current_commit; + if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; + if (page_faults!=NULL) *page_faults = pinfo.page_faults; } - diff --git a/contrib/libs/mimalloc/ya.make b/contrib/libs/mimalloc/ya.make index b23b8c2394..cad456e3ea 100644 --- a/contrib/libs/mimalloc/ya.make +++ b/contrib/libs/mimalloc/ya.make @@ -1,21 +1,51 @@ -LIBRARY() +# Generated by devtools/yamaker from nixpkgs 22.11. -CFLAGS( - -w - -DMI_MALLOC_OVERRIDE=1 - -DMI_PADDING=0 -) +LIBRARY() LICENSE(MIT) LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(1.7.2) +VERSION(1.8.7) + +ORIGINAL_SOURCE(https://github.com/microsoft/mimalloc/archive/refs/tags/v1.8.7.tar.gz) + +PEERDIR( + library/cpp/sanitizer/include +) + +ADDINCL( + GLOBAL contrib/libs/mimalloc/include +) + +NO_COMPILER_WARNINGS() + +NO_RUNTIME() + +CFLAGS( + -DMI_MALLOC_OVERRIDE + -DMI_SHARED_LIB + -DMI_SHARED_LIB_EXPORT + -DMI_STATIC_LIB +) -ADDINCL(contrib/libs/mimalloc/include) -NO_UTIL() SRCS( - src/static.c + src/alloc-aligned.c + src/alloc-posix.c + src/alloc.c + src/arena.c + src/bitmap.c + src/heap.c + src/init.c + src/libc.c + src/options.c + src/os.c + src/page.c + src/prim/prim.c + src/random.c + src/segment-map.c + src/segment.c + src/stats.c ) END() |