diff options
author | Stanislav Kirillov <staskirillov@gmail.com> | 2022-02-10 16:46:08 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:08 +0300 |
commit | cb68f224c46a8ee52ac3fdd2a32534b8bb8dc134 (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/tbb/src | |
parent | 92fe2b1e7bc79f7b95adef61714fc003f6ea4a1c (diff) | |
download | ydb-cb68f224c46a8ee52ac3fdd2a32534b8bb8dc134.tar.gz |
Restoring authorship annotation for Stanislav Kirillov <staskirillov@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/tbb/src')
61 files changed, 11812 insertions, 11812 deletions
diff --git a/contrib/libs/tbb/src/tbb/allocator.cpp b/contrib/libs/tbb/src/tbb/allocator.cpp index a7ef1b3aa0..6bf5a0be01 100644 --- a/contrib/libs/tbb/src/tbb/allocator.cpp +++ b/contrib/libs/tbb/src/tbb/allocator.cpp @@ -1,234 +1,234 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/version.h" - -#include "oneapi/tbb/detail/_exception.h" -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_utils.h" - -#include "dynamic_link.h" -#include "misc.h" - -#include <cstdlib> - -#if _WIN32 || _WIN64 -#include <Windows.h> -#else -#include <dlfcn.h> -#endif /* _WIN32||_WIN64 */ - -#if __TBB_WEAK_SYMBOLS_PRESENT - -#pragma weak scalable_malloc -#pragma weak scalable_free -#pragma weak scalable_aligned_malloc -#pragma weak scalable_aligned_free - -extern "C" { - void* scalable_malloc(std::size_t); - void scalable_free(void*); - void* scalable_aligned_malloc(std::size_t, std::size_t); - void scalable_aligned_free(void*); -} - -#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ - -namespace tbb { -namespace detail { -namespace r1 { - -//! Initialization routine used for first indirect call via allocate_handler. -static void* initialize_allocate_handler(std::size_t size); - -//! Handler for memory allocation -static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; - -//! Handler for memory deallocation -static void (*deallocate_handler)(void* pointer) = nullptr; - -//! Initialization routine used for first indirect call via cache_aligned_allocate_handler. -static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); - -//! Allocates memory using standard malloc. It is used when scalable_allocator is not available -static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); - -//! Allocates memory using standard free. It is used when scalable_allocator is not available -static void std_cache_aligned_deallocate(void* p); - -//! Handler for padded memory allocation -static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; - -//! Handler for padded memory deallocation -static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; - -//! Table describing how to link the handlers. -static const dynamic_link_descriptor MallocLinkTable[] = { - DLD(scalable_malloc, allocate_handler), - DLD(scalable_free, deallocate_handler), - DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), - DLD(scalable_aligned_free, cache_aligned_deallocate_handler), -}; - - -#if TBB_USE_DEBUG -#define DEBUG_SUFFIX "_debug" -#else -#define DEBUG_SUFFIX -#endif /* TBB_USE_DEBUG */ - -// MALLOCLIB_NAME is the name of the oneTBB memory allocator library. -#if _WIN32||_WIN64 -#define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" -#elif __APPLE__ -#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" -#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ -#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" -#elif __linux__ // Note that order of these #elif's is important! -#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" -#else -#error Unknown OS -#endif - -//! Initialize the allocation/free handler pointers. -/** Caller is responsible for ensuring this routine is called exactly once. - The routine attempts to dynamically link with the TBB memory allocator. - If that allocator is not found, it links to malloc and free. */ -void initialize_handler_pointers() { - __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); - bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); - if(!success) { - // If unsuccessful, set the handlers to the default routines. - // This must be done now, and not before FillDynamicLinks runs, because if other - // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, - // which forces them to wait. - allocate_handler = &std::malloc; - deallocate_handler = &std::free; - cache_aligned_allocate_handler = &std_cache_aligned_allocate; - cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; - } - - PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); -} - -static std::once_flag initialization_state; -void initialize_cache_aligned_allocator() { - std::call_once(initialization_state, &initialize_handler_pointers); -} - -//! Executed on very first call through allocate_handler -static void* initialize_allocate_handler(std::size_t size) { - initialize_cache_aligned_allocator(); - __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); - return (*allocate_handler)(size); -} - -//! Executed on very first call through cache_aligned_allocate_handler -static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { - initialize_cache_aligned_allocator(); - __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); - return (*cache_aligned_allocate_handler)(bytes, alignment); -} - -// TODO: use CPUID to find actual line size, though consider backward compatibility -// nfs - no false sharing -static constexpr std::size_t nfs_size = 128; - -std::size_t __TBB_EXPORTED_FUNC cache_line_size() { - return nfs_size; -} - -void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { - const std::size_t cache_line_size = nfs_size; - __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); - - // Check for overflow - if (size + cache_line_size < size) { - throw_exception(exception_id::bad_alloc); - } - // scalable_aligned_malloc considers zero size request an error, and returns NULL - if (size == 0) size = 1; - - void* result = cache_aligned_allocate_handler(size, cache_line_size); - if (!result) { - throw_exception(exception_id::bad_alloc); - } - __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); - return result; -} - -void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { - __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); - (*cache_aligned_deallocate_handler)(p); -} - -static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { - // TODO: make it common with cache_aligned_resource - std::size_t space = alignment + bytes; - std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); - if (!base) { - return nullptr; - } - std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); - // Round up to the next cache line (align the base address) - __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); - __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); - - // Record where block actually starts. - (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; - return reinterpret_cast<void*>(result); -} - -static void std_cache_aligned_deallocate(void* p) { - if (p) { - __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); - // Recover where block actually starts - std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; - __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); - std::free(reinterpret_cast<void*>(base)); - } -} - -void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { - void* result = (*allocate_handler)(size); - if (!result) { - throw_exception(exception_id::bad_alloc); - } - return result; -} - -void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { - if (p) { - __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); - (*deallocate_handler)(p); - } -} - -bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { - if (allocate_handler == &initialize_allocate_handler) { - void* void_ptr = allocate_handler(1); - deallocate_handler(void_ptr); - } - __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); - // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) - __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), - "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); - return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/version.h" + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" + +#include "dynamic_link.h" +#include "misc.h" + +#include <cstdlib> + +#if _WIN32 || _WIN64 +#include <Windows.h> +#else +#include <dlfcn.h> +#endif /* _WIN32||_WIN64 */ + +#if __TBB_WEAK_SYMBOLS_PRESENT + +#pragma weak scalable_malloc +#pragma weak scalable_free +#pragma weak scalable_aligned_malloc +#pragma weak scalable_aligned_free + +extern "C" { + void* scalable_malloc(std::size_t); + void scalable_free(void*); + void* scalable_aligned_malloc(std::size_t, std::size_t); + void scalable_aligned_free(void*); +} + +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +namespace tbb { +namespace detail { +namespace r1 { + +//! Initialization routine used for first indirect call via allocate_handler. +static void* initialize_allocate_handler(std::size_t size); + +//! Handler for memory allocation +static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; + +//! Handler for memory deallocation +static void (*deallocate_handler)(void* pointer) = nullptr; + +//! Initialization routine used for first indirect call via cache_aligned_allocate_handler. +static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); + +//! Allocates memory using standard malloc. It is used when scalable_allocator is not available +static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); + +//! Allocates memory using standard free. It is used when scalable_allocator is not available +static void std_cache_aligned_deallocate(void* p); + +//! Handler for padded memory allocation +static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; + +//! Handler for padded memory deallocation +static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; + +//! Table describing how to link the handlers. +static const dynamic_link_descriptor MallocLinkTable[] = { + DLD(scalable_malloc, allocate_handler), + DLD(scalable_free, deallocate_handler), + DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), + DLD(scalable_aligned_free, cache_aligned_deallocate_handler), +}; + + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +// MALLOCLIB_NAME is the name of the oneTBB memory allocator library. +#if _WIN32||_WIN64 +#define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" +#elif __APPLE__ +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" +#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" +#elif __linux__ // Note that order of these #elif's is important! +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" +#else +#error Unknown OS +#endif + +//! Initialize the allocation/free handler pointers. +/** Caller is responsible for ensuring this routine is called exactly once. + The routine attempts to dynamically link with the TBB memory allocator. + If that allocator is not found, it links to malloc and free. */ +void initialize_handler_pointers() { + __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); + bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); + if(!success) { + // If unsuccessful, set the handlers to the default routines. + // This must be done now, and not before FillDynamicLinks runs, because if other + // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, + // which forces them to wait. + allocate_handler = &std::malloc; + deallocate_handler = &std::free; + cache_aligned_allocate_handler = &std_cache_aligned_allocate; + cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; + } + + PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); +} + +static std::once_flag initialization_state; +void initialize_cache_aligned_allocator() { + std::call_once(initialization_state, &initialize_handler_pointers); +} + +//! Executed on very first call through allocate_handler +static void* initialize_allocate_handler(std::size_t size) { + initialize_cache_aligned_allocator(); + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); + return (*allocate_handler)(size); +} + +//! Executed on very first call through cache_aligned_allocate_handler +static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { + initialize_cache_aligned_allocator(); + __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); + return (*cache_aligned_allocate_handler)(bytes, alignment); +} + +// TODO: use CPUID to find actual line size, though consider backward compatibility +// nfs - no false sharing +static constexpr std::size_t nfs_size = 128; + +std::size_t __TBB_EXPORTED_FUNC cache_line_size() { + return nfs_size; +} + +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { + const std::size_t cache_line_size = nfs_size; + __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); + + // Check for overflow + if (size + cache_line_size < size) { + throw_exception(exception_id::bad_alloc); + } + // scalable_aligned_malloc considers zero size request an error, and returns NULL + if (size == 0) size = 1; + + void* result = cache_aligned_allocate_handler(size, cache_line_size); + if (!result) { + throw_exception(exception_id::bad_alloc); + } + __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); + return result; +} + +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { + __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); + (*cache_aligned_deallocate_handler)(p); +} + +static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { + // TODO: make it common with cache_aligned_resource + std::size_t space = alignment + bytes; + std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); + if (!base) { + return nullptr; + } + std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); + // Round up to the next cache line (align the base address) + __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); + __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); + + // Record where block actually starts. + (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; + return reinterpret_cast<void*>(result); +} + +static void std_cache_aligned_deallocate(void* p) { + if (p) { + __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); + // Recover where block actually starts + std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; + __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); + std::free(reinterpret_cast<void*>(base)); + } +} + +void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { + void* result = (*allocate_handler)(size); + if (!result) { + throw_exception(exception_id::bad_alloc); + } + return result; +} + +void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { + if (p) { + __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); + (*deallocate_handler)(p); + } +} + +bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { + if (allocate_handler == &initialize_allocate_handler) { + void* void_ptr = allocate_handler(1); + deallocate_handler(void_ptr); + } + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); + // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) + __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), + "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); + return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/arena.cpp b/contrib/libs/tbb/src/tbb/arena.cpp index e91d1bb984..1ddab36ff5 100644 --- a/contrib/libs/tbb/src/tbb/arena.cpp +++ b/contrib/libs/tbb/src/tbb/arena.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,85 +14,85 @@ limitations under the License. */ -#include "task_dispatcher.h" +#include "task_dispatcher.h" #include "governor.h" #include "arena.h" #include "itt_notify.h" #include "semaphore.h" -#include "waiters.h" -#include "oneapi/tbb/detail/_task.h" -#include "oneapi/tbb/info.h" -#include "oneapi/tbb/tbb_allocator.h" +#include "waiters.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/tbb_allocator.h" -#include <atomic> -#include <cstring> +#include <atomic> +#include <cstring> #include <functional> namespace tbb { -namespace detail { -namespace r1 { - -#if __TBB_ARENA_BINDING -class numa_binding_observer : public tbb::task_scheduler_observer { - binding_handler* my_binding_handler; -public: - numa_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) - : task_scheduler_observer(*ta) - , my_binding_handler(construct_binding_handler(num_slots, numa_id, core_type, max_threads_per_core)) - {} - - void on_scheduler_entry( bool ) override { - apply_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); - } - - void on_scheduler_exit( bool ) override { - restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); - } - - ~numa_binding_observer(){ - destroy_binding_handler(my_binding_handler); - } -}; - -numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) { - numa_binding_observer* binding_observer = nullptr; - if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) { - binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core); - __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); - binding_observer->observe(true); - } - return binding_observer; -} - -void destroy_binding_observer( numa_binding_observer* binding_observer ) { - __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer"); - binding_observer->observe(false); - binding_observer->~numa_binding_observer(); - deallocate_memory(binding_observer); -} -#endif /*!__TBB_ARENA_BINDING*/ - -std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) { +namespace detail { +namespace r1 { + +#if __TBB_ARENA_BINDING +class numa_binding_observer : public tbb::task_scheduler_observer { + binding_handler* my_binding_handler; +public: + numa_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) + : task_scheduler_observer(*ta) + , my_binding_handler(construct_binding_handler(num_slots, numa_id, core_type, max_threads_per_core)) + {} + + void on_scheduler_entry( bool ) override { + apply_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); + } + + void on_scheduler_exit( bool ) override { + restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); + } + + ~numa_binding_observer(){ + destroy_binding_handler(my_binding_handler); + } +}; + +numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) { + numa_binding_observer* binding_observer = nullptr; + if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) { + binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core); + __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); + binding_observer->observe(true); + } + return binding_observer; +} + +void destroy_binding_observer( numa_binding_observer* binding_observer ) { + __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer"); + binding_observer->observe(false); + binding_observer->~numa_binding_observer(); + deallocate_memory(binding_observer); +} +#endif /*!__TBB_ARENA_BINDING*/ + +std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) { if ( lower >= upper ) return out_of_arena; // Start search for an empty slot from the one we occupied the last time - std::size_t index = tls.my_arena_index; - if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower; + std::size_t index = tls.my_arena_index; + if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower; __TBB_ASSERT( index >= lower && index < upper, NULL ); // Find a free slot - for ( std::size_t i = index; i < upper; ++i ) - if (my_slots[i].try_occupy()) return i; - for ( std::size_t i = lower; i < index; ++i ) - if (my_slots[i].try_occupy()) return i; + for ( std::size_t i = index; i < upper; ++i ) + if (my_slots[i].try_occupy()) return i; + for ( std::size_t i = lower; i < index; ++i ) + if (my_slots[i].try_occupy()) return i; return out_of_arena; } template <bool as_worker> -std::size_t arena::occupy_free_slot(thread_data& tls) { - // Firstly, external threads try to occupy reserved slots - std::size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( tls, 0, my_num_reserved_slots ); +std::size_t arena::occupy_free_slot(thread_data& tls) { + // Firstly, external threads try to occupy reserved slots + std::size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( tls, 0, my_num_reserved_slots ); if ( index == out_of_arena ) { // Secondly, all threads try to occupy all non-reserved slots - index = occupy_free_slot_in_range(tls, my_num_reserved_slots, my_num_slots ); + index = occupy_free_slot_in_range(tls, my_num_reserved_slots, my_num_slots ); // Likely this arena is already saturated if ( index == out_of_arena ) return out_of_arena; @@ -102,656 +102,656 @@ std::size_t arena::occupy_free_slot(thread_data& tls) { return index; } -std::uintptr_t arena::calculate_stealing_threshold() { - stack_anchor_type anchor; - return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_market->worker_stack_size()); -} - -void arena::process(thread_data& tls) { - governor::set_thread_data(tls); // TODO: consider moving to create_one_job. - __TBB_ASSERT( is_alive(my_guard), nullptr); - __TBB_ASSERT( my_num_slots > 1, nullptr); - - std::size_t index = occupy_free_slot</*as_worker*/true>(tls); - if (index == out_of_arena) { - on_thread_leaving<ref_worker>(); - return; - } +std::uintptr_t arena::calculate_stealing_threshold() { + stack_anchor_type anchor; + return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_market->worker_stack_size()); +} + +void arena::process(thread_data& tls) { + governor::set_thread_data(tls); // TODO: consider moving to create_one_job. + __TBB_ASSERT( is_alive(my_guard), nullptr); + __TBB_ASSERT( my_num_slots > 1, nullptr); + + std::size_t index = occupy_free_slot</*as_worker*/true>(tls); + if (index == out_of_arena) { + on_thread_leaving<ref_worker>(); + return; + } __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); - tls.attach_arena(*this, index); - - task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(calculate_stealing_threshold()); - __TBB_ASSERT(task_disp.can_steal(), nullptr); - tls.attach_task_dispatcher(task_disp); - - __TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" ); - my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); - - // Waiting on special object tied to this arena - outermost_worker_waiter waiter(*this); - d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter); - __TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task"); - __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr); - __TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr); - - my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker); - tls.my_last_observer = nullptr; - - task_disp.set_stealing_threshold(0); - tls.detach_task_dispatcher(); - - // Arena slot detach (arena may be used in market::process) - // TODO: Consider moving several calls below into a new method(e.g.detach_arena). - tls.my_arena_slot->release(); - tls.my_arena_slot = nullptr; - tls.my_inbox.detach(); - __TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr); - __TBB_ASSERT(is_alive(my_guard), nullptr); - + tls.attach_arena(*this, index); + + task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(calculate_stealing_threshold()); + __TBB_ASSERT(task_disp.can_steal(), nullptr); + tls.attach_task_dispatcher(task_disp); + + __TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" ); + my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); + + // Waiting on special object tied to this arena + outermost_worker_waiter waiter(*this); + d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter); + __TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task"); + __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr); + __TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr); + + my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker); + tls.my_last_observer = nullptr; + + task_disp.set_stealing_threshold(0); + tls.detach_task_dispatcher(); + + // Arena slot detach (arena may be used in market::process) + // TODO: Consider moving several calls below into a new method(e.g.detach_arena). + tls.my_arena_slot->release(); + tls.my_arena_slot = nullptr; + tls.my_inbox.detach(); + __TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr); + __TBB_ASSERT(is_alive(my_guard), nullptr); + // In contrast to earlier versions of TBB (before 3.0 U5) now it is possible // that arena may be temporarily left unpopulated by threads. See comments in // arena::on_thread_leaving() for more details. on_thread_leaving<ref_worker>(); - __TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join"); + __TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join"); } -arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level ) -{ +arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level ) +{ __TBB_ASSERT( !my_guard, "improperly allocated arena?" ); - __TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" ); - __TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" ); + __TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" ); + __TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" ); my_market = &m; my_limit = 1; - // Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks). + // Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks). my_num_slots = num_arena_slots(num_slots); my_num_reserved_slots = num_reserved_slots; my_max_num_workers = num_slots-num_reserved_slots; - my_priority_level = priority_level; - my_references = ref_external; // accounts for the external thread - my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed); + my_priority_level = priority_level; + my_references = ref_external; // accounts for the external thread + my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed); my_observers.my_arena = this; - my_co_cache.init(4 * num_slots); + my_co_cache.init(4 * num_slots); __TBB_ASSERT ( my_max_num_workers <= my_num_slots, NULL ); - // Initialize the default context. It should be allocated before task_dispatch construction. - my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context))) - d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings }; + // Initialize the default context. It should be allocated before task_dispatch construction. + my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context))) + d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings }; // Construct slots. Mark internal synchronization elements for the tools. - task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots); + task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots); for( unsigned i = 0; i < my_num_slots; ++i ) { - // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL ); + // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL ); __TBB_ASSERT( !my_slots[i].task_pool_ptr, NULL ); __TBB_ASSERT( !my_slots[i].my_task_pool_size, NULL ); - mailbox(i).construct(); - my_slots[i].init_task_streams(i); - my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this); - my_slots[i].my_is_occupied.store(false, std::memory_order_relaxed); + mailbox(i).construct(); + my_slots[i].init_task_streams(i); + my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this); + my_slots[i].my_is_occupied.store(false, std::memory_order_relaxed); } - my_fifo_task_stream.initialize(my_num_slots); - my_resume_task_stream.initialize(my_num_slots); -#if __TBB_PREVIEW_CRITICAL_TASKS - my_critical_task_stream.initialize(my_num_slots); -#endif + my_fifo_task_stream.initialize(my_num_slots); + my_resume_task_stream.initialize(my_num_slots); +#if __TBB_PREVIEW_CRITICAL_TASKS + my_critical_task_stream.initialize(my_num_slots); +#endif #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - my_local_concurrency_requests = 0; - my_local_concurrency_flag.clear(); - my_global_concurrency_mode.store(false, std::memory_order_relaxed); + my_local_concurrency_requests = 0; + my_local_concurrency_flag.clear(); + my_global_concurrency_mode.store(false, std::memory_order_relaxed); #endif } -arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, - unsigned priority_level ) -{ +arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level ) +{ __TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" ); - __TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" ); - __TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" ); - std::size_t n = allocation_size(num_arena_slots(num_slots)); - unsigned char* storage = (unsigned char*)cache_aligned_allocate(n); + __TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" ); + __TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" ); + std::size_t n = allocation_size(num_arena_slots(num_slots)); + unsigned char* storage = (unsigned char*)cache_aligned_allocate(n); // Zero all slots to indicate that they are empty - std::memset( storage, 0, n ); - return *new( storage + num_arena_slots(num_slots) * sizeof(mail_outbox) ) - arena(m, num_slots, num_reserved_slots, priority_level); + std::memset( storage, 0, n ); + return *new( storage + num_arena_slots(num_slots) * sizeof(mail_outbox) ) + arena(m, num_slots, num_reserved_slots, priority_level); } void arena::free_arena () { __TBB_ASSERT( is_alive(my_guard), NULL ); - __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); + __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, - "Inconsistent state of a dying arena" ); + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, + "Inconsistent state of a dying arena" ); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - __TBB_ASSERT( !my_global_concurrency_mode, NULL ); + __TBB_ASSERT( !my_global_concurrency_mode, NULL ); #endif poison_value( my_guard ); - std::intptr_t drained = 0; + std::intptr_t drained = 0; for ( unsigned i = 0; i < my_num_slots; ++i ) { - // __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" ); + // __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" ); // TODO: understand the assertion and modify // __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, NULL ); __TBB_ASSERT( my_slots[i].head == my_slots[i].tail, NULL ); // TODO: replace by is_quiescent_local_task_pool_empty my_slots[i].free_task_pool(); - drained += mailbox(i).drain(); - my_slots[i].my_default_task_dispatcher->~task_dispatcher(); + drained += mailbox(i).drain(); + my_slots[i].my_default_task_dispatcher->~task_dispatcher(); } - __TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed"); - __TBB_ASSERT(my_resume_task_stream.empty(), "Not all enqueued tasks were executed"); - // Cleanup coroutines/schedulers cache - my_co_cache.cleanup(); - my_default_ctx->~task_group_context(); - cache_aligned_deallocate(my_default_ctx); -#if __TBB_PREVIEW_CRITICAL_TASKS - __TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed"); -#endif + __TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed"); + __TBB_ASSERT(my_resume_task_stream.empty(), "Not all enqueued tasks were executed"); + // Cleanup coroutines/schedulers cache + my_co_cache.cleanup(); + my_default_ctx->~task_group_context(); + cache_aligned_deallocate(my_default_ctx); +#if __TBB_PREVIEW_CRITICAL_TASKS + __TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed"); +#endif // remove an internal reference my_market->release( /*is_public=*/false, /*blocking_terminate=*/false ); - if ( !my_observers.empty() ) { + if ( !my_observers.empty() ) { my_observers.clear(); - } - void* storage = &mailbox(my_num_slots-1); - __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, NULL ); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, NULL ); + } + void* storage = &mailbox(my_num_slots-1); + __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, NULL ); + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, NULL ); this->~arena(); #if TBB_USE_ASSERT > 1 - std::memset( storage, 0, allocation_size(my_num_slots) ); + std::memset( storage, 0, allocation_size(my_num_slots) ); #endif /* TBB_USE_ASSERT */ - cache_aligned_deallocate( storage ); + cache_aligned_deallocate( storage ); +} + +bool arena::has_enqueued_tasks() { + return !my_fifo_task_stream.empty(); } -bool arena::has_enqueued_tasks() { - return !my_fifo_task_stream.empty(); -} - -bool arena::is_out_of_work() { -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_local_concurrency_flag.try_clear_if([this] { - return !has_enqueued_tasks(); - })) { - my_market->adjust_demand(*this, /* delta = */ -1, /* mandatory = */ true); +bool arena::is_out_of_work() { +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (my_local_concurrency_flag.try_clear_if([this] { + return !has_enqueued_tasks(); + })) { + my_market->adjust_demand(*this, /* delta = */ -1, /* mandatory = */ true); } #endif - // TODO: rework it to return at least a hint about where a task was found; better if the task itself. - switch (my_pool_state.load(std::memory_order_acquire)) { - case SNAPSHOT_EMPTY: + // TODO: rework it to return at least a hint about where a task was found; better if the task itself. + switch (my_pool_state.load(std::memory_order_acquire)) { + case SNAPSHOT_EMPTY: return true; - case SNAPSHOT_FULL: { - // Use unique id for "busy" in order to avoid ABA problems. - const pool_state_t busy = pool_state_t(&busy); - // Helper for CAS execution - pool_state_t expected_state; - - // Request permission to take snapshot - expected_state = SNAPSHOT_FULL; - if (my_pool_state.compare_exchange_strong(expected_state, busy)) { - // Got permission. Take the snapshot. - // NOTE: This is not a lock, as the state can be set to FULL at - // any moment by a thread that spawns/enqueues new task. - std::size_t n = my_limit.load(std::memory_order_acquire); - // Make local copies of volatile parameters. Their change during - // snapshot taking procedure invalidates the attempt, and returns - // this thread into the dispatch loop. - std::size_t k; - for (k = 0; k < n; ++k) { - if (my_slots[k].task_pool.load(std::memory_order_relaxed) != EmptyTaskPool && - my_slots[k].head.load(std::memory_order_relaxed) < my_slots[k].tail.load(std::memory_order_relaxed)) - { - // k-th primary task pool is nonempty and does contain tasks. - break; - } - if (my_pool_state.load(std::memory_order_acquire) != busy) - return false; // the work was published + case SNAPSHOT_FULL: { + // Use unique id for "busy" in order to avoid ABA problems. + const pool_state_t busy = pool_state_t(&busy); + // Helper for CAS execution + pool_state_t expected_state; + + // Request permission to take snapshot + expected_state = SNAPSHOT_FULL; + if (my_pool_state.compare_exchange_strong(expected_state, busy)) { + // Got permission. Take the snapshot. + // NOTE: This is not a lock, as the state can be set to FULL at + // any moment by a thread that spawns/enqueues new task. + std::size_t n = my_limit.load(std::memory_order_acquire); + // Make local copies of volatile parameters. Their change during + // snapshot taking procedure invalidates the attempt, and returns + // this thread into the dispatch loop. + std::size_t k; + for (k = 0; k < n; ++k) { + if (my_slots[k].task_pool.load(std::memory_order_relaxed) != EmptyTaskPool && + my_slots[k].head.load(std::memory_order_relaxed) < my_slots[k].tail.load(std::memory_order_relaxed)) + { + // k-th primary task pool is nonempty and does contain tasks. + break; + } + if (my_pool_state.load(std::memory_order_acquire) != busy) + return false; // the work was published } - bool work_absent = k == n; - // Test and test-and-set. - if (my_pool_state.load(std::memory_order_acquire) == busy) { - bool no_stream_tasks = !has_enqueued_tasks() && my_resume_task_stream.empty(); -#if __TBB_PREVIEW_CRITICAL_TASKS - no_stream_tasks = no_stream_tasks && my_critical_task_stream.empty(); -#endif - work_absent = work_absent && no_stream_tasks; - if (work_absent) { - // save current demand value before setting SNAPSHOT_EMPTY, - // to avoid race with advertise_new_work. - int current_demand = (int)my_max_num_workers; - expected_state = busy; - if (my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_EMPTY)) { - // This thread transitioned pool to empty state, and thus is - // responsible for telling the market that there is no work to do. - my_market->adjust_demand(*this, -current_demand, /* mandatory = */ false); - return true; + bool work_absent = k == n; + // Test and test-and-set. + if (my_pool_state.load(std::memory_order_acquire) == busy) { + bool no_stream_tasks = !has_enqueued_tasks() && my_resume_task_stream.empty(); +#if __TBB_PREVIEW_CRITICAL_TASKS + no_stream_tasks = no_stream_tasks && my_critical_task_stream.empty(); +#endif + work_absent = work_absent && no_stream_tasks; + if (work_absent) { + // save current demand value before setting SNAPSHOT_EMPTY, + // to avoid race with advertise_new_work. + int current_demand = (int)my_max_num_workers; + expected_state = busy; + if (my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_EMPTY)) { + // This thread transitioned pool to empty state, and thus is + // responsible for telling the market that there is no work to do. + my_market->adjust_demand(*this, -current_demand, /* mandatory = */ false); + return true; } - return false; + return false; } - // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it. - expected_state = busy; - my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_FULL); + // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it. + expected_state = busy; + my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_FULL); } } - return false; + return false; } - default: - // Another thread is taking a snapshot. - return false; + default: + // Another thread is taking a snapshot. + return false; } } -void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) { - task_group_context_impl::bind_to(ctx, &td); - task_accessor::context(t) = &ctx; - task_accessor::isolation(t) = no_isolation; - my_fifo_task_stream.push( &t, random_lane_selector(td.my_random) ); +void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) { + task_group_context_impl::bind_to(ctx, &td); + task_accessor::context(t) = &ctx; + task_accessor::isolation(t) = no_isolation; + my_fifo_task_stream.push( &t, random_lane_selector(td.my_random) ); advertise_new_work<work_enqueued>(); } -} // namespace r1 -} // namespace detail -} // namespace tbb - -// Enable task_arena.h -#include "oneapi/tbb/task_arena.h" // task_arena_base - -namespace tbb { -namespace detail { -namespace r1 { - -#if TBB_USE_ASSERT -void assert_arena_priority_valid( tbb::task_arena::priority a_priority ) { - bool is_arena_priority_correct = - a_priority == tbb::task_arena::priority::high || - a_priority == tbb::task_arena::priority::normal || - a_priority == tbb::task_arena::priority::low; - __TBB_ASSERT( is_arena_priority_correct, - "Task arena priority should be equal to one of the predefined values." ); -} -#else -void assert_arena_priority_valid( tbb::task_arena::priority ) {} +} // namespace r1 +} // namespace detail +} // namespace tbb + +// Enable task_arena.h +#include "oneapi/tbb/task_arena.h" // task_arena_base + +namespace tbb { +namespace detail { +namespace r1 { + +#if TBB_USE_ASSERT +void assert_arena_priority_valid( tbb::task_arena::priority a_priority ) { + bool is_arena_priority_correct = + a_priority == tbb::task_arena::priority::high || + a_priority == tbb::task_arena::priority::normal || + a_priority == tbb::task_arena::priority::low; + __TBB_ASSERT( is_arena_priority_correct, + "Task arena priority should be equal to one of the predefined values." ); +} +#else +void assert_arena_priority_valid( tbb::task_arena::priority ) {} #endif -unsigned arena_priority_level( tbb::task_arena::priority a_priority ) { - assert_arena_priority_valid( a_priority ); - return market::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride); +unsigned arena_priority_level( tbb::task_arena::priority a_priority ) { + assert_arena_priority_valid( a_priority ); + return market::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride); } -tbb::task_arena::priority arena_priority( unsigned priority_level ) { - auto priority = tbb::task_arena::priority( - (market::num_priority_levels - priority_level) * d1::priority_stride - ); - assert_arena_priority_valid( priority ); - return priority; +tbb::task_arena::priority arena_priority( unsigned priority_level ) { + auto priority = tbb::task_arena::priority( + (market::num_priority_levels - priority_level) * d1::priority_stride + ); + assert_arena_priority_valid( priority ); + return priority; } -struct task_arena_impl { - static void initialize(d1::task_arena_base&); - static void terminate(d1::task_arena_base&); - static bool attach(d1::task_arena_base&); - static void execute(d1::task_arena_base&, d1::delegate_base&); - static void wait(d1::task_arena_base&); - static int max_concurrency(const d1::task_arena_base*); - static void enqueue(d1::task&, d1::task_arena_base*); -}; - -void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) { - task_arena_impl::initialize(ta); -} -void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base& ta) { - task_arena_impl::terminate(ta); -} -bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base& ta) { - return task_arena_impl::attach(ta); -} -void __TBB_EXPORTED_FUNC execute(d1::task_arena_base& ta, d1::delegate_base& d) { - task_arena_impl::execute(ta, d); -} -void __TBB_EXPORTED_FUNC wait(d1::task_arena_base& ta) { - task_arena_impl::wait(ta); -} - -int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) { - return task_arena_impl::max_concurrency(ta); -} - -void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) { - task_arena_impl::enqueue(t, ta); -} - -void task_arena_impl::initialize(d1::task_arena_base& ta) { - governor::one_time_init(); - if (ta.my_max_concurrency < 1) { -#if __TBB_ARENA_BINDING - -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - d1::constraints arena_constraints = d1::constraints{} - .set_core_type(ta.core_type()) - .set_max_threads_per_core(ta.max_threads_per_core()) - .set_numa_id(ta.my_numa_id); - ta.my_max_concurrency = (int)default_concurrency(arena_constraints); -#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - ta.my_max_concurrency = (int)default_concurrency(ta.my_numa_id); -#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - -#else /*!__TBB_ARENA_BINDING*/ - ta.my_max_concurrency = (int)governor::default_num_threads(); -#endif /*!__TBB_ARENA_BINDING*/ - } - - __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized"); - unsigned priority_level = arena_priority_level(ta.my_priority); - arena* a = market::create_arena(ta.my_max_concurrency, ta.my_num_reserved_slots, priority_level, /* stack_size = */ 0); - ta.my_arena.store(a, std::memory_order_release); +struct task_arena_impl { + static void initialize(d1::task_arena_base&); + static void terminate(d1::task_arena_base&); + static bool attach(d1::task_arena_base&); + static void execute(d1::task_arena_base&, d1::delegate_base&); + static void wait(d1::task_arena_base&); + static int max_concurrency(const d1::task_arena_base*); + static void enqueue(d1::task&, d1::task_arena_base*); +}; + +void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) { + task_arena_impl::initialize(ta); +} +void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base& ta) { + task_arena_impl::terminate(ta); +} +bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base& ta) { + return task_arena_impl::attach(ta); +} +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base& ta, d1::delegate_base& d) { + task_arena_impl::execute(ta, d); +} +void __TBB_EXPORTED_FUNC wait(d1::task_arena_base& ta) { + task_arena_impl::wait(ta); +} + +int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) { + return task_arena_impl::max_concurrency(ta); +} + +void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) { + task_arena_impl::enqueue(t, ta); +} + +void task_arena_impl::initialize(d1::task_arena_base& ta) { + governor::one_time_init(); + if (ta.my_max_concurrency < 1) { +#if __TBB_ARENA_BINDING + +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + d1::constraints arena_constraints = d1::constraints{} + .set_core_type(ta.core_type()) + .set_max_threads_per_core(ta.max_threads_per_core()) + .set_numa_id(ta.my_numa_id); + ta.my_max_concurrency = (int)default_concurrency(arena_constraints); +#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + ta.my_max_concurrency = (int)default_concurrency(ta.my_numa_id); +#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + +#else /*!__TBB_ARENA_BINDING*/ + ta.my_max_concurrency = (int)governor::default_num_threads(); +#endif /*!__TBB_ARENA_BINDING*/ + } + + __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized"); + unsigned priority_level = arena_priority_level(ta.my_priority); + arena* a = market::create_arena(ta.my_max_concurrency, ta.my_num_reserved_slots, priority_level, /* stack_size = */ 0); + ta.my_arena.store(a, std::memory_order_release); // add an internal market reference; a public reference was added in create_arena - market::global_market( /*is_public=*/false); -#if __TBB_ARENA_BINDING - a->my_numa_binding_observer = construct_binding_observer( - static_cast<d1::task_arena*>(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); -#endif /*__TBB_ARENA_BINDING*/ + market::global_market( /*is_public=*/false); +#if __TBB_ARENA_BINDING + a->my_numa_binding_observer = construct_binding_observer( + static_cast<d1::task_arena*>(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); +#endif /*__TBB_ARENA_BINDING*/ } -void task_arena_impl::terminate(d1::task_arena_base& ta) { - arena* a = ta.my_arena.load(std::memory_order_relaxed); - assert_pointer_valid(a); -#if __TBB_ARENA_BINDING - if(a->my_numa_binding_observer != nullptr ) { - destroy_binding_observer(a->my_numa_binding_observer); - a->my_numa_binding_observer = nullptr; +void task_arena_impl::terminate(d1::task_arena_base& ta) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + assert_pointer_valid(a); +#if __TBB_ARENA_BINDING + if(a->my_numa_binding_observer != nullptr ) { + destroy_binding_observer(a->my_numa_binding_observer); + a->my_numa_binding_observer = nullptr; } -#endif /*__TBB_ARENA_BINDING*/ - a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); - a->on_thread_leaving<arena::ref_external>(); - ta.my_arena.store(nullptr, std::memory_order_relaxed); +#endif /*__TBB_ARENA_BINDING*/ + a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); + a->on_thread_leaving<arena::ref_external>(); + ta.my_arena.store(nullptr, std::memory_order_relaxed); } -bool task_arena_impl::attach(d1::task_arena_base& ta) { - __TBB_ASSERT(!ta.my_arena.load(std::memory_order_relaxed), nullptr); - thread_data* td = governor::get_thread_data_if_initialized(); - if( td && td->my_arena ) { - arena* a = td->my_arena; +bool task_arena_impl::attach(d1::task_arena_base& ta) { + __TBB_ASSERT(!ta.my_arena.load(std::memory_order_relaxed), nullptr); + thread_data* td = governor::get_thread_data_if_initialized(); + if( td && td->my_arena ) { + arena* a = td->my_arena; // There is an active arena to attach to. // It's still used by s, so won't be destroyed right away. - __TBB_ASSERT(a->my_references > 0, NULL ); - a->my_references += arena::ref_external; - ta.my_num_reserved_slots = a->my_num_reserved_slots; - ta.my_priority = arena_priority(a->my_priority_level); - ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers; - __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, NULL); - ta.my_arena.store(a, std::memory_order_release); + __TBB_ASSERT(a->my_references > 0, NULL ); + a->my_references += arena::ref_external; + ta.my_num_reserved_slots = a->my_num_reserved_slots; + ta.my_priority = arena_priority(a->my_priority_level); + ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers; + __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, NULL); + ta.my_arena.store(a, std::memory_order_release); // increases market's ref count for task_arena market::global_market( /*is_public=*/true ); - return true; + return true; } - return false; + return false; } -void task_arena_impl::enqueue(d1::task& t, d1::task_arena_base* ta) { - thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance - arena* a = ta->my_arena.load(std::memory_order_relaxed); - assert_pointers_valid(ta, a, a->my_default_ctx, td); - // Is there a better place for checking the state of my_default_ctx? - __TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(), - "The task will not be executed because default task_group_context of task_arena is cancelled. Has previously enqueued task thrown an exception?"); - a->enqueue_task(t, *a->my_default_ctx, *td); +void task_arena_impl::enqueue(d1::task& t, d1::task_arena_base* ta) { + thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance + arena* a = ta->my_arena.load(std::memory_order_relaxed); + assert_pointers_valid(ta, a, a->my_default_ctx, td); + // Is there a better place for checking the state of my_default_ctx? + __TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(), + "The task will not be executed because default task_group_context of task_arena is cancelled. Has previously enqueued task thrown an exception?"); + a->enqueue_task(t, *a->my_default_ctx, *td); } -class nested_arena_context : no_copy { -public: - nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index) - : m_orig_execute_data_ext(td.my_task_dispatcher->m_execute_data_ext) - { - if (td.my_arena != &nested_arena) { - m_orig_arena = td.my_arena; - m_orig_slot_index = td.my_arena_index; - m_orig_last_observer = td.my_last_observer; - - td.detach_task_dispatcher(); - td.attach_arena(nested_arena, slot_index); - task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(m_orig_execute_data_ext.task_disp->m_stealing_threshold); - td.attach_task_dispatcher(task_disp); - - // If the calling thread occupies the slots out of external thread reserve we need to notify the - // market that this arena requires one worker less. - if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { - td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ -1, /* mandatory = */ false); +class nested_arena_context : no_copy { +public: + nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index) + : m_orig_execute_data_ext(td.my_task_dispatcher->m_execute_data_ext) + { + if (td.my_arena != &nested_arena) { + m_orig_arena = td.my_arena; + m_orig_slot_index = td.my_arena_index; + m_orig_last_observer = td.my_last_observer; + + td.detach_task_dispatcher(); + td.attach_arena(nested_arena, slot_index); + task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(m_orig_execute_data_ext.task_disp->m_stealing_threshold); + td.attach_task_dispatcher(task_disp); + + // If the calling thread occupies the slots out of external thread reserve we need to notify the + // market that this arena requires one worker less. + if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { + td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ -1, /* mandatory = */ false); } - - td.my_last_observer = nullptr; - // The task_arena::execute method considers each calling thread as an external thread. - td.my_arena->my_observers.notify_entry_observers(td.my_last_observer, /* worker*/false); - } - - m_task_dispatcher = td.my_task_dispatcher; - m_orig_fifo_tasks_allowed = m_task_dispatcher->allow_fifo_task(true); - m_orig_critical_task_allowed = m_task_dispatcher->m_properties.critical_task_allowed; - m_task_dispatcher->m_properties.critical_task_allowed = true; - - execution_data_ext& ed_ext = td.my_task_dispatcher->m_execute_data_ext; - ed_ext.context = td.my_arena->my_default_ctx; - ed_ext.original_slot = td.my_arena_index; - ed_ext.affinity_slot = d1::no_slot; - ed_ext.task_disp = td.my_task_dispatcher; - ed_ext.isolation = no_isolation; - - __TBB_ASSERT(td.my_arena_slot, nullptr); - __TBB_ASSERT(td.my_arena_slot->is_occupied(), nullptr); - __TBB_ASSERT(td.my_task_dispatcher, nullptr); - } - ~nested_arena_context() { - thread_data& td = *m_task_dispatcher->m_thread_data; - __TBB_ASSERT(governor::is_thread_data_set(&td), nullptr); - m_task_dispatcher->allow_fifo_task(m_orig_fifo_tasks_allowed); - m_task_dispatcher->m_properties.critical_task_allowed = m_orig_critical_task_allowed; - if (m_orig_arena) { - td.my_arena->my_observers.notify_exit_observers(td.my_last_observer, /*worker*/ false); - td.my_last_observer = m_orig_last_observer; - - // Notify the market that this thread releasing a one slot - // that can be used by a worker thread. - if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { - td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false); + + td.my_last_observer = nullptr; + // The task_arena::execute method considers each calling thread as an external thread. + td.my_arena->my_observers.notify_entry_observers(td.my_last_observer, /* worker*/false); + } + + m_task_dispatcher = td.my_task_dispatcher; + m_orig_fifo_tasks_allowed = m_task_dispatcher->allow_fifo_task(true); + m_orig_critical_task_allowed = m_task_dispatcher->m_properties.critical_task_allowed; + m_task_dispatcher->m_properties.critical_task_allowed = true; + + execution_data_ext& ed_ext = td.my_task_dispatcher->m_execute_data_ext; + ed_ext.context = td.my_arena->my_default_ctx; + ed_ext.original_slot = td.my_arena_index; + ed_ext.affinity_slot = d1::no_slot; + ed_ext.task_disp = td.my_task_dispatcher; + ed_ext.isolation = no_isolation; + + __TBB_ASSERT(td.my_arena_slot, nullptr); + __TBB_ASSERT(td.my_arena_slot->is_occupied(), nullptr); + __TBB_ASSERT(td.my_task_dispatcher, nullptr); + } + ~nested_arena_context() { + thread_data& td = *m_task_dispatcher->m_thread_data; + __TBB_ASSERT(governor::is_thread_data_set(&td), nullptr); + m_task_dispatcher->allow_fifo_task(m_orig_fifo_tasks_allowed); + m_task_dispatcher->m_properties.critical_task_allowed = m_orig_critical_task_allowed; + if (m_orig_arena) { + td.my_arena->my_observers.notify_exit_observers(td.my_last_observer, /*worker*/ false); + td.my_last_observer = m_orig_last_observer; + + // Notify the market that this thread releasing a one slot + // that can be used by a worker thread. + if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { + td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false); } - - td.my_task_dispatcher->set_stealing_threshold(0); - td.detach_task_dispatcher(); - td.my_arena_slot->release(); - td.my_arena->my_exit_monitors.notify_one(); // do not relax! - - td.attach_arena(*m_orig_arena, m_orig_slot_index); - td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); - } - td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext; + + td.my_task_dispatcher->set_stealing_threshold(0); + td.detach_task_dispatcher(); + td.my_arena_slot->release(); + td.my_arena->my_exit_monitors.notify_one(); // do not relax! + + td.attach_arena(*m_orig_arena, m_orig_slot_index); + td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); + } + td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext; + } + +private: + execution_data_ext m_orig_execute_data_ext{}; + arena* m_orig_arena{ nullptr }; + observer_proxy* m_orig_last_observer{ nullptr }; + task_dispatcher* m_task_dispatcher{ nullptr }; + unsigned m_orig_slot_index{}; + bool m_orig_fifo_tasks_allowed{}; + bool m_orig_critical_task_allowed{}; +}; + +class delegated_task : public d1::task { + d1::delegate_base& m_delegate; + concurrent_monitor& m_monitor; + d1::wait_context& m_wait_ctx; + std::atomic<bool> m_completed; + d1::task* execute(d1::execution_data& ed) override { + const execution_data_ext& ed_ext = static_cast<const execution_data_ext&>(ed); + execution_data_ext orig_execute_data_ext = ed_ext.task_disp->m_execute_data_ext; + __TBB_ASSERT(&ed_ext.task_disp->m_execute_data_ext == &ed, + "The execute data shall point to the current task dispatcher execute data"); + __TBB_ASSERT(ed_ext.task_disp->m_execute_data_ext.isolation == no_isolation, nullptr); + + ed_ext.task_disp->m_execute_data_ext.context = ed_ext.task_disp->get_thread_data().my_arena->my_default_ctx; + bool fifo_task_allowed = ed_ext.task_disp->allow_fifo_task(true); + try_call([&] { + m_delegate(); + }).on_completion([&] { + ed_ext.task_disp->m_execute_data_ext = orig_execute_data_ext; + ed_ext.task_disp->allow_fifo_task(fifo_task_allowed); + }); + + finalize(); + return nullptr; + } + d1::task* cancel(d1::execution_data&) override { + finalize(); + return nullptr; + } + void finalize() { + m_wait_ctx.release(); // must precede the wakeup + m_monitor.notify([this](std::uintptr_t ctx) { + return ctx == std::uintptr_t(&m_delegate); + }); // do not relax, it needs a fence! + m_completed.store(true, std::memory_order_release); } - -private: - execution_data_ext m_orig_execute_data_ext{}; - arena* m_orig_arena{ nullptr }; - observer_proxy* m_orig_last_observer{ nullptr }; - task_dispatcher* m_task_dispatcher{ nullptr }; - unsigned m_orig_slot_index{}; - bool m_orig_fifo_tasks_allowed{}; - bool m_orig_critical_task_allowed{}; -}; - -class delegated_task : public d1::task { - d1::delegate_base& m_delegate; - concurrent_monitor& m_monitor; - d1::wait_context& m_wait_ctx; - std::atomic<bool> m_completed; - d1::task* execute(d1::execution_data& ed) override { - const execution_data_ext& ed_ext = static_cast<const execution_data_ext&>(ed); - execution_data_ext orig_execute_data_ext = ed_ext.task_disp->m_execute_data_ext; - __TBB_ASSERT(&ed_ext.task_disp->m_execute_data_ext == &ed, - "The execute data shall point to the current task dispatcher execute data"); - __TBB_ASSERT(ed_ext.task_disp->m_execute_data_ext.isolation == no_isolation, nullptr); - - ed_ext.task_disp->m_execute_data_ext.context = ed_ext.task_disp->get_thread_data().my_arena->my_default_ctx; - bool fifo_task_allowed = ed_ext.task_disp->allow_fifo_task(true); - try_call([&] { - m_delegate(); - }).on_completion([&] { - ed_ext.task_disp->m_execute_data_ext = orig_execute_data_ext; - ed_ext.task_disp->allow_fifo_task(fifo_task_allowed); - }); - - finalize(); - return nullptr; - } - d1::task* cancel(d1::execution_data&) override { - finalize(); - return nullptr; - } - void finalize() { - m_wait_ctx.release(); // must precede the wakeup - m_monitor.notify([this](std::uintptr_t ctx) { - return ctx == std::uintptr_t(&m_delegate); - }); // do not relax, it needs a fence! - m_completed.store(true, std::memory_order_release); - } -public: - delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo) - : m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{} +public: + delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo) + : m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{} ~delegated_task() { - // The destructor can be called earlier than the m_monitor is notified - // because the waiting thread can be released after m_wait_ctx.release_wait. - // To close that race we wait for the m_completed signal. - spin_wait_until_eq(m_completed, true); + // The destructor can be called earlier than the m_monitor is notified + // because the waiting thread can be released after m_wait_ctx.release_wait. + // To close that race we wait for the m_completed signal. + spin_wait_until_eq(m_completed, true); } }; -void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { - arena* a = ta.my_arena.load(std::memory_order_relaxed); - __TBB_ASSERT(a != nullptr, nullptr); - thread_data* td = governor::get_thread_data(); - - bool same_arena = td->my_arena == a; - std::size_t index1 = td->my_arena_index; - if (!same_arena) { - index1 = a->occupy_free_slot</*as_worker */false>(*td); - if (index1 == arena::out_of_arena) { - concurrent_monitor::thread_context waiter((std::uintptr_t)&d); - d1::wait_context wo(1); - d1::task_group_context exec_context(d1::task_group_context::isolated); - task_group_context_impl::copy_fp_settings(exec_context, *a->my_default_ctx); - - delegated_task dt(d, a->my_exit_monitors, wo); - a->enqueue_task( dt, exec_context, *td); - size_t index2 = arena::out_of_arena; - do { - a->my_exit_monitors.prepare_wait(waiter); - if (!wo.continue_execution()) { - a->my_exit_monitors.cancel_wait(waiter); - break; - } - index2 = a->occupy_free_slot</*as_worker*/false>(*td); - if (index2 != arena::out_of_arena) { - a->my_exit_monitors.cancel_wait(waiter); - nested_arena_context scope(*td, *a, index2 ); - r1::wait(wo, exec_context); - __TBB_ASSERT(!exec_context.my_exception, NULL); // exception can be thrown above, not deferred - break; - } - a->my_exit_monitors.commit_wait(waiter); - } while (wo.continue_execution()); - if (index2 == arena::out_of_arena) { - // notify a waiting thread even if this thread did not enter arena, - // in case it was woken by a leaving thread but did not need to enter - a->my_exit_monitors.notify_one(); // do not relax! - } - // process possible exception - if (exec_context.my_exception) { - __TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); - exec_context.my_exception->throw_self(); - } - __TBB_ASSERT(governor::is_thread_data_set(td), nullptr); - return; - } // if (index1 == arena::out_of_arena) - } // if (!same_arena) - - context_guard_helper</*report_tasks=*/false> context_guard; - context_guard.set_ctx(a->my_default_ctx); - nested_arena_context scope(*td, *a, index1); -#if _WIN64 - try { +void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + __TBB_ASSERT(a != nullptr, nullptr); + thread_data* td = governor::get_thread_data(); + + bool same_arena = td->my_arena == a; + std::size_t index1 = td->my_arena_index; + if (!same_arena) { + index1 = a->occupy_free_slot</*as_worker */false>(*td); + if (index1 == arena::out_of_arena) { + concurrent_monitor::thread_context waiter((std::uintptr_t)&d); + d1::wait_context wo(1); + d1::task_group_context exec_context(d1::task_group_context::isolated); + task_group_context_impl::copy_fp_settings(exec_context, *a->my_default_ctx); + + delegated_task dt(d, a->my_exit_monitors, wo); + a->enqueue_task( dt, exec_context, *td); + size_t index2 = arena::out_of_arena; + do { + a->my_exit_monitors.prepare_wait(waiter); + if (!wo.continue_execution()) { + a->my_exit_monitors.cancel_wait(waiter); + break; + } + index2 = a->occupy_free_slot</*as_worker*/false>(*td); + if (index2 != arena::out_of_arena) { + a->my_exit_monitors.cancel_wait(waiter); + nested_arena_context scope(*td, *a, index2 ); + r1::wait(wo, exec_context); + __TBB_ASSERT(!exec_context.my_exception, NULL); // exception can be thrown above, not deferred + break; + } + a->my_exit_monitors.commit_wait(waiter); + } while (wo.continue_execution()); + if (index2 == arena::out_of_arena) { + // notify a waiting thread even if this thread did not enter arena, + // in case it was woken by a leaving thread but did not need to enter + a->my_exit_monitors.notify_one(); // do not relax! + } + // process possible exception + if (exec_context.my_exception) { + __TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); + exec_context.my_exception->throw_self(); + } + __TBB_ASSERT(governor::is_thread_data_set(td), nullptr); + return; + } // if (index1 == arena::out_of_arena) + } // if (!same_arena) + + context_guard_helper</*report_tasks=*/false> context_guard; + context_guard.set_ctx(a->my_default_ctx); + nested_arena_context scope(*td, *a, index1); +#if _WIN64 + try { #endif - d(); - __TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr); -#if _WIN64 - } catch (...) { - context_guard.restore_default(); - throw; - } -#endif -} - -void task_arena_impl::wait(d1::task_arena_base& ta) { - arena* a = ta.my_arena.load(std::memory_order_relaxed); - __TBB_ASSERT(a != nullptr, nullptr); - thread_data* td = governor::get_thread_data(); - __TBB_ASSERT_EX(td, "Scheduler is not initialized"); - __TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" ); - if (a->my_max_num_workers != 0) { - while (a->num_workers_active() || a->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { - yield(); + d(); + __TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr); +#if _WIN64 + } catch (...) { + context_guard.restore_default(); + throw; + } +#endif +} + +void task_arena_impl::wait(d1::task_arena_base& ta) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + __TBB_ASSERT(a != nullptr, nullptr); + thread_data* td = governor::get_thread_data(); + __TBB_ASSERT_EX(td, "Scheduler is not initialized"); + __TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" ); + if (a->my_max_num_workers != 0) { + while (a->num_workers_active() || a->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { + yield(); } - } + } } -int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { - arena* a = nullptr; - if( ta ) // for special cases of ta->max_concurrency() - a = ta->my_arena.load(std::memory_order_relaxed); - else if( thread_data* td = governor::get_thread_data_if_initialized() ) - a = td->my_arena; // the current arena if any - - if( a ) { // Get parameters from the arena - __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL ); - return a->my_num_reserved_slots + a->my_max_num_workers -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - + (a->my_local_concurrency_flag.test() ? 1 : 0) -#endif - ; +int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { + arena* a = nullptr; + if( ta ) // for special cases of ta->max_concurrency() + a = ta->my_arena.load(std::memory_order_relaxed); + else if( thread_data* td = governor::get_thread_data_if_initialized() ) + a = td->my_arena; // the current arena if any + + if( a ) { // Get parameters from the arena + __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL ); + return a->my_num_reserved_slots + a->my_max_num_workers +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + + (a->my_local_concurrency_flag.test() ? 1 : 0) +#endif + ; + } + + if (ta && ta->my_max_concurrency == 1) { + return 1; } - if (ta && ta->my_max_concurrency == 1) { - return 1; +#if __TBB_ARENA_BINDING + if (ta) { +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + d1::constraints arena_constraints = d1::constraints{} + .set_numa_id(ta->my_numa_id) + .set_core_type(ta->core_type()) + .set_max_threads_per_core(ta->max_threads_per_core()); + return (int)default_concurrency(arena_constraints); +#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + return (int)default_concurrency(ta->my_numa_id); +#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ } +#endif /*!__TBB_ARENA_BINDING*/ -#if __TBB_ARENA_BINDING - if (ta) { -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - d1::constraints arena_constraints = d1::constraints{} - .set_numa_id(ta->my_numa_id) - .set_core_type(ta->core_type()) - .set_max_threads_per_core(ta->max_threads_per_core()); - return (int)default_concurrency(arena_constraints); -#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - return (int)default_concurrency(ta->my_numa_id); -#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ - } -#endif /*!__TBB_ARENA_BINDING*/ - - __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); - return int(governor::default_num_threads()); + __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); + return int(governor::default_num_threads()); } -void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { +void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { // TODO: Decide what to do if the scheduler is not initialized. Is there a use case for it? - thread_data* tls = governor::get_thread_data(); - assert_pointers_valid(tls, tls->my_task_dispatcher); - task_dispatcher* dispatcher = tls->my_task_dispatcher; - isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation; - try_call([&] { - // We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard. - isolation_type current_isolation = isolation ? isolation : reinterpret_cast<isolation_type>(&d); - // Save the current isolation value and set new one - previous_isolation = dispatcher->set_isolation(current_isolation); - // Isolation within this callable - d(); - }).on_completion([&] { - __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL); - dispatcher->set_isolation(previous_isolation); - }); + thread_data* tls = governor::get_thread_data(); + assert_pointers_valid(tls, tls->my_task_dispatcher); + task_dispatcher* dispatcher = tls->my_task_dispatcher; + isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation; + try_call([&] { + // We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard. + isolation_type current_isolation = isolation ? isolation : reinterpret_cast<isolation_type>(&d); + // Save the current isolation value and set new one + previous_isolation = dispatcher->set_isolation(current_isolation); + // Isolation within this callable + d(); + }).on_completion([&] { + __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL); + dispatcher->set_isolation(previous_isolation); + }); } -} // namespace r1 -} // namespace detail -} // namespace tbb +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/arena.h b/contrib/libs/tbb/src/tbb/arena.h index c085808277..b1b9c3dc93 100644 --- a/contrib/libs/tbb/src/tbb/arena.h +++ b/contrib/libs/tbb/src/tbb/arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,279 +17,279 @@ #ifndef _TBB_arena_H #define _TBB_arena_H -#include <atomic> -#include <cstring> +#include <atomic> +#include <cstring> -#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/detail/_task.h" #include "scheduler_common.h" #include "intrusive_list.h" #include "task_stream.h" -#include "arena_slot.h" -#include "rml_tbb.h" +#include "arena_slot.h" +#include "rml_tbb.h" #include "mailbox.h" #include "market.h" #include "governor.h" #include "concurrent_monitor.h" -#include "observer_proxy.h" -#include "oneapi/tbb/spin_mutex.h" +#include "observer_proxy.h" +#include "oneapi/tbb/spin_mutex.h" namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { -class task_dispatcher; +class task_dispatcher; class task_group_context; class allocate_root_with_context_proxy; -#if __TBB_ARENA_BINDING -class numa_binding_observer; -#endif /*__TBB_ARENA_BINDING*/ - -//! Bounded coroutines cache LIFO ring buffer -class arena_co_cache { - //! Ring buffer storage - task_dispatcher** my_co_scheduler_cache; - //! Current cache index - unsigned my_head; - //! Cache capacity for arena - unsigned my_max_index; - //! Accessor lock for modification operations - tbb::spin_mutex my_co_cache_mutex; - - unsigned next_index() { - return ( my_head == my_max_index ) ? 0 : my_head + 1; - } - - unsigned prev_index() { - return ( my_head == 0 ) ? my_max_index : my_head - 1; - } - - bool internal_empty() { - return my_co_scheduler_cache[prev_index()] == nullptr; - } - - void internal_task_dispatcher_cleanup(task_dispatcher* to_cleanup) { - to_cleanup->~task_dispatcher(); - cache_aligned_deallocate(to_cleanup); - } - -public: - void init(unsigned cache_capacity) { - std::size_t alloc_size = cache_capacity * sizeof(task_dispatcher*); - my_co_scheduler_cache = (task_dispatcher**)cache_aligned_allocate(alloc_size); - std::memset( my_co_scheduler_cache, 0, alloc_size ); - my_head = 0; - my_max_index = cache_capacity - 1; - } - - void cleanup() { - while (task_dispatcher* to_cleanup = pop()) { - internal_task_dispatcher_cleanup(to_cleanup); - } - cache_aligned_deallocate(my_co_scheduler_cache); - } - - //! Insert scheduler to the current available place. - //! Replace an old value, if necessary. - void push(task_dispatcher* s) { - task_dispatcher* to_cleanup = nullptr; - { - tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); - // Check if we are replacing some existing buffer entrance - if (my_co_scheduler_cache[my_head] != nullptr) { - to_cleanup = my_co_scheduler_cache[my_head]; - } - // Store the cached value - my_co_scheduler_cache[my_head] = s; - // Move head index to the next slot - my_head = next_index(); - } - // Cleanup replaced buffer if any - if (to_cleanup) { - internal_task_dispatcher_cleanup(to_cleanup); - } - } - - //! Get a cached scheduler if any - task_dispatcher* pop() { - tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); - // No cached coroutine - if (internal_empty()) { - return nullptr; - } - // Move head index to the currently available value - my_head = prev_index(); - // Retrieve the value from the buffer - task_dispatcher* to_return = my_co_scheduler_cache[my_head]; - // Clear the previous entrance value - my_co_scheduler_cache[my_head] = nullptr; - return to_return; - } -}; - -struct stack_anchor_type { - stack_anchor_type() = default; - stack_anchor_type(const stack_anchor_type&) = delete; -}; - -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY -class atomic_flag { - static const std::uintptr_t SET = 1; - static const std::uintptr_t EMPTY = 0; - std::atomic<std::uintptr_t> my_state; -public: - bool test_and_set() { - std::uintptr_t state = my_state.load(std::memory_order_acquire); - switch (state) { - case SET: - return false; - default: /* busy */ - if (my_state.compare_exchange_strong(state, SET)) { - // We interrupted clear transaction - return false; - } - if (state != EMPTY) { - // We lost our epoch - return false; - } - // We are too late but still in the same epoch - __TBB_fallthrough; - case EMPTY: - return my_state.compare_exchange_strong(state, SET); - } - } - template <typename Pred> - bool try_clear_if(Pred&& pred) { - std::uintptr_t busy = std::uintptr_t(&busy); - std::uintptr_t state = my_state.load(std::memory_order_acquire); - if (state == SET && my_state.compare_exchange_strong(state, busy)) { - if (pred()) { - return my_state.compare_exchange_strong(busy, EMPTY); - } - // The result of the next operation is discarded, always false should be returned. - my_state.compare_exchange_strong(busy, SET); - } - return false; - } - void clear() { - my_state.store(EMPTY, std::memory_order_release); - } - bool test() { - return my_state.load(std::memory_order_acquire) != EMPTY; - } -}; -#endif - +#if __TBB_ARENA_BINDING +class numa_binding_observer; +#endif /*__TBB_ARENA_BINDING*/ + +//! Bounded coroutines cache LIFO ring buffer +class arena_co_cache { + //! Ring buffer storage + task_dispatcher** my_co_scheduler_cache; + //! Current cache index + unsigned my_head; + //! Cache capacity for arena + unsigned my_max_index; + //! Accessor lock for modification operations + tbb::spin_mutex my_co_cache_mutex; + + unsigned next_index() { + return ( my_head == my_max_index ) ? 0 : my_head + 1; + } + + unsigned prev_index() { + return ( my_head == 0 ) ? my_max_index : my_head - 1; + } + + bool internal_empty() { + return my_co_scheduler_cache[prev_index()] == nullptr; + } + + void internal_task_dispatcher_cleanup(task_dispatcher* to_cleanup) { + to_cleanup->~task_dispatcher(); + cache_aligned_deallocate(to_cleanup); + } + +public: + void init(unsigned cache_capacity) { + std::size_t alloc_size = cache_capacity * sizeof(task_dispatcher*); + my_co_scheduler_cache = (task_dispatcher**)cache_aligned_allocate(alloc_size); + std::memset( my_co_scheduler_cache, 0, alloc_size ); + my_head = 0; + my_max_index = cache_capacity - 1; + } + + void cleanup() { + while (task_dispatcher* to_cleanup = pop()) { + internal_task_dispatcher_cleanup(to_cleanup); + } + cache_aligned_deallocate(my_co_scheduler_cache); + } + + //! Insert scheduler to the current available place. + //! Replace an old value, if necessary. + void push(task_dispatcher* s) { + task_dispatcher* to_cleanup = nullptr; + { + tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); + // Check if we are replacing some existing buffer entrance + if (my_co_scheduler_cache[my_head] != nullptr) { + to_cleanup = my_co_scheduler_cache[my_head]; + } + // Store the cached value + my_co_scheduler_cache[my_head] = s; + // Move head index to the next slot + my_head = next_index(); + } + // Cleanup replaced buffer if any + if (to_cleanup) { + internal_task_dispatcher_cleanup(to_cleanup); + } + } + + //! Get a cached scheduler if any + task_dispatcher* pop() { + tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); + // No cached coroutine + if (internal_empty()) { + return nullptr; + } + // Move head index to the currently available value + my_head = prev_index(); + // Retrieve the value from the buffer + task_dispatcher* to_return = my_co_scheduler_cache[my_head]; + // Clear the previous entrance value + my_co_scheduler_cache[my_head] = nullptr; + return to_return; + } +}; + +struct stack_anchor_type { + stack_anchor_type() = default; + stack_anchor_type(const stack_anchor_type&) = delete; +}; + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY +class atomic_flag { + static const std::uintptr_t SET = 1; + static const std::uintptr_t EMPTY = 0; + std::atomic<std::uintptr_t> my_state; +public: + bool test_and_set() { + std::uintptr_t state = my_state.load(std::memory_order_acquire); + switch (state) { + case SET: + return false; + default: /* busy */ + if (my_state.compare_exchange_strong(state, SET)) { + // We interrupted clear transaction + return false; + } + if (state != EMPTY) { + // We lost our epoch + return false; + } + // We are too late but still in the same epoch + __TBB_fallthrough; + case EMPTY: + return my_state.compare_exchange_strong(state, SET); + } + } + template <typename Pred> + bool try_clear_if(Pred&& pred) { + std::uintptr_t busy = std::uintptr_t(&busy); + std::uintptr_t state = my_state.load(std::memory_order_acquire); + if (state == SET && my_state.compare_exchange_strong(state, busy)) { + if (pred()) { + return my_state.compare_exchange_strong(busy, EMPTY); + } + // The result of the next operation is discarded, always false should be returned. + my_state.compare_exchange_strong(busy, SET); + } + return false; + } + void clear() { + my_state.store(EMPTY, std::memory_order_release); + } + bool test() { + return my_state.load(std::memory_order_acquire) != EMPTY; + } +}; +#endif + //! The structure of an arena, except the array of slots. /** Separated in order to simplify padding. Intrusive list node base class is used by market to form a list of arenas. **/ struct arena_base : padded<intrusive_list_node> { //! The number of workers that have been marked out by the resource manager to service the arena. - std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop + std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop //! Reference counter for the arena. - /** Worker and external thread references are counted separately: first several bits are for references - from external thread threads or explicit task_arenas (see arena::ref_external_bits below); + /** Worker and external thread references are counted separately: first several bits are for references + from external thread threads or explicit task_arenas (see arena::ref_external_bits below); the rest counts the number of workers servicing the arena. */ - std::atomic<unsigned> my_references; // heavy use in stealing loop + std::atomic<unsigned> my_references; // heavy use in stealing loop //! The maximal number of currently busy slots. - std::atomic<unsigned> my_limit; // heavy use in stealing loop + std::atomic<unsigned> my_limit; // heavy use in stealing loop //! Task pool for the tasks scheduled via task::enqueue() method. /** Such scheduling guarantees eventual execution even if - new tasks are constantly coming (by extracting scheduled tasks in relaxed FIFO order); - - the enqueuing thread does not call any of wait_for_all methods. **/ - task_stream<front_accessor> my_fifo_task_stream; // heavy use in stealing loop - - //! Task pool for the tasks scheduled via tbb::resume() function. - task_stream<front_accessor> my_resume_task_stream; // heavy use in stealing loop - -#if __TBB_PREVIEW_CRITICAL_TASKS - //! Task pool for the tasks with critical property set. - /** Critical tasks are scheduled for execution ahead of other sources (including local task pool - and even bypassed tasks) unless the thread already executes a critical task in an outer - dispatch loop **/ - // used on the hot path of the task dispatch loop - task_stream<back_nonnull_accessor> my_critical_task_stream; -#endif - - //! The number of workers requested by the external thread owning the arena. + - the enqueuing thread does not call any of wait_for_all methods. **/ + task_stream<front_accessor> my_fifo_task_stream; // heavy use in stealing loop + + //! Task pool for the tasks scheduled via tbb::resume() function. + task_stream<front_accessor> my_resume_task_stream; // heavy use in stealing loop + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Task pool for the tasks with critical property set. + /** Critical tasks are scheduled for execution ahead of other sources (including local task pool + and even bypassed tasks) unless the thread already executes a critical task in an outer + dispatch loop **/ + // used on the hot path of the task dispatch loop + task_stream<back_nonnull_accessor> my_critical_task_stream; +#endif + + //! The number of workers requested by the external thread owning the arena. unsigned my_max_num_workers; - //! The total number of workers that are requested from the resource manager. - int my_total_num_workers_requested; - - //! The number of workers that are really requested from the resource manager. - //! Possible values are in [0, my_max_num_workers] + //! The total number of workers that are requested from the resource manager. + int my_total_num_workers_requested; + + //! The number of workers that are really requested from the resource manager. + //! Possible values are in [0, my_max_num_workers] int my_num_workers_requested; - //! The index in the array of per priority lists of arenas this object is in. - /*const*/ unsigned my_priority_level; - - //! The max priority level of arena in market. - std::atomic<bool> my_is_top_priority{false}; - + //! The index in the array of per priority lists of arenas this object is in. + /*const*/ unsigned my_priority_level; + + //! The max priority level of arena in market. + std::atomic<bool> my_is_top_priority{false}; + //! Current task pool state and estimate of available tasks amount. /** The estimate is either 0 (SNAPSHOT_EMPTY) or infinity (SNAPSHOT_FULL). Special state is "busy" (any other unsigned value). Note that the implementation of arena::is_busy_or_empty() requires my_pool_state to be unsigned. */ - using pool_state_t = std::uintptr_t ; - std::atomic<pool_state_t> my_pool_state; + using pool_state_t = std::uintptr_t ; + std::atomic<pool_state_t> my_pool_state; //! The list of local observers attached to this arena. observer_list my_observers; -#if __TBB_ARENA_BINDING - //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. - numa_binding_observer* my_numa_binding_observer; -#endif /*__TBB_ARENA_BINDING*/ - +#if __TBB_ARENA_BINDING + //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. + numa_binding_observer* my_numa_binding_observer; +#endif /*__TBB_ARENA_BINDING*/ + // Below are rarely modified members //! The market that owns this arena. market* my_market; //! ABA prevention marker. - std::uintptr_t my_aba_epoch; + std::uintptr_t my_aba_epoch; //! Default task group context. - d1::task_group_context* my_default_ctx; + d1::task_group_context* my_default_ctx; //! The number of slots in the arena. unsigned my_num_slots; - //! The number of reserved slots (can be occupied only by external threads). + //! The number of reserved slots (can be occupied only by external threads). unsigned my_num_reserved_slots; #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // arena needs an extra worker despite the arena limit - atomic_flag my_local_concurrency_flag; - // the number of local mandatory concurrency requests - int my_local_concurrency_requests; - // arena needs an extra worker despite a global limit - std::atomic<bool> my_global_concurrency_mode; + // arena needs an extra worker despite the arena limit + atomic_flag my_local_concurrency_flag; + // the number of local mandatory concurrency requests + int my_local_concurrency_requests; + // arena needs an extra worker despite a global limit + std::atomic<bool> my_global_concurrency_mode; #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - //! Waiting object for external threads that cannot join the arena. + //! Waiting object for external threads that cannot join the arena. concurrent_monitor my_exit_monitors; - //! Coroutines (task_dispathers) cache buffer - arena_co_cache my_co_cache; - + //! Coroutines (task_dispathers) cache buffer + arena_co_cache my_co_cache; + #if TBB_USE_ASSERT //! Used to trap accesses to the object after its destruction. - std::uintptr_t my_guard; + std::uintptr_t my_guard; #endif /* TBB_USE_ASSERT */ }; // struct arena_base class arena: public padded<arena_base> { public: - using base_type = padded<arena_base>; + using base_type = padded<arena_base>; //! Types of work advertised by advertise_new_work() enum new_work_type { @@ -299,25 +299,25 @@ public: }; //! Constructor - arena ( market& m, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level); + arena ( market& m, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level); //! Allocate an instance of arena. - static arena& allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, - unsigned priority_level ); + static arena& allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level ); static int unsigned num_arena_slots ( unsigned num_slots ) { return max(2u, num_slots); } static int allocation_size ( unsigned num_slots ) { - return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher)); + return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher)); } - //! Get reference to mailbox corresponding to given slot_id - mail_outbox& mailbox( d1::slot_id slot ) { - __TBB_ASSERT( slot != d1::no_slot, "affinity should be specified" ); + //! Get reference to mailbox corresponding to given slot_id + mail_outbox& mailbox( d1::slot_id slot ) { + __TBB_ASSERT( slot != d1::no_slot, "affinity should be specified" ); - return reinterpret_cast<mail_outbox*>(this)[-(int)(slot+1)]; // cast to 'int' is redundant but left for readability + return reinterpret_cast<mail_outbox*>(this)[-(int)(slot+1)]; // cast to 'int' is redundant but left for readability } //! Completes arena shutdown, destructs and deallocates it. @@ -334,62 +334,62 @@ public: //! Reference increment values for externals and workers static const unsigned ref_external = 1; - static const unsigned ref_worker = 1 << ref_external_bits; + static const unsigned ref_worker = 1 << ref_external_bits; //! No tasks to steal or snapshot is being taken. static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; } //! The number of workers active in the arena. - unsigned num_workers_active() const { - return my_references.load(std::memory_order_acquire) >> ref_external_bits; + unsigned num_workers_active() const { + return my_references.load(std::memory_order_acquire) >> ref_external_bits; + } + + //! Check if the recall is requested by the market. + bool is_recall_requested() const { + return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed); } - //! Check if the recall is requested by the market. - bool is_recall_requested() const { - return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed); - } - //! If necessary, raise a flag that there is new job in arena. template<arena::new_work_type work_type> void advertise_new_work(); - //! Attempts to steal a task from a randomly chosen arena slot - d1::task* steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation); - - //! Get a task from a global starvation resistant queue - template<task_stream_accessor_type accessor> - d1::task* get_stream_task(task_stream<accessor>& stream, unsigned& hint); - -#if __TBB_PREVIEW_CRITICAL_TASKS - //! Tries to find a critical task in global critical task stream - d1::task* get_critical_task(unsigned& hint, isolation_type isolation); -#endif - + //! Attempts to steal a task from a randomly chosen arena slot + d1::task* steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation); + + //! Get a task from a global starvation resistant queue + template<task_stream_accessor_type accessor> + d1::task* get_stream_task(task_stream<accessor>& stream, unsigned& hint); + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Tries to find a critical task in global critical task stream + d1::task* get_critical_task(unsigned& hint, isolation_type isolation); +#endif + //! Check if there is job anywhere in arena. /** Return true if no job or if arena is being cleaned up. */ bool is_out_of_work(); //! enqueue a task into starvation-resistance queue - void enqueue_task(d1::task&, d1::task_group_context&, thread_data&); + void enqueue_task(d1::task&, d1::task_group_context&, thread_data&); //! Registers the worker with the arena and enters TBB scheduler dispatch loop - void process(thread_data&); + void process(thread_data&); - //! Notification that the thread leaves its arena + //! Notification that the thread leaves its arena template<unsigned ref_param> inline void on_thread_leaving ( ); //! Check for the presence of enqueued tasks at all priority levels bool has_enqueued_tasks(); - static const std::size_t out_of_arena = ~size_t(0); + static const std::size_t out_of_arena = ~size_t(0); //! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena. template <bool as_worker> - std::size_t occupy_free_slot(thread_data&); + std::size_t occupy_free_slot(thread_data&); //! Tries to occupy a slot in the specified range. - std::size_t occupy_free_slot_in_range(thread_data& tls, std::size_t lower, std::size_t upper); + std::size_t occupy_free_slot_in_range(thread_data& tls, std::size_t lower, std::size_t upper); + + std::uintptr_t calculate_stealing_threshold(); - std::uintptr_t calculate_stealing_threshold(); - /** Must be the last data field */ arena_slot my_slots[1]; }; // class arena @@ -403,14 +403,14 @@ inline void arena::on_thread_leaving ( ) { // current design. // // In case of using fire-and-forget tasks (scheduled via task::enqueue()) - // external thread is allowed to leave its arena before all its work is executed, + // external thread is allowed to leave its arena before all its work is executed, // and market may temporarily revoke all workers from this arena. Since revoked // workers never attempt to reset arena state to EMPTY and cancel its request // to RML for threads, the arena object is destroyed only when both the last - // thread is leaving it and arena's state is EMPTY (that is its external thread + // thread is leaving it and arena's state is EMPTY (that is its external thread // left and it does not contain any work). // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not - // be done here (or anywhere else in the external thread to that matter); doing so + // be done here (or anywhere else in the external thread to that matter); doing so // can result either in arena's premature destruction (at least without // additional costly checks in workers) or in unnecessary arena state changes // (and ensuing workers migration). @@ -441,7 +441,7 @@ inline void arena::on_thread_leaving ( ) { // In both cases we cannot dereference arena pointer after the refcount is // decremented, as our arena may already be destroyed. // - // If this is the external thread, the market is protected by refcount to it. + // If this is the external thread, the market is protected by refcount to it. // In case of workers market's liveness is ensured by the RML connection // rundown protocol, according to which the client (i.e. the market) lives // until RML server notifies it about connection termination, and this @@ -450,10 +450,10 @@ inline void arena::on_thread_leaving ( ) { // Thus if we decremented refcount to zero we ask the market to check arena // state (including the fact if it is alive) under the lock. // - std::uintptr_t aba_epoch = my_aba_epoch; - unsigned priority_level = my_priority_level; + std::uintptr_t aba_epoch = my_aba_epoch; + unsigned priority_level = my_priority_level; market* m = my_market; - __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); + __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY // When there is no workers someone must free arena, as // without workers, no one calls is_out_of_work(). @@ -461,64 +461,64 @@ inline void arena::on_thread_leaving ( ) { // TODO: consider more strict conditions for the cleanup, // because it can create the demand of workers, // but the arena can be already empty (and so ready for destroying) - // TODO: Fix the race: while we check soft limit and it might be changed. + // TODO: Fix the race: while we check soft limit and it might be changed. if( ref_param==ref_external && my_num_slots != my_num_reserved_slots - && 0 == m->my_num_workers_soft_limit.load(std::memory_order_relaxed) && - !my_global_concurrency_mode.load(std::memory_order_relaxed) ) { - is_out_of_work(); + && 0 == m->my_num_workers_soft_limit.load(std::memory_order_relaxed) && + !my_global_concurrency_mode.load(std::memory_order_relaxed) ) { + is_out_of_work(); // We expect, that in worst case it's enough to have num_priority_levels-1 - // calls to restore priorities and yet another is_out_of_work() to conform + // calls to restore priorities and yet another is_out_of_work() to conform // that no work was found. But as market::set_active_num_workers() can be called // concurrently, can't guarantee last is_out_of_work() return true. } #endif if ( (my_references -= ref_param ) == 0 ) - m->try_destroy_arena( this, aba_epoch, priority_level ); + m->try_destroy_arena( this, aba_epoch, priority_level ); } -template<arena::new_work_type work_type> -void arena::advertise_new_work() { - auto is_related_arena = [&] (extended_context context) { - return this == context.my_arena_addr; - }; - +template<arena::new_work_type work_type> +void arena::advertise_new_work() { + auto is_related_arena = [&] (extended_context context) { + return this == context.my_arena_addr; + }; + if( work_type == work_enqueued ) { - atomic_fence(std::memory_order_seq_cst); + atomic_fence(std::memory_order_seq_cst); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && - my_global_concurrency_mode.load(std::memory_order_acquire) == false ) - my_market->enable_mandatory_concurrency(this); - - if (my_max_num_workers == 0 && my_num_reserved_slots == 1 && my_local_concurrency_flag.test_and_set()) { - my_market->adjust_demand(*this, /* delta = */ 1, /* mandatory = */ true); + if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && + my_global_concurrency_mode.load(std::memory_order_acquire) == false ) + my_market->enable_mandatory_concurrency(this); + + if (my_max_num_workers == 0 && my_num_reserved_slots == 1 && my_local_concurrency_flag.test_and_set()) { + my_market->adjust_demand(*this, /* delta = */ 1, /* mandatory = */ true); } #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ // Local memory fence here and below is required to avoid missed wakeups; see the comment below. // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable. } else if( work_type == wakeup ) { - atomic_fence(std::memory_order_seq_cst); + atomic_fence(std::memory_order_seq_cst); } - + // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences. // Technically, to avoid missed wakeups, there should be a full memory fence between the point we // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a // fence might hurt overall performance more than it helps, because the fence would be executed // on every task pool release, even when stealing does not occur. Since TBB allows parallelism, // but never promises parallelism, the missed wakeup is not a correctness problem. - pool_state_t snapshot = my_pool_state.load(std::memory_order_acquire); + pool_state_t snapshot = my_pool_state.load(std::memory_order_acquire); if( is_busy_or_empty(snapshot) ) { // Attempt to mark as full. The compare_and_swap below is a little unusual because the // result is compared to a value that can be different than the comparand argument. - pool_state_t expected_state = snapshot; - my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ); - if( expected_state == SNAPSHOT_EMPTY ) { - if( snapshot != SNAPSHOT_EMPTY ) { + pool_state_t expected_state = snapshot; + my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ); + if( expected_state == SNAPSHOT_EMPTY ) { + if( snapshot != SNAPSHOT_EMPTY ) { // This thread read "busy" into snapshot, and then another thread transitioned // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above // to fail. Attempt to transition my_pool_state from "empty" to "full". - expected_state = SNAPSHOT_EMPTY; - if( !my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ) ) { + expected_state = SNAPSHOT_EMPTY; + if( !my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ) ) { // Some other thread transitioned my_pool_state from "empty", and hence became // responsible for waking up workers. return; @@ -528,89 +528,89 @@ void arena::advertise_new_work() { // telling the market that there is work to do. #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY if( work_type == work_spawned ) { - if ( my_global_concurrency_mode.load(std::memory_order_acquire) == true ) - my_market->mandatory_concurrency_disable( this ); + if ( my_global_concurrency_mode.load(std::memory_order_acquire) == true ) + my_market->mandatory_concurrency_disable( this ); } #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - // TODO: investigate adjusting of arena's demand by a single worker. - my_market->adjust_demand(*this, my_max_num_workers, /* mandatory = */ false); - - // Notify all sleeping threads that work has appeared in the arena. - my_market->get_wait_list().notify(is_related_arena); + // TODO: investigate adjusting of arena's demand by a single worker. + my_market->adjust_demand(*this, my_max_num_workers, /* mandatory = */ false); + + // Notify all sleeping threads that work has appeared in the arena. + my_market->get_wait_list().notify(is_related_arena); + } + } +} + +inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation) { + auto slot_num_limit = my_limit.load(std::memory_order_relaxed); + if (slot_num_limit == 1) { + // No slots to steal from + return nullptr; + } + // Try to steal a task from a random victim. + std::size_t k = frnd.get() % (slot_num_limit - 1); + // The following condition excludes the external thread that might have + // already taken our previous place in the arena from the list . + // of potential victims. But since such a situation can take + // place only in case of significant oversubscription, keeping + // the checks simple seems to be preferable to complicating the code. + if (k >= arena_index) { + ++k; // Adjusts random distribution to exclude self + } + arena_slot* victim = &my_slots[k]; + d1::task **pool = victim->task_pool.load(std::memory_order_relaxed); + d1::task *t = nullptr; + if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation))) { + return nullptr; + } + if (task_accessor::is_proxy_task(*t)) { + task_proxy &tp = *(task_proxy*)t; + d1::slot_id slot = tp.slot; + t = tp.extract_task<task_proxy::pool_bit>(); + if (!t) { + // Proxy was empty, so it's our responsibility to free it + tp.allocator.delete_object(&tp, ed); + return nullptr; } + // Note affinity is called for any stealed task (proxy or general) + ed.affinity_slot = slot; + } else { + // Note affinity is called for any stealed task (proxy or general) + ed.affinity_slot = d1::any_slot; + } + // Update task owner thread id to identify stealing + ed.original_slot = k; + return t; +} + +template<task_stream_accessor_type accessor> +inline d1::task* arena::get_stream_task(task_stream<accessor>& stream, unsigned& hint) { + if (stream.empty()) + return nullptr; + return stream.pop(subsequent_lane_selector(hint)); +} + +#if __TBB_PREVIEW_CRITICAL_TASKS +// Retrieves critical task respecting isolation level, if provided. The rule is: +// 1) If no outer critical task and no isolation => take any critical task +// 2) If working on an outer critical task and no isolation => cannot take any critical task +// 3) If no outer critical task but isolated => respect isolation +// 4) If working on an outer critical task and isolated => respect isolation +// Hint is used to keep some LIFO-ness, start search with the lane that was used during push operation. +inline d1::task* arena::get_critical_task(unsigned& hint, isolation_type isolation) { + if (my_critical_task_stream.empty()) + return nullptr; + + if ( isolation != no_isolation ) { + return my_critical_task_stream.pop_specific( hint, isolation ); + } else { + return my_critical_task_stream.pop(preceding_lane_selector(hint)); } } +#endif // __TBB_PREVIEW_CRITICAL_TASKS -inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation) { - auto slot_num_limit = my_limit.load(std::memory_order_relaxed); - if (slot_num_limit == 1) { - // No slots to steal from - return nullptr; - } - // Try to steal a task from a random victim. - std::size_t k = frnd.get() % (slot_num_limit - 1); - // The following condition excludes the external thread that might have - // already taken our previous place in the arena from the list . - // of potential victims. But since such a situation can take - // place only in case of significant oversubscription, keeping - // the checks simple seems to be preferable to complicating the code. - if (k >= arena_index) { - ++k; // Adjusts random distribution to exclude self - } - arena_slot* victim = &my_slots[k]; - d1::task **pool = victim->task_pool.load(std::memory_order_relaxed); - d1::task *t = nullptr; - if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation))) { - return nullptr; - } - if (task_accessor::is_proxy_task(*t)) { - task_proxy &tp = *(task_proxy*)t; - d1::slot_id slot = tp.slot; - t = tp.extract_task<task_proxy::pool_bit>(); - if (!t) { - // Proxy was empty, so it's our responsibility to free it - tp.allocator.delete_object(&tp, ed); - return nullptr; - } - // Note affinity is called for any stealed task (proxy or general) - ed.affinity_slot = slot; - } else { - // Note affinity is called for any stealed task (proxy or general) - ed.affinity_slot = d1::any_slot; - } - // Update task owner thread id to identify stealing - ed.original_slot = k; - return t; -} - -template<task_stream_accessor_type accessor> -inline d1::task* arena::get_stream_task(task_stream<accessor>& stream, unsigned& hint) { - if (stream.empty()) - return nullptr; - return stream.pop(subsequent_lane_selector(hint)); -} - -#if __TBB_PREVIEW_CRITICAL_TASKS -// Retrieves critical task respecting isolation level, if provided. The rule is: -// 1) If no outer critical task and no isolation => take any critical task -// 2) If working on an outer critical task and no isolation => cannot take any critical task -// 3) If no outer critical task but isolated => respect isolation -// 4) If working on an outer critical task and isolated => respect isolation -// Hint is used to keep some LIFO-ness, start search with the lane that was used during push operation. -inline d1::task* arena::get_critical_task(unsigned& hint, isolation_type isolation) { - if (my_critical_task_stream.empty()) - return nullptr; - - if ( isolation != no_isolation ) { - return my_critical_task_stream.pop_specific( hint, isolation ); - } else { - return my_critical_task_stream.pop(preceding_lane_selector(hint)); - } -} -#endif // __TBB_PREVIEW_CRITICAL_TASKS - -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_arena_H */ diff --git a/contrib/libs/tbb/src/tbb/arena_slot.cpp b/contrib/libs/tbb/src/tbb/arena_slot.cpp index 94709a6780..72706b3de5 100644 --- a/contrib/libs/tbb/src/tbb/arena_slot.cpp +++ b/contrib/libs/tbb/src/tbb/arena_slot.cpp @@ -1,219 +1,219 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "arena_slot.h" -#include "arena.h" -#include "thread_data.h" - -namespace tbb { -namespace detail { -namespace r1 { - -//------------------------------------------------------------------------ -// Arena Slot -//------------------------------------------------------------------------ -d1::task* arena_slot::get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation) { - __TBB_ASSERT(tail.load(std::memory_order_relaxed) <= T || is_local_task_pool_quiescent(), - "Is it safe to get a task at position T?"); - - d1::task* result = task_pool_ptr[T]; - __TBB_ASSERT(!is_poisoned( result ), "The poisoned task is going to be processed"); - - if (!result) { - return nullptr; - } - bool omit = isolation != no_isolation && isolation != task_accessor::isolation(*result); - if (!omit && !task_accessor::is_proxy_task(*result)) { - return result; - } else if (omit) { - tasks_omitted = true; - return nullptr; - } - - task_proxy& tp = static_cast<task_proxy&>(*result); - d1::slot_id aff_id = tp.slot; - if ( d1::task *t = tp.extract_task<task_proxy::pool_bit>() ) { - ed.affinity_slot = aff_id; - return t; - } - // Proxy was empty, so it's our responsibility to free it - tp.allocator.delete_object(&tp, ed); - - if ( tasks_omitted ) { - task_pool_ptr[T] = nullptr; - } - return nullptr; -} - -d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) { - __TBB_ASSERT(is_task_pool_published(), nullptr); - // The current task position in the task pool. - std::size_t T0 = tail.load(std::memory_order_relaxed); - // The bounds of available tasks in the task pool. H0 is only used when the head bound is reached. - std::size_t H0 = (std::size_t)-1, T = T0; - d1::task* result = nullptr; - bool task_pool_empty = false; - bool tasks_omitted = false; - do { - __TBB_ASSERT( !result, nullptr ); - // The full fence is required to sync the store of `tail` with the load of `head` (write-read barrier) - T = --tail; - // The acquire load of head is required to guarantee consistency of our task pool - // when a thief rolls back the head. - if ( (std::intptr_t)( head.load(std::memory_order_acquire) ) > (std::intptr_t)T ) { - acquire_task_pool(); - H0 = head.load(std::memory_order_relaxed); - if ( (std::intptr_t)H0 > (std::intptr_t)T ) { - // The thief has not backed off - nothing to grab. - __TBB_ASSERT( H0 == head.load(std::memory_order_relaxed) - && T == tail.load(std::memory_order_relaxed) - && H0 == T + 1, "victim/thief arbitration algorithm failure" ); - reset_task_pool_and_leave(); - // No tasks in the task pool. - task_pool_empty = true; - break; - } else if ( H0 == T ) { - // There is only one task in the task pool. - reset_task_pool_and_leave(); - task_pool_empty = true; - } else { - // Release task pool if there are still some tasks. - // After the release, the tail will be less than T, thus a thief - // will not attempt to get a task at position T. - release_task_pool(); - } - } - result = get_task_impl( T, ed, tasks_omitted, isolation ); - if ( result ) { - poison_pointer( task_pool_ptr[T] ); - break; - } else if ( !tasks_omitted ) { - poison_pointer( task_pool_ptr[T] ); - __TBB_ASSERT( T0 == T+1, nullptr ); - T0 = T; - } - } while ( !result && !task_pool_empty ); - - if ( tasks_omitted ) { - if ( task_pool_empty ) { - // All tasks have been checked. The task pool should be in reset state. - // We just restore the bounds for the available tasks. - // TODO: Does it have sense to move them to the beginning of the task pool? - __TBB_ASSERT( is_quiescent_local_task_pool_reset(), nullptr ); - if ( result ) { - // If we have a task, it should be at H0 position. - __TBB_ASSERT( H0 == T, nullptr ); - ++H0; - } - __TBB_ASSERT( H0 <= T0, nullptr ); - if ( H0 < T0 ) { - // Restore the task pool if there are some tasks. - head.store(H0, std::memory_order_relaxed); - tail.store(T0, std::memory_order_relaxed); - // The release fence is used in publish_task_pool. - publish_task_pool(); - // Synchronize with snapshot as we published some tasks. - ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); - } - } else { - // A task has been obtained. We need to make a hole in position T. - __TBB_ASSERT( is_task_pool_published(), nullptr ); - __TBB_ASSERT( result, nullptr ); - task_pool_ptr[T] = nullptr; - tail.store(T0, std::memory_order_release); - // Synchronize with snapshot as we published some tasks. - // TODO: consider some approach not to call wakeup for each time. E.g. check if the tail reached the head. - ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); - } - } - - __TBB_ASSERT( (std::intptr_t)tail.load(std::memory_order_relaxed) >= 0, nullptr ); - __TBB_ASSERT( result || tasks_omitted || is_quiescent_local_task_pool_reset(), nullptr ); - return result; -} - -d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) { - d1::task** victim_pool = lock_task_pool(); - if (!victim_pool) { - return nullptr; - } - d1::task* result = nullptr; - std::size_t H = head.load(std::memory_order_relaxed); // mirror - std::size_t H0 = H; - bool tasks_omitted = false; - do { - // The full fence is required to sync the store of `head` with the load of `tail` (write-read barrier) - H = ++head; - // The acquire load of tail is required to guarantee consistency of victim_pool - // because the owner synchronizes task spawning via tail. - if ((std::intptr_t)H > (std::intptr_t)(tail.load(std::memory_order_acquire))) { - // Stealing attempt failed, deque contents has not been changed by us - head.store( /*dead: H = */ H0, std::memory_order_relaxed ); - __TBB_ASSERT( !result, nullptr ); - goto unlock; - } - result = victim_pool[H-1]; - __TBB_ASSERT( !is_poisoned( result ), nullptr ); - - if (result) { - if (isolation == no_isolation || isolation == task_accessor::isolation(*result)) { - if (!task_accessor::is_proxy_task(*result)) { - break; - } - task_proxy& tp = *static_cast<task_proxy*>(result); - // If mailed task is likely to be grabbed by its destination thread, skip it. - if ( !(task_proxy::is_shared( tp.task_and_tag ) && tp.outbox->recipient_is_idle()) ) { - break; - } - } - // The task cannot be executed either due to isolation or proxy constraints. - result = nullptr; - tasks_omitted = true; - } else if (!tasks_omitted) { - // Cleanup the task pool from holes until a task is skipped. - __TBB_ASSERT( H0 == H-1, nullptr ); - poison_pointer( victim_pool[H0] ); - H0 = H; - } - } while (!result); - __TBB_ASSERT( result, nullptr ); - - // emit "task was consumed" signal - poison_pointer( victim_pool[H-1] ); - if (tasks_omitted) { - // Some proxies in the task pool have been omitted. Set the stolen task to nullptr. - victim_pool[H-1] = nullptr; - // The release store synchronizes the victim_pool update(the store of nullptr). - head.store( /*dead: H = */ H0, std::memory_order_release ); - } -unlock: - unlock_task_pool(victim_pool); - -#if __TBB_PREFETCHING - __TBB_cl_evict(&victim_slot.head); - __TBB_cl_evict(&victim_slot.tail); -#endif - if (tasks_omitted) { - // Synchronize with snapshot as the head and tail can be bumped which can falsely trigger EMPTY state - a.advertise_new_work<arena::wakeup>(); - } - return result; -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "arena_slot.h" +#include "arena.h" +#include "thread_data.h" + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// Arena Slot +//------------------------------------------------------------------------ +d1::task* arena_slot::get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation) { + __TBB_ASSERT(tail.load(std::memory_order_relaxed) <= T || is_local_task_pool_quiescent(), + "Is it safe to get a task at position T?"); + + d1::task* result = task_pool_ptr[T]; + __TBB_ASSERT(!is_poisoned( result ), "The poisoned task is going to be processed"); + + if (!result) { + return nullptr; + } + bool omit = isolation != no_isolation && isolation != task_accessor::isolation(*result); + if (!omit && !task_accessor::is_proxy_task(*result)) { + return result; + } else if (omit) { + tasks_omitted = true; + return nullptr; + } + + task_proxy& tp = static_cast<task_proxy&>(*result); + d1::slot_id aff_id = tp.slot; + if ( d1::task *t = tp.extract_task<task_proxy::pool_bit>() ) { + ed.affinity_slot = aff_id; + return t; + } + // Proxy was empty, so it's our responsibility to free it + tp.allocator.delete_object(&tp, ed); + + if ( tasks_omitted ) { + task_pool_ptr[T] = nullptr; + } + return nullptr; +} + +d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) { + __TBB_ASSERT(is_task_pool_published(), nullptr); + // The current task position in the task pool. + std::size_t T0 = tail.load(std::memory_order_relaxed); + // The bounds of available tasks in the task pool. H0 is only used when the head bound is reached. + std::size_t H0 = (std::size_t)-1, T = T0; + d1::task* result = nullptr; + bool task_pool_empty = false; + bool tasks_omitted = false; + do { + __TBB_ASSERT( !result, nullptr ); + // The full fence is required to sync the store of `tail` with the load of `head` (write-read barrier) + T = --tail; + // The acquire load of head is required to guarantee consistency of our task pool + // when a thief rolls back the head. + if ( (std::intptr_t)( head.load(std::memory_order_acquire) ) > (std::intptr_t)T ) { + acquire_task_pool(); + H0 = head.load(std::memory_order_relaxed); + if ( (std::intptr_t)H0 > (std::intptr_t)T ) { + // The thief has not backed off - nothing to grab. + __TBB_ASSERT( H0 == head.load(std::memory_order_relaxed) + && T == tail.load(std::memory_order_relaxed) + && H0 == T + 1, "victim/thief arbitration algorithm failure" ); + reset_task_pool_and_leave(); + // No tasks in the task pool. + task_pool_empty = true; + break; + } else if ( H0 == T ) { + // There is only one task in the task pool. + reset_task_pool_and_leave(); + task_pool_empty = true; + } else { + // Release task pool if there are still some tasks. + // After the release, the tail will be less than T, thus a thief + // will not attempt to get a task at position T. + release_task_pool(); + } + } + result = get_task_impl( T, ed, tasks_omitted, isolation ); + if ( result ) { + poison_pointer( task_pool_ptr[T] ); + break; + } else if ( !tasks_omitted ) { + poison_pointer( task_pool_ptr[T] ); + __TBB_ASSERT( T0 == T+1, nullptr ); + T0 = T; + } + } while ( !result && !task_pool_empty ); + + if ( tasks_omitted ) { + if ( task_pool_empty ) { + // All tasks have been checked. The task pool should be in reset state. + // We just restore the bounds for the available tasks. + // TODO: Does it have sense to move them to the beginning of the task pool? + __TBB_ASSERT( is_quiescent_local_task_pool_reset(), nullptr ); + if ( result ) { + // If we have a task, it should be at H0 position. + __TBB_ASSERT( H0 == T, nullptr ); + ++H0; + } + __TBB_ASSERT( H0 <= T0, nullptr ); + if ( H0 < T0 ) { + // Restore the task pool if there are some tasks. + head.store(H0, std::memory_order_relaxed); + tail.store(T0, std::memory_order_relaxed); + // The release fence is used in publish_task_pool. + publish_task_pool(); + // Synchronize with snapshot as we published some tasks. + ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); + } + } else { + // A task has been obtained. We need to make a hole in position T. + __TBB_ASSERT( is_task_pool_published(), nullptr ); + __TBB_ASSERT( result, nullptr ); + task_pool_ptr[T] = nullptr; + tail.store(T0, std::memory_order_release); + // Synchronize with snapshot as we published some tasks. + // TODO: consider some approach not to call wakeup for each time. E.g. check if the tail reached the head. + ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); + } + } + + __TBB_ASSERT( (std::intptr_t)tail.load(std::memory_order_relaxed) >= 0, nullptr ); + __TBB_ASSERT( result || tasks_omitted || is_quiescent_local_task_pool_reset(), nullptr ); + return result; +} + +d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) { + d1::task** victim_pool = lock_task_pool(); + if (!victim_pool) { + return nullptr; + } + d1::task* result = nullptr; + std::size_t H = head.load(std::memory_order_relaxed); // mirror + std::size_t H0 = H; + bool tasks_omitted = false; + do { + // The full fence is required to sync the store of `head` with the load of `tail` (write-read barrier) + H = ++head; + // The acquire load of tail is required to guarantee consistency of victim_pool + // because the owner synchronizes task spawning via tail. + if ((std::intptr_t)H > (std::intptr_t)(tail.load(std::memory_order_acquire))) { + // Stealing attempt failed, deque contents has not been changed by us + head.store( /*dead: H = */ H0, std::memory_order_relaxed ); + __TBB_ASSERT( !result, nullptr ); + goto unlock; + } + result = victim_pool[H-1]; + __TBB_ASSERT( !is_poisoned( result ), nullptr ); + + if (result) { + if (isolation == no_isolation || isolation == task_accessor::isolation(*result)) { + if (!task_accessor::is_proxy_task(*result)) { + break; + } + task_proxy& tp = *static_cast<task_proxy*>(result); + // If mailed task is likely to be grabbed by its destination thread, skip it. + if ( !(task_proxy::is_shared( tp.task_and_tag ) && tp.outbox->recipient_is_idle()) ) { + break; + } + } + // The task cannot be executed either due to isolation or proxy constraints. + result = nullptr; + tasks_omitted = true; + } else if (!tasks_omitted) { + // Cleanup the task pool from holes until a task is skipped. + __TBB_ASSERT( H0 == H-1, nullptr ); + poison_pointer( victim_pool[H0] ); + H0 = H; + } + } while (!result); + __TBB_ASSERT( result, nullptr ); + + // emit "task was consumed" signal + poison_pointer( victim_pool[H-1] ); + if (tasks_omitted) { + // Some proxies in the task pool have been omitted. Set the stolen task to nullptr. + victim_pool[H-1] = nullptr; + // The release store synchronizes the victim_pool update(the store of nullptr). + head.store( /*dead: H = */ H0, std::memory_order_release ); + } +unlock: + unlock_task_pool(victim_pool); + +#if __TBB_PREFETCHING + __TBB_cl_evict(&victim_slot.head); + __TBB_cl_evict(&victim_slot.tail); +#endif + if (tasks_omitted) { + // Synchronize with snapshot as the head and tail can be bumped which can falsely trigger EMPTY state + a.advertise_new_work<arena::wakeup>(); + } + return result; +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/arena_slot.h b/contrib/libs/tbb/src/tbb/arena_slot.h index 54a7ae0b4b..83d61d2197 100644 --- a/contrib/libs/tbb/src/tbb/arena_slot.h +++ b/contrib/libs/tbb/src/tbb/arena_slot.h @@ -1,409 +1,409 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_arena_slot_H -#define _TBB_arena_slot_H - -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_utils.h" -#include "oneapi/tbb/detail/_template_helpers.h" -#include "oneapi/tbb/detail/_task.h" - -#include "oneapi/tbb/cache_aligned_allocator.h" - -#include "misc.h" -#include "mailbox.h" -#include "scheduler_common.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { - -class arena; -class task_group_context; - -//-------------------------------------------------------------------------------------------------------- -// Arena Slot -//-------------------------------------------------------------------------------------------------------- - -static d1::task** const EmptyTaskPool = nullptr; -static d1::task** const LockedTaskPool = reinterpret_cast<d1::task**>(~std::intptr_t(0)); - -struct alignas(max_nfs_size) arena_slot_shared_state { - //! Scheduler of the thread attached to the slot - /** Marks the slot as busy, and is used to iterate through the schedulers belonging to this arena **/ - std::atomic<bool> my_is_occupied; - - // Synchronization of access to Task pool - /** Also is used to specify if the slot is empty or locked: - 0 - empty - -1 - locked **/ - std::atomic<d1::task**> task_pool; - - //! Index of the first ready task in the deque. - /** Modified by thieves, and by the owner during compaction/reallocation **/ - std::atomic<std::size_t> head; -}; - -struct alignas(max_nfs_size) arena_slot_private_state { - //! Hint provided for operations with the container of starvation-resistant tasks. - /** Modified by the owner thread (during these operations). **/ - unsigned hint_for_fifo_stream; - -#if __TBB_PREVIEW_CRITICAL_TASKS - //! Similar to 'hint_for_fifo_stream' but for critical tasks. - unsigned hint_for_critical_stream; -#endif - - //! Similar to 'hint_for_fifo_stream' but for the resume tasks. - unsigned hint_for_resume_stream; - - //! Index of the element following the last ready task in the deque. - /** Modified by the owner thread. **/ - std::atomic<std::size_t> tail; - - //! Capacity of the primary task pool (number of elements - pointers to task). - std::size_t my_task_pool_size; - - //! Task pool of the scheduler that owns this slot - // TODO: previously was task**__TBB_atomic, but seems like not accessed on other thread - d1::task** task_pool_ptr; -}; - -class arena_slot : private arena_slot_shared_state, private arena_slot_private_state { - friend class arena; - friend class outermost_worker_waiter; - friend class task_dispatcher; - friend class thread_data; - friend class nested_arena_context; - - //! The original task dispather associated with this slot - task_dispatcher* my_default_task_dispatcher; - -#if TBB_USE_ASSERT - void fill_with_canary_pattern ( std::size_t first, std::size_t last ) { - for ( std::size_t i = first; i < last; ++i ) - poison_pointer(task_pool_ptr[i]); - } -#else - void fill_with_canary_pattern ( size_t, std::size_t ) {} -#endif /* TBB_USE_ASSERT */ - - static constexpr std::size_t min_task_pool_size = 64; - - void allocate_task_pool( std::size_t n ) { - std::size_t byte_size = ((n * sizeof(d1::task*) + max_nfs_size - 1) / max_nfs_size) * max_nfs_size; - my_task_pool_size = byte_size / sizeof(d1::task*); - task_pool_ptr = (d1::task**)cache_aligned_allocate(byte_size); - // No need to clear the fresh deque since valid items are designated by the head and tail members. - // But fill it with a canary pattern in the high vigilance debug mode. - fill_with_canary_pattern( 0, my_task_pool_size ); - } - -public: - //! Deallocate task pool that was allocated by means of allocate_task_pool. - void free_task_pool( ) { - // TODO: understand the assertion and modify - // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, NULL); - if( task_pool_ptr ) { - __TBB_ASSERT( my_task_pool_size, NULL); - cache_aligned_deallocate( task_pool_ptr ); - task_pool_ptr = NULL; - my_task_pool_size = 0; - } - } - - //! Get a task from the local pool. - /** Called only by the pool owner. - Returns the pointer to the task or NULL if a suitable task is not found. - Resets the pool if it is empty. **/ - d1::task* get_task(execution_data_ext&, isolation_type); - - //! Steal task from slot's ready pool - d1::task* steal_task(arena&, isolation_type); - - //! Some thread is now the owner of this slot - void occupy() { - __TBB_ASSERT(!my_is_occupied.load(std::memory_order_relaxed), nullptr); - my_is_occupied.store(true, std::memory_order_release); - } - - //! Try to occupy the slot - bool try_occupy() { - return !is_occupied() && my_is_occupied.exchange(true) == false; - } - - //! Some thread is now the owner of this slot - void release() { - __TBB_ASSERT(my_is_occupied.load(std::memory_order_relaxed), nullptr); - my_is_occupied.store(false, std::memory_order_release); - } - - //! Spawn newly created tasks - void spawn(d1::task& t) { - std::size_t T = prepare_task_pool(1); - __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), NULL); - task_pool_ptr[T] = &t; - commit_spawned_tasks(T + 1); - if (!is_task_pool_published()) { - publish_task_pool(); - } - } - - bool is_task_pool_published() const { - return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool; - } - - bool is_occupied() const { - return my_is_occupied.load(std::memory_order_relaxed); - } - - task_dispatcher& default_task_dispatcher() { - __TBB_ASSERT(my_default_task_dispatcher != nullptr, nullptr); - return *my_default_task_dispatcher; - } - - void init_task_streams(unsigned h) { - hint_for_fifo_stream = h; -#if __TBB_RESUMABLE_TASKS - hint_for_resume_stream = h; -#endif -#if __TBB_PREVIEW_CRITICAL_TASKS - hint_for_critical_stream = h; -#endif - } - -#if __TBB_PREVIEW_CRITICAL_TASKS - unsigned& critical_hint() { - return hint_for_critical_stream; - } -#endif -private: - //! Get a task from the local pool at specified location T. - /** Returns the pointer to the task or NULL if the task cannot be executed, - e.g. proxy has been deallocated or isolation constraint is not met. - tasks_omitted tells if some tasks have been omitted. - Called only by the pool owner. The caller should guarantee that the - position T is not available for a thief. **/ - d1::task* get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation); - - //! Makes sure that the task pool can accommodate at least n more elements - /** If necessary relocates existing task pointers or grows the ready task deque. - * Returns (possible updated) tail index (not accounting for n). **/ - std::size_t prepare_task_pool(std::size_t num_tasks) { - std::size_t T = tail.load(std::memory_order_relaxed); // mirror - if ( T + num_tasks <= my_task_pool_size ) { - return T; - } - - std::size_t new_size = num_tasks; - if ( !my_task_pool_size ) { - __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), NULL ); - __TBB_ASSERT( !task_pool_ptr, NULL ); - if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size; - allocate_task_pool( new_size ); - return 0; - } - acquire_task_pool(); - std::size_t H = head.load(std::memory_order_relaxed); // mirror - d1::task** new_task_pool = task_pool_ptr;; - __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, NULL ); - // Count not skipped tasks. Consider using std::count_if. - for ( std::size_t i = H; i < T; ++i ) - if ( new_task_pool[i] ) ++new_size; - // If the free space at the beginning of the task pool is too short, we - // are likely facing a pathological single-producer-multiple-consumers - // scenario, and thus it's better to expand the task pool - bool allocate = new_size > my_task_pool_size - min_task_pool_size/4; - if ( allocate ) { - // Grow task pool. As this operation is rare, and its cost is asymptotically - // amortizable, we can tolerate new task pool allocation done under the lock. - if ( new_size < 2 * my_task_pool_size ) - new_size = 2 * my_task_pool_size; - allocate_task_pool( new_size ); // updates my_task_pool_size - } - // Filter out skipped tasks. Consider using std::copy_if. - std::size_t T1 = 0; - for ( std::size_t i = H; i < T; ++i ) { - if ( new_task_pool[i] ) { - task_pool_ptr[T1++] = new_task_pool[i]; +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_arena_slot_H +#define _TBB_arena_slot_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_task.h" + +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "misc.h" +#include "mailbox.h" +#include "scheduler_common.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +class arena; +class task_group_context; + +//-------------------------------------------------------------------------------------------------------- +// Arena Slot +//-------------------------------------------------------------------------------------------------------- + +static d1::task** const EmptyTaskPool = nullptr; +static d1::task** const LockedTaskPool = reinterpret_cast<d1::task**>(~std::intptr_t(0)); + +struct alignas(max_nfs_size) arena_slot_shared_state { + //! Scheduler of the thread attached to the slot + /** Marks the slot as busy, and is used to iterate through the schedulers belonging to this arena **/ + std::atomic<bool> my_is_occupied; + + // Synchronization of access to Task pool + /** Also is used to specify if the slot is empty or locked: + 0 - empty + -1 - locked **/ + std::atomic<d1::task**> task_pool; + + //! Index of the first ready task in the deque. + /** Modified by thieves, and by the owner during compaction/reallocation **/ + std::atomic<std::size_t> head; +}; + +struct alignas(max_nfs_size) arena_slot_private_state { + //! Hint provided for operations with the container of starvation-resistant tasks. + /** Modified by the owner thread (during these operations). **/ + unsigned hint_for_fifo_stream; + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Similar to 'hint_for_fifo_stream' but for critical tasks. + unsigned hint_for_critical_stream; +#endif + + //! Similar to 'hint_for_fifo_stream' but for the resume tasks. + unsigned hint_for_resume_stream; + + //! Index of the element following the last ready task in the deque. + /** Modified by the owner thread. **/ + std::atomic<std::size_t> tail; + + //! Capacity of the primary task pool (number of elements - pointers to task). + std::size_t my_task_pool_size; + + //! Task pool of the scheduler that owns this slot + // TODO: previously was task**__TBB_atomic, but seems like not accessed on other thread + d1::task** task_pool_ptr; +}; + +class arena_slot : private arena_slot_shared_state, private arena_slot_private_state { + friend class arena; + friend class outermost_worker_waiter; + friend class task_dispatcher; + friend class thread_data; + friend class nested_arena_context; + + //! The original task dispather associated with this slot + task_dispatcher* my_default_task_dispatcher; + +#if TBB_USE_ASSERT + void fill_with_canary_pattern ( std::size_t first, std::size_t last ) { + for ( std::size_t i = first; i < last; ++i ) + poison_pointer(task_pool_ptr[i]); + } +#else + void fill_with_canary_pattern ( size_t, std::size_t ) {} +#endif /* TBB_USE_ASSERT */ + + static constexpr std::size_t min_task_pool_size = 64; + + void allocate_task_pool( std::size_t n ) { + std::size_t byte_size = ((n * sizeof(d1::task*) + max_nfs_size - 1) / max_nfs_size) * max_nfs_size; + my_task_pool_size = byte_size / sizeof(d1::task*); + task_pool_ptr = (d1::task**)cache_aligned_allocate(byte_size); + // No need to clear the fresh deque since valid items are designated by the head and tail members. + // But fill it with a canary pattern in the high vigilance debug mode. + fill_with_canary_pattern( 0, my_task_pool_size ); + } + +public: + //! Deallocate task pool that was allocated by means of allocate_task_pool. + void free_task_pool( ) { + // TODO: understand the assertion and modify + // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, NULL); + if( task_pool_ptr ) { + __TBB_ASSERT( my_task_pool_size, NULL); + cache_aligned_deallocate( task_pool_ptr ); + task_pool_ptr = NULL; + my_task_pool_size = 0; + } + } + + //! Get a task from the local pool. + /** Called only by the pool owner. + Returns the pointer to the task or NULL if a suitable task is not found. + Resets the pool if it is empty. **/ + d1::task* get_task(execution_data_ext&, isolation_type); + + //! Steal task from slot's ready pool + d1::task* steal_task(arena&, isolation_type); + + //! Some thread is now the owner of this slot + void occupy() { + __TBB_ASSERT(!my_is_occupied.load(std::memory_order_relaxed), nullptr); + my_is_occupied.store(true, std::memory_order_release); + } + + //! Try to occupy the slot + bool try_occupy() { + return !is_occupied() && my_is_occupied.exchange(true) == false; + } + + //! Some thread is now the owner of this slot + void release() { + __TBB_ASSERT(my_is_occupied.load(std::memory_order_relaxed), nullptr); + my_is_occupied.store(false, std::memory_order_release); + } + + //! Spawn newly created tasks + void spawn(d1::task& t) { + std::size_t T = prepare_task_pool(1); + __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), NULL); + task_pool_ptr[T] = &t; + commit_spawned_tasks(T + 1); + if (!is_task_pool_published()) { + publish_task_pool(); + } + } + + bool is_task_pool_published() const { + return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool; + } + + bool is_occupied() const { + return my_is_occupied.load(std::memory_order_relaxed); + } + + task_dispatcher& default_task_dispatcher() { + __TBB_ASSERT(my_default_task_dispatcher != nullptr, nullptr); + return *my_default_task_dispatcher; + } + + void init_task_streams(unsigned h) { + hint_for_fifo_stream = h; +#if __TBB_RESUMABLE_TASKS + hint_for_resume_stream = h; +#endif +#if __TBB_PREVIEW_CRITICAL_TASKS + hint_for_critical_stream = h; +#endif + } + +#if __TBB_PREVIEW_CRITICAL_TASKS + unsigned& critical_hint() { + return hint_for_critical_stream; + } +#endif +private: + //! Get a task from the local pool at specified location T. + /** Returns the pointer to the task or NULL if the task cannot be executed, + e.g. proxy has been deallocated or isolation constraint is not met. + tasks_omitted tells if some tasks have been omitted. + Called only by the pool owner. The caller should guarantee that the + position T is not available for a thief. **/ + d1::task* get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation); + + //! Makes sure that the task pool can accommodate at least n more elements + /** If necessary relocates existing task pointers or grows the ready task deque. + * Returns (possible updated) tail index (not accounting for n). **/ + std::size_t prepare_task_pool(std::size_t num_tasks) { + std::size_t T = tail.load(std::memory_order_relaxed); // mirror + if ( T + num_tasks <= my_task_pool_size ) { + return T; + } + + std::size_t new_size = num_tasks; + if ( !my_task_pool_size ) { + __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), NULL ); + __TBB_ASSERT( !task_pool_ptr, NULL ); + if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size; + allocate_task_pool( new_size ); + return 0; + } + acquire_task_pool(); + std::size_t H = head.load(std::memory_order_relaxed); // mirror + d1::task** new_task_pool = task_pool_ptr;; + __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, NULL ); + // Count not skipped tasks. Consider using std::count_if. + for ( std::size_t i = H; i < T; ++i ) + if ( new_task_pool[i] ) ++new_size; + // If the free space at the beginning of the task pool is too short, we + // are likely facing a pathological single-producer-multiple-consumers + // scenario, and thus it's better to expand the task pool + bool allocate = new_size > my_task_pool_size - min_task_pool_size/4; + if ( allocate ) { + // Grow task pool. As this operation is rare, and its cost is asymptotically + // amortizable, we can tolerate new task pool allocation done under the lock. + if ( new_size < 2 * my_task_pool_size ) + new_size = 2 * my_task_pool_size; + allocate_task_pool( new_size ); // updates my_task_pool_size + } + // Filter out skipped tasks. Consider using std::copy_if. + std::size_t T1 = 0; + for ( std::size_t i = H; i < T; ++i ) { + if ( new_task_pool[i] ) { + task_pool_ptr[T1++] = new_task_pool[i]; + } + } + // Deallocate the previous task pool if a new one has been allocated. + if ( allocate ) + cache_aligned_deallocate( new_task_pool ); + else + fill_with_canary_pattern( T1, tail ); + // Publish the new state. + commit_relocated_tasks( T1 ); + // assert_task_pool_valid(); + return T1; + } + + //! Makes newly spawned tasks visible to thieves + void commit_spawned_tasks(std::size_t new_tail) { + __TBB_ASSERT (new_tail <= my_task_pool_size, "task deque end was overwritten"); + // emit "task was released" signal + // Release fence is necessary to make sure that previously stored task pointers + // are visible to thieves. + tail.store(new_tail, std::memory_order_release); + } + + //! Used by workers to enter the task pool + /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/ + void publish_task_pool() { + __TBB_ASSERT ( task_pool == EmptyTaskPool, "someone else grabbed my arena slot?" ); + __TBB_ASSERT ( head.load(std::memory_order_relaxed) < tail.load(std::memory_order_relaxed), + "entering arena without tasks to share" ); + // Release signal on behalf of previously spawned tasks (when this thread was not in arena yet) + task_pool.store(task_pool_ptr, std::memory_order_release ); + } + + //! Locks the local task pool + /** Garbles task_pool for the duration of the lock. Requires correctly set task_pool_ptr. + ATTENTION: This method is mostly the same as generic_scheduler::lock_task_pool(), with + a little different logic of slot state checks (slot is either locked or points + to our task pool). Thus if either of them is changed, consider changing the counterpart as well. **/ + void acquire_task_pool() { + if (!is_task_pool_published()) { + return; // we are not in arena - nothing to lock + } + bool sync_prepare_done = false; + for( atomic_backoff b;;b.pause() ) { +#if TBB_USE_ASSERT + // Local copy of the arena slot task pool pointer is necessary for the next + // assertion to work correctly to exclude asynchronous state transition effect. + d1::task** tp = task_pool.load(std::memory_order_relaxed); + __TBB_ASSERT( tp == LockedTaskPool || tp == task_pool_ptr, "slot ownership corrupt?" ); +#endif + d1::task** expected = task_pool_ptr; + if( task_pool.load(std::memory_order_relaxed) != LockedTaskPool && + task_pool.compare_exchange_strong(expected, LockedTaskPool ) ) { + // We acquired our own slot + break; + } else if( !sync_prepare_done ) { + // Start waiting + sync_prepare_done = true; + } + // Someone else acquired a lock, so pause and do exponential backoff. + } + __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "not really acquired task pool" ); + } + + //! Unlocks the local task pool + /** Restores task_pool munged by acquire_task_pool. Requires + correctly set task_pool_ptr. **/ + void release_task_pool() { + if ( !(task_pool.load(std::memory_order_relaxed) != EmptyTaskPool) ) + return; // we are not in arena - nothing to unlock + __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "arena slot is not locked" ); + task_pool.store( task_pool_ptr, std::memory_order_release ); + } + + //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL. + /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/ + d1::task** lock_task_pool() { + d1::task** victim_task_pool; + for ( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) { + victim_task_pool = task_pool.load(std::memory_order_relaxed); + // Microbenchmarks demonstrated that aborting stealing attempt when the + // victim's task pool is locked degrade performance. + // NOTE: Do not use comparison of head and tail indices to check for + // the presence of work in the victim's task pool, as they may give + // incorrect indication because of task pool relocations and resizes. + if (victim_task_pool == EmptyTaskPool) { + break; + } + d1::task** expected = victim_task_pool; + if (victim_task_pool != LockedTaskPool && task_pool.compare_exchange_strong(expected, LockedTaskPool) ) { + // We've locked victim's task pool + break; } - } - // Deallocate the previous task pool if a new one has been allocated. - if ( allocate ) - cache_aligned_deallocate( new_task_pool ); - else - fill_with_canary_pattern( T1, tail ); - // Publish the new state. - commit_relocated_tasks( T1 ); - // assert_task_pool_valid(); - return T1; - } - - //! Makes newly spawned tasks visible to thieves - void commit_spawned_tasks(std::size_t new_tail) { - __TBB_ASSERT (new_tail <= my_task_pool_size, "task deque end was overwritten"); - // emit "task was released" signal - // Release fence is necessary to make sure that previously stored task pointers - // are visible to thieves. - tail.store(new_tail, std::memory_order_release); - } - - //! Used by workers to enter the task pool - /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/ - void publish_task_pool() { - __TBB_ASSERT ( task_pool == EmptyTaskPool, "someone else grabbed my arena slot?" ); - __TBB_ASSERT ( head.load(std::memory_order_relaxed) < tail.load(std::memory_order_relaxed), - "entering arena without tasks to share" ); - // Release signal on behalf of previously spawned tasks (when this thread was not in arena yet) - task_pool.store(task_pool_ptr, std::memory_order_release ); - } - - //! Locks the local task pool - /** Garbles task_pool for the duration of the lock. Requires correctly set task_pool_ptr. - ATTENTION: This method is mostly the same as generic_scheduler::lock_task_pool(), with - a little different logic of slot state checks (slot is either locked or points - to our task pool). Thus if either of them is changed, consider changing the counterpart as well. **/ - void acquire_task_pool() { - if (!is_task_pool_published()) { - return; // we are not in arena - nothing to lock - } - bool sync_prepare_done = false; - for( atomic_backoff b;;b.pause() ) { -#if TBB_USE_ASSERT - // Local copy of the arena slot task pool pointer is necessary for the next - // assertion to work correctly to exclude asynchronous state transition effect. - d1::task** tp = task_pool.load(std::memory_order_relaxed); - __TBB_ASSERT( tp == LockedTaskPool || tp == task_pool_ptr, "slot ownership corrupt?" ); -#endif - d1::task** expected = task_pool_ptr; - if( task_pool.load(std::memory_order_relaxed) != LockedTaskPool && - task_pool.compare_exchange_strong(expected, LockedTaskPool ) ) { - // We acquired our own slot - break; - } else if( !sync_prepare_done ) { - // Start waiting - sync_prepare_done = true; - } - // Someone else acquired a lock, so pause and do exponential backoff. - } - __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "not really acquired task pool" ); - } - - //! Unlocks the local task pool - /** Restores task_pool munged by acquire_task_pool. Requires - correctly set task_pool_ptr. **/ - void release_task_pool() { - if ( !(task_pool.load(std::memory_order_relaxed) != EmptyTaskPool) ) - return; // we are not in arena - nothing to unlock - __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "arena slot is not locked" ); - task_pool.store( task_pool_ptr, std::memory_order_release ); - } - - //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL. - /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/ - d1::task** lock_task_pool() { - d1::task** victim_task_pool; - for ( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) { - victim_task_pool = task_pool.load(std::memory_order_relaxed); - // Microbenchmarks demonstrated that aborting stealing attempt when the - // victim's task pool is locked degrade performance. - // NOTE: Do not use comparison of head and tail indices to check for - // the presence of work in the victim's task pool, as they may give - // incorrect indication because of task pool relocations and resizes. - if (victim_task_pool == EmptyTaskPool) { - break; - } - d1::task** expected = victim_task_pool; - if (victim_task_pool != LockedTaskPool && task_pool.compare_exchange_strong(expected, LockedTaskPool) ) { - // We've locked victim's task pool - break; - } - // Someone else acquired a lock, so pause and do exponential backoff. - backoff.pause(); - } - __TBB_ASSERT(victim_task_pool == EmptyTaskPool || - (task_pool.load(std::memory_order_relaxed) == LockedTaskPool && - victim_task_pool != LockedTaskPool), "not really locked victim's task pool?"); - return victim_task_pool; - } - - //! Unlocks victim's task pool - /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/ - void unlock_task_pool(d1::task** victim_task_pool) { - __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked"); - __TBB_ASSERT(victim_task_pool != LockedTaskPool, NULL); - task_pool.store(victim_task_pool, std::memory_order_release); - } - -#if TBB_USE_ASSERT - bool is_local_task_pool_quiescent() const { - d1::task** tp = task_pool.load(std::memory_order_relaxed); - return tp == EmptyTaskPool || tp == LockedTaskPool; - } - - bool is_quiescent_local_task_pool_empty() const { - __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); - return head.load(std::memory_order_relaxed) == tail.load(std::memory_order_relaxed); - } - - bool is_quiescent_local_task_pool_reset() const { - __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); - return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0; - } -#endif // TBB_USE_ASSERT - - //! Leave the task pool - /** Leaving task pool automatically releases the task pool if it is locked. **/ - void leave_task_pool() { - __TBB_ASSERT(is_task_pool_published(), "Not in arena"); - // Do not reset my_arena_index. It will be used to (attempt to) re-acquire the slot next time - __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when leaving arena"); - __TBB_ASSERT(is_quiescent_local_task_pool_empty(), "Cannot leave arena when the task pool is not empty"); - // No release fence is necessary here as this assignment precludes external - // accesses to the local task pool when becomes visible. Thus it is harmless - // if it gets hoisted above preceding local bookkeeping manipulations. - task_pool.store(EmptyTaskPool, std::memory_order_relaxed); - } - - //! Resets head and tail indices to 0, and leaves task pool - /** The task pool must be locked by the owner (via acquire_task_pool).**/ - void reset_task_pool_and_leave() { - __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when resetting task pool"); - tail.store(0, std::memory_order_relaxed); - head.store(0, std::memory_order_relaxed); - leave_task_pool(); - } - - //! Makes relocated tasks visible to thieves and releases the local task pool. - /** Obviously, the task pool must be locked when calling this method. **/ - void commit_relocated_tasks(std::size_t new_tail) { - __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool must be locked when calling commit_relocated_tasks()"); - head.store(0, std::memory_order_relaxed); - // Tail is updated last to minimize probability of a thread making arena - // snapshot being misguided into thinking that this task pool is empty. - tail.store(new_tail, std::memory_order_release); - release_task_pool(); - } -}; - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_arena_slot_H + // Someone else acquired a lock, so pause and do exponential backoff. + backoff.pause(); + } + __TBB_ASSERT(victim_task_pool == EmptyTaskPool || + (task_pool.load(std::memory_order_relaxed) == LockedTaskPool && + victim_task_pool != LockedTaskPool), "not really locked victim's task pool?"); + return victim_task_pool; + } + + //! Unlocks victim's task pool + /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/ + void unlock_task_pool(d1::task** victim_task_pool) { + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked"); + __TBB_ASSERT(victim_task_pool != LockedTaskPool, NULL); + task_pool.store(victim_task_pool, std::memory_order_release); + } + +#if TBB_USE_ASSERT + bool is_local_task_pool_quiescent() const { + d1::task** tp = task_pool.load(std::memory_order_relaxed); + return tp == EmptyTaskPool || tp == LockedTaskPool; + } + + bool is_quiescent_local_task_pool_empty() const { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); + return head.load(std::memory_order_relaxed) == tail.load(std::memory_order_relaxed); + } + + bool is_quiescent_local_task_pool_reset() const { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); + return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0; + } +#endif // TBB_USE_ASSERT + + //! Leave the task pool + /** Leaving task pool automatically releases the task pool if it is locked. **/ + void leave_task_pool() { + __TBB_ASSERT(is_task_pool_published(), "Not in arena"); + // Do not reset my_arena_index. It will be used to (attempt to) re-acquire the slot next time + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when leaving arena"); + __TBB_ASSERT(is_quiescent_local_task_pool_empty(), "Cannot leave arena when the task pool is not empty"); + // No release fence is necessary here as this assignment precludes external + // accesses to the local task pool when becomes visible. Thus it is harmless + // if it gets hoisted above preceding local bookkeeping manipulations. + task_pool.store(EmptyTaskPool, std::memory_order_relaxed); + } + + //! Resets head and tail indices to 0, and leaves task pool + /** The task pool must be locked by the owner (via acquire_task_pool).**/ + void reset_task_pool_and_leave() { + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when resetting task pool"); + tail.store(0, std::memory_order_relaxed); + head.store(0, std::memory_order_relaxed); + leave_task_pool(); + } + + //! Makes relocated tasks visible to thieves and releases the local task pool. + /** Obviously, the task pool must be locked when calling this method. **/ + void commit_relocated_tasks(std::size_t new_tail) { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool must be locked when calling commit_relocated_tasks()"); + head.store(0, std::memory_order_relaxed); + // Tail is updated last to minimize probability of a thread making arena + // snapshot being misguided into thinking that this task pool is empty. + tail.store(new_tail, std::memory_order_release); + release_task_pool(); + } +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_arena_slot_H diff --git a/contrib/libs/tbb/src/tbb/assert_impl.h b/contrib/libs/tbb/src/tbb/assert_impl.h index 0a188a7812..7f411e06f7 100644 --- a/contrib/libs/tbb/src/tbb/assert_impl.h +++ b/contrib/libs/tbb/src/tbb/assert_impl.h @@ -1,71 +1,71 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_assert_impl_H -#define __TBB_assert_impl_H - -#include "oneapi/tbb/detail/_config.h" - -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <cstdarg> -#if _MSC_VER && _DEBUG -#include <crtdbg.h> -#endif - -#include <mutex> - -namespace tbb { -namespace detail { -namespace r1 { - -// TODO: consider extension for formatted error description string -static void assertion_failure_impl(const char* filename, int line, const char* expression, const char* comment) { - std::fprintf(stderr, "Assertion %s failed on line %d of file %s\n", expression, line, filename); - if (comment) { - std::fprintf(stderr, "Detailed description: %s\n", comment); - } -#if _MSC_VER && _DEBUG - if (1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) { - _CrtDbgBreak(); - } -#else - std::fflush(stderr); - std::abort(); -#endif -} - -void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment) { - static std::once_flag flag; - std::call_once(flag, [&](){ assertion_failure_impl(filename, line, expression, comment); }); -} - -//! Report a runtime warning. -void runtime_warning( const char* format, ... ) { - char str[1024]; std::memset(str, 0, 1024); - va_list args; va_start(args, format); - vsnprintf( str, 1024-1, format, args); - va_end(args); - fprintf(stderr, "TBB Warning: %s\n", str); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_assert_impl_H - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_assert_impl_H +#define __TBB_assert_impl_H + +#include "oneapi/tbb/detail/_config.h" + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cstdarg> +#if _MSC_VER && _DEBUG +#include <crtdbg.h> +#endif + +#include <mutex> + +namespace tbb { +namespace detail { +namespace r1 { + +// TODO: consider extension for formatted error description string +static void assertion_failure_impl(const char* filename, int line, const char* expression, const char* comment) { + std::fprintf(stderr, "Assertion %s failed on line %d of file %s\n", expression, line, filename); + if (comment) { + std::fprintf(stderr, "Detailed description: %s\n", comment); + } +#if _MSC_VER && _DEBUG + if (1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) { + _CrtDbgBreak(); + } +#else + std::fflush(stderr); + std::abort(); +#endif +} + +void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment) { + static std::once_flag flag; + std::call_once(flag, [&](){ assertion_failure_impl(filename, line, expression, comment); }); +} + +//! Report a runtime warning. +void runtime_warning( const char* format, ... ) { + char str[1024]; std::memset(str, 0, 1024); + va_list args; va_start(args, format); + vsnprintf( str, 1024-1, format, args); + va_end(args); + fprintf(stderr, "TBB Warning: %s\n", str); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_assert_impl_H + diff --git a/contrib/libs/tbb/src/tbb/co_context.h b/contrib/libs/tbb/src/tbb/co_context.h index f19d7c6b01..552dec356b 100644 --- a/contrib/libs/tbb/src/tbb/co_context.h +++ b/contrib/libs/tbb/src/tbb/co_context.h @@ -1,222 +1,222 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_co_context_H -#define _TBB_co_context_H - -#include "oneapi/tbb/detail/_config.h" - -#if __TBB_RESUMABLE_TASKS - -#include <cstddef> -#include <cstdint> - -#if _WIN32 || _WIN64 -#include <windows.h> -#else -// ucontext.h API is deprecated since macOS 10.6 -#if __APPLE__ - #if __INTEL_COMPILER - #pragma warning(push) - #pragma warning(disable:1478) - #elif __clang__ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wdeprecated-declarations" - #endif -#endif // __APPLE__ - -#include <ucontext.h> -#include <sys/mman.h> // mprotect - -#include "governor.h" // default_page_size() - -#ifndef MAP_STACK -// macOS* does not define MAP_STACK -#define MAP_STACK 0 -#endif -#ifndef MAP_ANONYMOUS -// macOS* defines MAP_ANON, which is deprecated in Linux*. -#define MAP_ANONYMOUS MAP_ANON -#endif -#endif // _WIN32 || _WIN64 - -namespace tbb { -namespace detail { -namespace r1 { - -#if _WIN32 || _WIN64 - typedef LPVOID coroutine_type; -#else - struct coroutine_type { - coroutine_type() : my_context(), my_stack(), my_stack_size() {} - ucontext_t my_context; - void* my_stack; - std::size_t my_stack_size; - }; -#endif - - // Forward declaration of the coroutine API. - void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg); - void current_coroutine(coroutine_type& c); - void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine); - void destroy_coroutine(coroutine_type& c); - -class co_context { - enum co_state { - co_invalid, - co_suspended, - co_executing, - co_destroyed - }; - coroutine_type my_coroutine; - co_state my_state; - -public: - co_context(std::size_t stack_size, void* arg) - : my_state(stack_size ? co_suspended : co_executing) - { - if (stack_size) { - __TBB_ASSERT(arg != 0, nullptr); - create_coroutine(my_coroutine, stack_size, arg); - } else { - current_coroutine(my_coroutine); - } - } - - ~co_context() { - __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), NULL); - if (my_state == co_suspended) - destroy_coroutine(my_coroutine); - my_state = co_destroyed; - } - - void resume(co_context& target) { - // Do not create non-trivial objects on the stack of this function. They might never be destroyed. - __TBB_ASSERT(my_state == co_executing, NULL); - __TBB_ASSERT(target.my_state == co_suspended, NULL); - - my_state = co_suspended; - target.my_state = co_executing; - - // 'target' can reference an invalid object after swap_coroutine. Do not access it. - swap_coroutine(my_coroutine, target.my_coroutine); - - __TBB_ASSERT(my_state == co_executing, NULL); - } -}; - -#if _WIN32 || _WIN64 -/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept; -#else -/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept; -#endif - -#if _WIN32 || _WIN64 -inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { - __TBB_ASSERT(arg, NULL); - c = CreateFiber(stack_size, co_local_wait_for_all, arg); - __TBB_ASSERT(c, NULL); -} - -inline void current_coroutine(coroutine_type& c) { - c = IsThreadAFiber() ? GetCurrentFiber() : - ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); - __TBB_ASSERT(c, NULL); -} - -inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { - if (!IsThreadAFiber()) { - ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); - } - __TBB_ASSERT(new_coroutine, NULL); - prev_coroutine = GetCurrentFiber(); - __TBB_ASSERT(prev_coroutine, NULL); - SwitchToFiber(new_coroutine); -} - -inline void destroy_coroutine(coroutine_type& c) { - __TBB_ASSERT(c, NULL); - DeleteFiber(c); -} -#else // !(_WIN32 || _WIN64) - -inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { - const std::size_t REG_PAGE_SIZE = governor::default_page_size(); - const std::size_t page_aligned_stack_size = (stack_size + (REG_PAGE_SIZE - 1)) & ~(REG_PAGE_SIZE - 1); - const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE; - - // Allocate the stack with protection property - std::uintptr_t stack_ptr = (std::uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); - __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL); - - // Allow read write on our stack (guarded pages are still protected) - int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE); - __TBB_ASSERT_EX(!err, NULL); - - // Remember the stack state - c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE); - c.my_stack_size = page_aligned_stack_size; - - err = getcontext(&c.my_context); - __TBB_ASSERT_EX(!err, NULL); - - c.my_context.uc_link = 0; - // cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error - c.my_context.uc_stack.ss_sp = (char*)c.my_stack; - c.my_context.uc_stack.ss_size = c.my_stack_size; - c.my_context.uc_stack.ss_flags = 0; - - typedef void(*coroutine_func_t)(); - makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, sizeof(arg) / sizeof(int), arg); -} - -inline void current_coroutine(coroutine_type& c) { - int err = getcontext(&c.my_context); - __TBB_ASSERT_EX(!err, NULL); -} - -inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { - int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context); - __TBB_ASSERT_EX(!err, NULL); -} - -inline void destroy_coroutine(coroutine_type& c) { - const std::size_t REG_PAGE_SIZE = governor::default_page_size(); - // Free stack memory with guarded pages - munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE); - // Clear the stack state afterwards - c.my_stack = NULL; - c.my_stack_size = 0; -} - -#if __APPLE__ - #if __INTEL_COMPILER - #pragma warning(pop) // 1478 warning - #elif __clang__ - #pragma clang diagnostic pop // "-Wdeprecated-declarations" - #endif -#endif - -#endif // _WIN32 || _WIN64 - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* __TBB_RESUMABLE_TASKS */ - -#endif /* _TBB_co_context_H */ - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_co_context_H +#define _TBB_co_context_H + +#include "oneapi/tbb/detail/_config.h" + +#if __TBB_RESUMABLE_TASKS + +#include <cstddef> +#include <cstdint> + +#if _WIN32 || _WIN64 +#include <windows.h> +#else +// ucontext.h API is deprecated since macOS 10.6 +#if __APPLE__ + #if __INTEL_COMPILER + #pragma warning(push) + #pragma warning(disable:1478) + #elif __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #endif +#endif // __APPLE__ + +#include <ucontext.h> +#include <sys/mman.h> // mprotect + +#include "governor.h" // default_page_size() + +#ifndef MAP_STACK +// macOS* does not define MAP_STACK +#define MAP_STACK 0 +#endif +#ifndef MAP_ANONYMOUS +// macOS* defines MAP_ANON, which is deprecated in Linux*. +#define MAP_ANONYMOUS MAP_ANON +#endif +#endif // _WIN32 || _WIN64 + +namespace tbb { +namespace detail { +namespace r1 { + +#if _WIN32 || _WIN64 + typedef LPVOID coroutine_type; +#else + struct coroutine_type { + coroutine_type() : my_context(), my_stack(), my_stack_size() {} + ucontext_t my_context; + void* my_stack; + std::size_t my_stack_size; + }; +#endif + + // Forward declaration of the coroutine API. + void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg); + void current_coroutine(coroutine_type& c); + void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine); + void destroy_coroutine(coroutine_type& c); + +class co_context { + enum co_state { + co_invalid, + co_suspended, + co_executing, + co_destroyed + }; + coroutine_type my_coroutine; + co_state my_state; + +public: + co_context(std::size_t stack_size, void* arg) + : my_state(stack_size ? co_suspended : co_executing) + { + if (stack_size) { + __TBB_ASSERT(arg != 0, nullptr); + create_coroutine(my_coroutine, stack_size, arg); + } else { + current_coroutine(my_coroutine); + } + } + + ~co_context() { + __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), NULL); + if (my_state == co_suspended) + destroy_coroutine(my_coroutine); + my_state = co_destroyed; + } + + void resume(co_context& target) { + // Do not create non-trivial objects on the stack of this function. They might never be destroyed. + __TBB_ASSERT(my_state == co_executing, NULL); + __TBB_ASSERT(target.my_state == co_suspended, NULL); + + my_state = co_suspended; + target.my_state = co_executing; + + // 'target' can reference an invalid object after swap_coroutine. Do not access it. + swap_coroutine(my_coroutine, target.my_coroutine); + + __TBB_ASSERT(my_state == co_executing, NULL); + } +}; + +#if _WIN32 || _WIN64 +/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept; +#else +/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept; +#endif + +#if _WIN32 || _WIN64 +inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { + __TBB_ASSERT(arg, NULL); + c = CreateFiber(stack_size, co_local_wait_for_all, arg); + __TBB_ASSERT(c, NULL); +} + +inline void current_coroutine(coroutine_type& c) { + c = IsThreadAFiber() ? GetCurrentFiber() : + ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); + __TBB_ASSERT(c, NULL); +} + +inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { + if (!IsThreadAFiber()) { + ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); + } + __TBB_ASSERT(new_coroutine, NULL); + prev_coroutine = GetCurrentFiber(); + __TBB_ASSERT(prev_coroutine, NULL); + SwitchToFiber(new_coroutine); +} + +inline void destroy_coroutine(coroutine_type& c) { + __TBB_ASSERT(c, NULL); + DeleteFiber(c); +} +#else // !(_WIN32 || _WIN64) + +inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { + const std::size_t REG_PAGE_SIZE = governor::default_page_size(); + const std::size_t page_aligned_stack_size = (stack_size + (REG_PAGE_SIZE - 1)) & ~(REG_PAGE_SIZE - 1); + const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE; + + // Allocate the stack with protection property + std::uintptr_t stack_ptr = (std::uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL); + + // Allow read write on our stack (guarded pages are still protected) + int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE); + __TBB_ASSERT_EX(!err, NULL); + + // Remember the stack state + c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE); + c.my_stack_size = page_aligned_stack_size; + + err = getcontext(&c.my_context); + __TBB_ASSERT_EX(!err, NULL); + + c.my_context.uc_link = 0; + // cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error + c.my_context.uc_stack.ss_sp = (char*)c.my_stack; + c.my_context.uc_stack.ss_size = c.my_stack_size; + c.my_context.uc_stack.ss_flags = 0; + + typedef void(*coroutine_func_t)(); + makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, sizeof(arg) / sizeof(int), arg); +} + +inline void current_coroutine(coroutine_type& c) { + int err = getcontext(&c.my_context); + __TBB_ASSERT_EX(!err, NULL); +} + +inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { + int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context); + __TBB_ASSERT_EX(!err, NULL); +} + +inline void destroy_coroutine(coroutine_type& c) { + const std::size_t REG_PAGE_SIZE = governor::default_page_size(); + // Free stack memory with guarded pages + munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE); + // Clear the stack state afterwards + c.my_stack = NULL; + c.my_stack_size = 0; +} + +#if __APPLE__ + #if __INTEL_COMPILER + #pragma warning(pop) // 1478 warning + #elif __clang__ + #pragma clang diagnostic pop // "-Wdeprecated-declarations" + #endif +#endif + +#endif // _WIN32 || _WIN64 + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_RESUMABLE_TASKS */ + +#endif /* _TBB_co_context_H */ + diff --git a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp index 92c7a4ce9a..90077936f6 100644 --- a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp +++ b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp @@ -1,84 +1,84 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_utils.h" -#include "oneapi/tbb/concurrent_queue.h" -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "concurrent_monitor.h" - -namespace tbb { -namespace detail { -namespace r1 { - -static constexpr std::size_t monitors_number = 2; - -std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ) -{ - std::size_t monitors_mem_size = sizeof(concurrent_monitor) * monitors_number; - std::uint8_t* mem = static_cast<std::uint8_t*>(cache_aligned_allocate(queue_rep_size + monitors_mem_size)); - - concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); - for (std::size_t i = 0; i < monitors_number; ++i) { - new (monitors + i) concurrent_monitor(); - } - - return mem; -} - -void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ) -{ - concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); - for (std::size_t i = 0; i < monitors_number; ++i) { - monitors[i].~concurrent_monitor(); - } - - cache_aligned_deallocate(mem); -} - -void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, - std::ptrdiff_t target, d1::delegate_base& predicate ) -{ - __TBB_ASSERT(monitor_tag < monitors_number, nullptr); - concurrent_monitor& monitor = monitors[monitor_tag]; - - monitor.wait<concurrent_monitor::thread_context>([&] { return !predicate(); }, std::uintptr_t(target)); -} - -void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) { - concurrent_monitor& items_avail = monitors[d1::cbq_items_avail_tag]; - concurrent_monitor& slots_avail = monitors[d1::cbq_slots_avail_tag]; - - items_avail.abort_all(); - slots_avail.abort_all(); -} - -struct predicate_leq { - std::size_t my_ticket; - predicate_leq( std::size_t ticket ) : my_ticket(ticket) {} - bool operator() ( std::uintptr_t ticket ) const { return static_cast<std::size_t>(ticket) <= my_ticket; } -}; - -void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, - std::size_t monitor_tag, std::size_t ticket) -{ - __TBB_ASSERT(monitor_tag < monitors_number, nullptr); - concurrent_monitor& monitor = monitors[monitor_tag]; - monitor.notify(predicate_leq(ticket)); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/concurrent_queue.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "concurrent_monitor.h" + +namespace tbb { +namespace detail { +namespace r1 { + +static constexpr std::size_t monitors_number = 2; + +std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ) +{ + std::size_t monitors_mem_size = sizeof(concurrent_monitor) * monitors_number; + std::uint8_t* mem = static_cast<std::uint8_t*>(cache_aligned_allocate(queue_rep_size + monitors_mem_size)); + + concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); + for (std::size_t i = 0; i < monitors_number; ++i) { + new (monitors + i) concurrent_monitor(); + } + + return mem; +} + +void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ) +{ + concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); + for (std::size_t i = 0; i < monitors_number; ++i) { + monitors[i].~concurrent_monitor(); + } + + cache_aligned_deallocate(mem); +} + +void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, + std::ptrdiff_t target, d1::delegate_base& predicate ) +{ + __TBB_ASSERT(monitor_tag < monitors_number, nullptr); + concurrent_monitor& monitor = monitors[monitor_tag]; + + monitor.wait<concurrent_monitor::thread_context>([&] { return !predicate(); }, std::uintptr_t(target)); +} + +void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) { + concurrent_monitor& items_avail = monitors[d1::cbq_items_avail_tag]; + concurrent_monitor& slots_avail = monitors[d1::cbq_slots_avail_tag]; + + items_avail.abort_all(); + slots_avail.abort_all(); +} + +struct predicate_leq { + std::size_t my_ticket; + predicate_leq( std::size_t ticket ) : my_ticket(ticket) {} + bool operator() ( std::uintptr_t ticket ) const { return static_cast<std::size_t>(ticket) <= my_ticket; } +}; + +void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, + std::size_t monitor_tag, std::size_t ticket) +{ + __TBB_ASSERT(monitor_tag < monitors_number, nullptr); + concurrent_monitor& monitor = monitors[monitor_tag]; + monitor.notify(predicate_leq(ticket)); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor.h b/contrib/libs/tbb/src/tbb/concurrent_monitor.h index eec21f858a..cb1885a5d0 100644 --- a/contrib/libs/tbb/src/tbb/concurrent_monitor.h +++ b/contrib/libs/tbb/src/tbb/concurrent_monitor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,46 +17,46 @@ #ifndef __TBB_concurrent_monitor_H #define __TBB_concurrent_monitor_H -#include "oneapi/tbb/spin_mutex.h" -#include "oneapi/tbb/detail/_exception.h" -#include "oneapi/tbb/detail/_aligned_space.h" -#include "oneapi/tbb/detail/_template_helpers.h" -#include "scheduler_common.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_aligned_space.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "scheduler_common.h" #include "semaphore.h" -#include <atomic> - +#include <atomic> + namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { //! Circular doubly-linked list with sentinel /** head.next points to the front and head.prev points to the back */ class circular_doubly_linked_list_with_sentinel : no_copy { public: - struct base_node { - base_node* next; - base_node* prev; - explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {} + struct base_node { + base_node* next; + base_node* prev; + explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {} }; // ctor - circular_doubly_linked_list_with_sentinel() { clear(); } + circular_doubly_linked_list_with_sentinel() { clear(); } // dtor - ~circular_doubly_linked_list_with_sentinel() { - __TBB_ASSERT(head.next == &head && head.prev == &head, "the list is not empty"); - } + ~circular_doubly_linked_list_with_sentinel() { + __TBB_ASSERT(head.next == &head && head.prev == &head, "the list is not empty"); + } - inline std::size_t size() const { return count.load(std::memory_order_relaxed); } - inline bool empty() const { return size() == 0; } - inline base_node* front() const { return head.next; } - inline base_node* last() const { return head.prev; } - inline const base_node* end() const { return &head; } + inline std::size_t size() const { return count.load(std::memory_order_relaxed); } + inline bool empty() const { return size() == 0; } + inline base_node* front() const { return head.next; } + inline base_node* last() const { return head.prev; } + inline const base_node* end() const { return &head; } //! add to the back of the list - inline void add( base_node* n ) { - count.store(count.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + inline void add( base_node* n ) { + count.store(count.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); n->prev = head.prev; n->next = &head; head.prev->next = n; @@ -64,18 +64,18 @@ public: } //! remove node 'n' - inline void remove( base_node& n ) { - __TBB_ASSERT(count.load(std::memory_order_relaxed) > 0, "attempt to remove an item from an empty list"); - count.store(count.load( std::memory_order_relaxed ) - 1, std::memory_order_relaxed); + inline void remove( base_node& n ) { + __TBB_ASSERT(count.load(std::memory_order_relaxed) > 0, "attempt to remove an item from an empty list"); + count.store(count.load( std::memory_order_relaxed ) - 1, std::memory_order_relaxed); n.prev->next = n.next; n.next->prev = n.prev; } //! move all elements to 'lst' and initialize the 'this' list inline void flush_to( circular_doubly_linked_list_with_sentinel& lst ) { - const std::size_t l_count = size(); - if (l_count > 0) { - lst.count.store(l_count, std::memory_order_relaxed); + const std::size_t l_count = size(); + if (l_count > 0) { + lst.count.store(l_count, std::memory_order_relaxed); lst.head.next = head.next; lst.head.prev = head.prev; head.next->prev = &lst.head; @@ -84,446 +84,446 @@ public: } } - void clear() { - head.next = &head; - head.prev = &head; - count.store(0, std::memory_order_relaxed); - } + void clear() { + head.next = &head; + head.prev = &head; + count.store(0, std::memory_order_relaxed); + } private: - std::atomic<std::size_t> count; - base_node head; + std::atomic<std::size_t> count; + base_node head; }; -using base_list = circular_doubly_linked_list_with_sentinel; -using base_node = circular_doubly_linked_list_with_sentinel::base_node; +using base_list = circular_doubly_linked_list_with_sentinel; +using base_node = circular_doubly_linked_list_with_sentinel::base_node; -template <typename Context> -class concurrent_monitor_base; - -template <typename Context> -class wait_node : public base_node { +template <typename Context> +class concurrent_monitor_base; + +template <typename Context> +class wait_node : public base_node { public: - -#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 - wait_node(Context ctx) : my_context(ctx), my_is_in_list(false) {} -#else - wait_node(Context ctx) : my_context(ctx) {} -#endif - - virtual ~wait_node() = default; - - virtual void init() { - __TBB_ASSERT(!my_initialized, nullptr); - my_initialized = true; - } - - virtual void wait() = 0; - - virtual void reset() { - __TBB_ASSERT(my_skipped_wakeup, nullptr); - my_skipped_wakeup = false; - } - - virtual void notify() = 0; - -protected: - friend class concurrent_monitor_base<Context>; - friend class thread_data; - - Context my_context{}; -#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 - std::atomic<bool> my_is_in_list; -#else - std::atomic<bool> my_is_in_list{false}; -#endif - - bool my_initialized{false}; - bool my_skipped_wakeup{false}; - bool my_aborted{false}; - unsigned my_epoch{0}; -}; - -template <typename Context> -class sleep_node : public wait_node<Context> { - using base_type = wait_node<Context>; -public: - using base_type::base_type; - - // Make it virtual due to Intel Compiler warning - virtual ~sleep_node() { - if (this->my_initialized) { - if (this->my_skipped_wakeup) semaphore().P(); - semaphore().~binary_semaphore(); + +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + wait_node(Context ctx) : my_context(ctx), my_is_in_list(false) {} +#else + wait_node(Context ctx) : my_context(ctx) {} +#endif + + virtual ~wait_node() = default; + + virtual void init() { + __TBB_ASSERT(!my_initialized, nullptr); + my_initialized = true; + } + + virtual void wait() = 0; + + virtual void reset() { + __TBB_ASSERT(my_skipped_wakeup, nullptr); + my_skipped_wakeup = false; + } + + virtual void notify() = 0; + +protected: + friend class concurrent_monitor_base<Context>; + friend class thread_data; + + Context my_context{}; +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + std::atomic<bool> my_is_in_list; +#else + std::atomic<bool> my_is_in_list{false}; +#endif + + bool my_initialized{false}; + bool my_skipped_wakeup{false}; + bool my_aborted{false}; + unsigned my_epoch{0}; +}; + +template <typename Context> +class sleep_node : public wait_node<Context> { + using base_type = wait_node<Context>; +public: + using base_type::base_type; + + // Make it virtual due to Intel Compiler warning + virtual ~sleep_node() { + if (this->my_initialized) { + if (this->my_skipped_wakeup) semaphore().P(); + semaphore().~binary_semaphore(); } - } - - binary_semaphore& semaphore() { return *sema.begin(); } - - void init() override { - if (!this->my_initialized) { - new (sema.begin()) binary_semaphore; - base_type::init(); + } + + binary_semaphore& semaphore() { return *sema.begin(); } + + void init() override { + if (!this->my_initialized) { + new (sema.begin()) binary_semaphore; + base_type::init(); } - } - - void wait() override { - __TBB_ASSERT(this->my_initialized, - "Use of commit_wait() without prior prepare_wait()"); - semaphore().P(); - __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); - if (this->my_aborted) - throw_exception(exception_id::user_abort); - } - - void reset() override { - base_type::reset(); - semaphore().P(); - } - - void notify() override { - semaphore().V(); - } - -private: - tbb::detail::aligned_space<binary_semaphore> sema; -}; - -//! concurrent_monitor -/** fine-grained concurrent_monitor implementation */ -template <typename Context> -class concurrent_monitor_base : no_copy { -public: + } + + void wait() override { + __TBB_ASSERT(this->my_initialized, + "Use of commit_wait() without prior prepare_wait()"); + semaphore().P(); + __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); + if (this->my_aborted) + throw_exception(exception_id::user_abort); + } + + void reset() override { + base_type::reset(); + semaphore().P(); + } + + void notify() override { + semaphore().V(); + } + +private: + tbb::detail::aligned_space<binary_semaphore> sema; +}; + +//! concurrent_monitor +/** fine-grained concurrent_monitor implementation */ +template <typename Context> +class concurrent_monitor_base : no_copy { +public: //! ctor - concurrent_monitor_base() : my_epoch{} - {} + concurrent_monitor_base() : my_epoch{} + {} //! dtor - ~concurrent_monitor_base() { - abort_all(); - __TBB_ASSERT(my_waitset.empty(), "waitset not empty?"); - } + ~concurrent_monitor_base() { + abort_all(); + __TBB_ASSERT(my_waitset.empty(), "waitset not empty?"); + } //! prepare wait by inserting 'thr' into the wait queue - void prepare_wait( wait_node<Context>& node) { - // TODO: consider making even more lazy instantiation of the semaphore, that is only when it is actually needed, e.g. move it in node::wait() - if (!node.my_initialized) { - node.init(); - } - // this is good place to pump previous skipped wakeup - else if (node.my_skipped_wakeup) { - node.reset(); - } - - node.my_is_in_list.store(true, std::memory_order_relaxed); - - { - tbb::spin_mutex::scoped_lock l(my_mutex); - node.my_epoch = my_epoch.load(std::memory_order_relaxed); - my_waitset.add(&node); - } - - // Prepare wait guarantees Write Read memory barrier. - // In C++ only full fence covers this type of barrier. - atomic_fence(std::memory_order_seq_cst); - } - + void prepare_wait( wait_node<Context>& node) { + // TODO: consider making even more lazy instantiation of the semaphore, that is only when it is actually needed, e.g. move it in node::wait() + if (!node.my_initialized) { + node.init(); + } + // this is good place to pump previous skipped wakeup + else if (node.my_skipped_wakeup) { + node.reset(); + } + + node.my_is_in_list.store(true, std::memory_order_relaxed); + + { + tbb::spin_mutex::scoped_lock l(my_mutex); + node.my_epoch = my_epoch.load(std::memory_order_relaxed); + my_waitset.add(&node); + } + + // Prepare wait guarantees Write Read memory barrier. + // In C++ only full fence covers this type of barrier. + atomic_fence(std::memory_order_seq_cst); + } + //! Commit wait if event count has not changed; otherwise, cancel wait. /** Returns true if committed, false if canceled. */ - inline bool commit_wait( wait_node<Context>& node ) { - const bool do_it = node.my_epoch == my_epoch.load(std::memory_order_relaxed); + inline bool commit_wait( wait_node<Context>& node ) { + const bool do_it = node.my_epoch == my_epoch.load(std::memory_order_relaxed); // this check is just an optimization - if (do_it) { - node.wait(); + if (do_it) { + node.wait(); } else { - cancel_wait( node ); + cancel_wait( node ); } return do_it; } - + //! Cancel the wait. Removes the thread from the wait queue if not removed yet. - void cancel_wait( wait_node<Context>& node ) { - // possible skipped wakeup will be pumped in the following prepare_wait() - node.my_skipped_wakeup = true; - // try to remove node from waitset - // Cancel wait guarantees acquire memory barrier. - bool in_list = node.my_is_in_list.load(std::memory_order_acquire); - if (in_list) { - tbb::spin_mutex::scoped_lock l(my_mutex); - if (node.my_is_in_list.load(std::memory_order_relaxed)) { - my_waitset.remove(node); - // node is removed from waitset, so there will be no wakeup - node.my_is_in_list.store(false, std::memory_order_relaxed); - node.my_skipped_wakeup = false; - } - } - } - - //! Wait for a condition to be satisfied with waiting-on my_context - template <typename NodeType, typename Pred> - bool wait(Pred&& pred, NodeType&& node) { - prepare_wait(node); - while (!guarded_call(std::forward<Pred>(pred), node)) { - if (commit_wait(node)) { - return true; - } - - prepare_wait(node); - } - - cancel_wait(node); - return false; - } - + void cancel_wait( wait_node<Context>& node ) { + // possible skipped wakeup will be pumped in the following prepare_wait() + node.my_skipped_wakeup = true; + // try to remove node from waitset + // Cancel wait guarantees acquire memory barrier. + bool in_list = node.my_is_in_list.load(std::memory_order_acquire); + if (in_list) { + tbb::spin_mutex::scoped_lock l(my_mutex); + if (node.my_is_in_list.load(std::memory_order_relaxed)) { + my_waitset.remove(node); + // node is removed from waitset, so there will be no wakeup + node.my_is_in_list.store(false, std::memory_order_relaxed); + node.my_skipped_wakeup = false; + } + } + } + + //! Wait for a condition to be satisfied with waiting-on my_context + template <typename NodeType, typename Pred> + bool wait(Pred&& pred, NodeType&& node) { + prepare_wait(node); + while (!guarded_call(std::forward<Pred>(pred), node)) { + if (commit_wait(node)) { + return true; + } + + prepare_wait(node); + } + + cancel_wait(node); + return false; + } + //! Notify one thread about the event - void notify_one() { - atomic_fence(std::memory_order_seq_cst); - notify_one_relaxed(); - } + void notify_one() { + atomic_fence(std::memory_order_seq_cst); + notify_one_relaxed(); + } //! Notify one thread about the event. Relaxed version. - void notify_one_relaxed() { - if (my_waitset.empty()) { - return; - } - - base_node* n; - const base_node* end = my_waitset.end(); - { - tbb::spin_mutex::scoped_lock l(my_mutex); - my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - n = my_waitset.front(); - if (n != end) { - my_waitset.remove(*n); - to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); - } - } - - if (n != end) { - to_wait_node(n)->notify(); - } - } - - //! Notify all waiting threads of the event - void notify_all() { - atomic_fence(std::memory_order_seq_cst); - notify_all_relaxed(); - } - - // ! Notify all waiting threads of the event; Relaxed version - void notify_all_relaxed() { - if (my_waitset.empty()) { - return; - } - - base_list temp; - const base_node* end; - { - tbb::spin_mutex::scoped_lock l(my_mutex); - my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - // TODO: Possible optimization, don't change node state under lock, just do flush - my_waitset.flush_to(temp); - end = temp.end(); - for (base_node* n = temp.front(); n != end; n = n->next) { - to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); - } - } - - base_node* nxt; - for (base_node* n = temp.front(); n != end; n=nxt) { - nxt = n->next; - to_wait_node(n)->notify(); - } -#if TBB_USE_ASSERT - temp.clear(); -#endif - } - - //! Notify waiting threads of the event that satisfies the given predicate - template <typename P> - void notify( const P& predicate ) { - atomic_fence(std::memory_order_seq_cst); - notify_relaxed( predicate ); + void notify_one_relaxed() { + if (my_waitset.empty()) { + return; + } + + base_node* n; + const base_node* end = my_waitset.end(); + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + n = my_waitset.front(); + if (n != end) { + my_waitset.remove(*n); + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + if (n != end) { + to_wait_node(n)->notify(); + } } - //! Notify waiting threads of the event that satisfies the given predicate; - //! the predicate is called under the lock. Relaxed version. - template<typename P> - void notify_relaxed( const P& predicate ) { - if (my_waitset.empty()) { + //! Notify all waiting threads of the event + void notify_all() { + atomic_fence(std::memory_order_seq_cst); + notify_all_relaxed(); + } + + // ! Notify all waiting threads of the event; Relaxed version + void notify_all_relaxed() { + if (my_waitset.empty()) { return; - } - - base_list temp; - base_node* nxt; - const base_node* end = my_waitset.end(); + } + + base_list temp; + const base_node* end; { - tbb::spin_mutex::scoped_lock l(my_mutex); - my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed); - for (base_node* n = my_waitset.last(); n != end; n = nxt) { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + // TODO: Possible optimization, don't change node state under lock, just do flush + my_waitset.flush_to(temp); + end = temp.end(); + for (base_node* n = temp.front(); n != end; n = n->next) { + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + base_node* nxt; + for (base_node* n = temp.front(); n != end; n=nxt) { + nxt = n->next; + to_wait_node(n)->notify(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif + } + + //! Notify waiting threads of the event that satisfies the given predicate + template <typename P> + void notify( const P& predicate ) { + atomic_fence(std::memory_order_seq_cst); + notify_relaxed( predicate ); + } + + //! Notify waiting threads of the event that satisfies the given predicate; + //! the predicate is called under the lock. Relaxed version. + template<typename P> + void notify_relaxed( const P& predicate ) { + if (my_waitset.empty()) { + return; + } + + base_list temp; + base_node* nxt; + const base_node* end = my_waitset.end(); + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed); + for (base_node* n = my_waitset.last(); n != end; n = nxt) { nxt = n->prev; - auto* node = static_cast<wait_node<Context>*>(n); - if (predicate(node->my_context)) { - my_waitset.remove(*n); - node->my_is_in_list.store(false, std::memory_order_relaxed); - temp.add(n); + auto* node = static_cast<wait_node<Context>*>(n); + if (predicate(node->my_context)) { + my_waitset.remove(*n); + node->my_is_in_list.store(false, std::memory_order_relaxed); + temp.add(n); } } } end = temp.end(); - for (base_node* n=temp.front(); n != end; n = nxt) { + for (base_node* n=temp.front(); n != end; n = nxt) { nxt = n->next; - to_wait_node(n)->notify(); + to_wait_node(n)->notify(); } #if TBB_USE_ASSERT temp.clear(); #endif - } - - //! Abort any sleeping threads at the time of the call - void abort_all() { - atomic_fence( std::memory_order_seq_cst ); - abort_all_relaxed(); - } - - //! Abort any sleeping threads at the time of the call; Relaxed version - void abort_all_relaxed() { - if (my_waitset.empty()) { - return; - } - - base_list temp; - const base_node* end; - { - tbb::spin_mutex::scoped_lock l(my_mutex); - my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - my_waitset.flush_to(temp); - end = temp.end(); - for (base_node* n = temp.front(); n != end; n = n->next) { - to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); - } - } - - base_node* nxt; - for (base_node* n = temp.front(); n != end; n = nxt) { - nxt = n->next; - to_wait_node(n)->my_aborted = true; - to_wait_node(n)->notify(); - } -#if TBB_USE_ASSERT - temp.clear(); -#endif - } - -private: - template <typename NodeType, typename Pred> - bool guarded_call(Pred&& predicate, NodeType& node) { - bool res = false; - tbb::detail::d0::try_call( [&] { - res = std::forward<Pred>(predicate)(); - }).on_exception( [&] { - cancel_wait(node); - }); - - return res; - } - - tbb::spin_mutex my_mutex; - base_list my_waitset; - std::atomic<unsigned> my_epoch; - - wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); } -}; - -class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> { - using base_type = concurrent_monitor_base<std::uintptr_t>; -public: - using base_type::base_type; - /** per-thread descriptor for concurrent_monitor */ - using thread_context = sleep_node<std::uintptr_t>; -}; - -struct extended_context { - extended_context() = default; - - extended_context(std::uintptr_t first_addr, arena* a) : - my_uniq_addr(first_addr), my_arena_addr(a) - {} - - std::uintptr_t my_uniq_addr{0}; - arena* my_arena_addr{nullptr}; -}; - - -#if __TBB_RESUMABLE_TASKS -class resume_node : public wait_node<extended_context> { - using base_type = wait_node<extended_context>; -public: - resume_node(extended_context ctx, execution_data_ext& ed_ext, task_dispatcher& target) - : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target) - , my_suspend_point(my_curr_dispatcher->get_suspend_point()) - {} - - virtual ~resume_node() { - if (this->my_skipped_wakeup) { - spin_wait_until_eq(this->my_notify_calls, 1); - } - - poison_pointer(my_curr_dispatcher); - poison_pointer(my_target_dispatcher); - poison_pointer(my_suspend_point); - } - - void init() override { - base_type::init(); - } - - void wait() override { - my_curr_dispatcher->resume(*my_target_dispatcher); - __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); - } - - void reset() override { - base_type::reset(); - spin_wait_until_eq(this->my_notify_calls, 1); - my_notify_calls.store(0, std::memory_order_relaxed); - } - - // notify is called (perhaps, concurrently) twice from: - // - concurrent_monitor::notify - // - post_resume_action::register_waiter - // The second notify is called after thread switches the stack - // (Because we can not call resume while the stack is occupied) - // We need calling resume only when both notifications are performed. - void notify() override { - if (++my_notify_calls == 2) { - r1::resume(my_suspend_point); - } - } - -private: - friend class thread_data; - friend struct suspend_point_type::resume_task; - task_dispatcher* my_curr_dispatcher; - task_dispatcher* my_target_dispatcher; - suspend_point_type* my_suspend_point; - std::atomic<int> my_notify_calls{0}; -}; -#endif // __TBB_RESUMABLE_TASKS - -class extended_concurrent_monitor : public concurrent_monitor_base<extended_context> { - using base_type = concurrent_monitor_base<extended_context>; -public: - using base_type::base_type; - /** per-thread descriptor for concurrent_monitor */ - using thread_context = sleep_node<extended_context>; -#if __TBB_RESUMABLE_TASKS - using resume_context = resume_node; -#endif -}; - -} // namespace r1 -} // namespace detail + } + + //! Abort any sleeping threads at the time of the call + void abort_all() { + atomic_fence( std::memory_order_seq_cst ); + abort_all_relaxed(); + } + + //! Abort any sleeping threads at the time of the call; Relaxed version + void abort_all_relaxed() { + if (my_waitset.empty()) { + return; + } + + base_list temp; + const base_node* end; + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + my_waitset.flush_to(temp); + end = temp.end(); + for (base_node* n = temp.front(); n != end; n = n->next) { + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + base_node* nxt; + for (base_node* n = temp.front(); n != end; n = nxt) { + nxt = n->next; + to_wait_node(n)->my_aborted = true; + to_wait_node(n)->notify(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif + } + +private: + template <typename NodeType, typename Pred> + bool guarded_call(Pred&& predicate, NodeType& node) { + bool res = false; + tbb::detail::d0::try_call( [&] { + res = std::forward<Pred>(predicate)(); + }).on_exception( [&] { + cancel_wait(node); + }); + + return res; + } + + tbb::spin_mutex my_mutex; + base_list my_waitset; + std::atomic<unsigned> my_epoch; + + wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); } +}; + +class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> { + using base_type = concurrent_monitor_base<std::uintptr_t>; +public: + using base_type::base_type; + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<std::uintptr_t>; +}; + +struct extended_context { + extended_context() = default; + + extended_context(std::uintptr_t first_addr, arena* a) : + my_uniq_addr(first_addr), my_arena_addr(a) + {} + + std::uintptr_t my_uniq_addr{0}; + arena* my_arena_addr{nullptr}; +}; + + +#if __TBB_RESUMABLE_TASKS +class resume_node : public wait_node<extended_context> { + using base_type = wait_node<extended_context>; +public: + resume_node(extended_context ctx, execution_data_ext& ed_ext, task_dispatcher& target) + : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target) + , my_suspend_point(my_curr_dispatcher->get_suspend_point()) + {} + + virtual ~resume_node() { + if (this->my_skipped_wakeup) { + spin_wait_until_eq(this->my_notify_calls, 1); + } + + poison_pointer(my_curr_dispatcher); + poison_pointer(my_target_dispatcher); + poison_pointer(my_suspend_point); + } + + void init() override { + base_type::init(); + } + + void wait() override { + my_curr_dispatcher->resume(*my_target_dispatcher); + __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); + } + + void reset() override { + base_type::reset(); + spin_wait_until_eq(this->my_notify_calls, 1); + my_notify_calls.store(0, std::memory_order_relaxed); + } + + // notify is called (perhaps, concurrently) twice from: + // - concurrent_monitor::notify + // - post_resume_action::register_waiter + // The second notify is called after thread switches the stack + // (Because we can not call resume while the stack is occupied) + // We need calling resume only when both notifications are performed. + void notify() override { + if (++my_notify_calls == 2) { + r1::resume(my_suspend_point); + } + } + +private: + friend class thread_data; + friend struct suspend_point_type::resume_task; + task_dispatcher* my_curr_dispatcher; + task_dispatcher* my_target_dispatcher; + suspend_point_type* my_suspend_point; + std::atomic<int> my_notify_calls{0}; +}; +#endif // __TBB_RESUMABLE_TASKS + +class extended_concurrent_monitor : public concurrent_monitor_base<extended_context> { + using base_type = concurrent_monitor_base<extended_context>; +public: + using base_type::base_type; + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<extended_context>; +#if __TBB_RESUMABLE_TASKS + using resume_context = resume_node; +#endif +}; + +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* __TBB_concurrent_monitor_H */ diff --git a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def index 608d57b51e..09e7753ad4 100644 --- a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def +++ b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def @@ -1,153 +1,153 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -{ -global: - -/* Assertions (assert.cpp) */ -_ZN3tbb6detail2r117assertion_failureEPKciS3_S3_; - -/* ITT (profiling.cpp) */ -_ZN3tbb6detail2r112itt_task_endENS0_2d115itt_domain_enumE; -_ZN3tbb6detail2r114itt_region_endENS0_2d115itt_domain_enumEPvy; -_ZN3tbb6detail2r114itt_task_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; -_ZN3tbb6detail2r115call_itt_notifyEiPv; -_ZN3tbb6detail2r115create_itt_syncEPvPKcS4_; -_ZN3tbb6detail2r116itt_region_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; -_ZN3tbb6detail2r116itt_relation_addENS0_2d115itt_domain_enumEPvyNS0_2d012itt_relationES4_y; -_ZN3tbb6detail2r117itt_set_sync_nameEPvPKc; -_ZN3tbb6detail2r119itt_make_task_groupENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; -_ZN3tbb6detail2r120itt_metadata_str_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexEPKc; -_ZN3tbb6detail2r120itt_metadata_ptr_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexES4_; - -/* Allocators (allocator.cpp) */ -_ZN3tbb6detail2r115allocate_memoryEm; -_ZN3tbb6detail2r117deallocate_memoryEPv; -_ZN3tbb6detail2r122cache_aligned_allocateEm; -_ZN3tbb6detail2r124cache_aligned_deallocateEPv; -_ZN3tbb6detail2r115cache_line_sizeEv; -_ZN3tbb6detail2r117is_tbbmalloc_usedEv; - -/* Small object pool (small_object_pool.cpp) */ -_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEm; -_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEmRKNS2_14execution_dataE; -_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvm; -_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvmRKNS2_14execution_dataE; - -/* Error handling (exception.cpp) */ -_ZN3tbb6detail2r115throw_exceptionENS0_2d012exception_idE; -_ZTIN3tbb6detail2r114bad_last_allocE; -_ZTVN3tbb6detail2r114bad_last_allocE; -_ZTIN3tbb6detail2r112missing_waitE; -_ZTVN3tbb6detail2r112missing_waitE; -_ZTIN3tbb6detail2r110user_abortE; -_ZTVN3tbb6detail2r110user_abortE; -_ZTIN3tbb6detail2r111unsafe_waitE; -_ZTVN3tbb6detail2r111unsafe_waitE; - -/* RTM Mutex (rtm_mutex.cpp) */ -_ZN3tbb6detail2r17acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockEb; -_ZN3tbb6detail2r17releaseERNS0_2d19rtm_mutex11scoped_lockE; -_ZN3tbb6detail2r111try_acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockE; - -/* RTM RW Mutex (rtm_rw_mutex.cpp) */ -_ZN3tbb6detail2r114acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; -_ZN3tbb6detail2r114acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; -_ZN3tbb6detail2r118try_acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; -_ZN3tbb6detail2r118try_acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; -_ZN3tbb6detail2r17releaseERNS0_2d112rtm_rw_mutex11scoped_lockE; -_ZN3tbb6detail2r17upgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; -_ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; - -/* Tasks and partitioners (task.cpp) */ -_ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; -_ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; -_ZN3tbb6detail2r121current_suspend_pointEv; -_ZN3tbb6detail2r114notify_waitersEm; - -/* Task dispatcher (task_dispatcher.cpp) */ -_ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; -_ZN3tbb6detail2r14waitERNS0_2d112wait_contextERNS2_18task_group_contextE; -_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextE; -_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextEt; -_ZN3tbb6detail2r116execute_and_waitERNS0_2d14taskERNS2_18task_group_contextERNS2_12wait_contextES6_; -_ZN3tbb6detail2r16submitERNS0_2d14taskERNS2_18task_group_contextEPNS1_5arenaEm; -_ZN3tbb6detail2r115current_contextEv; - -/* Task group context (task_group_context.cpp) */ -_ZN3tbb6detail2r110initializeERNS0_2d118task_group_contextE; -_ZN3tbb6detail2r122cancel_group_executionERNS0_2d118task_group_contextE; -_ZN3tbb6detail2r128is_group_execution_cancelledERNS0_2d118task_group_contextE; -_ZN3tbb6detail2r15resetERNS0_2d118task_group_contextE; -_ZN3tbb6detail2r17destroyERNS0_2d118task_group_contextE; -_ZN3tbb6detail2r119capture_fp_settingsERNS0_2d118task_group_contextE; - -/* Task arena (arena.cpp) */ -_ZN3tbb6detail2r115max_concurrencyEPKNS0_2d115task_arena_baseE; -_ZN3tbb6detail2r110initializeERNS0_2d115task_arena_baseE; -_ZN3tbb6detail2r16attachERNS0_2d115task_arena_baseE; -_ZN3tbb6detail2r17executeERNS0_2d115task_arena_baseERNS2_13delegate_baseE; -_ZN3tbb6detail2r19terminateERNS0_2d115task_arena_baseE; -_ZN3tbb6detail2r120isolate_within_arenaERNS0_2d113delegate_baseEl; -_ZN3tbb6detail2r17enqueueERNS0_2d14taskEPNS2_15task_arena_baseE; -_ZN3tbb6detail2r14waitERNS0_2d115task_arena_baseE; - -/* System topology parsing and threads pinning (governor.cpp) */ -_ZN3tbb6detail2r115numa_node_countEv; -_ZN3tbb6detail2r117fill_numa_indicesEPi; -_ZN3tbb6detail2r115core_type_countEl; -_ZN3tbb6detail2r122fill_core_type_indicesEPil; -_ZN3tbb6detail2r131constraints_default_concurrencyERKNS0_2d111constraintsEl; -_ZN3tbb6detail2r128constraints_threads_per_coreERKNS0_2d111constraintsEl; -_ZN3tbb6detail2r124numa_default_concurrencyEi; - -/* Observer (observer_proxy.cpp) */ -_ZN3tbb6detail2r17observeERNS0_2d123task_scheduler_observerEb; - -/* Queuing RW Mutex (queuing_rw_mutex.cpp) */ -_ZN3tbb6detail2r111try_acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; -_ZN3tbb6detail2r117upgrade_to_writerERNS0_2d116queuing_rw_mutex11scoped_lockE; -_ZN3tbb6detail2r119downgrade_to_readerERNS0_2d116queuing_rw_mutex11scoped_lockE; -_ZN3tbb6detail2r17acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; -_ZN3tbb6detail2r17releaseERNS0_2d116queuing_rw_mutex11scoped_lockE; -_ZN3tbb6detail2r19constructERNS0_2d116queuing_rw_mutexE; - -/* Global control (global_control.cpp) */ -_ZN3tbb6detail2r16createERNS0_2d114global_controlE; -_ZN3tbb6detail2r17destroyERNS0_2d114global_controlE; -_ZN3tbb6detail2r127global_control_active_valueEi; -_ZN3tbb6detail2r18finalizeERNS0_2d121task_scheduler_handleEl; -_ZN3tbb6detail2r13getERNS0_2d121task_scheduler_handleE; - -/* Parallel pipeline (parallel_pipeline.cpp) */ -_ZN3tbb6detail2r117parallel_pipelineERNS0_2d118task_group_contextEmRKNS2_11filter_nodeE; -_ZN3tbb6detail2r116set_end_of_inputERNS0_2d111base_filterE; - -/* Concurrent bounded queue (concurrent_bounded_queue.cpp) */ -_ZN3tbb6detail2r126allocate_bounded_queue_repEm; -_ZN3tbb6detail2r126wait_bounded_queue_monitorEPNS1_18concurrent_monitorEmlRNS0_2d113delegate_baseE; -_ZN3tbb6detail2r128abort_bounded_queue_monitorsEPNS1_18concurrent_monitorE; -_ZN3tbb6detail2r128deallocate_bounded_queue_repEPhm; -_ZN3tbb6detail2r128notify_bounded_queue_monitorEPNS1_18concurrent_monitorEmm; - -/* Versioning (version.cpp) */ -TBB_runtime_interface_version; -TBB_runtime_version; - -local: -/* TODO: fill more precisely */ -*; -}; +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{ +global: + +/* Assertions (assert.cpp) */ +_ZN3tbb6detail2r117assertion_failureEPKciS3_S3_; + +/* ITT (profiling.cpp) */ +_ZN3tbb6detail2r112itt_task_endENS0_2d115itt_domain_enumE; +_ZN3tbb6detail2r114itt_region_endENS0_2d115itt_domain_enumEPvy; +_ZN3tbb6detail2r114itt_task_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r115call_itt_notifyEiPv; +_ZN3tbb6detail2r115create_itt_syncEPvPKcS4_; +_ZN3tbb6detail2r116itt_region_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r116itt_relation_addENS0_2d115itt_domain_enumEPvyNS0_2d012itt_relationES4_y; +_ZN3tbb6detail2r117itt_set_sync_nameEPvPKc; +_ZN3tbb6detail2r119itt_make_task_groupENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r120itt_metadata_str_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexEPKc; +_ZN3tbb6detail2r120itt_metadata_ptr_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexES4_; + +/* Allocators (allocator.cpp) */ +_ZN3tbb6detail2r115allocate_memoryEm; +_ZN3tbb6detail2r117deallocate_memoryEPv; +_ZN3tbb6detail2r122cache_aligned_allocateEm; +_ZN3tbb6detail2r124cache_aligned_deallocateEPv; +_ZN3tbb6detail2r115cache_line_sizeEv; +_ZN3tbb6detail2r117is_tbbmalloc_usedEv; + +/* Small object pool (small_object_pool.cpp) */ +_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEm; +_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEmRKNS2_14execution_dataE; +_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvm; +_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvmRKNS2_14execution_dataE; + +/* Error handling (exception.cpp) */ +_ZN3tbb6detail2r115throw_exceptionENS0_2d012exception_idE; +_ZTIN3tbb6detail2r114bad_last_allocE; +_ZTVN3tbb6detail2r114bad_last_allocE; +_ZTIN3tbb6detail2r112missing_waitE; +_ZTVN3tbb6detail2r112missing_waitE; +_ZTIN3tbb6detail2r110user_abortE; +_ZTVN3tbb6detail2r110user_abortE; +_ZTIN3tbb6detail2r111unsafe_waitE; +_ZTVN3tbb6detail2r111unsafe_waitE; + +/* RTM Mutex (rtm_mutex.cpp) */ +_ZN3tbb6detail2r17acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r17releaseERNS0_2d19rtm_mutex11scoped_lockE; +_ZN3tbb6detail2r111try_acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockE; + +/* RTM RW Mutex (rtm_rw_mutex.cpp) */ +_ZN3tbb6detail2r114acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r114acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r118try_acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; +_ZN3tbb6detail2r118try_acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; +_ZN3tbb6detail2r17releaseERNS0_2d112rtm_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r17upgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; + +/* Tasks and partitioners (task.cpp) */ +_ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; +_ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; +_ZN3tbb6detail2r121current_suspend_pointEv; +_ZN3tbb6detail2r114notify_waitersEm; + +/* Task dispatcher (task_dispatcher.cpp) */ +_ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; +_ZN3tbb6detail2r14waitERNS0_2d112wait_contextERNS2_18task_group_contextE; +_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextE; +_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextEt; +_ZN3tbb6detail2r116execute_and_waitERNS0_2d14taskERNS2_18task_group_contextERNS2_12wait_contextES6_; +_ZN3tbb6detail2r16submitERNS0_2d14taskERNS2_18task_group_contextEPNS1_5arenaEm; +_ZN3tbb6detail2r115current_contextEv; + +/* Task group context (task_group_context.cpp) */ +_ZN3tbb6detail2r110initializeERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r122cancel_group_executionERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r128is_group_execution_cancelledERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r15resetERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r17destroyERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r119capture_fp_settingsERNS0_2d118task_group_contextE; + +/* Task arena (arena.cpp) */ +_ZN3tbb6detail2r115max_concurrencyEPKNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r110initializeERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r16attachERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r17executeERNS0_2d115task_arena_baseERNS2_13delegate_baseE; +_ZN3tbb6detail2r19terminateERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r120isolate_within_arenaERNS0_2d113delegate_baseEl; +_ZN3tbb6detail2r17enqueueERNS0_2d14taskEPNS2_15task_arena_baseE; +_ZN3tbb6detail2r14waitERNS0_2d115task_arena_baseE; + +/* System topology parsing and threads pinning (governor.cpp) */ +_ZN3tbb6detail2r115numa_node_countEv; +_ZN3tbb6detail2r117fill_numa_indicesEPi; +_ZN3tbb6detail2r115core_type_countEl; +_ZN3tbb6detail2r122fill_core_type_indicesEPil; +_ZN3tbb6detail2r131constraints_default_concurrencyERKNS0_2d111constraintsEl; +_ZN3tbb6detail2r128constraints_threads_per_coreERKNS0_2d111constraintsEl; +_ZN3tbb6detail2r124numa_default_concurrencyEi; + +/* Observer (observer_proxy.cpp) */ +_ZN3tbb6detail2r17observeERNS0_2d123task_scheduler_observerEb; + +/* Queuing RW Mutex (queuing_rw_mutex.cpp) */ +_ZN3tbb6detail2r111try_acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r117upgrade_to_writerERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r119downgrade_to_readerERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r17acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r17releaseERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r19constructERNS0_2d116queuing_rw_mutexE; + +/* Global control (global_control.cpp) */ +_ZN3tbb6detail2r16createERNS0_2d114global_controlE; +_ZN3tbb6detail2r17destroyERNS0_2d114global_controlE; +_ZN3tbb6detail2r127global_control_active_valueEi; +_ZN3tbb6detail2r18finalizeERNS0_2d121task_scheduler_handleEl; +_ZN3tbb6detail2r13getERNS0_2d121task_scheduler_handleE; + +/* Parallel pipeline (parallel_pipeline.cpp) */ +_ZN3tbb6detail2r117parallel_pipelineERNS0_2d118task_group_contextEmRKNS2_11filter_nodeE; +_ZN3tbb6detail2r116set_end_of_inputERNS0_2d111base_filterE; + +/* Concurrent bounded queue (concurrent_bounded_queue.cpp) */ +_ZN3tbb6detail2r126allocate_bounded_queue_repEm; +_ZN3tbb6detail2r126wait_bounded_queue_monitorEPNS1_18concurrent_monitorEmlRNS0_2d113delegate_baseE; +_ZN3tbb6detail2r128abort_bounded_queue_monitorsEPNS1_18concurrent_monitorE; +_ZN3tbb6detail2r128deallocate_bounded_queue_repEPhm; +_ZN3tbb6detail2r128notify_bounded_queue_monitorEPNS1_18concurrent_monitorEmm; + +/* Versioning (version.cpp) */ +TBB_runtime_interface_version; +TBB_runtime_version; + +local: +/* TODO: fill more precisely */ +*; +}; diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.cpp b/contrib/libs/tbb/src/tbb/dynamic_link.cpp index 12f7e149f2..d5c5c7be7d 100644 --- a/contrib/libs/tbb/src/tbb/dynamic_link.cpp +++ b/contrib/libs/tbb/src/tbb/dynamic_link.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,13 +16,13 @@ #include "dynamic_link.h" -#include "oneapi/tbb/detail/_template_helpers.h" -#include "oneapi/tbb/detail/_utils.h" - +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_utils.h" + /* This file is used by both TBB and OpenMP RTL. Do not use __TBB_ASSERT() macro and runtime_warning() function because they are not available in OpenMP. Use - __TBB_ASSERT_EX and DYNAMIC_LINK_WARNING instead. + __TBB_ASSERT_EX and DYNAMIC_LINK_WARNING instead. */ #include <cstdarg> // va_list etc. @@ -40,10 +40,10 @@ #else /* _WIN32 */ #include <dlfcn.h> #include <unistd.h> - - #include <cstring> - #include <climits> - #include <cstdlib> + + #include <cstring> + #include <climits> + #include <cstdlib> #endif /* _WIN32 */ #if __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED @@ -99,9 +99,9 @@ soon as all of the symbols have been resolved. 3. Weak symbols: if weak symbols are available they are returned. */ -namespace tbb { -namespace detail { -namespace r1 { +namespace tbb { +namespace detail { +namespace r1 { #if __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED @@ -109,11 +109,11 @@ namespace r1 { // Report runtime errors and continue. #define DYNAMIC_LINK_WARNING dynamic_link_warning static void dynamic_link_warning( dynamic_link_error_t code, ... ) { - suppress_unused_warning(code); + suppress_unused_warning(code); } // library_warning #endif /* !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED */ - static bool resolve_symbols( dynamic_link_handle module, const dynamic_link_descriptor descriptors[], std::size_t required ) + static bool resolve_symbols( dynamic_link_handle module, const dynamic_link_descriptor descriptors[], std::size_t required ) { if ( !module ) return false; @@ -122,12 +122,12 @@ namespace r1 { if ( !dlsym ) return false; #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */ - const std::size_t n_desc=20; // Usually we don't have more than 20 descriptors per library - __TBB_ASSERT_EX( required <= n_desc, "Too many descriptors is required" ); + const std::size_t n_desc=20; // Usually we don't have more than 20 descriptors per library + __TBB_ASSERT_EX( required <= n_desc, "Too many descriptors is required" ); if ( required > n_desc ) return false; pointer_to_handler h[n_desc]; - for ( std::size_t k = 0; k < required; ++k ) { + for ( std::size_t k = 0; k < required; ++k ) { dynamic_link_descriptor const & desc = descriptors[k]; pointer_to_handler addr = (pointer_to_handler)dlsym( module, desc.name ); if ( !addr ) { @@ -138,13 +138,13 @@ namespace r1 { // Commit the entry points. // Cannot use memset here, because the writes must be atomic. - for( std::size_t k = 0; k < required; ++k ) + for( std::size_t k = 0; k < required; ++k ) *descriptors[k].handler = h[k]; return true; } #if __TBB_WIN8UI_SUPPORT - bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) { + bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) { dynamic_link_handle tmp_handle = NULL; TCHAR wlibrary[256]; if ( MultiByteToWideChar(CP_UTF8, 0, library, -1, wlibrary, 255) == 0 ) return false; @@ -171,8 +171,8 @@ namespace r1 { current one, it is the directory tbb.dll loaded from. Example: - Let us assume "tbb.dll" is located in "c:\program files\common\intel\" directory, e.g. - absolute path of the library is "c:\program files\common\intel\tbb.dll". Absolute path for + Let us assume "tbb.dll" is located in "c:\program files\common\intel\" directory, e.g. + absolute path of the library is "c:\program files\common\intel\tbb.dll". Absolute path for "tbbmalloc.dll" would be "c:\program files\common\intel\tbbmalloc.dll". Absolute path for "malloc\tbbmalloc.dll" would be "c:\program files\common\intel\malloc\tbbmalloc.dll". */ @@ -184,30 +184,30 @@ namespace r1 { // the constructor is called. #define MAX_LOADED_MODULES 8 // The number of maximum possible modules which can be loaded - using atomic_incrementer = std::atomic<std::size_t>; + using atomic_incrementer = std::atomic<std::size_t>; - static struct handles_t { + static struct handles_t { atomic_incrementer my_size; dynamic_link_handle my_handles[MAX_LOADED_MODULES]; void add(const dynamic_link_handle &handle) { - const std::size_t ind = my_size++; - __TBB_ASSERT_EX( ind < MAX_LOADED_MODULES, "Too many modules are loaded" ); + const std::size_t ind = my_size++; + __TBB_ASSERT_EX( ind < MAX_LOADED_MODULES, "Too many modules are loaded" ); my_handles[ind] = handle; } void free() { - const std::size_t size = my_size; - for (std::size_t i=0; i<size; ++i) + const std::size_t size = my_size; + for (std::size_t i=0; i<size; ++i) dynamic_unlink( my_handles[i] ); } } handles; - static std::once_flag init_dl_data_state; + static std::once_flag init_dl_data_state; static struct ap_data_t { char _path[PATH_MAX+1]; - std::size_t _len; + std::size_t _len; } ap_data; static void init_ap_data() { @@ -236,14 +236,14 @@ namespace r1 { return; } // Find the position of the last backslash. - char *backslash = std::strrchr( ap_data._path, '\\' ); + char *backslash = std::strrchr( ap_data._path, '\\' ); if ( !backslash ) { // Backslash not found. - __TBB_ASSERT_EX( backslash!=NULL, "Unbelievable."); + __TBB_ASSERT_EX( backslash!=NULL, "Unbelievable."); return; } - __TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable."); - ap_data._len = (std::size_t)(backslash - ap_data._path) + 1; + __TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable."); + ap_data._len = (std::size_t)(backslash - ap_data._path) + 1; *(backslash+1) = 0; #else // Get the library path @@ -254,17 +254,17 @@ namespace r1 { DYNAMIC_LINK_WARNING( dl_sys_fail, "dladdr", err ); return; } else { - __TBB_ASSERT_EX( dlinfo.dli_fname!=NULL, "Unbelievable." ); + __TBB_ASSERT_EX( dlinfo.dli_fname!=NULL, "Unbelievable." ); } - char const *slash = std::strrchr( dlinfo.dli_fname, '/' ); - std::size_t fname_len=0; + char const *slash = std::strrchr( dlinfo.dli_fname, '/' ); + std::size_t fname_len=0; if ( slash ) { - __TBB_ASSERT_EX( slash >= dlinfo.dli_fname, "Unbelievable."); - fname_len = (std::size_t)(slash - dlinfo.dli_fname) + 1; + __TBB_ASSERT_EX( slash >= dlinfo.dli_fname, "Unbelievable."); + fname_len = (std::size_t)(slash - dlinfo.dli_fname) + 1; } - std::size_t rc; + std::size_t rc; if ( dlinfo.dli_fname[0]=='/' ) { // The library path is absolute rc = 0; @@ -275,7 +275,7 @@ namespace r1 { DYNAMIC_LINK_WARNING( dl_buff_too_small ); return; } - ap_data._len = std::strlen( ap_data._path ); + ap_data._len = std::strlen( ap_data._path ); ap_data._path[ap_data._len++]='/'; rc = ap_data._len; } @@ -286,7 +286,7 @@ namespace r1 { ap_data._len=0; return; } - std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len ); + std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len ); ap_data._len += fname_len; ap_data._path[ap_data._len]=0; } @@ -307,28 +307,28 @@ namespace r1 { in len -- Size of buffer. ret -- 0 -- Error occurred. > len -- Buffer too short, required size returned. - otherwise -- Ok, number of characters (incl. terminating null) written to buffer. + otherwise -- Ok, number of characters (incl. terminating null) written to buffer. */ - static std::size_t abs_path( char const * name, char * path, std::size_t len ) { - if ( ap_data._len == 0 ) + static std::size_t abs_path( char const * name, char * path, std::size_t len ) { + if ( ap_data._len == 0 ) return 0; - std::size_t name_len = std::strlen( name ); - std::size_t full_len = name_len+ap_data._len; + std::size_t name_len = std::strlen( name ); + std::size_t full_len = name_len+ap_data._len; if ( full_len < len ) { - __TBB_ASSERT( ap_data._path[ap_data._len] == 0, NULL); - __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, NULL); - std::strncpy( path, ap_data._path, ap_data._len + 1 ); - __TBB_ASSERT( path[ap_data._len] == 0, NULL ); - std::strncat( path, name, len - ap_data._len ); - __TBB_ASSERT( std::strlen(path) == full_len, NULL ); + __TBB_ASSERT( ap_data._path[ap_data._len] == 0, NULL); + __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, NULL); + std::strncpy( path, ap_data._path, ap_data._len + 1 ); + __TBB_ASSERT( path[ap_data._len] == 0, NULL ); + std::strncat( path, name, len - ap_data._len ); + __TBB_ASSERT( std::strlen(path) == full_len, NULL ); } - return full_len+1; // +1 for null character + return full_len+1; // +1 for null character } #endif // __TBB_DYNAMIC_LOAD_ENABLED void init_dynamic_link_data() { #if __TBB_DYNAMIC_LOAD_ENABLED - std::call_once( init_dl_data_state, init_dl_data ); + std::call_once( init_dl_data_state, init_dl_data ); #endif } @@ -344,19 +344,19 @@ namespace r1 { #endif #if __TBB_WEAK_SYMBOLS_PRESENT - static bool weak_symbol_link( const dynamic_link_descriptor descriptors[], std::size_t required ) + static bool weak_symbol_link( const dynamic_link_descriptor descriptors[], std::size_t required ) { // Check if the required entries are present in what was loaded into our process. - for ( std::size_t k = 0; k < required; ++k ) + for ( std::size_t k = 0; k < required; ++k ) if ( !descriptors[k].ptr ) return false; // Commit the entry points. - for ( std::size_t k = 0; k < required; ++k ) + for ( std::size_t k = 0; k < required; ++k ) *descriptors[k].handler = (pointer_to_handler) descriptors[k].ptr; return true; } #else - static bool weak_symbol_link( const dynamic_link_descriptor[], std::size_t ) { + static bool weak_symbol_link( const dynamic_link_descriptor[], std::size_t ) { return false; } #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ @@ -376,30 +376,30 @@ namespace r1 { #endif } - static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { - dynamic_link_handle library_handle{}; + static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { + dynamic_link_handle library_handle{}; #if _WIN32 - bool res = GetModuleHandleEx(0, library, &library_handle); - __TBB_ASSERT_EX(res && library_handle || !res && !library_handle, nullptr); + bool res = GetModuleHandleEx(0, library, &library_handle); + __TBB_ASSERT_EX(res && library_handle || !res && !library_handle, nullptr); #else /* _WIN32 */ #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */ if ( !dlopen ) return 0; #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */ - // RTLD_GLOBAL - to guarantee that old TBB will find the loaded library - // RTLD_NOLOAD - not to load the library without the full path - library_handle = dlopen(library, RTLD_LAZY | RTLD_GLOBAL | RTLD_NOLOAD); -#endif /* _WIN32 */ - if (library_handle) { - if (!resolve_symbols(library_handle, descriptors, required)) { - dynamic_unlink(library_handle); - library_handle = nullptr; - } + // RTLD_GLOBAL - to guarantee that old TBB will find the loaded library + // RTLD_NOLOAD - not to load the library without the full path + library_handle = dlopen(library, RTLD_LAZY | RTLD_GLOBAL | RTLD_NOLOAD); +#endif /* _WIN32 */ + if (library_handle) { + if (!resolve_symbols(library_handle, descriptors, required)) { + dynamic_unlink(library_handle); + library_handle = nullptr; + } } - return library_handle; + return library_handle; } static void save_library_handle( dynamic_link_handle src, dynamic_link_handle *dst ) { - __TBB_ASSERT_EX( src, "The library handle to store must be non-zero" ); + __TBB_ASSERT_EX( src, "The library handle to store must be non-zero" ); if ( dst ) *dst = src; #if __TBB_DYNAMIC_LOAD_ENABLED @@ -408,41 +408,41 @@ namespace r1 { #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ } - dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { - ::tbb::detail::suppress_unused_warning( library, descriptors, required ); -#if __TBB_DYNAMIC_LOAD_ENABLED + dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { + ::tbb::detail::suppress_unused_warning( library, descriptors, required ); +#if __TBB_DYNAMIC_LOAD_ENABLED - std::size_t const len = PATH_MAX + 1; - char path[ len ]; - std::size_t rc = abs_path( library, path, len ); - if ( 0 < rc && rc <= len ) { + std::size_t const len = PATH_MAX + 1; + char path[ len ]; + std::size_t rc = abs_path( library, path, len ); + if ( 0 < rc && rc <= len ) { #if _WIN32 - // Prevent Windows from displaying silly message boxes if it fails to load library - // (e.g. because of MS runtime problems - one of those crazy manifest related ones) - UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); + // Prevent Windows from displaying silly message boxes if it fails to load library + // (e.g. because of MS runtime problems - one of those crazy manifest related ones) + UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); #endif /* _WIN32 */ - dynamic_link_handle library_handle = dlopen( path, RTLD_NOW | RTLD_GLOBAL ); + dynamic_link_handle library_handle = dlopen( path, RTLD_NOW | RTLD_GLOBAL ); #if _WIN32 - SetErrorMode (prev_mode); + SetErrorMode (prev_mode); #endif /* _WIN32 */ - if( library_handle ) { - if( !resolve_symbols( library_handle, descriptors, required ) ) { - // The loaded library does not contain all the expected entry points - dynamic_unlink( library_handle ); - library_handle = NULL; - } - } else - DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() ); - return library_handle; - } else if ( rc>len ) - DYNAMIC_LINK_WARNING( dl_buff_too_small ); - // rc == 0 means failing of init_ap_data so the warning has already been issued. - -#endif /* __TBB_DYNAMIC_LOAD_ENABLED */ - return 0; + if( library_handle ) { + if( !resolve_symbols( library_handle, descriptors, required ) ) { + // The loaded library does not contain all the expected entry points + dynamic_unlink( library_handle ); + library_handle = NULL; + } + } else + DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() ); + return library_handle; + } else if ( rc>len ) + DYNAMIC_LINK_WARNING( dl_buff_too_small ); + // rc == 0 means failing of init_ap_data so the warning has already been issued. + +#endif /* __TBB_DYNAMIC_LOAD_ENABLED */ + return 0; } - bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) { + bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) { init_dynamic_link_data(); // TODO: May global_symbols_link find weak symbols? @@ -463,7 +463,7 @@ namespace r1 { #endif /*__TBB_WIN8UI_SUPPORT*/ #else /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */ - bool dynamic_link( const char*, const dynamic_link_descriptor*, std::size_t, dynamic_link_handle *handle, int ) { + bool dynamic_link( const char*, const dynamic_link_descriptor*, std::size_t, dynamic_link_handle *handle, int ) { if ( handle ) *handle=0; return false; @@ -472,6 +472,6 @@ namespace r1 { void dynamic_unlink_all() {} #endif /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */ -} // namespace r1 -} // namespace detail -} // namespace tbb +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.h b/contrib/libs/tbb/src/tbb/dynamic_link.h index 465d17ad58..91adcc507c 100644 --- a/contrib/libs/tbb/src/tbb/dynamic_link.h +++ b/contrib/libs/tbb/src/tbb/dynamic_link.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,23 +19,23 @@ // Support for dynamic loading entry points from other shared libraries. -#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_config.h" -#include <atomic> -#include <mutex> +#include <atomic> +#include <mutex> /** By default, symbols declared and defined here go into namespace tbb::internal. To put them in other namespace, define macros OPEN_INTERNAL_NAMESPACE and CLOSE_INTERNAL_NAMESPACE to override the following default definitions. **/ -#include <cstddef> +#include <cstddef> #if _WIN32 -#include <Windows.h> +#include <Windows.h> #endif /* _WIN32 */ -namespace tbb { -namespace detail { -namespace r1 { +namespace tbb { +namespace detail { +namespace r1 { //! Type definition for a pointer to a void somefunc(void) typedef void (*pointer_to_handler)(); @@ -45,10 +45,10 @@ typedef void (*pointer_to_handler)(); // prevent warnings from some compilers (g++ 4.1) #if __TBB_WEAK_SYMBOLS_PRESENT #define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h), (pointer_to_handler)&s} -#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), NULL} +#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), NULL} #else #define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h)} -#define DLD_NOWEAK(s,h) DLD(s,h) +#define DLD_NOWEAK(s,h) DLD(s,h) #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ //! Association between a handler name and location of pointer to it. struct dynamic_link_descriptor { @@ -63,9 +63,9 @@ struct dynamic_link_descriptor { }; #if _WIN32 -using dynamic_link_handle = HMODULE; +using dynamic_link_handle = HMODULE; #else -using dynamic_link_handle = void*; +using dynamic_link_handle = void*; #endif /* _WIN32 */ const int DYNAMIC_LINK_GLOBAL = 0x01; @@ -91,7 +91,7 @@ const int DYNAMIC_LINK_ALL = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMI **/ bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], - std::size_t required, + std::size_t required, dynamic_link_handle* handle = 0, int flags = DYNAMIC_LINK_ALL ); @@ -108,8 +108,8 @@ enum dynamic_link_error_t { dl_buff_too_small // none }; // dynamic_link_error_t -} // namespace r1 -} // namespace detail -} // namespace tbb +} // namespace r1 +} // namespace detail +} // namespace tbb #endif /* __TBB_dynamic_link */ diff --git a/contrib/libs/tbb/src/tbb/environment.h b/contrib/libs/tbb/src/tbb/environment.h index b4ac296682..8886ef09e1 100644 --- a/contrib/libs/tbb/src/tbb/environment.h +++ b/contrib/libs/tbb/src/tbb/environment.h @@ -1,81 +1,81 @@ -/* - Copyright (c) 2018-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_tbb_environment_H -#define __TBB_tbb_environment_H - -#include <cstdlib> -#include <cstring> -#include <cerrno> -#include <cctype> - -namespace tbb { -namespace detail { -namespace r1 { - -#if __TBB_WIN8UI_SUPPORT -static inline bool GetBoolEnvironmentVariable( const char * ) { - return false; -} - -static inline long GetIntegralEnvironmentVariable( const char * ) { - return -1; -} -#else /* __TBB_WIN8UI_SUPPORT */ -static inline bool GetBoolEnvironmentVariable( const char * name ) { - if ( const char* s = std::getenv(name) ) { - // The result is defined as true only if the environment variable contains - // no characters except one '1' character and an arbitrary number of spaces - // (including the absence of spaces). - size_t index = std::strspn(s, " "); - if (s[index] != '1') return false; - index++; - // Memory access after incrementing is safe, since the getenv() returns a - // NULL terminated string, and even if the character getting by index is '1', - // and this character is the end of string, after incrementing we will get - // an index of character, that contains '\0' - index += std::strspn(&s[index], " "); - return !s[index]; - } - return false; -} - -static inline long GetIntegralEnvironmentVariable( const char * name ) { - if ( const char* s = std::getenv(name) ) { - char* end = NULL; - errno = 0; - long value = std::strtol(s, &end, 10); - - // We have exceeded the range, value is negative or string is incovertable - if ( errno == ERANGE || value < 0 || end==s ) { - return -1; - } - for ( ; *end != '\0'; end++ ) { - if ( !std::isspace(*end) ) { - return -1; - } - } - return value; - } - return -1; -} -#endif /* __TBB_WIN8UI_SUPPORT */ - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_tbb_environment_H +/* + Copyright (c) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_environment_H +#define __TBB_tbb_environment_H + +#include <cstdlib> +#include <cstring> +#include <cerrno> +#include <cctype> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_WIN8UI_SUPPORT +static inline bool GetBoolEnvironmentVariable( const char * ) { + return false; +} + +static inline long GetIntegralEnvironmentVariable( const char * ) { + return -1; +} +#else /* __TBB_WIN8UI_SUPPORT */ +static inline bool GetBoolEnvironmentVariable( const char * name ) { + if ( const char* s = std::getenv(name) ) { + // The result is defined as true only if the environment variable contains + // no characters except one '1' character and an arbitrary number of spaces + // (including the absence of spaces). + size_t index = std::strspn(s, " "); + if (s[index] != '1') return false; + index++; + // Memory access after incrementing is safe, since the getenv() returns a + // NULL terminated string, and even if the character getting by index is '1', + // and this character is the end of string, after incrementing we will get + // an index of character, that contains '\0' + index += std::strspn(&s[index], " "); + return !s[index]; + } + return false; +} + +static inline long GetIntegralEnvironmentVariable( const char * name ) { + if ( const char* s = std::getenv(name) ) { + char* end = NULL; + errno = 0; + long value = std::strtol(s, &end, 10); + + // We have exceeded the range, value is negative or string is incovertable + if ( errno == ERANGE || value < 0 || end==s ) { + return -1; + } + for ( ; *end != '\0'; end++ ) { + if ( !std::isspace(*end) ) { + return -1; + } + } + return value; + } + return -1; +} +#endif /* __TBB_WIN8UI_SUPPORT */ + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_tbb_environment_H diff --git a/contrib/libs/tbb/src/tbb/exception.cpp b/contrib/libs/tbb/src/tbb/exception.cpp index 0ee5083c22..c3e95d6d97 100644 --- a/contrib/libs/tbb/src/tbb/exception.cpp +++ b/contrib/libs/tbb/src/tbb/exception.cpp @@ -1,162 +1,162 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_exception.h" -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_template_helpers.h" - -#include <cstring> -#include <cstdio> -#include <stdexcept> // std::runtime_error -#include <new> -#include <stdexcept> - -#define __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN \ - (__GLIBCXX__ && __TBB_GLIBCXX_VERSION>=40700 && __TBB_GLIBCXX_VERSION<60000 && TBB_USE_EXCEPTIONS) - -#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN -// GCC ABI declarations necessary for a workaround -#include <cxxabi.h> -#endif - -namespace tbb { -namespace detail { -namespace r1 { - -const char* bad_last_alloc::what() const noexcept(true) { return "bad allocation in previous or concurrent attempt"; } -const char* user_abort::what() const noexcept(true) { return "User-initiated abort has terminated this operation"; } -const char* missing_wait::what() const noexcept(true) { return "wait() was not called on the structured_task_group"; } - -#if TBB_USE_EXCEPTIONS - template <typename F> - /*[[noreturn]]*/ void do_throw_noexcept(F throw_func) noexcept { - throw_func(); - } - - /*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept { - throw_func(); - } - - bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp - - template <typename F> - /*[[noreturn]]*/ void do_throw(F throw_func) { - if (terminate_on_exception()) { - do_throw_noexcept(throw_func); - } - throw_func(); - } - - #define DO_THROW(exc, init_args) do_throw( []{ throw exc init_args; } ); -#else /* !TBB_USE_EXCEPTIONS */ - #define PRINT_ERROR_AND_ABORT(exc_name, msg) \ - std::fprintf (stderr, "Exception %s with message %s would have been thrown, " \ - "if exception handling had not been disabled. Aborting.\n", exc_name, msg); \ - std::fflush(stderr); \ - std::abort(); - #define DO_THROW(exc, init_args) PRINT_ERROR_AND_ABORT(#exc, #init_args) -#endif /* !TBB_USE_EXCEPTIONS */ - -void throw_exception ( exception_id eid ) { - switch ( eid ) { - case exception_id::bad_alloc: DO_THROW(std::bad_alloc, ()); break; - case exception_id::bad_last_alloc: DO_THROW(bad_last_alloc, ()); break; - case exception_id::user_abort: DO_THROW( user_abort, () ); break; - case exception_id::nonpositive_step: DO_THROW(std::invalid_argument, ("Step must be positive") ); break; - case exception_id::out_of_range: DO_THROW(std::out_of_range, ("Index out of requested size range")); break; - case exception_id::reservation_length_error: DO_THROW(std::length_error, ("Attempt to exceed implementation defined length limits")); break; - case exception_id::missing_wait: DO_THROW(missing_wait, ()); break; - case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break; - case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break; - case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break; -#endif - default: __TBB_ASSERT ( false, "Unknown exception ID" ); - } - __TBB_ASSERT(false, "Unreachable code"); -} - -/* The "what" should be fairly short, not more than about 128 characters. - Because we control all the call sites to handle_perror, it is pointless - to bullet-proof it for very long strings. - - Design note: ADR put this routine off to the side in tbb_misc.cpp instead of - Task.cpp because the throw generates a pathetic lot of code, and ADR wanted - this large chunk of code to be placed on a cold page. */ -void handle_perror( int error_code, const char* what ) { - const int BUF_SIZE = 255; - char buf[BUF_SIZE + 1] = { 0 }; - std::strncat(buf, what, BUF_SIZE); - std::size_t buf_len = std::strlen(buf); - if (error_code) { - std::strncat(buf, ": ", BUF_SIZE - buf_len); - buf_len = std::strlen(buf); - std::strncat(buf, std::strerror(error_code), BUF_SIZE - buf_len); - buf_len = std::strlen(buf); - } - __TBB_ASSERT(buf_len <= BUF_SIZE && buf[buf_len] == 0, nullptr); -#if TBB_USE_EXCEPTIONS - do_throw([&buf] { throw std::runtime_error(buf); }); -#else - PRINT_ERROR_AND_ABORT( "runtime_error", buf); -#endif /* !TBB_USE_EXCEPTIONS */ -} - -#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN -// Runtime detection and workaround for the GCC bug 62258. -// The problem is that std::rethrow_exception() does not increment a counter -// of active exceptions, causing std::uncaught_exception() to return a wrong value. -// The code is created after, and roughly reflects, the workaround -// at https://gcc.gnu.org/bugzilla/attachment.cgi?id=34683 - -void fix_broken_rethrow() { - struct gcc_eh_data { - void * caughtExceptions; - unsigned int uncaughtExceptions; - }; - gcc_eh_data* eh_data = punned_cast<gcc_eh_data*>( abi::__cxa_get_globals() ); - ++eh_data->uncaughtExceptions; -} - -bool gcc_rethrow_exception_broken() { - bool is_broken; - __TBB_ASSERT( !std::uncaught_exception(), - "gcc_rethrow_exception_broken() must not be called when an exception is active" ); - try { - // Throw, catch, and rethrow an exception - try { - throw __TBB_GLIBCXX_VERSION; - } catch(...) { - std::rethrow_exception( std::current_exception() ); - } - } catch(...) { - // Check the bug presence - is_broken = std::uncaught_exception(); - } - if( is_broken ) fix_broken_rethrow(); - __TBB_ASSERT( !std::uncaught_exception(), NULL ); - return is_broken; -} -#else -void fix_broken_rethrow() {} -bool gcc_rethrow_exception_broken() { return false; } -#endif /* __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN */ - -} // namespace r1 -} // namespace detail -} // namespace tbb - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include <cstring> +#include <cstdio> +#include <stdexcept> // std::runtime_error +#include <new> +#include <stdexcept> + +#define __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN \ + (__GLIBCXX__ && __TBB_GLIBCXX_VERSION>=40700 && __TBB_GLIBCXX_VERSION<60000 && TBB_USE_EXCEPTIONS) + +#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN +// GCC ABI declarations necessary for a workaround +#include <cxxabi.h> +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +const char* bad_last_alloc::what() const noexcept(true) { return "bad allocation in previous or concurrent attempt"; } +const char* user_abort::what() const noexcept(true) { return "User-initiated abort has terminated this operation"; } +const char* missing_wait::what() const noexcept(true) { return "wait() was not called on the structured_task_group"; } + +#if TBB_USE_EXCEPTIONS + template <typename F> + /*[[noreturn]]*/ void do_throw_noexcept(F throw_func) noexcept { + throw_func(); + } + + /*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept { + throw_func(); + } + + bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp + + template <typename F> + /*[[noreturn]]*/ void do_throw(F throw_func) { + if (terminate_on_exception()) { + do_throw_noexcept(throw_func); + } + throw_func(); + } + + #define DO_THROW(exc, init_args) do_throw( []{ throw exc init_args; } ); +#else /* !TBB_USE_EXCEPTIONS */ + #define PRINT_ERROR_AND_ABORT(exc_name, msg) \ + std::fprintf (stderr, "Exception %s with message %s would have been thrown, " \ + "if exception handling had not been disabled. Aborting.\n", exc_name, msg); \ + std::fflush(stderr); \ + std::abort(); + #define DO_THROW(exc, init_args) PRINT_ERROR_AND_ABORT(#exc, #init_args) +#endif /* !TBB_USE_EXCEPTIONS */ + +void throw_exception ( exception_id eid ) { + switch ( eid ) { + case exception_id::bad_alloc: DO_THROW(std::bad_alloc, ()); break; + case exception_id::bad_last_alloc: DO_THROW(bad_last_alloc, ()); break; + case exception_id::user_abort: DO_THROW( user_abort, () ); break; + case exception_id::nonpositive_step: DO_THROW(std::invalid_argument, ("Step must be positive") ); break; + case exception_id::out_of_range: DO_THROW(std::out_of_range, ("Index out of requested size range")); break; + case exception_id::reservation_length_error: DO_THROW(std::length_error, ("Attempt to exceed implementation defined length limits")); break; + case exception_id::missing_wait: DO_THROW(missing_wait, ()); break; + case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break; + case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break; + case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break; +#endif + default: __TBB_ASSERT ( false, "Unknown exception ID" ); + } + __TBB_ASSERT(false, "Unreachable code"); +} + +/* The "what" should be fairly short, not more than about 128 characters. + Because we control all the call sites to handle_perror, it is pointless + to bullet-proof it for very long strings. + + Design note: ADR put this routine off to the side in tbb_misc.cpp instead of + Task.cpp because the throw generates a pathetic lot of code, and ADR wanted + this large chunk of code to be placed on a cold page. */ +void handle_perror( int error_code, const char* what ) { + const int BUF_SIZE = 255; + char buf[BUF_SIZE + 1] = { 0 }; + std::strncat(buf, what, BUF_SIZE); + std::size_t buf_len = std::strlen(buf); + if (error_code) { + std::strncat(buf, ": ", BUF_SIZE - buf_len); + buf_len = std::strlen(buf); + std::strncat(buf, std::strerror(error_code), BUF_SIZE - buf_len); + buf_len = std::strlen(buf); + } + __TBB_ASSERT(buf_len <= BUF_SIZE && buf[buf_len] == 0, nullptr); +#if TBB_USE_EXCEPTIONS + do_throw([&buf] { throw std::runtime_error(buf); }); +#else + PRINT_ERROR_AND_ABORT( "runtime_error", buf); +#endif /* !TBB_USE_EXCEPTIONS */ +} + +#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN +// Runtime detection and workaround for the GCC bug 62258. +// The problem is that std::rethrow_exception() does not increment a counter +// of active exceptions, causing std::uncaught_exception() to return a wrong value. +// The code is created after, and roughly reflects, the workaround +// at https://gcc.gnu.org/bugzilla/attachment.cgi?id=34683 + +void fix_broken_rethrow() { + struct gcc_eh_data { + void * caughtExceptions; + unsigned int uncaughtExceptions; + }; + gcc_eh_data* eh_data = punned_cast<gcc_eh_data*>( abi::__cxa_get_globals() ); + ++eh_data->uncaughtExceptions; +} + +bool gcc_rethrow_exception_broken() { + bool is_broken; + __TBB_ASSERT( !std::uncaught_exception(), + "gcc_rethrow_exception_broken() must not be called when an exception is active" ); + try { + // Throw, catch, and rethrow an exception + try { + throw __TBB_GLIBCXX_VERSION; + } catch(...) { + std::rethrow_exception( std::current_exception() ); + } + } catch(...) { + // Check the bug presence + is_broken = std::uncaught_exception(); + } + if( is_broken ) fix_broken_rethrow(); + __TBB_ASSERT( !std::uncaught_exception(), NULL ); + return is_broken; +} +#else +void fix_broken_rethrow() {} +bool gcc_rethrow_exception_broken() { return false; } +#endif /* __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN */ + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/global_control.cpp b/contrib/libs/tbb/src/tbb/global_control.cpp index a00591cded..a9eac2cbc3 100644 --- a/contrib/libs/tbb/src/tbb/global_control.cpp +++ b/contrib/libs/tbb/src/tbb/global_control.cpp @@ -1,275 +1,275 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_template_helpers.h" - -#include "oneapi/tbb/global_control.h" -#include "oneapi/tbb/tbb_allocator.h" -#include "oneapi/tbb/spin_mutex.h" - -#include "governor.h" -#include "market.h" -#include "misc.h" - -#include <atomic> -#include <set> - -namespace tbb { -namespace detail { -namespace r1 { - -//! Comparator for a set of global_control objects -struct control_storage_comparator { - bool operator()(const global_control* lhs, const global_control* rhs) const; -}; - -class control_storage { - friend struct global_control_impl; - friend std::size_t global_control_active_value(int); -protected: - std::size_t my_active_value{0}; - std::set<global_control*, control_storage_comparator, tbb_allocator<global_control*>> my_list{}; - spin_mutex my_list_mutex{}; -public: - virtual std::size_t default_value() const = 0; - virtual void apply_active(std::size_t new_active) { - my_active_value = new_active; - } - virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const { - return a>b; // prefer max by default - } - virtual std::size_t active_value() { - spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call - return !my_list.empty() ? my_active_value : default_value(); - } -}; - -class alignas(max_nfs_size) allowed_parallelism_control : public control_storage { - virtual std::size_t default_value() const override { - return max(1U, governor::default_num_threads()); - } - virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const override { - return a<b; // prefer min allowed parallelism - } - virtual void apply_active(std::size_t new_active) override { - control_storage::apply_active(new_active); - __TBB_ASSERT( my_active_value>=1, NULL ); - // -1 to take external thread into account - market::set_active_num_workers( my_active_value-1 ); - } - virtual std::size_t active_value() override { - spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call - if (my_list.empty()) - return default_value(); - // non-zero, if market is active - const std::size_t workers = market::max_num_workers(); - // We can't exceed market's maximal number of workers. - // +1 to take external thread into account - return workers? min(workers+1, my_active_value): my_active_value; - } -public: - std::size_t active_value_if_present() const { - return !my_list.empty() ? my_active_value : 0; - } -}; - -class alignas(max_nfs_size) stack_size_control : public control_storage { - virtual std::size_t default_value() const override { - return ThreadStackSize; - } - virtual void apply_active(std::size_t new_active) override { - control_storage::apply_active(new_active); -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) - __TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" ); -#endif - } -}; - -class alignas(max_nfs_size) terminate_on_exception_control : public control_storage { - virtual std::size_t default_value() const override { - return 0; - } -}; - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -class alignas(max_nfs_size) lifetime_control : public control_storage { - virtual bool is_first_arg_preferred(std::size_t, std::size_t) const override { - return false; // not interested - } - virtual std::size_t default_value() const override { - return 0; - } - virtual void apply_active(std::size_t new_active) override { - if (new_active == 1) { - // reserve the market reference - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - if (market::theMarket) { - market::add_ref_unsafe(lock, /*is_public*/ true); - } - } else if (new_active == 0) { // new_active == 0 - // release the market reference - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - if (market::theMarket != nullptr) { - lock.release(); - market::theMarket->release(/*is_public*/ true, /*blocking_terminate*/ false); - } - } - control_storage::apply_active(new_active); - } - -public: - bool is_empty() { - spin_mutex::scoped_lock lock(my_list_mutex); - return my_list.empty(); - } -}; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -static allowed_parallelism_control allowed_parallelism_ctl; -static stack_size_control stack_size_ctl; -static terminate_on_exception_control terminate_on_exception_ctl; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -static lifetime_control lifetime_ctl; -static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; -#else -static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl}; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -//! Comparator for a set of global_control objects -inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const { - __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , NULL); - return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); -} - -unsigned market::app_parallelism_limit() { - return allowed_parallelism_ctl.active_value_if_present(); -} - -bool terminate_on_exception() { - return global_control::active_value(global_control::terminate_on_exception) == 1; -} - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -unsigned market::is_lifetime_control_present() { - return !lifetime_ctl.is_empty(); -} -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -struct global_control_impl { -private: - static bool erase_if_present(control_storage* const c, d1::global_control& gc) { - auto it = c->my_list.find(&gc); - if (it != c->my_list.end()) { - c->my_list.erase(it); - return true; - } - return false; - } - -public: - - static void create(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); - control_storage* const c = controls[gc.my_param]; - - spin_mutex::scoped_lock lock(c->my_list_mutex); - if (c->my_list.empty() || c->is_first_arg_preferred(gc.my_value, c->my_active_value)) { - // to guarantee that apply_active() is called with current active value, - // calls it here and in internal_destroy() under my_list_mutex - c->apply_active(gc.my_value); - } - c->my_list.insert(&gc); - } - - static void destroy(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); - control_storage* const c = controls[gc.my_param]; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), NULL); -#else - __TBB_ASSERT(!c->my_list.empty(), NULL); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - // Concurrent reading and changing global parameter is possible. - spin_mutex::scoped_lock lock(c->my_list_mutex); - std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value; - - if (!erase_if_present(c, gc)) { -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , NULL); - return; -#else - __TBB_ASSERT(false, "Unreachable code"); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - } - if (c->my_list.empty()) { - __TBB_ASSERT(new_active == (std::size_t) - 1, NULL); - new_active = c->default_value(); - } else { - new_active = (*c->my_list.begin())->my_value; - } - if (new_active != old_active) { - c->apply_active(new_active); - } - } - - static bool remove_and_check_if_empty(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); - control_storage* const c = controls[gc.my_param]; - __TBB_ASSERT(!c->my_list.empty(), NULL); - - spin_mutex::scoped_lock lock(c->my_list_mutex); - erase_if_present(c, gc); - return c->my_list.empty(); - } -#if TBB_USE_ASSERT - static bool is_present(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); - control_storage* const c = controls[gc.my_param]; - - spin_mutex::scoped_lock lock(c->my_list_mutex); - auto it = c->my_list.find(&gc); - if (it != c->my_list.end()) { - return true; - } - return false; - } -#endif // TBB_USE_ASSERT -}; - -void __TBB_EXPORTED_FUNC create(d1::global_control& gc) { - global_control_impl::create(gc); -} -void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) { - global_control_impl::destroy(gc); -} - -bool remove_and_check_if_empty(d1::global_control& gc) { - return global_control_impl::remove_and_check_if_empty(gc); -} -#if TBB_USE_ASSERT -bool is_present(d1::global_control& gc) { - return global_control_impl::is_present(gc); -} -#endif // TBB_USE_ASSERT -std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { - __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL); - return controls[param]->active_value(); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/spin_mutex.h" + +#include "governor.h" +#include "market.h" +#include "misc.h" + +#include <atomic> +#include <set> + +namespace tbb { +namespace detail { +namespace r1 { + +//! Comparator for a set of global_control objects +struct control_storage_comparator { + bool operator()(const global_control* lhs, const global_control* rhs) const; +}; + +class control_storage { + friend struct global_control_impl; + friend std::size_t global_control_active_value(int); +protected: + std::size_t my_active_value{0}; + std::set<global_control*, control_storage_comparator, tbb_allocator<global_control*>> my_list{}; + spin_mutex my_list_mutex{}; +public: + virtual std::size_t default_value() const = 0; + virtual void apply_active(std::size_t new_active) { + my_active_value = new_active; + } + virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const { + return a>b; // prefer max by default + } + virtual std::size_t active_value() { + spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call + return !my_list.empty() ? my_active_value : default_value(); + } +}; + +class alignas(max_nfs_size) allowed_parallelism_control : public control_storage { + virtual std::size_t default_value() const override { + return max(1U, governor::default_num_threads()); + } + virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const override { + return a<b; // prefer min allowed parallelism + } + virtual void apply_active(std::size_t new_active) override { + control_storage::apply_active(new_active); + __TBB_ASSERT( my_active_value>=1, NULL ); + // -1 to take external thread into account + market::set_active_num_workers( my_active_value-1 ); + } + virtual std::size_t active_value() override { + spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call + if (my_list.empty()) + return default_value(); + // non-zero, if market is active + const std::size_t workers = market::max_num_workers(); + // We can't exceed market's maximal number of workers. + // +1 to take external thread into account + return workers? min(workers+1, my_active_value): my_active_value; + } +public: + std::size_t active_value_if_present() const { + return !my_list.empty() ? my_active_value : 0; + } +}; + +class alignas(max_nfs_size) stack_size_control : public control_storage { + virtual std::size_t default_value() const override { + return ThreadStackSize; + } + virtual void apply_active(std::size_t new_active) override { + control_storage::apply_active(new_active); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + __TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" ); +#endif + } +}; + +class alignas(max_nfs_size) terminate_on_exception_control : public control_storage { + virtual std::size_t default_value() const override { + return 0; + } +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +class alignas(max_nfs_size) lifetime_control : public control_storage { + virtual bool is_first_arg_preferred(std::size_t, std::size_t) const override { + return false; // not interested + } + virtual std::size_t default_value() const override { + return 0; + } + virtual void apply_active(std::size_t new_active) override { + if (new_active == 1) { + // reserve the market reference + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + if (market::theMarket) { + market::add_ref_unsafe(lock, /*is_public*/ true); + } + } else if (new_active == 0) { // new_active == 0 + // release the market reference + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + if (market::theMarket != nullptr) { + lock.release(); + market::theMarket->release(/*is_public*/ true, /*blocking_terminate*/ false); + } + } + control_storage::apply_active(new_active); + } + +public: + bool is_empty() { + spin_mutex::scoped_lock lock(my_list_mutex); + return my_list.empty(); + } +}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +static allowed_parallelism_control allowed_parallelism_ctl; +static stack_size_control stack_size_ctl; +static terminate_on_exception_control terminate_on_exception_ctl; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +static lifetime_control lifetime_ctl; +static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; +#else +static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +//! Comparator for a set of global_control objects +inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const { + __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , NULL); + return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); +} + +unsigned market::app_parallelism_limit() { + return allowed_parallelism_ctl.active_value_if_present(); +} + +bool terminate_on_exception() { + return global_control::active_value(global_control::terminate_on_exception) == 1; +} + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +unsigned market::is_lifetime_control_present() { + return !lifetime_ctl.is_empty(); +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +struct global_control_impl { +private: + static bool erase_if_present(control_storage* const c, d1::global_control& gc) { + auto it = c->my_list.find(&gc); + if (it != c->my_list.end()) { + c->my_list.erase(it); + return true; + } + return false; + } + +public: + + static void create(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + + spin_mutex::scoped_lock lock(c->my_list_mutex); + if (c->my_list.empty() || c->is_first_arg_preferred(gc.my_value, c->my_active_value)) { + // to guarantee that apply_active() is called with current active value, + // calls it here and in internal_destroy() under my_list_mutex + c->apply_active(gc.my_value); + } + c->my_list.insert(&gc); + } + + static void destroy(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), NULL); +#else + __TBB_ASSERT(!c->my_list.empty(), NULL); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + // Concurrent reading and changing global parameter is possible. + spin_mutex::scoped_lock lock(c->my_list_mutex); + std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value; + + if (!erase_if_present(c, gc)) { +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , NULL); + return; +#else + __TBB_ASSERT(false, "Unreachable code"); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + } + if (c->my_list.empty()) { + __TBB_ASSERT(new_active == (std::size_t) - 1, NULL); + new_active = c->default_value(); + } else { + new_active = (*c->my_list.begin())->my_value; + } + if (new_active != old_active) { + c->apply_active(new_active); + } + } + + static bool remove_and_check_if_empty(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + __TBB_ASSERT(!c->my_list.empty(), NULL); + + spin_mutex::scoped_lock lock(c->my_list_mutex); + erase_if_present(c, gc); + return c->my_list.empty(); + } +#if TBB_USE_ASSERT + static bool is_present(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + + spin_mutex::scoped_lock lock(c->my_list_mutex); + auto it = c->my_list.find(&gc); + if (it != c->my_list.end()) { + return true; + } + return false; + } +#endif // TBB_USE_ASSERT +}; + +void __TBB_EXPORTED_FUNC create(d1::global_control& gc) { + global_control_impl::create(gc); +} +void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) { + global_control_impl::destroy(gc); +} + +bool remove_and_check_if_empty(d1::global_control& gc) { + return global_control_impl::remove_and_check_if_empty(gc); +} +#if TBB_USE_ASSERT +bool is_present(d1::global_control& gc) { + return global_control_impl::is_present(gc); +} +#endif // TBB_USE_ASSERT +std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { + __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL); + return controls[param]->active_value(); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/governor.cpp b/contrib/libs/tbb/src/tbb/governor.cpp index 465f95b3d5..b75b91a75c 100644 --- a/contrib/libs/tbb/src/tbb/governor.cpp +++ b/contrib/libs/tbb/src/tbb/governor.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,64 +15,64 @@ */ #include "governor.h" -#include "main.h" -#include "thread_data.h" +#include "main.h" +#include "thread_data.h" #include "market.h" #include "arena.h" -#include "dynamic_link.h" +#include "dynamic_link.h" -#include "oneapi/tbb/task_group.h" -#include "oneapi/tbb/global_control.h" -#include "oneapi/tbb/tbb_allocator.h" -#include "oneapi/tbb/info.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/info.h" -#include "task_dispatcher.h" +#include "task_dispatcher.h" + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <atomic> +#include <algorithm> -#include <cstdio> -#include <cstdlib> -#include <cstring> -#include <atomic> -#include <algorithm> - namespace tbb { -namespace detail { -namespace r1 { - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -//! global_control.cpp contains definition -bool remove_and_check_if_empty(d1::global_control& gc); -bool is_present(d1::global_control& gc); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -namespace rml { -tbb_server* make_private_server( tbb_client& client ); -} // namespace rml - +namespace detail { +namespace r1 { + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! global_control.cpp contains definition +bool remove_and_check_if_empty(d1::global_control& gc); +bool is_present(d1::global_control& gc); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +namespace rml { +tbb_server* make_private_server( tbb_client& client ); +} // namespace rml + //------------------------------------------------------------------------ // governor //------------------------------------------------------------------------ void governor::acquire_resources () { -#if __TBB_USE_POSIX +#if __TBB_USE_POSIX int status = theTLS.create(auto_terminate); #else int status = theTLS.create(); #endif if( status ) handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); - detect_cpu_features(cpu_features); + detect_cpu_features(cpu_features); is_rethrow_broken = gcc_rethrow_exception_broken(); } void governor::release_resources () { theRMLServerFactory.close(); destroy_process_mask(); - - __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?"); - + + __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?"); + int status = theTLS.destroy(); if( status ) - runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); + runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); dynamic_unlink_all(); } @@ -93,125 +93,125 @@ rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { return server; } -void governor::one_time_init() { - if ( !__TBB_InitOnce::initialization_done() ) { - DoOneTimeInitialization(); - } +void governor::one_time_init() { + if ( !__TBB_InitOnce::initialization_done() ) { + DoOneTimeInitialization(); + } } -/* - There is no portable way to get stack base address in Posix, however the modern - Linux versions provide pthread_attr_np API that can be used to obtain thread's - stack size and base address. Unfortunately even this function does not provide - enough information for the main thread on IA-64 architecture (RSE spill area - and memory stack are allocated as two separate discontinuous chunks of memory), - and there is no portable way to discern the main and the secondary threads. - Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for - all threads and use the current stack top as the stack base. This simplified - approach is based on the following assumptions: - 1) If the default stack size is insufficient for the user app needs, the - required amount will be explicitly specified by the user at the point of the - TBB scheduler initialization (as an argument to tbb::task_scheduler_init - constructor). - 2) When an external thread initializes the scheduler, it has enough space on its - stack. Here "enough" means "at least as much as worker threads have". - 3) If the user app strives to conserve the memory by cutting stack size, it - should do this for TBB workers too (as in the #1). -*/ -static std::uintptr_t get_stack_base(std::size_t stack_size) { - // Stacks are growing top-down. Highest address is called "stack base", - // and the lowest is "stack limit". -#if USE_WINTHREAD - suppress_unused_warning(stack_size); - NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); - __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); - return reinterpret_cast<std::uintptr_t>(pteb->StackBase); -#else /* USE_PTHREAD */ - // There is no portable way to get stack base address in Posix, so we use - // non-portable method (on all modern Linux) or the simplified approach - // based on the common sense assumptions. The most important assumption - // is that the main thread's stack size is not less than that of other threads. - - // Points to the lowest addressable byte of a stack. - void* stack_limit = nullptr; -#if __linux__ && !__bg__ - size_t np_stack_size = 0; - pthread_attr_t np_attr_stack; - if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) { - if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) { - __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" ); +/* + There is no portable way to get stack base address in Posix, however the modern + Linux versions provide pthread_attr_np API that can be used to obtain thread's + stack size and base address. Unfortunately even this function does not provide + enough information for the main thread on IA-64 architecture (RSE spill area + and memory stack are allocated as two separate discontinuous chunks of memory), + and there is no portable way to discern the main and the secondary threads. + Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for + all threads and use the current stack top as the stack base. This simplified + approach is based on the following assumptions: + 1) If the default stack size is insufficient for the user app needs, the + required amount will be explicitly specified by the user at the point of the + TBB scheduler initialization (as an argument to tbb::task_scheduler_init + constructor). + 2) When an external thread initializes the scheduler, it has enough space on its + stack. Here "enough" means "at least as much as worker threads have". + 3) If the user app strives to conserve the memory by cutting stack size, it + should do this for TBB workers too (as in the #1). +*/ +static std::uintptr_t get_stack_base(std::size_t stack_size) { + // Stacks are growing top-down. Highest address is called "stack base", + // and the lowest is "stack limit". +#if USE_WINTHREAD + suppress_unused_warning(stack_size); + NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); + __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); + return reinterpret_cast<std::uintptr_t>(pteb->StackBase); +#else /* USE_PTHREAD */ + // There is no portable way to get stack base address in Posix, so we use + // non-portable method (on all modern Linux) or the simplified approach + // based on the common sense assumptions. The most important assumption + // is that the main thread's stack size is not less than that of other threads. + + // Points to the lowest addressable byte of a stack. + void* stack_limit = nullptr; +#if __linux__ && !__bg__ + size_t np_stack_size = 0; + pthread_attr_t np_attr_stack; + if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) { + if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) { + __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" ); } - pthread_attr_destroy(&np_attr_stack); + pthread_attr_destroy(&np_attr_stack); + } +#endif /* __linux__ */ + std::uintptr_t stack_base{}; + if (stack_limit) { + stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size; + } else { + // Use an anchor as a base stack address. + int anchor{}; + stack_base = reinterpret_cast<std::uintptr_t>(&anchor); } -#endif /* __linux__ */ - std::uintptr_t stack_base{}; - if (stack_limit) { - stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size; - } else { - // Use an anchor as a base stack address. - int anchor{}; - stack_base = reinterpret_cast<std::uintptr_t>(&anchor); - } - return stack_base; -#endif /* USE_PTHREAD */ + return stack_base; +#endif /* USE_PTHREAD */ } -void governor::init_external_thread() { - one_time_init(); - // Create new scheduler instance with arena - int num_slots = default_num_threads(); - // TODO_REVAMP: support an external thread without an implicit arena - int num_reserved_slots = 1; - unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal - std::size_t stack_size = 0; - arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size); - // We need an internal reference to the market. TODO: is it legacy? - market::global_market(false); - // External thread always occupies the first slot - thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); - td.attach_arena(a, /*slot index*/ 0); - - stack_size = a.my_market->worker_stack_size(); - std::uintptr_t stack_base = get_stack_base(stack_size); - task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size)); - td.attach_task_dispatcher(task_disp); - - td.my_arena_slot->occupy(); - a.my_market->add_external_thread(td); - set_thread_data(td); +void governor::init_external_thread() { + one_time_init(); + // Create new scheduler instance with arena + int num_slots = default_num_threads(); + // TODO_REVAMP: support an external thread without an implicit arena + int num_reserved_slots = 1; + unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal + std::size_t stack_size = 0; + arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size); + // We need an internal reference to the market. TODO: is it legacy? + market::global_market(false); + // External thread always occupies the first slot + thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); + td.attach_arena(a, /*slot index*/ 0); + + stack_size = a.my_market->worker_stack_size(); + std::uintptr_t stack_base = get_stack_base(stack_size); + task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size)); + td.attach_task_dispatcher(task_disp); + + td.my_arena_slot->occupy(); + a.my_market->add_external_thread(td); + set_thread_data(td); } -void governor::auto_terminate(void* tls) { - __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || - get_thread_data_if_initialized() == tls, NULL); - if (tls) { - thread_data* td = static_cast<thread_data*>(tls); - - // Only external thread can be inside an arena during termination. - if (td->my_arena_slot) { - arena* a = td->my_arena; - market* m = a->my_market; - - a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); - - td->my_task_dispatcher->m_stealing_threshold = 0; - td->detach_task_dispatcher(); - td->my_arena_slot->release(); - // Release an arena - a->on_thread_leaving<arena::ref_external>(); - - m->remove_external_thread(*td); - // If there was an associated arena, it added a public market reference - m->release( /*is_public*/ true, /*blocking_terminate*/ false); +void governor::auto_terminate(void* tls) { + __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || + get_thread_data_if_initialized() == tls, NULL); + if (tls) { + thread_data* td = static_cast<thread_data*>(tls); + + // Only external thread can be inside an arena during termination. + if (td->my_arena_slot) { + arena* a = td->my_arena; + market* m = a->my_market; + + a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); + + td->my_task_dispatcher->m_stealing_threshold = 0; + td->detach_task_dispatcher(); + td->my_arena_slot->release(); + // Release an arena + a->on_thread_leaving<arena::ref_external>(); + + m->remove_external_thread(*td); + // If there was an associated arena, it added a public market reference + m->release( /*is_public*/ true, /*blocking_terminate*/ false); } - td->~thread_data(); - cache_aligned_deallocate(td); - - clear_thread_data(); + td->~thread_data(); + cache_aligned_deallocate(td); + + clear_thread_data(); } - __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL); + __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL); } void governor::initialize_rml_factory () { @@ -219,308 +219,308 @@ void governor::initialize_rml_factory () { UsePrivateRML = res != ::rml::factory::st_success; } -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { - handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); -} - -void release_impl(d1::task_scheduler_handle& handle) { - if (handle.m_ctl != nullptr) { - handle.m_ctl->~global_control(); - deallocate_memory(handle.m_ctl); - handle.m_ctl = nullptr; - } -} - -bool finalize_impl(d1::task_scheduler_handle& handle) { - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - bool ok = true; // ok if theMarket does not exist yet - market* m = market::theMarket; // read the state of theMarket - if (m != nullptr) { - lock.release(); - __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); - thread_data* td = governor::get_thread_data_if_initialized(); - if (td) { - task_dispatcher* task_disp = td->my_task_dispatcher; - __TBB_ASSERT(task_disp, nullptr); - if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region - governor::auto_terminate(td); - } +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { + handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); +} + +void release_impl(d1::task_scheduler_handle& handle) { + if (handle.m_ctl != nullptr) { + handle.m_ctl->~global_control(); + deallocate_memory(handle.m_ctl); + handle.m_ctl = nullptr; + } +} + +bool finalize_impl(d1::task_scheduler_handle& handle) { + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + bool ok = true; // ok if theMarket does not exist yet + market* m = market::theMarket; // read the state of theMarket + if (m != nullptr) { + lock.release(); + __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); + thread_data* td = governor::get_thread_data_if_initialized(); + if (td) { + task_dispatcher* task_disp = td->my_task_dispatcher; + __TBB_ASSERT(task_disp, nullptr); + if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region + governor::auto_terminate(td); + } + } + if (remove_and_check_if_empty(*handle.m_ctl)) { + ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true); + } else { + ok = false; } - if (remove_and_check_if_empty(*handle.m_ctl)) { - ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true); - } else { - ok = false; + } + return ok; +} + +bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) { + if (mode == d1::release_nothrowing) { + release_impl(handle); + return true; + } else { + bool ok = finalize_impl(handle); + // TODO: it is unsafe when finalize is called concurrently and further library unload + release_impl(handle); + if (mode == d1::finalize_throwing && !ok) { + throw_exception(exception_id::unsafe_wait); } - } - return ok; -} - -bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) { - if (mode == d1::release_nothrowing) { - release_impl(handle); - return true; - } else { - bool ok = finalize_impl(handle); - // TODO: it is unsafe when finalize is called concurrently and further library unload - release_impl(handle); - if (mode == d1::finalize_throwing && !ok) { - throw_exception(exception_id::unsafe_wait); + return ok; + } +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +#if __TBB_ARENA_BINDING + +#if __TBB_WEAK_SYMBOLS_PRESENT +#pragma weak __TBB_internal_initialize_system_topology +#pragma weak __TBB_internal_allocate_binding_handler +#pragma weak __TBB_internal_deallocate_binding_handler +#pragma weak __TBB_internal_apply_affinity +#pragma weak __TBB_internal_restore_affinity +#pragma weak __TBB_internal_get_default_concurrency + +extern "C" { +void __TBB_internal_initialize_system_topology( + size_t groups_num, + int& numa_nodes_count, int*& numa_indexes_list, + int& core_types_count, int*& core_types_indexes_list +); + +//TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` +binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); +void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr ); + +void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num ); +void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num ); + +int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core ); +} +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +// Stubs that will be used if TBBbind library is unavailable. +static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } +static void dummy_deallocate_binding_handler ( binding_handler* ) { } +static void dummy_apply_affinity ( binding_handler*, int ) { } +static void dummy_restore_affinity ( binding_handler*, int ) { } +static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); } + +// Handlers for communication with TBBbind +static void (*initialize_system_topology_ptr)( + size_t groups_num, + int& numa_nodes_count, int*& numa_indexes_list, + int& core_types_count, int*& core_types_indexes_list +) = nullptr; + +static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) + = dummy_allocate_binding_handler; +static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr ) + = dummy_deallocate_binding_handler; +static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) + = dummy_apply_affinity; +static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) + = dummy_restore_affinity; +int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) + = dummy_get_default_concurrency; + +#if _WIN32 || _WIN64 || __linux__ +// Table describing how to link the handlers. +static const dynamic_link_descriptor TbbBindLinkTable[] = { + DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), + DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), + DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), + DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), + DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), + DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) +}; + +static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor); + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +#if _WIN32 || _WIN64 +#define LIBRARY_EXTENSION ".dll" +#define LIBRARY_PREFIX +#elif __linux__ +#define LIBRARY_EXTENSION __TBB_STRING(.so.3) +#define LIBRARY_PREFIX "lib" +#endif /* __linux__ */ + +#define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION +#define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION +#define TBBBIND_2_4_NAME LIBRARY_PREFIX "tbbbind_2_4" DEBUG_SUFFIX LIBRARY_EXTENSION +#endif /* _WIN32 || _WIN64 || __linux__ */ + +// Representation of system hardware topology information on the TBB side. +// System topology may be initialized by third-party component (e.g. hwloc) +// or just filled in with default stubs. +namespace system_topology { + +constexpr int automatic = -1; + +static std::atomic<do_once_state> initialization_state; + +namespace { +int numa_nodes_count = 0; +int* numa_nodes_indexes = nullptr; + +int core_types_count = 0; +int* core_types_indexes = nullptr; + +const char* load_tbbbind_shared_object() { +#if _WIN32 || _WIN64 || __linux__ +#if _WIN32 && !_WIN64 + // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + if (si.dwNumberOfProcessors > 32) return nullptr; +#endif /* _WIN32 && !_WIN64 */ + for (const auto& tbbbind_version : {TBBBIND_2_4_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { + if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize)) { + return tbbbind_version; } - return ok; } +#endif /* _WIN32 || _WIN64 || __linux__ */ + return nullptr; +} + +int processor_groups_num() { +#if _WIN32 + return NumberOfProcessorGroups(); +#else + // Stub to improve code readability by reducing number of the compile-time conditions + return 1; +#endif +} +} // internal namespace + +// Tries to load TBBbind library API, if success, gets NUMA topology information from it, +// in another case, fills NUMA topology by stubs. +void initialization_impl() { + governor::one_time_init(); + + if (const char* tbbbind_name = load_tbbbind_shared_object()) { + initialize_system_topology_ptr( + processor_groups_num(), + numa_nodes_count, numa_nodes_indexes, + core_types_count, core_types_indexes + ); + + PrintExtraVersionInfo("TBBBIND", tbbbind_name); + return; + } + + static int dummy_index = automatic; + + numa_nodes_count = 1; + numa_nodes_indexes = &dummy_index; + + core_types_count = 1; + core_types_indexes = &dummy_index; + + PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE"); +} + +void initialize() { + atomic_do_once(initialization_impl, initialization_state); +} +} // namespace system_topology + +binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { + system_topology::initialize(); + return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core); +} + +void destroy_binding_handler(binding_handler* handler_ptr) { + __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed"); + deallocate_binding_handler_ptr(handler_ptr); +} + +void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) { + __TBB_ASSERT(slot_index >= 0, "Negative thread index"); + __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed"); + apply_affinity_ptr(handler_ptr, slot_index); +} + +void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) { + __TBB_ASSERT(slot_index >= 0, "Negative thread index"); + __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed"); + restore_affinity_ptr(handler_ptr, slot_index); +} + +unsigned __TBB_EXPORTED_FUNC numa_node_count() { + system_topology::initialize(); + return system_topology::numa_nodes_count; +} + +void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) { + system_topology::initialize(); + std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int)); +} + +int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) { + if (node_id >= 0) { + system_topology::initialize(); + int result = get_default_concurrency_ptr( + node_id, + /*core_type*/system_topology::automatic, + /*threads_per_core*/system_topology::automatic + ); + if (result > 0) return result; + } + return governor::default_num_threads(); +} + +unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) { + system_topology::initialize(); + return system_topology::core_types_count; } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - -#if __TBB_ARENA_BINDING - -#if __TBB_WEAK_SYMBOLS_PRESENT -#pragma weak __TBB_internal_initialize_system_topology -#pragma weak __TBB_internal_allocate_binding_handler -#pragma weak __TBB_internal_deallocate_binding_handler -#pragma weak __TBB_internal_apply_affinity -#pragma weak __TBB_internal_restore_affinity -#pragma weak __TBB_internal_get_default_concurrency - -extern "C" { -void __TBB_internal_initialize_system_topology( - size_t groups_num, - int& numa_nodes_count, int*& numa_indexes_list, - int& core_types_count, int*& core_types_indexes_list -); - -//TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` -binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); -void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr ); - -void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num ); -void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num ); - -int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core ); -} -#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ - -// Stubs that will be used if TBBbind library is unavailable. -static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } -static void dummy_deallocate_binding_handler ( binding_handler* ) { } -static void dummy_apply_affinity ( binding_handler*, int ) { } -static void dummy_restore_affinity ( binding_handler*, int ) { } -static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); } - -// Handlers for communication with TBBbind -static void (*initialize_system_topology_ptr)( - size_t groups_num, - int& numa_nodes_count, int*& numa_indexes_list, - int& core_types_count, int*& core_types_indexes_list -) = nullptr; - -static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) - = dummy_allocate_binding_handler; -static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr ) - = dummy_deallocate_binding_handler; -static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) - = dummy_apply_affinity; -static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) - = dummy_restore_affinity; -int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) - = dummy_get_default_concurrency; - -#if _WIN32 || _WIN64 || __linux__ -// Table describing how to link the handlers. -static const dynamic_link_descriptor TbbBindLinkTable[] = { - DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), - DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), - DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), - DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), - DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), - DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) -}; - -static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor); - -#if TBB_USE_DEBUG -#define DEBUG_SUFFIX "_debug" -#else -#define DEBUG_SUFFIX -#endif /* TBB_USE_DEBUG */ - -#if _WIN32 || _WIN64 -#define LIBRARY_EXTENSION ".dll" -#define LIBRARY_PREFIX -#elif __linux__ -#define LIBRARY_EXTENSION __TBB_STRING(.so.3) -#define LIBRARY_PREFIX "lib" -#endif /* __linux__ */ - -#define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION -#define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION -#define TBBBIND_2_4_NAME LIBRARY_PREFIX "tbbbind_2_4" DEBUG_SUFFIX LIBRARY_EXTENSION -#endif /* _WIN32 || _WIN64 || __linux__ */ - -// Representation of system hardware topology information on the TBB side. -// System topology may be initialized by third-party component (e.g. hwloc) -// or just filled in with default stubs. -namespace system_topology { - -constexpr int automatic = -1; - -static std::atomic<do_once_state> initialization_state; - -namespace { -int numa_nodes_count = 0; -int* numa_nodes_indexes = nullptr; - -int core_types_count = 0; -int* core_types_indexes = nullptr; - -const char* load_tbbbind_shared_object() { -#if _WIN32 || _WIN64 || __linux__ -#if _WIN32 && !_WIN64 - // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. - SYSTEM_INFO si; - GetNativeSystemInfo(&si); - if (si.dwNumberOfProcessors > 32) return nullptr; -#endif /* _WIN32 && !_WIN64 */ - for (const auto& tbbbind_version : {TBBBIND_2_4_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { - if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize)) { - return tbbbind_version; - } - } -#endif /* _WIN32 || _WIN64 || __linux__ */ - return nullptr; -} - -int processor_groups_num() { -#if _WIN32 - return NumberOfProcessorGroups(); -#else - // Stub to improve code readability by reducing number of the compile-time conditions - return 1; -#endif -} -} // internal namespace - -// Tries to load TBBbind library API, if success, gets NUMA topology information from it, -// in another case, fills NUMA topology by stubs. -void initialization_impl() { - governor::one_time_init(); - - if (const char* tbbbind_name = load_tbbbind_shared_object()) { - initialize_system_topology_ptr( - processor_groups_num(), - numa_nodes_count, numa_nodes_indexes, - core_types_count, core_types_indexes - ); - - PrintExtraVersionInfo("TBBBIND", tbbbind_name); - return; - } - - static int dummy_index = automatic; - - numa_nodes_count = 1; - numa_nodes_indexes = &dummy_index; - - core_types_count = 1; - core_types_indexes = &dummy_index; - - PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE"); -} - -void initialize() { - atomic_do_once(initialization_impl, initialization_state); -} -} // namespace system_topology - -binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { - system_topology::initialize(); - return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core); -} - -void destroy_binding_handler(binding_handler* handler_ptr) { - __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed"); - deallocate_binding_handler_ptr(handler_ptr); -} - -void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) { - __TBB_ASSERT(slot_index >= 0, "Negative thread index"); - __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed"); - apply_affinity_ptr(handler_ptr, slot_index); -} - -void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) { - __TBB_ASSERT(slot_index >= 0, "Negative thread index"); - __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed"); - restore_affinity_ptr(handler_ptr, slot_index); -} - -unsigned __TBB_EXPORTED_FUNC numa_node_count() { - system_topology::initialize(); - return system_topology::numa_nodes_count; -} - -void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) { - system_topology::initialize(); - std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int)); -} - -int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) { - if (node_id >= 0) { - system_topology::initialize(); - int result = get_default_concurrency_ptr( - node_id, - /*core_type*/system_topology::automatic, - /*threads_per_core*/system_topology::automatic - ); - if (result > 0) return result; - } - return governor::default_num_threads(); -} - -unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) { - system_topology::initialize(); - return system_topology::core_types_count; -} - -void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) { - system_topology::initialize(); - std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int)); -} - -void constraints_assertion(d1::constraints c) { - bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized; - __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0, - "Wrong max_threads_per_core constraints field value."); - - auto numa_nodes_begin = system_topology::numa_nodes_indexes; - auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count; - __TBB_ASSERT_RELEASE( - c.numa_id == system_topology::automatic || - (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end), - "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values."); - - int* core_types_begin = system_topology::core_types_indexes; - int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count; - __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic || - (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end), - "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values."); + +void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) { + system_topology::initialize(); + std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int)); +} + +void constraints_assertion(d1::constraints c) { + bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized; + __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0, + "Wrong max_threads_per_core constraints field value."); + + auto numa_nodes_begin = system_topology::numa_nodes_indexes; + auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count; + __TBB_ASSERT_RELEASE( + c.numa_id == system_topology::automatic || + (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end), + "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values."); + + int* core_types_begin = system_topology::core_types_indexes; + int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count; + __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic || + (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end), + "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values."); } -int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) { - constraints_assertion(c); +int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) { + constraints_assertion(c); - if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) { - system_topology::initialize(); - return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core); + if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) { + system_topology::initialize(); + return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core); } - return governor::default_num_threads(); + return governor::default_num_threads(); } -int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) { - return system_topology::automatic; +int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) { + return system_topology::automatic; } -#endif /* __TBB_ARENA_BINDING */ +#endif /* __TBB_ARENA_BINDING */ -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/governor.h b/contrib/libs/tbb/src/tbb/governor.h index c15f07e824..0ff4781414 100644 --- a/contrib/libs/tbb/src/tbb/governor.h +++ b/contrib/libs/tbb/src/tbb/governor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,26 +17,26 @@ #ifndef _TBB_governor_H #define _TBB_governor_H -#include "rml_tbb.h" +#include "rml_tbb.h" -#include "misc.h" // for AvailableHwConcurrency +#include "misc.h" // for AvailableHwConcurrency #include "tls.h" namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { class market; -class thread_data; +class thread_data; class __TBB_InitOnce; -#if __TBB_USE_ITT_NOTIFY -//! Defined in profiling.cpp -extern bool ITT_Present; -#endif +#if __TBB_USE_ITT_NOTIFY +//! Defined in profiling.cpp +extern bool ITT_Present; +#endif + +typedef std::size_t stack_size_type; -typedef std::size_t stack_size_type; - //------------------------------------------------------------------------ // Class governor //------------------------------------------------------------------------ @@ -49,23 +49,23 @@ private: friend class __TBB_InitOnce; friend class market; - // TODO: consider using thread_local (measure performance and side effects) + // TODO: consider using thread_local (measure performance and side effects) //! TLS for scheduler instances associated with individual threads - static basic_tls<thread_data*> theTLS; + static basic_tls<thread_data*> theTLS; //! Caches the maximal level of parallelism supported by the hardware static unsigned DefaultNumberOfThreads; - //! Caches the size of OS regular memory page - static std::size_t DefaultPageSize; - - // TODO (TBB_REVAMP_TODO): reconsider constant names + //! Caches the size of OS regular memory page + static std::size_t DefaultPageSize; + + // TODO (TBB_REVAMP_TODO): reconsider constant names static rml::tbb_factory theRMLServerFactory; static bool UsePrivateRML; // Flags for runtime-specific conditions - static cpu_features_type cpu_features; + static cpu_features_type cpu_features; static bool is_rethrow_broken; //! Create key for thread-local storage and initialize RML. @@ -82,77 +82,77 @@ public: return DefaultNumberOfThreads ? DefaultNumberOfThreads : DefaultNumberOfThreads = AvailableHwConcurrency(); } - static std::size_t default_page_size () { - return DefaultPageSize ? DefaultPageSize : - DefaultPageSize = DefaultSystemPageSize(); - } + static std::size_t default_page_size () { + return DefaultPageSize ? DefaultPageSize : + DefaultPageSize = DefaultSystemPageSize(); + } static void one_time_init(); - //! Processes scheduler initialization request (possibly nested) in an external thread + //! Processes scheduler initialization request (possibly nested) in an external thread /** If necessary creates new instance of arena and/or local scheduler. The auto_init argument specifies if the call is due to automatic initialization. **/ - static void init_external_thread(); - - //! The routine to undo automatic initialization. - /** The signature is written with void* so that the routine - can be the destructor argument to pthread_key_create. */ - static void auto_terminate(void* tls); - - //! Obtain the thread-local instance of the thread data. - /** If the scheduler has not been initialized yet, initialization is done automatically. - Note that auto-initialized scheduler instance is destroyed only when its thread terminates. **/ - static thread_data* get_thread_data() { - thread_data* td = theTLS.get(); - if (td) { - return td; - } - init_external_thread(); - td = theTLS.get(); - __TBB_ASSERT(td, NULL); - return td; - } - - static void set_thread_data(thread_data& td) { - theTLS.set(&td); + static void init_external_thread(); + + //! The routine to undo automatic initialization. + /** The signature is written with void* so that the routine + can be the destructor argument to pthread_key_create. */ + static void auto_terminate(void* tls); + + //! Obtain the thread-local instance of the thread data. + /** If the scheduler has not been initialized yet, initialization is done automatically. + Note that auto-initialized scheduler instance is destroyed only when its thread terminates. **/ + static thread_data* get_thread_data() { + thread_data* td = theTLS.get(); + if (td) { + return td; + } + init_external_thread(); + td = theTLS.get(); + __TBB_ASSERT(td, NULL); + return td; } - static void clear_thread_data() { - theTLS.set(nullptr); + static void set_thread_data(thread_data& td) { + theTLS.set(&td); } - static thread_data* get_thread_data_if_initialized () { - return theTLS.get(); + static void clear_thread_data() { + theTLS.set(nullptr); } - static bool is_thread_data_set(thread_data* td) { - return theTLS.get() == td; + static thread_data* get_thread_data_if_initialized () { + return theTLS.get(); + } + + static bool is_thread_data_set(thread_data* td) { + return theTLS.get() == td; } //! Undo automatic initialization if necessary; call when a thread exits. - static void terminate_external_thread() { - auto_terminate(get_thread_data_if_initialized()); + static void terminate_external_thread() { + auto_terminate(get_thread_data_if_initialized()); } static void initialize_rml_factory (); - static bool does_client_join_workers (const rml::tbb_client &client); + static bool does_client_join_workers (const rml::tbb_client &client); + + static bool speculation_enabled() { return cpu_features.rtm_enabled; } - static bool speculation_enabled() { return cpu_features.rtm_enabled; } + static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } - static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } - static bool rethrow_exception_broken() { return is_rethrow_broken; } - static bool is_itt_present() { -#if __TBB_USE_ITT_NOTIFY - return ITT_Present; -#else - return false; -#endif - } + static bool is_itt_present() { +#if __TBB_USE_ITT_NOTIFY + return ITT_Present; +#else + return false; +#endif + } }; // class governor -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_governor_H */ diff --git a/contrib/libs/tbb/src/tbb/intrusive_list.h b/contrib/libs/tbb/src/tbb/intrusive_list.h index 4bf75b81aa..699bc149aa 100644 --- a/contrib/libs/tbb/src/tbb/intrusive_list.h +++ b/contrib/libs/tbb/src/tbb/intrusive_list.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ #define _TBB_intrusive_list_H namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { //! Data structure to be inherited by the types that can form intrusive lists. /** Intrusive list is formed by means of the member_intrusive_list<T> template class. @@ -27,10 +27,10 @@ namespace r1 { declare instantiation member_intrusive_list<T> as a friend. This class implements a limited subset of std::list interface. **/ struct intrusive_list_node { - intrusive_list_node* my_prev_node{}; - intrusive_list_node* my_next_node{}; + intrusive_list_node* my_prev_node{}; + intrusive_list_node* my_next_node{}; #if TBB_USE_ASSERT - intrusive_list_node() { my_prev_node = my_next_node = this; } + intrusive_list_node() { my_prev_node = my_next_node = this; } #endif /* TBB_USE_ASSERT */ }; @@ -42,76 +42,76 @@ class intrusive_list_base { intrusive_list_node my_head; //! Number of list elements - std::size_t my_size; + std::size_t my_size; static intrusive_list_node& node ( T& item ) { return List::node(item); } static T& item ( intrusive_list_node* node ) { return List::item(node); } - static const T& item( const intrusive_list_node* node ) { return List::item(node); } - - template <typename DereferenceType> + static const T& item( const intrusive_list_node* node ) { return List::item(node); } + + template <typename DereferenceType> class iterator_impl { - static_assert(std::is_same<DereferenceType, T>::value || - std::is_same<DereferenceType, const T>::value, - "Incorrect DereferenceType in iterator_impl"); - - using pointer_type = typename std::conditional<std::is_same<DereferenceType, T>::value, - intrusive_list_node*, - const intrusive_list_node*>::type; - - public: - iterator_impl() : my_pos(nullptr) {} - - iterator_impl( pointer_type pos ) : my_pos(pos) {} - - iterator_impl& operator++() { - my_pos = my_pos->my_next_node; - return *this; + static_assert(std::is_same<DereferenceType, T>::value || + std::is_same<DereferenceType, const T>::value, + "Incorrect DereferenceType in iterator_impl"); + + using pointer_type = typename std::conditional<std::is_same<DereferenceType, T>::value, + intrusive_list_node*, + const intrusive_list_node*>::type; + + public: + iterator_impl() : my_pos(nullptr) {} + + iterator_impl( pointer_type pos ) : my_pos(pos) {} + + iterator_impl& operator++() { + my_pos = my_pos->my_next_node; + return *this; } - iterator_impl operator++( int ) { - iterator_impl it(*this); - ++*this; - return it; + iterator_impl operator++( int ) { + iterator_impl it(*this); + ++*this; + return it; } - iterator_impl& operator--() { - my_pos = my_pos->my_prev_node; - return *this; + iterator_impl& operator--() { + my_pos = my_pos->my_prev_node; + return *this; } - iterator_impl operator--( int ) { - iterator_impl it(*this); - --*this; - return it; + iterator_impl operator--( int ) { + iterator_impl it(*this); + --*this; + return it; } - bool operator==( const iterator_impl& rhs ) const { - return my_pos == rhs.my_pos; + bool operator==( const iterator_impl& rhs ) const { + return my_pos == rhs.my_pos; } - bool operator!=( const iterator_impl& rhs ) const { - return my_pos != rhs.my_pos; + bool operator!=( const iterator_impl& rhs ) const { + return my_pos != rhs.my_pos; } - DereferenceType& operator*() const { - return intrusive_list_base::item(my_pos); + DereferenceType& operator*() const { + return intrusive_list_base::item(my_pos); } - DereferenceType* operator->() const { - return &intrusive_list_base::item(my_pos); + DereferenceType* operator->() const { + return &intrusive_list_base::item(my_pos); } - private: - // Node the iterator points to at the moment - pointer_type my_pos; - }; // class iterator_impl - + private: + // Node the iterator points to at the moment + pointer_type my_pos; + }; // class iterator_impl + void assert_ok () const { __TBB_ASSERT( (my_head.my_prev_node == &my_head && !my_size) || (my_head.my_next_node != &my_head && my_size >0), "intrusive_list_base corrupted" ); #if TBB_USE_ASSERT >= 2 - std::size_t i = 0; + std::size_t i = 0; for ( intrusive_list_node *n = my_head.my_next_node; n != &my_head; n = n->my_next_node ) ++i; __TBB_ASSERT( my_size == i, "Wrong size" ); @@ -119,8 +119,8 @@ class intrusive_list_base { } public: - using iterator = iterator_impl<T>; - using const_iterator = iterator_impl<const T>; + using iterator = iterator_impl<T>; + using const_iterator = iterator_impl<const T>; intrusive_list_base () : my_size(0) { my_head.my_prev_node = &my_head; @@ -129,7 +129,7 @@ public: bool empty () const { return my_head.my_next_node == &my_head; } - std::size_t size () const { return my_size; } + std::size_t size () const { return my_size; } iterator begin () { return iterator(my_head.my_next_node); } @@ -172,22 +172,22 @@ public: }; // intrusive_list_base -#if __TBB_TODO -// With standard compliant compilers memptr_intrusive_list could be named simply intrusive_list, -// and inheritance based intrusive_list version would become its partial specialization. -// Here are the corresponding declarations: - -struct dummy_intrusive_list_item { intrusive_list_node my_node; }; - -template <class T, class U = dummy_intrusive_list_item, intrusive_list_node U::*NodePtr = &dummy_intrusive_list_item::my_node> -class intrusive_list : public intrusive_list_base<intrusive_list<T, U, NodePtr>, T>; - -template <class T> -class intrusive_list<T, dummy_intrusive_list_item, &dummy_intrusive_list_item::my_node> - : public intrusive_list_base<intrusive_list<T>, T>; - -#endif /* __TBB_TODO */ - +#if __TBB_TODO +// With standard compliant compilers memptr_intrusive_list could be named simply intrusive_list, +// and inheritance based intrusive_list version would become its partial specialization. +// Here are the corresponding declarations: + +struct dummy_intrusive_list_item { intrusive_list_node my_node; }; + +template <class T, class U = dummy_intrusive_list_item, intrusive_list_node U::*NodePtr = &dummy_intrusive_list_item::my_node> +class intrusive_list : public intrusive_list_base<intrusive_list<T, U, NodePtr>, T>; + +template <class T> +class intrusive_list<T, dummy_intrusive_list_item, &dummy_intrusive_list_item::my_node> + : public intrusive_list_base<intrusive_list<T>, T>; + +#endif /* __TBB_TODO */ + //! Double linked list of items of type T containing a member of type intrusive_list_node. /** NodePtr is a member pointer to the node data field. Class U is either T or a base class of T containing the node member. Default values exist for the sake @@ -211,11 +211,11 @@ class memptr_intrusive_list : public intrusive_list_base<memptr_intrusive_list<T // __TBB_offsetof implementation breaks operations with normal member names. return *reinterpret_cast<T*>((char*)node - ((ptrdiff_t)&(reinterpret_cast<T*>(0x1000)->*NodePtr) - 0x1000)); } - - static const T& item( const intrusive_list_node* node ) { - return item(const_cast<intrusive_list_node*>(node)); - } - + + static const T& item( const intrusive_list_node* node ) { + return item(const_cast<intrusive_list_node*>(node)); + } + }; // intrusive_list<T, U, NodePtr> //! Double linked list of items of type T that is derived from intrusive_list_node class. @@ -231,12 +231,12 @@ class intrusive_list : public intrusive_list_base<intrusive_list<T>, T> static intrusive_list_node& node ( T& val ) { return val; } static T& item ( intrusive_list_node* node ) { return *static_cast<T*>(node); } - - static const T& item( const intrusive_list_node* node ) { return *static_cast<const T*>(node); } + + static const T& item( const intrusive_list_node* node ) { return *static_cast<const T*>(node); } }; // intrusive_list<T> -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_intrusive_list_H */ diff --git a/contrib/libs/tbb/src/tbb/itt_notify.cpp b/contrib/libs/tbb/src/tbb/itt_notify.cpp index 6c905fa377..0e60579a62 100644 --- a/contrib/libs/tbb/src/tbb/itt_notify.cpp +++ b/contrib/libs/tbb/src/tbb/itt_notify.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ limitations under the License. */ -#if __TBB_USE_ITT_NOTIFY +#if __TBB_USE_ITT_NOTIFY #if _WIN32||_WIN64 #ifndef UNICODE @@ -43,10 +43,10 @@ extern "C" void MallocInitializeITT(); #include "tools_api/ittnotify_static.c" namespace tbb { -namespace detail { -namespace r1 { - -/** This extra proxy method is necessary since __itt_init_lib is declared as static **/ +namespace detail { +namespace r1 { + +/** This extra proxy method is necessary since __itt_init_lib is declared as static **/ int __TBB_load_ittnotify() { #if !(_WIN32||_WIN64) // tool_api crashes without dlopen, check that it's present. Common case @@ -62,8 +62,8 @@ int __TBB_load_ittnotify() { )); } -} //namespace r1 -} //namespace detail +} //namespace r1 +} //namespace detail } // namespace tbb -#endif /* __TBB_USE_ITT_NOTIFY */ +#endif /* __TBB_USE_ITT_NOTIFY */ diff --git a/contrib/libs/tbb/src/tbb/itt_notify.h b/contrib/libs/tbb/src/tbb/itt_notify.h index 3a5c53f7b8..9978bcd7cb 100644 --- a/contrib/libs/tbb/src/tbb/itt_notify.h +++ b/contrib/libs/tbb/src/tbb/itt_notify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,9 +17,9 @@ #ifndef _TBB_ITT_NOTIFY #define _TBB_ITT_NOTIFY -#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_config.h" -#if __TBB_USE_ITT_NOTIFY +#if __TBB_USE_ITT_NOTIFY #if _WIN32||_WIN64 #ifndef UNICODE @@ -39,76 +39,76 @@ extern "C" void __itt_fini_ittlib(void); #undef _T #endif /* WIN */ -#endif /* __TBB_USE_ITT_NOTIFY */ +#endif /* __TBB_USE_ITT_NOTIFY */ #if !ITT_CALLER_NULL #define ITT_CALLER_NULL ((__itt_caller)0) #endif namespace tbb { -namespace detail { -namespace r1 { - +namespace detail { +namespace r1 { + //! Unicode support #if (_WIN32||_WIN64) && !__MINGW32__ //! Unicode character type. Always wchar_t on Windows. /** We do not use typedefs from Windows TCHAR family to keep consistence of TBB coding style. **/ - using tchar = wchar_t; + using tchar = wchar_t; //! Standard Windows macro to markup the string literals. #define _T(string_literal) L ## string_literal #else /* !WIN */ - using tchar = char; + using tchar = char; //! Standard Windows style macro to markup the string literals. #define _T(string_literal) string_literal #endif /* !WIN */ -//! Display names of internal synchronization types -extern const tchar - *SyncType_Scheduler; -//! Display names of internal synchronization components/scenarios -extern const tchar - *SyncObj_ContextsList - ; +//! Display names of internal synchronization types +extern const tchar + *SyncType_Scheduler; +//! Display names of internal synchronization components/scenarios +extern const tchar + *SyncObj_ContextsList + ; -#if __TBB_USE_ITT_NOTIFY +#if __TBB_USE_ITT_NOTIFY // const_cast<void*>() is necessary to cast off volatility -#define ITT_NOTIFY(name,obj) __itt_##name(const_cast<void*>(static_cast<volatile void*>(obj))) +#define ITT_NOTIFY(name,obj) __itt_##name(const_cast<void*>(static_cast<volatile void*>(obj))) #define ITT_THREAD_SET_NAME(name) __itt_thread_set_name(name) #define ITT_FINI_ITTLIB() __itt_fini_ittlib() #define ITT_SYNC_CREATE(obj, type, name) __itt_sync_create((void*)(obj), type, name, 2) #define ITT_STACK_CREATE(obj) obj = __itt_stack_caller_create() -#define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0) -#define ITT_CALLEE_ENTER(cond, t, obj) if(cond) {\ - __itt_stack_callee_enter(static_cast<__itt_caller>(obj));\ - __itt_sync_acquired(t);\ - } -#define ITT_CALLEE_LEAVE(cond, obj) (cond) ? __itt_stack_callee_leave(static_cast<__itt_caller>(obj)) : ((void)0) - -#define ITT_TASK_GROUP(obj,name,parent) r1::itt_make_task_group(d1::ITT_DOMAIN_MAIN,(void*)(obj),ALGORITHM,(void*)(parent),(parent!=nullptr) ? ALGORITHM : FLOW_NULL,name) -#define ITT_TASK_BEGIN(obj,name,id) r1::itt_task_begin(d1::ITT_DOMAIN_MAIN,(void*)(id),ALGORITHM,(void*)(obj),ALGORITHM,name) -#define ITT_TASK_END r1::itt_task_end(d1::ITT_DOMAIN_MAIN) - - -#else /* !__TBB_USE_ITT_NOTIFY */ - +#define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0) +#define ITT_CALLEE_ENTER(cond, t, obj) if(cond) {\ + __itt_stack_callee_enter(static_cast<__itt_caller>(obj));\ + __itt_sync_acquired(t);\ + } +#define ITT_CALLEE_LEAVE(cond, obj) (cond) ? __itt_stack_callee_leave(static_cast<__itt_caller>(obj)) : ((void)0) + +#define ITT_TASK_GROUP(obj,name,parent) r1::itt_make_task_group(d1::ITT_DOMAIN_MAIN,(void*)(obj),ALGORITHM,(void*)(parent),(parent!=nullptr) ? ALGORITHM : FLOW_NULL,name) +#define ITT_TASK_BEGIN(obj,name,id) r1::itt_task_begin(d1::ITT_DOMAIN_MAIN,(void*)(id),ALGORITHM,(void*)(obj),ALGORITHM,name) +#define ITT_TASK_END r1::itt_task_end(d1::ITT_DOMAIN_MAIN) + + +#else /* !__TBB_USE_ITT_NOTIFY */ + #define ITT_NOTIFY(name,obj) ((void)0) #define ITT_THREAD_SET_NAME(name) ((void)0) #define ITT_FINI_ITTLIB() ((void)0) #define ITT_SYNC_CREATE(obj, type, name) ((void)0) #define ITT_STACK_CREATE(obj) ((void)0) -#define ITT_STACK_DESTROY(obj) ((void)0) -#define ITT_CALLEE_ENTER(cond, t, obj) ((void)0) -#define ITT_CALLEE_LEAVE(cond, obj) ((void)0) -#define ITT_TASK_GROUP(type,name,parent) ((void)0) -#define ITT_TASK_BEGIN(type,name,id) ((void)0) -#define ITT_TASK_END ((void)0) - -#endif /* !__TBB_USE_ITT_NOTIFY */ +#define ITT_STACK_DESTROY(obj) ((void)0) +#define ITT_CALLEE_ENTER(cond, t, obj) ((void)0) +#define ITT_CALLEE_LEAVE(cond, obj) ((void)0) +#define ITT_TASK_GROUP(type,name,parent) ((void)0) +#define ITT_TASK_BEGIN(type,name,id) ((void)0) +#define ITT_TASK_END ((void)0) + +#endif /* !__TBB_USE_ITT_NOTIFY */ int __TBB_load_ittnotify(); -} // namespace r1 -} // namespace detail -} // namespace tbb - +} // namespace r1 +} // namespace detail +} // namespace tbb + #endif /* _TBB_ITT_NOTIFY */ diff --git a/contrib/libs/tbb/src/tbb/mailbox.h b/contrib/libs/tbb/src/tbb/mailbox.h index 931a53d337..2f49e9b35e 100644 --- a/contrib/libs/tbb/src/tbb/mailbox.h +++ b/contrib/libs/tbb/src/tbb/mailbox.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,19 +17,19 @@ #ifndef _TBB_mailbox_H #define _TBB_mailbox_H -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "oneapi/tbb/detail/_small_object_pool.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/detail/_small_object_pool.h" -#include "arena_slot.h" +#include "arena_slot.h" #include "scheduler_common.h" -#include <atomic> - +#include <atomic> + namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { -struct task_proxy : public d1::task { +struct task_proxy : public d1::task { static const intptr_t pool_bit = 1<<0; static const intptr_t mailbox_bit = 1<<1; static const intptr_t location_mask = pool_bit | mailbox_bit; @@ -37,34 +37,34 @@ struct task_proxy : public d1::task { Two low-order bits mean: 1 = proxy is/was/will be in task pool 2 = proxy is/was/will be in mailbox */ - std::atomic<intptr_t> task_and_tag; + std::atomic<intptr_t> task_and_tag; //! Pointer to next task_proxy in a mailbox - std::atomic<task_proxy*> next_in_mailbox; + std::atomic<task_proxy*> next_in_mailbox; //! Mailbox to which this was mailed. mail_outbox* outbox; - //! Task affinity id which is referenced - d1::slot_id slot; - - d1::small_object_allocator allocator; - + //! Task affinity id which is referenced + d1::slot_id slot; + + d1::small_object_allocator allocator; + //! True if the proxy is stored both in its sender's pool and in the destination mailbox. static bool is_shared ( intptr_t tat ) { return (tat & location_mask) == location_mask; } - //! Returns a pointer to the encapsulated task or nullptr. + //! Returns a pointer to the encapsulated task or nullptr. static task* task_ptr ( intptr_t tat ) { return (task*)(tat & ~location_mask); } - //! Returns a pointer to the encapsulated task or nullptr, and frees proxy if necessary. + //! Returns a pointer to the encapsulated task or nullptr, and frees proxy if necessary. template<intptr_t from_bit> inline task* extract_task () { - // __TBB_ASSERT( prefix().extra_state == es_task_proxy, "Normal task misinterpreted as a proxy?" ); - intptr_t tat = task_and_tag.load(std::memory_order_acquire); + // __TBB_ASSERT( prefix().extra_state == es_task_proxy, "Normal task misinterpreted as a proxy?" ); + intptr_t tat = task_and_tag.load(std::memory_order_acquire); __TBB_ASSERT( tat == from_bit || (is_shared(tat) && task_ptr(tat)), "Proxy's tag cannot specify both locations if the proxy " "was retrieved from one of its original locations" ); @@ -73,104 +73,104 @@ struct task_proxy : public d1::task { // Attempt to transition the proxy to the "empty" state with // cleaner_bit specifying entity responsible for its eventual freeing. // Explicit cast to void* is to work around a seeming ICC 11.1 bug. - if ( task_and_tag.compare_exchange_strong(tat, cleaner_bit) ) { + if ( task_and_tag.compare_exchange_strong(tat, cleaner_bit) ) { // Successfully grabbed the task, and left new owner with the job of freeing the proxy return task_ptr(tat); } } // Proxied task has already been claimed from another proxy location. - __TBB_ASSERT( task_and_tag.load(std::memory_order_relaxed) == from_bit, "Empty proxy cannot contain non-zero task pointer" ); - return nullptr; - } - - virtual task* execute(d1::execution_data&) { - __TBB_ASSERT_RELEASE(false, nullptr); - return nullptr; - } - virtual task* cancel(d1::execution_data&) { - __TBB_ASSERT_RELEASE(false, nullptr); - return nullptr; - } + __TBB_ASSERT( task_and_tag.load(std::memory_order_relaxed) == from_bit, "Empty proxy cannot contain non-zero task pointer" ); + return nullptr; + } + + virtual task* execute(d1::execution_data&) { + __TBB_ASSERT_RELEASE(false, nullptr); + return nullptr; + } + virtual task* cancel(d1::execution_data&) { + __TBB_ASSERT_RELEASE(false, nullptr); + return nullptr; + } }; // struct task_proxy //! Internal representation of mail_outbox, without padding. class unpadded_mail_outbox { protected: - typedef std::atomic<task_proxy*> atomic_proxy_ptr; + typedef std::atomic<task_proxy*> atomic_proxy_ptr; - //! Pointer to first task_proxy in mailbox, or nullptr if box is empty. - atomic_proxy_ptr my_first; + //! Pointer to first task_proxy in mailbox, or nullptr if box is empty. + atomic_proxy_ptr my_first; - //! Pointer to pointer that will point to next item in the queue. Never nullptr. - std::atomic<atomic_proxy_ptr*> my_last; + //! Pointer to pointer that will point to next item in the queue. Never nullptr. + std::atomic<atomic_proxy_ptr*> my_last; //! Owner of mailbox is not executing a task, and has drained its own task pool. - std::atomic<bool> my_is_idle; + std::atomic<bool> my_is_idle; }; -// TODO: - consider moving to arena slot +// TODO: - consider moving to arena slot //! Class representing where mail is put. /** Padded to occupy a cache line. */ class mail_outbox : padded<unpadded_mail_outbox> { - task_proxy* internal_pop( isolation_type isolation ) { - task_proxy* curr = my_first.load(std::memory_order_acquire); + task_proxy* internal_pop( isolation_type isolation ) { + task_proxy* curr = my_first.load(std::memory_order_acquire); if ( !curr ) - return nullptr; - atomic_proxy_ptr* prev_ptr = &my_first; + return nullptr; + atomic_proxy_ptr* prev_ptr = &my_first; if ( isolation != no_isolation ) { - while ( task_accessor::isolation(*curr) != isolation ) { + while ( task_accessor::isolation(*curr) != isolation ) { prev_ptr = &curr->next_in_mailbox; - // The next_in_mailbox should be read with acquire to guarantee (*curr) consistency. - curr = curr->next_in_mailbox.load(std::memory_order_acquire); + // The next_in_mailbox should be read with acquire to guarantee (*curr) consistency. + curr = curr->next_in_mailbox.load(std::memory_order_acquire); if ( !curr ) - return nullptr; + return nullptr; } } // There is a first item in the mailbox. See if there is a second. - // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. - if ( task_proxy* second = curr->next_in_mailbox.load(std::memory_order_acquire) ) { + // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. + if ( task_proxy* second = curr->next_in_mailbox.load(std::memory_order_acquire) ) { // There are at least two items, so first item can be popped easily. - prev_ptr->store(second, std::memory_order_relaxed); + prev_ptr->store(second, std::memory_order_relaxed); } else { - // There is only one item. Some care is required to pop it. - - prev_ptr->store(nullptr, std::memory_order_relaxed); - atomic_proxy_ptr* expected = &curr->next_in_mailbox; - if ( my_last.compare_exchange_strong( expected, prev_ptr ) ) { + // There is only one item. Some care is required to pop it. + + prev_ptr->store(nullptr, std::memory_order_relaxed); + atomic_proxy_ptr* expected = &curr->next_in_mailbox; + if ( my_last.compare_exchange_strong( expected, prev_ptr ) ) { // Successfully transitioned mailbox from having one item to having none. - __TBB_ASSERT( !curr->next_in_mailbox.load(std::memory_order_relaxed), nullptr); + __TBB_ASSERT( !curr->next_in_mailbox.load(std::memory_order_relaxed), nullptr); } else { // Some other thread updated my_last but has not filled in first->next_in_mailbox // Wait until first item points to second item. atomic_backoff backoff; - // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. - while ( !(second = curr->next_in_mailbox.load(std::memory_order_acquire)) ) backoff.pause(); - prev_ptr->store( second, std::memory_order_relaxed); + // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. + while ( !(second = curr->next_in_mailbox.load(std::memory_order_acquire)) ) backoff.pause(); + prev_ptr->store( second, std::memory_order_relaxed); } } - assert_pointer_valid(curr); + assert_pointer_valid(curr); return curr; } public: friend class mail_inbox; //! Push task_proxy onto the mailbox queue of another thread. - /** Implementation is wait-free. */ - void push( task_proxy* t ) { - assert_pointer_valid(t); - t->next_in_mailbox.store(nullptr, std::memory_order_relaxed); - atomic_proxy_ptr* const link = my_last.exchange(&t->next_in_mailbox); - // Logically, the release fence is not required because the exchange above provides the - // release-acquire semantic that guarantees that (*t) will be consistent when another thread - // loads the link atomic. However, C++11 memory model guarantees consistency of(*t) only - // when the same atomic is used for synchronization. - link->store(t, std::memory_order_release); + /** Implementation is wait-free. */ + void push( task_proxy* t ) { + assert_pointer_valid(t); + t->next_in_mailbox.store(nullptr, std::memory_order_relaxed); + atomic_proxy_ptr* const link = my_last.exchange(&t->next_in_mailbox); + // Logically, the release fence is not required because the exchange above provides the + // release-acquire semantic that guarantees that (*t) will be consistent when another thread + // loads the link atomic. However, C++11 memory model guarantees consistency of(*t) only + // when the same atomic is used for synchronization. + link->store(t, std::memory_order_release); } //! Return true if mailbox is empty bool empty() { - return my_first.load(std::memory_order_relaxed) == nullptr; + return my_first.load(std::memory_order_relaxed) == nullptr; } //! Construct *this as a mailbox from zeroed memory. @@ -178,11 +178,11 @@ public: This method is provided instead of a full constructor since we know the object will be constructed in zeroed memory. */ void construct() { - __TBB_ASSERT( sizeof(*this)==max_nfs_size, nullptr ); - __TBB_ASSERT( !my_first.load(std::memory_order_relaxed), nullptr ); - __TBB_ASSERT( !my_last.load(std::memory_order_relaxed), nullptr ); - __TBB_ASSERT( !my_is_idle.load(std::memory_order_relaxed), nullptr ); - my_last = &my_first; + __TBB_ASSERT( sizeof(*this)==max_nfs_size, nullptr ); + __TBB_ASSERT( !my_first.load(std::memory_order_relaxed), nullptr ); + __TBB_ASSERT( !my_last.load(std::memory_order_relaxed), nullptr ); + __TBB_ASSERT( !my_is_idle.load(std::memory_order_relaxed), nullptr ); + my_last = &my_first; suppress_unused_warning(pad); } @@ -191,15 +191,15 @@ public: intptr_t k = 0; // No fences here because other threads have already quit. for( ; task_proxy* t = my_first; ++k ) { - my_first.store(t->next_in_mailbox, std::memory_order_relaxed); - // cache_aligned_deallocate((char*)t - task_prefix_reservation_size); + my_first.store(t->next_in_mailbox, std::memory_order_relaxed); + // cache_aligned_deallocate((char*)t - task_prefix_reservation_size); } return k; } //! True if thread that owns this mailbox is looking for work. bool recipient_is_idle() { - return my_is_idle.load(std::memory_order_relaxed); + return my_is_idle.load(std::memory_order_relaxed); } }; // class mail_outbox @@ -209,7 +209,7 @@ class mail_inbox { mail_outbox* my_putter; public: //! Construct unattached inbox - mail_inbox() : my_putter(nullptr) {} + mail_inbox() : my_putter(nullptr) {} //! Attach inbox to a corresponding outbox. void attach( mail_outbox& putter ) { @@ -218,11 +218,11 @@ public: //! Detach inbox from its outbox void detach() { __TBB_ASSERT(my_putter,"not attached"); - my_putter = nullptr; + my_putter = nullptr; } - //! Get next piece of mail, or nullptr if mailbox is empty. - task_proxy* pop( isolation_type isolation ) { - return my_putter->internal_pop( isolation ); + //! Get next piece of mail, or nullptr if mailbox is empty. + task_proxy* pop( isolation_type isolation ) { + return my_putter->internal_pop( isolation ); } //! Return true if mailbox is empty bool empty() { @@ -232,18 +232,18 @@ public: /** Raises assertion failure if mailbox is redundantly marked as not idle. */ void set_is_idle( bool value ) { if( my_putter ) { - __TBB_ASSERT( my_putter->my_is_idle.load(std::memory_order_relaxed) || value, "attempt to redundantly mark mailbox as not idle" ); - my_putter->my_is_idle.store(value, std::memory_order_relaxed); + __TBB_ASSERT( my_putter->my_is_idle.load(std::memory_order_relaxed) || value, "attempt to redundantly mark mailbox as not idle" ); + my_putter->my_is_idle.store(value, std::memory_order_relaxed); } } //! Indicate whether thread that reads this mailbox is idle. bool is_idle_state ( bool value ) const { - return !my_putter || my_putter->my_is_idle.load(std::memory_order_relaxed) == value; + return !my_putter || my_putter->my_is_idle.load(std::memory_order_relaxed) == value; } }; // class mail_inbox -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_mailbox_H */ diff --git a/contrib/libs/tbb/src/tbb/main.cpp b/contrib/libs/tbb/src/tbb/main.cpp index ac1f125551..ec6c98d682 100644 --- a/contrib/libs/tbb/src/tbb/main.cpp +++ b/contrib/libs/tbb/src/tbb/main.cpp @@ -1,171 +1,171 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_config.h" - -#include "main.h" -#include "governor.h" -#include "environment.h" -#include "market.h" -#include "misc.h" -#include "itt_notify.h" - -namespace tbb { -namespace detail { -namespace r1 { - -//------------------------------------------------------------------------ -// Begin shared data layout. -// The following global data items are mostly read-only after initialization. -//------------------------------------------------------------------------ - -//------------------------------------------------------------------------ -// governor data -basic_tls<thread_data*> governor::theTLS; -unsigned governor::DefaultNumberOfThreads; -size_t governor::DefaultPageSize; -rml::tbb_factory governor::theRMLServerFactory; -bool governor::UsePrivateRML; -bool governor::is_rethrow_broken; - -//------------------------------------------------------------------------ -// market data -market* market::theMarket; -market::global_market_mutex_type market::theMarketMutex; - -//------------------------------------------------------------------------ -// context propagation data -context_state_propagation_mutex_type the_context_state_propagation_mutex; -std::atomic<uintptr_t> the_context_state_propagation_epoch{}; - -//------------------------------------------------------------------------ -// One time initialization data - -//! Counter of references to global shared resources such as TLS. -std::atomic<int> __TBB_InitOnce::count{}; - -std::atomic_flag __TBB_InitOnce::InitializationLock = ATOMIC_FLAG_INIT; - -//! Flag that is set to true after one-time initializations are done. -std::atomic<bool> __TBB_InitOnce::InitializationDone{}; - -#if __TBB_USE_ITT_NOTIFY -//! Defined in profiling.cpp -extern bool ITT_Present; -void ITT_DoUnsafeOneTimeInitialization(); -#endif - -#if !(_WIN32||_WIN64) || __TBB_SOURCE_DIRECTLY_INCLUDED -static __TBB_InitOnce __TBB_InitOnceHiddenInstance; -#endif - -#if TBB_USE_ASSERT -std::atomic<int> the_observer_proxy_count; - -struct check_observer_proxy_count { - ~check_observer_proxy_count() { - if (the_observer_proxy_count != 0) { - runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count)); - } - } -}; -// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count -// after auto termination. -static check_observer_proxy_count the_check_observer_proxy_count; -#endif /* TBB_USE_ASSERT */ - -//------------------------------------------------------------------------ -// __TBB_InitOnce -//------------------------------------------------------------------------ - -void __TBB_InitOnce::add_ref() { - if( ++count==1 ) - governor::acquire_resources(); -} - -void __TBB_InitOnce::remove_ref() { - int k = --count; - __TBB_ASSERT(k>=0,"removed __TBB_InitOnce ref that was not added?"); - if( k==0 ) { - governor::release_resources(); - ITT_FINI_ITTLIB(); - } -} - -//------------------------------------------------------------------------ -// One-time Initializations -//------------------------------------------------------------------------ - -//! Defined in cache_aligned_allocator.cpp -void initialize_cache_aligned_allocator(); - -//! Performs thread-safe lazy one-time general TBB initialization. -void DoOneTimeInitialization() { - __TBB_InitOnce::lock(); - // No fence required for load of InitializationDone, because we are inside a critical section. - if( !__TBB_InitOnce::InitializationDone ) { - __TBB_InitOnce::add_ref(); - if( GetBoolEnvironmentVariable("TBB_VERSION") ) - PrintVersion(); - bool itt_present = false; -#if __TBB_USE_ITT_NOTIFY - ITT_DoUnsafeOneTimeInitialization(); - itt_present = ITT_Present; -#endif /* __TBB_USE_ITT_NOTIFY */ - initialize_cache_aligned_allocator(); - governor::initialize_rml_factory(); - // Force processor groups support detection - governor::default_num_threads(); - // Force OS regular page size detection - governor::default_page_size(); - PrintExtraVersionInfo( "TOOLS SUPPORT", itt_present ? "enabled" : "disabled" ); - __TBB_InitOnce::InitializationDone = true; - } - __TBB_InitOnce::unlock(); -} - -#if (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED -//! Windows "DllMain" that handles startup and shutdown of dynamic library. -extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvReserved ) { - switch( reason ) { - case DLL_PROCESS_ATTACH: - __TBB_InitOnce::add_ref(); - break; - case DLL_PROCESS_DETACH: - // Since THREAD_DETACH is not called for the main thread, call auto-termination - // here as well - but not during process shutdown (due to risk of a deadlock). - if ( lpvReserved==NULL ) { // library unload - governor::terminate_external_thread(); - } - __TBB_InitOnce::remove_ref(); - // It is assumed that InitializationDone is not set after DLL_PROCESS_DETACH, - // and thus no race on InitializationDone is possible. - if ( __TBB_InitOnce::initialization_done() ) { - // Remove reference that we added in DoOneTimeInitialization. - __TBB_InitOnce::remove_ref(); - } - break; - case DLL_THREAD_DETACH: - governor::terminate_external_thread(); - break; - } - return true; -} -#endif /* (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED */ - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" + +#include "main.h" +#include "governor.h" +#include "environment.h" +#include "market.h" +#include "misc.h" +#include "itt_notify.h" + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// Begin shared data layout. +// The following global data items are mostly read-only after initialization. +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// governor data +basic_tls<thread_data*> governor::theTLS; +unsigned governor::DefaultNumberOfThreads; +size_t governor::DefaultPageSize; +rml::tbb_factory governor::theRMLServerFactory; +bool governor::UsePrivateRML; +bool governor::is_rethrow_broken; + +//------------------------------------------------------------------------ +// market data +market* market::theMarket; +market::global_market_mutex_type market::theMarketMutex; + +//------------------------------------------------------------------------ +// context propagation data +context_state_propagation_mutex_type the_context_state_propagation_mutex; +std::atomic<uintptr_t> the_context_state_propagation_epoch{}; + +//------------------------------------------------------------------------ +// One time initialization data + +//! Counter of references to global shared resources such as TLS. +std::atomic<int> __TBB_InitOnce::count{}; + +std::atomic_flag __TBB_InitOnce::InitializationLock = ATOMIC_FLAG_INIT; + +//! Flag that is set to true after one-time initializations are done. +std::atomic<bool> __TBB_InitOnce::InitializationDone{}; + +#if __TBB_USE_ITT_NOTIFY +//! Defined in profiling.cpp +extern bool ITT_Present; +void ITT_DoUnsafeOneTimeInitialization(); +#endif + +#if !(_WIN32||_WIN64) || __TBB_SOURCE_DIRECTLY_INCLUDED +static __TBB_InitOnce __TBB_InitOnceHiddenInstance; +#endif + +#if TBB_USE_ASSERT +std::atomic<int> the_observer_proxy_count; + +struct check_observer_proxy_count { + ~check_observer_proxy_count() { + if (the_observer_proxy_count != 0) { + runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count)); + } + } +}; +// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count +// after auto termination. +static check_observer_proxy_count the_check_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ + +//------------------------------------------------------------------------ +// __TBB_InitOnce +//------------------------------------------------------------------------ + +void __TBB_InitOnce::add_ref() { + if( ++count==1 ) + governor::acquire_resources(); +} + +void __TBB_InitOnce::remove_ref() { + int k = --count; + __TBB_ASSERT(k>=0,"removed __TBB_InitOnce ref that was not added?"); + if( k==0 ) { + governor::release_resources(); + ITT_FINI_ITTLIB(); + } +} + +//------------------------------------------------------------------------ +// One-time Initializations +//------------------------------------------------------------------------ + +//! Defined in cache_aligned_allocator.cpp +void initialize_cache_aligned_allocator(); + +//! Performs thread-safe lazy one-time general TBB initialization. +void DoOneTimeInitialization() { + __TBB_InitOnce::lock(); + // No fence required for load of InitializationDone, because we are inside a critical section. + if( !__TBB_InitOnce::InitializationDone ) { + __TBB_InitOnce::add_ref(); + if( GetBoolEnvironmentVariable("TBB_VERSION") ) + PrintVersion(); + bool itt_present = false; +#if __TBB_USE_ITT_NOTIFY + ITT_DoUnsafeOneTimeInitialization(); + itt_present = ITT_Present; +#endif /* __TBB_USE_ITT_NOTIFY */ + initialize_cache_aligned_allocator(); + governor::initialize_rml_factory(); + // Force processor groups support detection + governor::default_num_threads(); + // Force OS regular page size detection + governor::default_page_size(); + PrintExtraVersionInfo( "TOOLS SUPPORT", itt_present ? "enabled" : "disabled" ); + __TBB_InitOnce::InitializationDone = true; + } + __TBB_InitOnce::unlock(); +} + +#if (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED +//! Windows "DllMain" that handles startup and shutdown of dynamic library. +extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvReserved ) { + switch( reason ) { + case DLL_PROCESS_ATTACH: + __TBB_InitOnce::add_ref(); + break; + case DLL_PROCESS_DETACH: + // Since THREAD_DETACH is not called for the main thread, call auto-termination + // here as well - but not during process shutdown (due to risk of a deadlock). + if ( lpvReserved==NULL ) { // library unload + governor::terminate_external_thread(); + } + __TBB_InitOnce::remove_ref(); + // It is assumed that InitializationDone is not set after DLL_PROCESS_DETACH, + // and thus no race on InitializationDone is possible. + if ( __TBB_InitOnce::initialization_done() ) { + // Remove reference that we added in DoOneTimeInitialization. + __TBB_InitOnce::remove_ref(); + } + break; + case DLL_THREAD_DETACH: + governor::terminate_external_thread(); + break; + } + return true; +} +#endif /* (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED */ + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/main.h b/contrib/libs/tbb/src/tbb/main.h index 2a6ffdd84d..c6f54bb47b 100644 --- a/contrib/libs/tbb/src/tbb/main.h +++ b/contrib/libs/tbb/src/tbb/main.h @@ -1,99 +1,99 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_main_H -#define _TBB_main_H - -#include "governor.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { - -void DoOneTimeInitialization(); - -//------------------------------------------------------------------------ -// __TBB_InitOnce -//------------------------------------------------------------------------ - -// TODO (TBB_REVAMP_TODO): consider better names -//! Class that supports TBB initialization. -/** It handles acquisition and release of global resources (e.g. TLS) during startup and shutdown, - as well as synchronization for DoOneTimeInitialization. */ -class __TBB_InitOnce { - friend void DoOneTimeInitialization(); - friend void ITT_DoUnsafeOneTimeInitialization(); - - static std::atomic<int> count; - - //! Platform specific code to acquire resources. - static void acquire_resources(); - - //! Platform specific code to release resources. - static void release_resources(); - - //! Specifies if the one-time initializations has been done. - static std::atomic<bool> InitializationDone; - - //! Global initialization lock - /** Scenarios are possible when tools interop has to be initialized before the - TBB itself. This imposes a requirement that the global initialization lock - has to support valid static initialization, and does not issue any tool - notifications in any build mode. **/ - static std::atomic_flag InitializationLock; - -public: - static void lock() { - tbb::detail::atomic_backoff backoff; - while( InitializationLock.test_and_set() ) backoff.pause(); - } - - static void unlock() { InitializationLock.clear(std::memory_order_release); } - - static bool initialization_done() { return InitializationDone.load(std::memory_order_acquire); } - - //! Add initial reference to resources. - /** We assume that dynamic loading of the library prevents any other threads - from entering the library until this constructor has finished running. **/ - __TBB_InitOnce() { add_ref(); } - - //! Remove the initial reference to resources. - /** This is not necessarily the last reference if other threads are still running. **/ - ~__TBB_InitOnce() { - governor::terminate_external_thread(); // TLS dtor not called for the main thread - remove_ref(); - // We assume that InitializationDone is not set after file-scope destructors - // start running, and thus no race on InitializationDone is possible. - if ( initialization_done() ) { - // Remove an extra reference that was added in DoOneTimeInitialization. - remove_ref(); - } - } - //! Add reference to resources. If first reference added, acquire the resources. - static void add_ref(); - - //! Remove reference to resources. If last reference removed, release the resources. - static void remove_ref(); - -}; // class __TBB_InitOnce - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* _TBB_main_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_main_H +#define _TBB_main_H + +#include "governor.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +void DoOneTimeInitialization(); + +//------------------------------------------------------------------------ +// __TBB_InitOnce +//------------------------------------------------------------------------ + +// TODO (TBB_REVAMP_TODO): consider better names +//! Class that supports TBB initialization. +/** It handles acquisition and release of global resources (e.g. TLS) during startup and shutdown, + as well as synchronization for DoOneTimeInitialization. */ +class __TBB_InitOnce { + friend void DoOneTimeInitialization(); + friend void ITT_DoUnsafeOneTimeInitialization(); + + static std::atomic<int> count; + + //! Platform specific code to acquire resources. + static void acquire_resources(); + + //! Platform specific code to release resources. + static void release_resources(); + + //! Specifies if the one-time initializations has been done. + static std::atomic<bool> InitializationDone; + + //! Global initialization lock + /** Scenarios are possible when tools interop has to be initialized before the + TBB itself. This imposes a requirement that the global initialization lock + has to support valid static initialization, and does not issue any tool + notifications in any build mode. **/ + static std::atomic_flag InitializationLock; + +public: + static void lock() { + tbb::detail::atomic_backoff backoff; + while( InitializationLock.test_and_set() ) backoff.pause(); + } + + static void unlock() { InitializationLock.clear(std::memory_order_release); } + + static bool initialization_done() { return InitializationDone.load(std::memory_order_acquire); } + + //! Add initial reference to resources. + /** We assume that dynamic loading of the library prevents any other threads + from entering the library until this constructor has finished running. **/ + __TBB_InitOnce() { add_ref(); } + + //! Remove the initial reference to resources. + /** This is not necessarily the last reference if other threads are still running. **/ + ~__TBB_InitOnce() { + governor::terminate_external_thread(); // TLS dtor not called for the main thread + remove_ref(); + // We assume that InitializationDone is not set after file-scope destructors + // start running, and thus no race on InitializationDone is possible. + if ( initialization_done() ) { + // Remove an extra reference that was added in DoOneTimeInitialization. + remove_ref(); + } + } + //! Add reference to resources. If first reference added, acquire the resources. + static void add_ref(); + + //! Remove reference to resources. If last reference removed, release the resources. + static void remove_ref(); + +}; // class __TBB_InitOnce + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_main_H */ diff --git a/contrib/libs/tbb/src/tbb/market.cpp b/contrib/libs/tbb/src/tbb/market.cpp index d51e4f62a9..9259eaf588 100644 --- a/contrib/libs/tbb/src/tbb/market.cpp +++ b/contrib/libs/tbb/src/tbb/market.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,57 +14,57 @@ limitations under the License. */ -#include "oneapi/tbb/global_control.h" // global_control::active_value +#include "oneapi/tbb/global_control.h" // global_control::active_value #include "market.h" -#include "main.h" +#include "main.h" #include "governor.h" -#include "arena.h" -#include "thread_data.h" +#include "arena.h" +#include "thread_data.h" #include "itt_notify.h" -#include <cstring> // std::memset() - +#include <cstring> // std::memset() + namespace tbb { -namespace detail { -namespace r1 { - -/** This method must be invoked under my_arenas_list_mutex. **/ -arena* market::select_next_arena( arena* hint ) { - unsigned next_arena_priority_level = num_priority_levels; - if ( hint ) - next_arena_priority_level = hint->my_priority_level; - for ( unsigned idx = 0; idx < next_arena_priority_level; ++idx ) { - if ( !my_arenas[idx].empty() ) - return &*my_arenas[idx].begin(); - } - // don't change if arena with higher priority is not found. - return hint; -} - +namespace detail { +namespace r1 { + +/** This method must be invoked under my_arenas_list_mutex. **/ +arena* market::select_next_arena( arena* hint ) { + unsigned next_arena_priority_level = num_priority_levels; + if ( hint ) + next_arena_priority_level = hint->my_priority_level; + for ( unsigned idx = 0; idx < next_arena_priority_level; ++idx ) { + if ( !my_arenas[idx].empty() ) + return &*my_arenas[idx].begin(); + } + // don't change if arena with higher priority is not found. + return hint; +} + void market::insert_arena_into_list ( arena& a ) { - __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); - my_arenas[a.my_priority_level].push_front( a ); - __TBB_ASSERT( !my_next_arena || my_next_arena->my_priority_level < num_priority_levels, nullptr ); - my_next_arena = select_next_arena( my_next_arena ); + __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); + my_arenas[a.my_priority_level].push_front( a ); + __TBB_ASSERT( !my_next_arena || my_next_arena->my_priority_level < num_priority_levels, nullptr ); + my_next_arena = select_next_arena( my_next_arena ); } void market::remove_arena_from_list ( arena& a ) { - __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); - my_arenas[a.my_priority_level].remove( a ); - if ( my_next_arena == &a ) - my_next_arena = nullptr; - my_next_arena = select_next_arena( my_next_arena ); + __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); + my_arenas[a.my_priority_level].remove( a ); + if ( my_next_arena == &a ) + my_next_arena = nullptr; + my_next_arena = select_next_arena( my_next_arena ); } //------------------------------------------------------------------------ // market //------------------------------------------------------------------------ -market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ) +market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ) : my_num_workers_hard_limit(workers_hard_limit) , my_num_workers_soft_limit(workers_soft_limit) - , my_next_arena(nullptr) + , my_next_arena(nullptr) , my_ref_count(1) , my_stack_size(stack_size) , my_workers_soft_limit_to_report(workers_soft_limit) @@ -85,11 +85,11 @@ static unsigned calc_workers_soft_limit(unsigned workers_soft_limit, unsigned wo return workers_soft_limit; } -bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned workers_requested, std::size_t stack_size ) { +bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned workers_requested, std::size_t stack_size ) { market *m = theMarket; if( m ) { ++m->my_ref_count; - const unsigned old_public_count = is_public ? m->my_public_ref_count++ : /*any non-zero value*/1; + const unsigned old_public_count = is_public ? m->my_public_ref_count++ : /*any non-zero value*/1; lock.release(); if( old_public_count==0 ) set_active_num_workers( calc_workers_soft_limit(workers_requested, m->my_num_workers_hard_limit) ); @@ -98,7 +98,7 @@ bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool i if( workers_requested != governor::default_num_threads()-1 ) { __TBB_ASSERT( skip_soft_limit_warning > workers_requested, "skip_soft_limit_warning must be larger than any valid workers_requested" ); - unsigned soft_limit_to_report = m->my_workers_soft_limit_to_report.load(std::memory_order_relaxed); + unsigned soft_limit_to_report = m->my_workers_soft_limit_to_report.load(std::memory_order_relaxed); if( soft_limit_to_report < workers_requested ) { runtime_warning( "The number of workers is currently limited to %u. " "The request for %u workers is ignored. Further requests for more workers " @@ -106,22 +106,22 @@ bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool i soft_limit_to_report, workers_requested ); // The race is possible when multiple threads report warnings. // We are OK with that, as there are just multiple warnings. - unsigned expected_limit = soft_limit_to_report; - m->my_workers_soft_limit_to_report.compare_exchange_strong(expected_limit, skip_soft_limit_warning); + unsigned expected_limit = soft_limit_to_report; + m->my_workers_soft_limit_to_report.compare_exchange_strong(expected_limit, skip_soft_limit_warning); } } if( m->my_stack_size < stack_size ) runtime_warning( "Thread stack size has been already set to %u. " - "The request for larger stack (%u) cannot be satisfied.\n", m->my_stack_size, stack_size ); - return true; + "The request for larger stack (%u) cannot be satisfied.\n", m->my_stack_size, stack_size ); + return true; } - return false; -} - -market& market::global_market(bool is_public, unsigned workers_requested, std::size_t stack_size) { - global_market_mutex_type::scoped_lock lock( theMarketMutex ); - if( !market::add_ref_unsafe(lock, is_public, workers_requested, stack_size) ) { + return false; +} + +market& market::global_market(bool is_public, unsigned workers_requested, std::size_t stack_size) { + global_market_mutex_type::scoped_lock lock( theMarketMutex ); + if( !market::add_ref_unsafe(lock, is_public, workers_requested, stack_size) ) { // TODO: A lot is done under theMarketMutex locked. Can anything be moved out? if( stack_size == 0 ) stack_size = global_control::active_value(global_control::thread_stack_size); @@ -132,80 +132,80 @@ market& market::global_market(bool is_public, unsigned workers_requested, std::s // The requested number of threads is intentionally not considered in // computation of the hard limit, in order to separate responsibilities // and avoid complicated interactions between global_control and task_scheduler_init. - // The market guarantees that at least 256 threads might be created. - const unsigned workers_hard_limit = max(max(factor*governor::default_num_threads(), 256u), app_parallelism_limit()); + // The market guarantees that at least 256 threads might be created. + const unsigned workers_hard_limit = max(max(factor*governor::default_num_threads(), 256u), app_parallelism_limit()); const unsigned workers_soft_limit = calc_workers_soft_limit(workers_requested, workers_hard_limit); // Create the global market instance - std::size_t size = sizeof(market); - __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(thread_data*) == sizeof(market), + std::size_t size = sizeof(market); + __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(thread_data*) == sizeof(market), "my_workers must be the last data field of the market class"); - size += sizeof(thread_data*) * (workers_hard_limit - 1); + size += sizeof(thread_data*) * (workers_hard_limit - 1); __TBB_InitOnce::add_ref(); - void* storage = cache_aligned_allocate(size); - std::memset( storage, 0, size ); + void* storage = cache_aligned_allocate(size); + std::memset( storage, 0, size ); // Initialize and publish global market - market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size ); + market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size ); if( is_public ) - m->my_public_ref_count.store(1, std::memory_order_relaxed); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - if (market::is_lifetime_control_present()) { - ++m->my_public_ref_count; - ++m->my_ref_count; - } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + m->my_public_ref_count.store(1, std::memory_order_relaxed); +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + if (market::is_lifetime_control_present()) { + ++m->my_public_ref_count; + ++m->my_ref_count; + } +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE theMarket = m; // This check relies on the fact that for shared RML default_concurrency==max_concurrency if ( !governor::UsePrivateRML && m->my_server->default_concurrency() < workers_soft_limit ) runtime_warning( "RML might limit the number of workers to %u while %u is requested.\n" , m->my_server->default_concurrency(), workers_soft_limit ); } - return *theMarket; + return *theMarket; } void market::destroy () { this->market::~market(); // qualified to suppress warning - cache_aligned_deallocate( this ); + cache_aligned_deallocate( this ); __TBB_InitOnce::remove_ref(); } bool market::release ( bool is_public, bool blocking_terminate ) { - market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); + market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); bool do_release = false; { global_market_mutex_type::scoped_lock lock( theMarketMutex ); if ( blocking_terminate ) { __TBB_ASSERT( is_public, "Only an object with a public reference can request the blocking terminate" ); - while ( my_public_ref_count.load(std::memory_order_relaxed) == 1 && - my_ref_count.load(std::memory_order_relaxed) > 1 ) { + while ( my_public_ref_count.load(std::memory_order_relaxed) == 1 && + my_ref_count.load(std::memory_order_relaxed) > 1 ) { lock.release(); - // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all - // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created. + // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all + // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created. // Theoretically, new private references to the market can be added during waiting making it potentially // endless. // TODO: revise why the weak scheduler needs market's pointer and try to remove this wait. - // Note that the market should know about its schedulers for cancellation/exception/priority propagation, + // Note that the market should know about its schedulers for cancellation/exception/priority propagation, // see e.g. task_group_context::cancel_group_execution() - while ( my_public_ref_count.load(std::memory_order_acquire) == 1 && - my_ref_count.load(std::memory_order_acquire) > 1 ) { - yield(); - } + while ( my_public_ref_count.load(std::memory_order_acquire) == 1 && + my_ref_count.load(std::memory_order_acquire) > 1 ) { + yield(); + } lock.acquire( theMarketMutex ); } } if ( is_public ) { __TBB_ASSERT( theMarket == this, "Global market instance was destroyed prematurely?" ); - __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), NULL ); --my_public_ref_count; } if ( --my_ref_count == 0 ) { - __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), NULL ); do_release = true; theMarket = NULL; } } if( do_release ) { - __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), - "No public references remain if we remove the market." ); + __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), + "No public references remain if we remove the market." ); // inform RML that blocking termination is required my_join_workers = blocking_terminate; my_server->request_close_connection(); @@ -214,20 +214,20 @@ bool market::release ( bool is_public, bool blocking_terminate ) { return false; } -int market::update_workers_request() { - int old_request = my_num_workers_requested; - my_num_workers_requested = min(my_total_demand.load(std::memory_order_relaxed), - (int)my_num_workers_soft_limit.load(std::memory_order_relaxed)); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); - my_num_workers_requested = 1; - } -#endif - update_allotment(my_num_workers_requested); - return my_num_workers_requested - old_request; -} - +int market::update_workers_request() { + int old_request = my_num_workers_requested; + my_num_workers_requested = min(my_total_demand.load(std::memory_order_relaxed), + (int)my_num_workers_soft_limit.load(std::memory_order_relaxed)); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (my_mandatory_num_requested > 0) { + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + my_num_workers_requested = 1; + } +#endif + update_allotment(my_num_workers_requested); + return my_num_workers_requested - old_request; +} + void market::set_active_num_workers ( unsigned soft_limit ) { market *m; @@ -236,45 +236,45 @@ void market::set_active_num_workers ( unsigned soft_limit ) { if ( !theMarket ) return; // actual value will be used at market creation m = theMarket; - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == soft_limit) - return; + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == soft_limit) + return; ++m->my_ref_count; } // have my_ref_count for market, use it safely - - int delta = 0; + + int delta = 0; { arenas_list_mutex_type::scoped_lock lock( m->my_arenas_list_mutex ); __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, NULL); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - arena_list_type* arenas = m->my_arenas; - - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0 && - m->my_mandatory_num_requested > 0) - { - for (unsigned level = 0; level < num_priority_levels; ++level ) - for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) - if (it->my_global_concurrency_mode.load(std::memory_order_relaxed)) - m->disable_mandatory_concurrency_impl(&*it); + arena_list_type* arenas = m->my_arenas; + + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0 && + m->my_mandatory_num_requested > 0) + { + for (unsigned level = 0; level < num_priority_levels; ++level ) + for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) + if (it->my_global_concurrency_mode.load(std::memory_order_relaxed)) + m->disable_mandatory_concurrency_impl(&*it); } - __TBB_ASSERT(m->my_mandatory_num_requested == 0, NULL); -#endif - - m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release); - // report only once after new soft limit value is set - m->my_workers_soft_limit_to_report.store(soft_limit, std::memory_order_relaxed); - + __TBB_ASSERT(m->my_mandatory_num_requested == 0, NULL); +#endif + + m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release); + // report only once after new soft limit value is set + m->my_workers_soft_limit_to_report.store(soft_limit, std::memory_order_relaxed); + #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { - for (unsigned level = 0; level < num_priority_levels; ++level ) - for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) - if (it->has_enqueued_tasks()) - m->enable_mandatory_concurrency_impl(&*it); + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { + for (unsigned level = 0; level < num_priority_levels; ++level ) + for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) + if (it->has_enqueued_tasks()) + m->enable_mandatory_concurrency_impl(&*it); } -#endif - - delta = m->update_workers_request(); +#endif + + delta = m->update_workers_request(); } // adjust_job_count_estimate must be called outside of any locks if( delta!=0 ) @@ -283,18 +283,18 @@ void market::set_active_num_workers ( unsigned soft_limit ) { m->release( /*is_public=*/false, /*blocking_terminate=*/false ); } -bool governor::does_client_join_workers (const rml::tbb_client &client) { +bool governor::does_client_join_workers (const rml::tbb_client &client) { return ((const market&)client).must_join_workers(); } -arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level, - std::size_t stack_size ) -{ +arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level, + std::size_t stack_size ) +{ __TBB_ASSERT( num_slots > 0, NULL ); __TBB_ASSERT( num_reserved_slots <= num_slots, NULL ); - // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). + // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). market &m = global_market( /*is_public=*/true, num_slots-num_reserved_slots, stack_size ); - arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level ); + arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level ); // Add newly created arena into the existing market's list. arenas_list_mutex_type::scoped_lock lock(m.my_arenas_list_mutex); m.insert_arena_into_list(a); @@ -303,35 +303,35 @@ arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned ar /** This method must be invoked under my_arenas_list_mutex. **/ void market::detach_arena ( arena& a ) { - market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); - __TBB_ASSERT( !a.my_slots[0].is_occupied(), NULL ); - if (a.my_global_concurrency_mode.load(std::memory_order_relaxed)) - disable_mandatory_concurrency_impl(&a); - + market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); + __TBB_ASSERT( !a.my_slots[0].is_occupied(), NULL ); + if (a.my_global_concurrency_mode.load(std::memory_order_relaxed)) + disable_mandatory_concurrency_impl(&a); + remove_arena_from_list(a); - if (a.my_aba_epoch == my_arenas_aba_epoch.load(std::memory_order_relaxed)) { - my_arenas_aba_epoch.store(my_arenas_aba_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - } + if (a.my_aba_epoch == my_arenas_aba_epoch.load(std::memory_order_relaxed)) { + my_arenas_aba_epoch.store(my_arenas_aba_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + } } -void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { +void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { bool locked = true; __TBB_ASSERT( a, NULL ); // we hold reference to the market, so it cannot be destroyed at any moment here - market::enforce([this] { return theMarket == this; }, NULL); + market::enforce([this] { return theMarket == this; }, NULL); __TBB_ASSERT( my_ref_count!=0, NULL ); my_arenas_list_mutex.lock(); - arena_list_type::iterator it = my_arenas[priority_level].begin(); - for ( ; it != my_arenas[priority_level].end(); ++it ) { + arena_list_type::iterator it = my_arenas[priority_level].begin(); + for ( ; it != my_arenas[priority_level].end(); ++it ) { if ( a == &*it ) { if ( it->my_aba_epoch == aba_epoch ) { // Arena is alive - if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) { - __TBB_ASSERT( - !a->my_num_workers_allotted.load(std::memory_order_relaxed) && - (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), - "Inconsistent arena state" - ); + if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) { + __TBB_ASSERT( + !a->my_num_workers_allotted.load(std::memory_order_relaxed) && + (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), + "Inconsistent arena state" + ); // Arena is abandoned. Destroy it. detach_arena( *a ); my_arenas_list_mutex.unlock(); @@ -348,265 +348,265 @@ void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priorit } /** This method must be invoked under my_arenas_list_mutex. **/ -arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) { - // TODO: make sure arena with higher priority returned only if there are available slots in it. - hint = select_next_arena( hint ); - if ( !hint ) - return nullptr; - arena_list_type::iterator it = hint; - unsigned curr_priority_level = hint->my_priority_level; - __TBB_ASSERT( it != arenas[curr_priority_level].end(), nullptr ); +arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) { + // TODO: make sure arena with higher priority returned only if there are available slots in it. + hint = select_next_arena( hint ); + if ( !hint ) + return nullptr; + arena_list_type::iterator it = hint; + unsigned curr_priority_level = hint->my_priority_level; + __TBB_ASSERT( it != arenas[curr_priority_level].end(), nullptr ); do { arena& a = *it; - if ( ++it == arenas[curr_priority_level].end() ) { - do { - ++curr_priority_level %= num_priority_levels; - } while ( arenas[curr_priority_level].empty() ); - it = arenas[curr_priority_level].begin(); - } - if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) { + if ( ++it == arenas[curr_priority_level].end() ) { + do { + ++curr_priority_level %= num_priority_levels; + } while ( arenas[curr_priority_level].empty() ); + it = arenas[curr_priority_level].begin(); + } + if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) { a.my_references += arena::ref_worker; return &a; } - } while ( it != hint ); - return nullptr; + } while ( it != hint ); + return nullptr; } -arena* market::arena_in_need(arena* prev) { - if (my_total_demand.load(std::memory_order_acquire) <= 0) - return nullptr; - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false); - // TODO: introduce three state response: alive, not_alive, no_market_arenas - if ( is_arena_alive(prev) ) - return arena_in_need(my_arenas, prev); - return arena_in_need(my_arenas, my_next_arena); -} - -int market::update_allotment ( arena_list_type* arenas, int workers_demand, int max_workers ) { - __TBB_ASSERT( workers_demand > 0, nullptr ); +arena* market::arena_in_need(arena* prev) { + if (my_total_demand.load(std::memory_order_acquire) <= 0) + return nullptr; + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false); + // TODO: introduce three state response: alive, not_alive, no_market_arenas + if ( is_arena_alive(prev) ) + return arena_in_need(my_arenas, prev); + return arena_in_need(my_arenas, my_next_arena); +} + +int market::update_allotment ( arena_list_type* arenas, int workers_demand, int max_workers ) { + __TBB_ASSERT( workers_demand > 0, nullptr ); max_workers = min(workers_demand, max_workers); - int unassigned_workers = max_workers; - int assigned = 0; + int unassigned_workers = max_workers; + int assigned = 0; int carry = 0; - unsigned max_priority_level = num_priority_levels; - for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) { - int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers); - unassigned_workers -= assigned_per_priority; - for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) { - arena& a = *it; - __TBB_ASSERT(a.my_num_workers_requested >= 0, nullptr); - __TBB_ASSERT(a.my_num_workers_requested <= int(a.my_max_num_workers) - || (a.my_max_num_workers == 0 && a.my_local_concurrency_requests > 0 && a.my_num_workers_requested == 1), nullptr); - if (a.my_num_workers_requested == 0) { - __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr); - continue; - } - - if (max_priority_level == num_priority_levels) { - max_priority_level = list_idx; - } - - int allotted = 0; + unsigned max_priority_level = num_priority_levels; + for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) { + int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers); + unassigned_workers -= assigned_per_priority; + for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) { + arena& a = *it; + __TBB_ASSERT(a.my_num_workers_requested >= 0, nullptr); + __TBB_ASSERT(a.my_num_workers_requested <= int(a.my_max_num_workers) + || (a.my_max_num_workers == 0 && a.my_local_concurrency_requests > 0 && a.my_num_workers_requested == 1), nullptr); + if (a.my_num_workers_requested == 0) { + __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr); + continue; + } + + if (max_priority_level == num_priority_levels) { + max_priority_level = list_idx; + } + + int allotted = 0; #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { - __TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr); - allotted = a.my_global_concurrency_mode.load(std::memory_order_relaxed) && - assigned < max_workers ? 1 : 0; - } else + if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { + __TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr); + allotted = a.my_global_concurrency_mode.load(std::memory_order_relaxed) && + assigned < max_workers ? 1 : 0; + } else #endif - { - int tmp = a.my_num_workers_requested * assigned_per_priority + carry; - allotted = tmp / my_priority_level_demand[list_idx]; - carry = tmp % my_priority_level_demand[list_idx]; - __TBB_ASSERT(allotted <= a.my_num_workers_requested, nullptr); - __TBB_ASSERT(allotted <= int(a.my_num_slots - a.my_num_reserved_slots), nullptr); - } - a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed); - a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed); - assigned += allotted; - } + { + int tmp = a.my_num_workers_requested * assigned_per_priority + carry; + allotted = tmp / my_priority_level_demand[list_idx]; + carry = tmp % my_priority_level_demand[list_idx]; + __TBB_ASSERT(allotted <= a.my_num_workers_requested, nullptr); + __TBB_ASSERT(allotted <= int(a.my_num_slots - a.my_num_reserved_slots), nullptr); + } + a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed); + a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed); + assigned += allotted; + } } - __TBB_ASSERT( 0 <= assigned && assigned <= max_workers, nullptr ); + __TBB_ASSERT( 0 <= assigned && assigned <= max_workers, nullptr ); return assigned; } -/** This method must be invoked under my_arenas_list_mutex. **/ -bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) { - __TBB_ASSERT( a, "Expected non-null pointer to arena." ); - for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it ) - if ( a == &*it ) - return true; - return false; -} - -/** This method must be invoked under my_arenas_list_mutex. **/ -bool market::is_arena_alive(arena* a) { - if ( !a ) - return false; - - // Still cannot access internals of the arena since the object itself might be destroyed. - - for ( unsigned idx = 0; idx < num_priority_levels; ++idx ) { - if ( is_arena_in_list( my_arenas[idx], a ) ) - return true; - } - return false; +/** This method must be invoked under my_arenas_list_mutex. **/ +bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) { + __TBB_ASSERT( a, "Expected non-null pointer to arena." ); + for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it ) + if ( a == &*it ) + return true; + return false; +} + +/** This method must be invoked under my_arenas_list_mutex. **/ +bool market::is_arena_alive(arena* a) { + if ( !a ) + return false; + + // Still cannot access internals of the arena since the object itself might be destroyed. + + for ( unsigned idx = 0; idx < num_priority_levels; ++idx ) { + if ( is_arena_in_list( my_arenas[idx], a ) ) + return true; + } + return false; } #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY -void market::enable_mandatory_concurrency_impl ( arena *a ) { - __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); - - a->my_global_concurrency_mode.store(true, std::memory_order_relaxed); - my_mandatory_num_requested++; +void market::enable_mandatory_concurrency_impl ( arena *a ) { + __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + + a->my_global_concurrency_mode.store(true, std::memory_order_relaxed); + my_mandatory_num_requested++; } -void market::enable_mandatory_concurrency ( arena *a ) { - int delta = 0; +void market::enable_mandatory_concurrency ( arena *a ) { + int delta = 0; { arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - if (my_num_workers_soft_limit.load(std::memory_order_relaxed) != 0 || - a->my_global_concurrency_mode.load(std::memory_order_relaxed)) - return; - - enable_mandatory_concurrency_impl(a); - delta = update_workers_request(); + if (my_num_workers_soft_limit.load(std::memory_order_relaxed) != 0 || + a->my_global_concurrency_mode.load(std::memory_order_relaxed)) + return; + + enable_mandatory_concurrency_impl(a); + delta = update_workers_request(); } - - if (delta != 0) - my_server->adjust_job_count_estimate(delta); + + if (delta != 0) + my_server->adjust_job_count_estimate(delta); +} + +void market::disable_mandatory_concurrency_impl(arena* a) { + __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); + __TBB_ASSERT(my_mandatory_num_requested > 0, NULL); + + a->my_global_concurrency_mode.store(false, std::memory_order_relaxed); + my_mandatory_num_requested--; } -void market::disable_mandatory_concurrency_impl(arena* a) { - __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); - __TBB_ASSERT(my_mandatory_num_requested > 0, NULL); - - a->my_global_concurrency_mode.store(false, std::memory_order_relaxed); - my_mandatory_num_requested--; -} - void market::mandatory_concurrency_disable ( arena *a ) { - int delta = 0; + int delta = 0; { arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - if (!a->my_global_concurrency_mode.load(std::memory_order_relaxed)) + if (!a->my_global_concurrency_mode.load(std::memory_order_relaxed)) + return; + // There is a racy window in advertise_new_work between mandtory concurrency enabling and + // setting SNAPSHOT_FULL. It gives a chance to spawn request to disable mandatory concurrency. + // Therefore, we double check that there is no enqueued tasks. + if (a->has_enqueued_tasks()) return; - // There is a racy window in advertise_new_work between mandtory concurrency enabling and - // setting SNAPSHOT_FULL. It gives a chance to spawn request to disable mandatory concurrency. - // Therefore, we double check that there is no enqueued tasks. - if (a->has_enqueued_tasks()) - return; - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); - disable_mandatory_concurrency_impl(a); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + disable_mandatory_concurrency_impl(a); - delta = update_workers_request(); + delta = update_workers_request(); } - if (delta != 0) - my_server->adjust_job_count_estimate(delta); + if (delta != 0) + my_server->adjust_job_count_estimate(delta); } #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ -void market::adjust_demand ( arena& a, int delta, bool mandatory ) { - if (!delta) { +void market::adjust_demand ( arena& a, int delta, bool mandatory ) { + if (!delta) { return; - } - int target_epoch{}; - { - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - __TBB_ASSERT(theMarket != nullptr, "market instance was destroyed prematurely?"); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (mandatory) { - __TBB_ASSERT(delta == 1 || delta == -1, nullptr); - // Count the number of mandatory requests and proceed only for 0->1 and 1->0 transitions. - a.my_local_concurrency_requests += delta; - if ((delta > 0 && a.my_local_concurrency_requests != 1) || - (delta < 0 && a.my_local_concurrency_requests != 0)) - { - return; - } - } -#endif - a.my_total_num_workers_requested += delta; - int target_workers = 0; - // Cap target_workers into interval [0, a.my_max_num_workers] - if (a.my_total_num_workers_requested > 0) { -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // At least one thread should be requested when mandatory concurrency - int max_num_workers = int(a.my_max_num_workers); - if (a.my_local_concurrency_requests > 0 && max_num_workers == 0) { - max_num_workers = 1; - } -#endif - target_workers = min(a.my_total_num_workers_requested, max_num_workers); - } - - delta = target_workers - a.my_num_workers_requested; - - if (delta == 0) { + } + int target_epoch{}; + { + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); + __TBB_ASSERT(theMarket != nullptr, "market instance was destroyed prematurely?"); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (mandatory) { + __TBB_ASSERT(delta == 1 || delta == -1, nullptr); + // Count the number of mandatory requests and proceed only for 0->1 and 1->0 transitions. + a.my_local_concurrency_requests += delta; + if ((delta > 0 && a.my_local_concurrency_requests != 1) || + (delta < 0 && a.my_local_concurrency_requests != 0)) + { + return; + } + } +#endif + a.my_total_num_workers_requested += delta; + int target_workers = 0; + // Cap target_workers into interval [0, a.my_max_num_workers] + if (a.my_total_num_workers_requested > 0) { +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + // At least one thread should be requested when mandatory concurrency + int max_num_workers = int(a.my_max_num_workers); + if (a.my_local_concurrency_requests > 0 && max_num_workers == 0) { + max_num_workers = 1; + } +#endif + target_workers = min(a.my_total_num_workers_requested, max_num_workers); + } + + delta = target_workers - a.my_num_workers_requested; + + if (delta == 0) { return; } - - a.my_num_workers_requested += delta; - if (a.my_num_workers_requested == 0) { - a.my_num_workers_allotted.store(0, std::memory_order_relaxed); + + a.my_num_workers_requested += delta; + if (a.my_num_workers_requested == 0) { + a.my_num_workers_allotted.store(0, std::memory_order_relaxed); } - - int total_demand = my_total_demand.load(std::memory_order_relaxed) + delta; - my_total_demand.store(total_demand, std::memory_order_relaxed); - my_priority_level_demand[a.my_priority_level] += delta; - unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); - if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(effective_soft_limit == 0, NULL); - effective_soft_limit = 1; + + int total_demand = my_total_demand.load(std::memory_order_relaxed) + delta; + my_total_demand.store(total_demand, std::memory_order_relaxed); + my_priority_level_demand[a.my_priority_level] += delta; + unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); + if (my_mandatory_num_requested > 0) { + __TBB_ASSERT(effective_soft_limit == 0, NULL); + effective_soft_limit = 1; } - - update_allotment(effective_soft_limit); - if (delta > 0) { - // can't overflow soft_limit, but remember values request by arenas in - // my_total_demand to not prematurely release workers to RML - if (my_num_workers_requested + delta > (int)effective_soft_limit) - delta = effective_soft_limit - my_num_workers_requested; + + update_allotment(effective_soft_limit); + if (delta > 0) { + // can't overflow soft_limit, but remember values request by arenas in + // my_total_demand to not prematurely release workers to RML + if (my_num_workers_requested + delta > (int)effective_soft_limit) + delta = effective_soft_limit - my_num_workers_requested; + } + else { + // the number of workers should not be decreased below my_total_demand + if (my_num_workers_requested + delta < total_demand) + delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested; } - else { - // the number of workers should not be decreased below my_total_demand - if (my_num_workers_requested + delta < total_demand) - delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested; - } - my_num_workers_requested += delta; - __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, NULL); - - target_epoch = my_adjust_demand_target_epoch++; + my_num_workers_requested += delta; + __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, NULL); + + target_epoch = my_adjust_demand_target_epoch++; } - spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch); + spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch); // Must be called outside of any locks my_server->adjust_job_count_estimate( delta ); - my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release); + my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release); } void market::process( job& j ) { - thread_data& td = static_cast<thread_data&>(j); - // td.my_arena can be dead. Don't access it until arena_in_need is called - arena *a = td.my_arena; - for (int i = 0; i < 2; ++i) { - while ( (a = arena_in_need(a)) ) { - a->process(td); + thread_data& td = static_cast<thread_data&>(j); + // td.my_arena can be dead. Don't access it until arena_in_need is called + arena *a = td.my_arena; + for (int i = 0; i < 2; ++i) { + while ( (a = arena_in_need(a)) ) { + a->process(td); } // Workers leave market because there is no arena in need. It can happen earlier than // adjust_job_count_estimate() decreases my_slack and RML can put this thread to sleep. // It might result in a busy-loop checking for my_slack<0 and calling this method instantly. - // the yield refines this spinning. - if ( !i ) { - yield(); - } + // the yield refines this spinning. + if ( !i ) { + yield(); + } } } -void market::cleanup( job& j) { - market::enforce([this] { return theMarket != this; }, NULL ); - governor::auto_terminate(&j); +void market::cleanup( job& j) { + market::enforce([this] { return theMarket != this; }, NULL ); + governor::auto_terminate(&j); } void market::acknowledge_close_connection() { @@ -614,27 +614,27 @@ void market::acknowledge_close_connection() { } ::rml::job* market::create_one_job() { - unsigned short index = ++my_first_unused_worker_idx; + unsigned short index = ++my_first_unused_worker_idx; __TBB_ASSERT( index > 0, NULL ); ITT_THREAD_SET_NAME(_T("TBB Worker Thread")); // index serves as a hint decreasing conflicts between workers when they migrate between arenas - thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; + thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; __TBB_ASSERT( index <= my_num_workers_hard_limit, NULL ); - __TBB_ASSERT( my_workers[index - 1] == nullptr, NULL ); - my_workers[index - 1] = td; - return td; + __TBB_ASSERT( my_workers[index - 1] == nullptr, NULL ); + my_workers[index - 1] = td; + return td; } -void market::add_external_thread(thread_data& td) { - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - my_masters.push_front(td); +void market::add_external_thread(thread_data& td) { + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + my_masters.push_front(td); } -void market::remove_external_thread(thread_data& td) { - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - my_masters.remove(td); +void market::remove_external_thread(thread_data& td) { + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + my_masters.remove(td); } -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/market.h b/contrib/libs/tbb/src/tbb/market.h index 02c71b2b04..8443467447 100644 --- a/contrib/libs/tbb/src/tbb/market.h +++ b/contrib/libs/tbb/src/tbb/market.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,15 +18,15 @@ #define _TBB_market_H #include "scheduler_common.h" -#include "concurrent_monitor.h" +#include "concurrent_monitor.h" #include "intrusive_list.h" -#include "rml_tbb.h" +#include "rml_tbb.h" + +#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task_group.h" + +#include <atomic> -#include "oneapi/tbb/spin_rw_mutex.h" -#include "oneapi/tbb/task_group.h" - -#include <atomic> - #if defined(_MSC_VER) && defined(_Wp64) // Workaround for overzealous compiler warnings in /Wp64 mode #pragma warning (push) @@ -34,17 +34,17 @@ #endif namespace tbb { -namespace detail { - -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -namespace d1 { -class task_scheduler_handle; -} -#endif - -namespace r1 { - -class task_arena_base; +namespace detail { + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +namespace d1 { +class task_scheduler_handle; +} +#endif + +namespace r1 { + +class task_arena_base; class task_group_context; //------------------------------------------------------------------------ @@ -53,26 +53,26 @@ class task_group_context; class market : no_copy, rml::tbb_client { friend class arena; - friend class task_arena_base; + friend class task_arena_base; template<typename SchedulerTraits> friend class custom_scheduler; - friend class task_group_context; - friend class governor; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - friend class lifetime_control; -#endif - -public: - //! Keys for the arena map array. The lower the value the higher priority of the arena list. - static constexpr unsigned num_priority_levels = 3; - + friend class task_group_context; + friend class governor; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + friend class lifetime_control; +#endif + +public: + //! Keys for the arena map array. The lower the value the higher priority of the arena list. + static constexpr unsigned num_priority_levels = 3; + private: friend void ITT_DoUnsafeOneTimeInitialization (); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - friend bool finalize_impl(d1::task_scheduler_handle& handle); -#endif +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + friend bool finalize_impl(d1::task_scheduler_handle& handle); +#endif typedef intrusive_list<arena> arena_list_type; - typedef intrusive_list<thread_data> thread_data_list_type; + typedef intrusive_list<thread_data> thread_data_list_type; //! Currently active global market static market* theMarket; @@ -84,66 +84,66 @@ private: //! Lightweight mutex guarding accounting operations with arenas list typedef spin_rw_mutex arenas_list_mutex_type; - // TODO: introduce fine-grained (per priority list) locking of arenas. + // TODO: introduce fine-grained (per priority list) locking of arenas. arenas_list_mutex_type my_arenas_list_mutex; //! Pointer to the RML server object that services this TBB instance. rml::tbb_server* my_server; - //! Waiting object for external and coroutine waiters. - extended_concurrent_monitor my_sleep_monitor; - + //! Waiting object for external and coroutine waiters. + extended_concurrent_monitor my_sleep_monitor; + //! Maximal number of workers allowed for use by the underlying resource manager /** It can't be changed after market creation. **/ unsigned my_num_workers_hard_limit; //! Current application-imposed limit on the number of workers (see set_active_num_workers()) /** It can't be more than my_num_workers_hard_limit. **/ - std::atomic<unsigned> my_num_workers_soft_limit; + std::atomic<unsigned> my_num_workers_soft_limit; //! Number of workers currently requested from RML int my_num_workers_requested; - //! The target serialization epoch for callers of adjust_job_count_estimate - int my_adjust_demand_target_epoch; - - //! The current serialization epoch for callers of adjust_job_count_estimate - std::atomic<int> my_adjust_demand_current_epoch; - + //! The target serialization epoch for callers of adjust_job_count_estimate + int my_adjust_demand_target_epoch; + + //! The current serialization epoch for callers of adjust_job_count_estimate + std::atomic<int> my_adjust_demand_current_epoch; + //! First unused index of worker /** Used to assign indices to the new workers coming from RML, and busy part of my_workers array. **/ - std::atomic<unsigned> my_first_unused_worker_idx; + std::atomic<unsigned> my_first_unused_worker_idx; - //! Number of workers that were requested by all arenas on all priority levels - std::atomic<int> my_total_demand; + //! Number of workers that were requested by all arenas on all priority levels + std::atomic<int> my_total_demand; + + //! Number of workers that were requested by arenas per single priority list item + int my_priority_level_demand[num_priority_levels]; - //! Number of workers that were requested by arenas per single priority list item - int my_priority_level_demand[num_priority_levels]; - #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY //! How many times mandatory concurrency was requested from the market int my_mandatory_num_requested; #endif - //! Per priority list of registered arenas - arena_list_type my_arenas[num_priority_levels]; + //! Per priority list of registered arenas + arena_list_type my_arenas[num_priority_levels]; //! The first arena to be checked when idle worker seeks for an arena to enter /** The check happens in round-robin fashion. **/ arena *my_next_arena; //! ABA prevention marker to assign to newly created arenas - std::atomic<uintptr_t> my_arenas_aba_epoch; + std::atomic<uintptr_t> my_arenas_aba_epoch; //! Reference count controlling market object lifetime - std::atomic<unsigned> my_ref_count; + std::atomic<unsigned> my_ref_count; - //! Count of external threads attached - std::atomic<unsigned> my_public_ref_count; + //! Count of external threads attached + std::atomic<unsigned> my_public_ref_count; //! Stack size of worker threads - std::size_t my_stack_size; + std::size_t my_stack_size; //! Shutdown mode bool my_join_workers; @@ -152,88 +152,88 @@ private: static const unsigned skip_soft_limit_warning = ~0U; //! Either workers soft limit to be reported via runtime_warning() or skip_soft_limit_warning - std::atomic<unsigned> my_workers_soft_limit_to_report; + std::atomic<unsigned> my_workers_soft_limit_to_report; //! Constructor - market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ); + market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ); //! Destroys and deallocates market object created by market::create() void destroy (); - //! Recalculates the number of workers requested from RML and updates the allotment. - int update_workers_request(); - + //! Recalculates the number of workers requested from RML and updates the allotment. + int update_workers_request(); + //! Recalculates the number of workers assigned to each arena in the list. /** The actual number of workers servicing a particular arena may temporarily deviate from the calculated value. **/ - void update_allotment (unsigned effective_soft_limit) { - int total_demand = my_total_demand.load(std::memory_order_relaxed); - if (total_demand) { - update_allotment(my_arenas, total_demand, (int)effective_soft_limit); - } + void update_allotment (unsigned effective_soft_limit) { + int total_demand = my_total_demand.load(std::memory_order_relaxed); + if (total_demand) { + update_allotment(my_arenas, total_demand, (int)effective_soft_limit); + } } //! Returns next arena that needs more workers, or NULL. - arena* arena_in_need(arena* prev); - - template <typename Pred> - static void enforce (Pred pred, const char* msg) { - suppress_unused_warning(pred, msg); -#if TBB_USE_ASSERT - global_market_mutex_type::scoped_lock lock(theMarketMutex); - __TBB_ASSERT(pred(), msg); -#endif + arena* arena_in_need(arena* prev); + + template <typename Pred> + static void enforce (Pred pred, const char* msg) { + suppress_unused_warning(pred, msg); +#if TBB_USE_ASSERT + global_market_mutex_type::scoped_lock lock(theMarketMutex); + __TBB_ASSERT(pred(), msg); +#endif } - + //////////////////////////////////////////////////////////////////////////////// // Helpers to unify code branches dependent on priority feature presence - arena* select_next_arena( arena* hint ); - + arena* select_next_arena( arena* hint ); + void insert_arena_into_list ( arena& a ); void remove_arena_from_list ( arena& a ); - arena* arena_in_need ( arena_list_type* arenas, arena* hint ); + arena* arena_in_need ( arena_list_type* arenas, arena* hint ); + + int update_allotment ( arena_list_type* arenas, int total_demand, int max_workers ); - int update_allotment ( arena_list_type* arenas, int total_demand, int max_workers ); + bool is_arena_in_list( arena_list_type& arenas, arena* a ); - bool is_arena_in_list( arena_list_type& arenas, arena* a ); + bool is_arena_alive( arena* a ); - bool is_arena_alive( arena* a ); - //////////////////////////////////////////////////////////////////////////////// // Implementation of rml::tbb_client interface methods - version_type version () const override { return 0; } + version_type version () const override { return 0; } - unsigned max_job_count () const override { return my_num_workers_hard_limit; } + unsigned max_job_count () const override { return my_num_workers_hard_limit; } - std::size_t min_stack_size () const override { return worker_stack_size(); } + std::size_t min_stack_size () const override { return worker_stack_size(); } - job* create_one_job () override; + job* create_one_job () override; - void cleanup( job& j ) override; + void cleanup( job& j ) override; - void acknowledge_close_connection () override; + void acknowledge_close_connection () override; - void process( job& j ) override; + void process( job& j ) override; -public: - //! Factory method creating new market object - static market& global_market( bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); +public: + //! Factory method creating new market object + static market& global_market( bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); + + //! Add reference to market if theMarket exists + static bool add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); - //! Add reference to market if theMarket exists - static bool add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); - //! Creates an arena object /** If necessary, also creates global market instance, and boosts its ref count. Each call to create_arena() must be matched by the call to arena::free_arena(). **/ - static arena* create_arena ( int num_slots, int num_reserved_slots, - unsigned arena_index, std::size_t stack_size ); + static arena* create_arena ( int num_slots, int num_reserved_slots, + unsigned arena_index, std::size_t stack_size ); //! Removes the arena from the market's list - void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level ); + void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level ); //! Removes the arena from the market's list void detach_arena ( arena& ); @@ -241,32 +241,32 @@ public: //! Decrements market's refcount and destroys it in the end bool release ( bool is_public, bool blocking_terminate ); - //! Return wait list - extended_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } - + //! Return wait list + extended_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } + #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY //! Imlpementation of mandatory concurrency enabling - void enable_mandatory_concurrency_impl ( arena *a ); + void enable_mandatory_concurrency_impl ( arena *a ); + + //! Inform the external thread that there is an arena with mandatory concurrency + void enable_mandatory_concurrency ( arena *a ); - //! Inform the external thread that there is an arena with mandatory concurrency - void enable_mandatory_concurrency ( arena *a ); + //! Inform the external thread that the arena is no more interested in mandatory concurrency + void disable_mandatory_concurrency_impl(arena* a); - //! Inform the external thread that the arena is no more interested in mandatory concurrency - void disable_mandatory_concurrency_impl(arena* a); - - //! Inform the external thread that the arena is no more interested in mandatory concurrency + //! Inform the external thread that the arena is no more interested in mandatory concurrency void mandatory_concurrency_disable ( arena *a ); #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ //! Request that arena's need in workers should be adjusted. /** Concurrent invocations are possible only on behalf of different arenas. **/ - void adjust_demand ( arena&, int delta, bool mandatory ); + void adjust_demand ( arena&, int delta, bool mandatory ); //! Used when RML asks for join mode during workers termination. bool must_join_workers () const { return my_join_workers; } //! Returns the requested stack size of worker threads. - std::size_t worker_stack_size () const { return my_stack_size; } + std::size_t worker_stack_size () const { return my_stack_size; } //! Set number of active workers static void set_active_num_workers( unsigned w ); @@ -274,39 +274,39 @@ public: //! Reports active parallelism level according to user's settings static unsigned app_parallelism_limit(); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - //! Reports if any active global lifetime references are present - static unsigned is_lifetime_control_present(); -#endif +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + //! Reports if any active global lifetime references are present + static unsigned is_lifetime_control_present(); +#endif //! Finds all contexts affected by the state change and propagates the new state to them. /** The propagation is relayed to the market because tasks created by one - external thread can be passed to and executed by other external threads. This means + external thread can be passed to and executed by other external threads. This means that context trees can span several arenas at once and thus state change propagation cannot be generally localized to one arena only. **/ template <typename T> - bool propagate_task_group_state (std::atomic<T> d1::task_group_context::*mptr_state, d1::task_group_context& src, T new_state ); + bool propagate_task_group_state (std::atomic<T> d1::task_group_context::*mptr_state, d1::task_group_context& src, T new_state ); - //! List of registered external threads - thread_data_list_type my_masters; + //! List of registered external threads + thread_data_list_type my_masters; //! Array of pointers to the registered workers /** Used by cancellation propagation mechanism. Must be the last data member of the class market. **/ - thread_data* my_workers[1]; + thread_data* my_workers[1]; static unsigned max_num_workers() { global_market_mutex_type::scoped_lock lock( theMarketMutex ); return theMarket? theMarket->my_num_workers_hard_limit : 0; } - - void add_external_thread(thread_data& td); - - void remove_external_thread(thread_data& td); + + void add_external_thread(thread_data& td); + + void remove_external_thread(thread_data& td); }; // class market -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #if defined(_MSC_VER) && defined(_Wp64) diff --git a/contrib/libs/tbb/src/tbb/misc.cpp b/contrib/libs/tbb/src/tbb/misc.cpp index 267c30cc2a..0e1d33a596 100644 --- a/contrib/libs/tbb/src/tbb/misc.cpp +++ b/contrib/libs/tbb/src/tbb/misc.cpp @@ -1,137 +1,137 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// Source file for miscellaneous entities that are infrequently referenced by -// an executing program. - -#include "oneapi/tbb/detail/_exception.h" -#include "oneapi/tbb/detail/_machine.h" - -#include "oneapi/tbb/version.h" - -#include "misc.h" -#include "governor.h" -#include "assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. - -#include <cstdio> -#include <cstdlib> -#include <stdexcept> -#include <cstring> -#include <cstdarg> - -#if _WIN32||_WIN64 -#include <windows.h> -#endif - -#if !_WIN32 -#include <unistd.h> // sysconf(_SC_PAGESIZE) -#endif - -namespace tbb { -namespace detail { -namespace r1 { - -//------------------------------------------------------------------------ -// governor data -//------------------------------------------------------------------------ -cpu_features_type governor::cpu_features; - - -size_t DefaultSystemPageSize() { -#if _WIN32 - SYSTEM_INFO si; - GetSystemInfo(&si); - return si.dwPageSize; -#else - return sysconf(_SC_PAGESIZE); -#endif -} - -/** The leading "\0" is here so that applying "strings" to the binary delivers a clean result. */ -static const char VersionString[] = "\0" TBB_VERSION_STRINGS; - -static bool PrintVersionFlag = false; - -void PrintVersion() { - PrintVersionFlag = true; - std::fputs(VersionString+1,stderr); -} - -void PrintExtraVersionInfo( const char* category, const char* format, ... ) { - if( PrintVersionFlag ) { - char str[1024]; std::memset(str, 0, 1024); - va_list args; va_start(args, format); - // Note: correct vsnprintf definition obtained from tbb_assert_impl.h - std::vsnprintf( str, 1024-1, format, args); - va_end(args); - std::fprintf(stderr, "oneTBB: %s\t%s\n", category, str ); - } -} - -//! check for transaction support. -#if _MSC_VER -#include <intrin.h> // for __cpuid -#endif - -#if __TBB_x86_32 || __TBB_x86_64 -void check_cpuid(int leaf, int sub_leaf, int registers[4]) { -#if _MSC_VER - __cpuidex(registers, leaf, sub_leaf); -#else - int reg_eax = 0; - int reg_ebx = 0; - int reg_ecx = 0; - int reg_edx = 0; -#if __TBB_x86_32 && __PIC__ - // On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX - // register. We help it using backup and restore. - __asm__("mov %%ebx, %%esi\n\t" - "cpuid\n\t" - "xchg %%ebx, %%esi" - : "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) - : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx - ); -#else - __asm__("cpuid" - : "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) - : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx - ); -#endif - registers[0] = reg_eax; - registers[1] = reg_ebx; - registers[2] = reg_ecx; - registers[3] = reg_edx; -#endif -} -#endif - -void detect_cpu_features(cpu_features_type& cpu_features) { - suppress_unused_warning(cpu_features); -#if __TBB_x86_32 || __TBB_x86_64 - const int rtm_ebx_mask = 1 << 11; - const int waitpkg_ecx_mask = 1 << 5; - int registers[4] = {0}; - - // Check RTM and WAITPKG - check_cpuid(7, 0, registers); - cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; - cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; -#endif /* (__TBB_x86_32 || __TBB_x86_64) */ -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Source file for miscellaneous entities that are infrequently referenced by +// an executing program. + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_machine.h" + +#include "oneapi/tbb/version.h" + +#include "misc.h" +#include "governor.h" +#include "assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. + +#include <cstdio> +#include <cstdlib> +#include <stdexcept> +#include <cstring> +#include <cstdarg> + +#if _WIN32||_WIN64 +#include <windows.h> +#endif + +#if !_WIN32 +#include <unistd.h> // sysconf(_SC_PAGESIZE) +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// governor data +//------------------------------------------------------------------------ +cpu_features_type governor::cpu_features; + + +size_t DefaultSystemPageSize() { +#if _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +/** The leading "\0" is here so that applying "strings" to the binary delivers a clean result. */ +static const char VersionString[] = "\0" TBB_VERSION_STRINGS; + +static bool PrintVersionFlag = false; + +void PrintVersion() { + PrintVersionFlag = true; + std::fputs(VersionString+1,stderr); +} + +void PrintExtraVersionInfo( const char* category, const char* format, ... ) { + if( PrintVersionFlag ) { + char str[1024]; std::memset(str, 0, 1024); + va_list args; va_start(args, format); + // Note: correct vsnprintf definition obtained from tbb_assert_impl.h + std::vsnprintf( str, 1024-1, format, args); + va_end(args); + std::fprintf(stderr, "oneTBB: %s\t%s\n", category, str ); + } +} + +//! check for transaction support. +#if _MSC_VER +#include <intrin.h> // for __cpuid +#endif + +#if __TBB_x86_32 || __TBB_x86_64 +void check_cpuid(int leaf, int sub_leaf, int registers[4]) { +#if _MSC_VER + __cpuidex(registers, leaf, sub_leaf); +#else + int reg_eax = 0; + int reg_ebx = 0; + int reg_ecx = 0; + int reg_edx = 0; +#if __TBB_x86_32 && __PIC__ + // On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX + // register. We help it using backup and restore. + __asm__("mov %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchg %%ebx, %%esi" + : "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); +#else + __asm__("cpuid" + : "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); +#endif + registers[0] = reg_eax; + registers[1] = reg_ebx; + registers[2] = reg_ecx; + registers[3] = reg_edx; +#endif +} +#endif + +void detect_cpu_features(cpu_features_type& cpu_features) { + suppress_unused_warning(cpu_features); +#if __TBB_x86_32 || __TBB_x86_64 + const int rtm_ebx_mask = 1 << 11; + const int waitpkg_ecx_mask = 1 << 5; + int registers[4] = {0}; + + // Check RTM and WAITPKG + check_cpuid(7, 0, registers); + cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; + cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; +#endif /* (__TBB_x86_32 || __TBB_x86_64) */ +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/misc.h b/contrib/libs/tbb/src/tbb/misc.h index 2da5f7bd7c..6a3cf778a4 100644 --- a/contrib/libs/tbb/src/tbb/misc.h +++ b/contrib/libs/tbb/src/tbb/misc.h @@ -1,289 +1,289 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_tbb_misc_H -#define _TBB_tbb_misc_H - -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_utils.h" - -#if __TBB_ARENA_BINDING -#include "oneapi/tbb/info.h" -#endif /*__TBB_ARENA_BINDING*/ - -#if __linux__ || __FreeBSD__ -#include <sys/param.h> // __FreeBSD_version -#if __FreeBSD_version >= 701000 -#include <sys/cpuset.h> -#endif -#endif - -#include <atomic> - -// Does the operating system have a system call to pin a thread to a set of OS processors? -#define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000)) -// On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB, -// and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account. -#define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) - -namespace tbb { -namespace detail { -namespace r1 { - -void runtime_warning(const char* format, ... ); - -#if __TBB_ARENA_BINDING -class task_arena; -class task_scheduler_observer; -#endif /*__TBB_ARENA_BINDING*/ - -const std::size_t MByte = 1024*1024; - -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) -// In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API -// that does not allow to specify the stack size. -// Still, the thread stack size value, either explicit or default, is used by the scheduler. -// So here we set the default value to match the platform's default of 1MB. -const std::size_t ThreadStackSize = 1*MByte; -#else -const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; -#endif - -#ifndef __TBB_HardwareConcurrency - -//! Returns maximal parallelism level supported by the current OS configuration. -int AvailableHwConcurrency(); - -#else - -inline int AvailableHwConcurrency() { - int n = __TBB_HardwareConcurrency(); - return n > 0 ? n : 1; // Fail safety strap -} -#endif /* __TBB_HardwareConcurrency */ - -//! Returns OS regular memory page size -size_t DefaultSystemPageSize(); - -//! Returns number of processor groups in the current OS configuration. -/** AvailableHwConcurrency must be called at least once before calling this method. **/ -int NumberOfProcessorGroups(); - -#if _WIN32||_WIN64 - -//! Retrieves index of processor group containing processor with the given index -int FindProcessorGroupIndex ( int processorIndex ); - -//! Affinitizes the thread to the specified processor group -void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ); - -#endif /* _WIN32||_WIN64 */ - -//! Prints TBB version information on stderr -void PrintVersion(); - -//! Prints arbitrary extra TBB version information on stderr -void PrintExtraVersionInfo( const char* category, const char* format, ... ); - -//! A callback routine to print RML version information on stderr -void PrintRMLVersionInfo( void* arg, const char* server_info ); - -// For TBB compilation only; not to be used in public headers -#if defined(min) || defined(max) -#undef min -#undef max -#endif - -//! Utility template function returning lesser of the two values. -/** Provided here to avoid including not strict safe <algorithm>.\n - In case operands cause signed/unsigned or size mismatch warnings it is caller's - responsibility to do the appropriate cast before calling the function. **/ -template<typename T> -T min ( const T& val1, const T& val2 ) { - return val1 < val2 ? val1 : val2; -} - -//! Utility template function returning greater of the two values. -/** Provided here to avoid including not strict safe <algorithm>.\n - In case operands cause signed/unsigned or size mismatch warnings it is caller's - responsibility to do the appropriate cast before calling the function. **/ -template<typename T> -T max ( const T& val1, const T& val2 ) { - return val1 < val2 ? val2 : val1; -} - -//! Utility helper structure to ease overload resolution -template<int > struct int_to_type {}; - -//------------------------------------------------------------------------ -// FastRandom -//------------------------------------------------------------------------ - -//! A fast random number generator. -/** Uses linear congruential method. */ -class FastRandom { -private: - unsigned x, c; - static const unsigned a = 0x9e3779b1; // a big prime number -public: - //! Get a random number. - unsigned short get() { - return get(x); - } - //! Get a random number for the given seed; update the seed for next use. - unsigned short get( unsigned& seed ) { - unsigned short r = (unsigned short)(seed>>16); - __TBB_ASSERT(c&1, "c must be odd for big rng period"); - seed = seed*a+c; - return r; - } - //! Construct a random number generator. - FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); } - - template <typename T> - void init( T seed ) { - init(seed,int_to_type<sizeof(seed)>()); - } - void init( uint64_t seed , int_to_type<8> ) { - init(uint32_t((seed>>32)+seed), int_to_type<4>()); - } - void init( uint32_t seed, int_to_type<4> ) { - // threads use different seeds for unique sequences - c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number - x = c^(seed>>1); // also shuffle x for the first get() invocation - } -}; - -//------------------------------------------------------------------------ -// Atomic extensions -//------------------------------------------------------------------------ - -//! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate -/** Return value semantics is the same as for CAS. **/ -template<typename T1, class Pred> -T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { - T1 oldValue = dst.load(std::memory_order_acquire); - while ( compare(oldValue, newValue) ) { - if ( dst.compare_exchange_strong(oldValue, newValue) ) - break; - } - return oldValue; -} - -#if __TBB_USE_OS_AFFINITY_SYSCALL - #if __linux__ - typedef cpu_set_t basic_mask_t; - #elif __FreeBSD_version >= 701000 - typedef cpuset_t basic_mask_t; - #else - #error affinity_helper is not implemented in this OS - #endif - class affinity_helper : no_copy { - basic_mask_t* threadMask; - int is_changed; - public: - affinity_helper() : threadMask(NULL), is_changed(0) {} - ~affinity_helper(); - void protect_affinity_mask( bool restore_process_mask ); - void dismiss(); - }; - void destroy_process_mask(); -#else - class affinity_helper : no_copy { - public: - void protect_affinity_mask( bool ) {} - void dismiss() {} - }; - inline void destroy_process_mask(){} -#endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ - -struct cpu_features_type { - bool rtm_enabled{false}; - bool waitpkg_enabled{false}; -}; - -void detect_cpu_features(cpu_features_type& cpu_features); - -#if __TBB_ARENA_BINDING -class binding_handler; - -binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core); -void destroy_binding_handler(binding_handler* handler_ptr); -void apply_affinity_mask(binding_handler* handler_ptr, int slot_num); -void restore_affinity_mask(binding_handler* handler_ptr, int slot_num); - -#endif /*__TBB_ARENA_BINDING*/ - -// RTM specific section -// abort code for mutexes that detect a conflict with another thread. -enum { - speculation_not_supported = 0x00, - speculation_transaction_aborted = 0x01, - speculation_can_retry = 0x02, - speculation_memadd_conflict = 0x04, - speculation_buffer_overflow = 0x08, - speculation_breakpoint_hit = 0x10, - speculation_nested_abort = 0x20, - speculation_xabort_mask = 0xFF000000, - speculation_xabort_shift = 24, - speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free - speculation_successful_begin = 0xFFFFFFFF, - speculation_retry = speculation_transaction_aborted - | speculation_can_retry - | speculation_memadd_conflict -}; - -// We suppose that successful transactions are sequentially ordered and -// do not require additional memory fences around them. -// Technically it can be achieved only if xbegin has implicit -// acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level. -// See the article: https://arxiv.org/pdf/1710.04839.pdf -static inline unsigned int begin_transaction() { -#if __TBB_TSX_INTRINSICS_PRESENT - return _xbegin(); -#else - return speculation_not_supported; // return unsuccessful code -#endif -} - -static inline void end_transaction() { -#if __TBB_TSX_INTRINSICS_PRESENT - _xend(); -#endif -} - -static inline void abort_transaction() { -#if __TBB_TSX_INTRINSICS_PRESENT - _xabort(speculation_xabort_not_free); -#endif -} - -#if TBB_USE_ASSERT -static inline unsigned char is_in_transaction() { -#if __TBB_TSX_INTRINSICS_PRESENT - return _xtest(); -#else - return 0; -#endif -} -#endif // TBB_USE_ASSERT - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* _TBB_tbb_misc_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_tbb_misc_H +#define _TBB_tbb_misc_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" + +#if __TBB_ARENA_BINDING +#include "oneapi/tbb/info.h" +#endif /*__TBB_ARENA_BINDING*/ + +#if __linux__ || __FreeBSD__ +#include <sys/param.h> // __FreeBSD_version +#if __FreeBSD_version >= 701000 +#include <sys/cpuset.h> +#endif +#endif + +#include <atomic> + +// Does the operating system have a system call to pin a thread to a set of OS processors? +#define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000)) +// On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB, +// and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account. +#define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) + +namespace tbb { +namespace detail { +namespace r1 { + +void runtime_warning(const char* format, ... ); + +#if __TBB_ARENA_BINDING +class task_arena; +class task_scheduler_observer; +#endif /*__TBB_ARENA_BINDING*/ + +const std::size_t MByte = 1024*1024; + +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +// In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API +// that does not allow to specify the stack size. +// Still, the thread stack size value, either explicit or default, is used by the scheduler. +// So here we set the default value to match the platform's default of 1MB. +const std::size_t ThreadStackSize = 1*MByte; +#else +const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; +#endif + +#ifndef __TBB_HardwareConcurrency + +//! Returns maximal parallelism level supported by the current OS configuration. +int AvailableHwConcurrency(); + +#else + +inline int AvailableHwConcurrency() { + int n = __TBB_HardwareConcurrency(); + return n > 0 ? n : 1; // Fail safety strap +} +#endif /* __TBB_HardwareConcurrency */ + +//! Returns OS regular memory page size +size_t DefaultSystemPageSize(); + +//! Returns number of processor groups in the current OS configuration. +/** AvailableHwConcurrency must be called at least once before calling this method. **/ +int NumberOfProcessorGroups(); + +#if _WIN32||_WIN64 + +//! Retrieves index of processor group containing processor with the given index +int FindProcessorGroupIndex ( int processorIndex ); + +//! Affinitizes the thread to the specified processor group +void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ); + +#endif /* _WIN32||_WIN64 */ + +//! Prints TBB version information on stderr +void PrintVersion(); + +//! Prints arbitrary extra TBB version information on stderr +void PrintExtraVersionInfo( const char* category, const char* format, ... ); + +//! A callback routine to print RML version information on stderr +void PrintRMLVersionInfo( void* arg, const char* server_info ); + +// For TBB compilation only; not to be used in public headers +#if defined(min) || defined(max) +#undef min +#undef max +#endif + +//! Utility template function returning lesser of the two values. +/** Provided here to avoid including not strict safe <algorithm>.\n + In case operands cause signed/unsigned or size mismatch warnings it is caller's + responsibility to do the appropriate cast before calling the function. **/ +template<typename T> +T min ( const T& val1, const T& val2 ) { + return val1 < val2 ? val1 : val2; +} + +//! Utility template function returning greater of the two values. +/** Provided here to avoid including not strict safe <algorithm>.\n + In case operands cause signed/unsigned or size mismatch warnings it is caller's + responsibility to do the appropriate cast before calling the function. **/ +template<typename T> +T max ( const T& val1, const T& val2 ) { + return val1 < val2 ? val2 : val1; +} + +//! Utility helper structure to ease overload resolution +template<int > struct int_to_type {}; + +//------------------------------------------------------------------------ +// FastRandom +//------------------------------------------------------------------------ + +//! A fast random number generator. +/** Uses linear congruential method. */ +class FastRandom { +private: + unsigned x, c; + static const unsigned a = 0x9e3779b1; // a big prime number +public: + //! Get a random number. + unsigned short get() { + return get(x); + } + //! Get a random number for the given seed; update the seed for next use. + unsigned short get( unsigned& seed ) { + unsigned short r = (unsigned short)(seed>>16); + __TBB_ASSERT(c&1, "c must be odd for big rng period"); + seed = seed*a+c; + return r; + } + //! Construct a random number generator. + FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); } + + template <typename T> + void init( T seed ) { + init(seed,int_to_type<sizeof(seed)>()); + } + void init( uint64_t seed , int_to_type<8> ) { + init(uint32_t((seed>>32)+seed), int_to_type<4>()); + } + void init( uint32_t seed, int_to_type<4> ) { + // threads use different seeds for unique sequences + c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number + x = c^(seed>>1); // also shuffle x for the first get() invocation + } +}; + +//------------------------------------------------------------------------ +// Atomic extensions +//------------------------------------------------------------------------ + +//! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate +/** Return value semantics is the same as for CAS. **/ +template<typename T1, class Pred> +T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { + T1 oldValue = dst.load(std::memory_order_acquire); + while ( compare(oldValue, newValue) ) { + if ( dst.compare_exchange_strong(oldValue, newValue) ) + break; + } + return oldValue; +} + +#if __TBB_USE_OS_AFFINITY_SYSCALL + #if __linux__ + typedef cpu_set_t basic_mask_t; + #elif __FreeBSD_version >= 701000 + typedef cpuset_t basic_mask_t; + #else + #error affinity_helper is not implemented in this OS + #endif + class affinity_helper : no_copy { + basic_mask_t* threadMask; + int is_changed; + public: + affinity_helper() : threadMask(NULL), is_changed(0) {} + ~affinity_helper(); + void protect_affinity_mask( bool restore_process_mask ); + void dismiss(); + }; + void destroy_process_mask(); +#else + class affinity_helper : no_copy { + public: + void protect_affinity_mask( bool ) {} + void dismiss() {} + }; + inline void destroy_process_mask(){} +#endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ + +struct cpu_features_type { + bool rtm_enabled{false}; + bool waitpkg_enabled{false}; +}; + +void detect_cpu_features(cpu_features_type& cpu_features); + +#if __TBB_ARENA_BINDING +class binding_handler; + +binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core); +void destroy_binding_handler(binding_handler* handler_ptr); +void apply_affinity_mask(binding_handler* handler_ptr, int slot_num); +void restore_affinity_mask(binding_handler* handler_ptr, int slot_num); + +#endif /*__TBB_ARENA_BINDING*/ + +// RTM specific section +// abort code for mutexes that detect a conflict with another thread. +enum { + speculation_not_supported = 0x00, + speculation_transaction_aborted = 0x01, + speculation_can_retry = 0x02, + speculation_memadd_conflict = 0x04, + speculation_buffer_overflow = 0x08, + speculation_breakpoint_hit = 0x10, + speculation_nested_abort = 0x20, + speculation_xabort_mask = 0xFF000000, + speculation_xabort_shift = 24, + speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free + speculation_successful_begin = 0xFFFFFFFF, + speculation_retry = speculation_transaction_aborted + | speculation_can_retry + | speculation_memadd_conflict +}; + +// We suppose that successful transactions are sequentially ordered and +// do not require additional memory fences around them. +// Technically it can be achieved only if xbegin has implicit +// acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level. +// See the article: https://arxiv.org/pdf/1710.04839.pdf +static inline unsigned int begin_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + return _xbegin(); +#else + return speculation_not_supported; // return unsuccessful code +#endif +} + +static inline void end_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + _xend(); +#endif +} + +static inline void abort_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + _xabort(speculation_xabort_not_free); +#endif +} + +#if TBB_USE_ASSERT +static inline unsigned char is_in_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + return _xtest(); +#else + return 0; +#endif +} +#endif // TBB_USE_ASSERT + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_tbb_misc_H */ diff --git a/contrib/libs/tbb/src/tbb/misc_ex.cpp b/contrib/libs/tbb/src/tbb/misc_ex.cpp index 96a2e200c6..177392bb65 100644 --- a/contrib/libs/tbb/src/tbb/misc_ex.cpp +++ b/contrib/libs/tbb/src/tbb/misc_ex.cpp @@ -1,398 +1,398 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// Source file for miscellaneous entities that are infrequently referenced by -// an executing program, and implementation of which requires dynamic linking. - -#include "misc.h" - -#if !defined(__TBB_HardwareConcurrency) - -#include "dynamic_link.h" -#include <stdio.h> -#include <limits.h> - -#if _WIN32||_WIN64 -#include <windows.h> -#if __TBB_WIN8UI_SUPPORT -#include <thread> -#endif -#else -#include <unistd.h> -#if __linux__ -#include <sys/sysinfo.h> -#include <cstring> -#include <sched.h> -#include <cerrno> -#elif __sun -#include <sys/sysinfo.h> -#elif __FreeBSD__ -#include <cerrno> -#include <cstring> -#include <sys/param.h> // Required by <sys/cpuset.h> -#include <sys/cpuset.h> -#endif -#endif - -namespace tbb { -namespace detail { -namespace r1 { - -#if __TBB_USE_OS_AFFINITY_SYSCALL - -#if __linux__ -// Handlers for interoperation with libiomp -static int (*libiomp_try_restoring_original_mask)(); -// Table for mapping to libiomp entry points -static const dynamic_link_descriptor iompLinkTable[] = { - DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask ) -}; -#endif - -static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) { -#if __linux__ - if( sched_setaffinity( 0, maskSize, threadMask ) ) -#else /* FreeBSD */ - if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) -#endif - // Here and below the error severity is lowered from critical level - // because it may happen during TBB library unload because of not - // waiting for workers to complete (current RML policy, to be fixed). - // handle_perror( errno, "setaffinity syscall" ); - runtime_warning( "setaffinity syscall failed" ); -} - -static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) { -#if __linux__ - if( sched_getaffinity( 0, maskSize, threadMask ) ) -#else /* FreeBSD */ - if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) -#endif - runtime_warning( "getaffinity syscall failed" ); -} - -static basic_mask_t* process_mask; -static int num_masks; - -void destroy_process_mask() { - if( process_mask ) { - delete [] process_mask; - } -} - -#define curMaskSize sizeof(basic_mask_t) * num_masks -affinity_helper::~affinity_helper() { - if( threadMask ) { - if( is_changed ) { - set_thread_affinity_mask( curMaskSize, threadMask ); - } - delete [] threadMask; - } -} -void affinity_helper::protect_affinity_mask( bool restore_process_mask ) { - if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity? - threadMask = new basic_mask_t [num_masks]; - std::memset( threadMask, 0, curMaskSize ); - get_thread_affinity_mask( curMaskSize, threadMask ); - if( restore_process_mask ) { - __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" ); - is_changed = memcmp( process_mask, threadMask, curMaskSize ); - if( is_changed ) - set_thread_affinity_mask( curMaskSize, process_mask ); - } else { - // Assume that the mask will be changed by the caller. - is_changed = 1; - } - } -} -void affinity_helper::dismiss() { - if( threadMask ) { - delete [] threadMask; - threadMask = NULL; - } - is_changed = 0; -} -#undef curMaskSize - -static std::atomic<do_once_state> hardware_concurrency_info; - -static int theNumProcs; - -static void initialize_hardware_concurrency_info () { - int err; - int availableProcs = 0; - int numMasks = 1; -#if __linux__ - int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); - int pid = getpid(); -#else /* FreeBSD >= 7.1 */ - int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); -#endif - basic_mask_t* processMask; - const std::size_t BasicMaskSize = sizeof(basic_mask_t); - for (;;) { - const int curMaskSize = BasicMaskSize * numMasks; - processMask = new basic_mask_t[numMasks]; - std::memset( processMask, 0, curMaskSize ); -#if __linux__ - err = sched_getaffinity( pid, curMaskSize, processMask ); - if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) - break; -#else /* FreeBSD >= 7.1 */ - // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask - err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask ); - if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 ) - break; -#endif /* FreeBSD >= 7.1 */ - delete[] processMask; - numMasks <<= 1; - } - if ( !err ) { - // We have found the mask size and captured the process affinity mask into processMask. - num_masks = numMasks; // do here because it's needed for affinity_helper to work -#if __linux__ - // For better coexistence with libiomp which might have changed the mask already, - // check for its presence and ask it to restore the mask. - dynamic_link_handle libhandle; - if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) { - // We have found the symbol provided by libiomp5 for restoring original thread affinity. - affinity_helper affhelp; - affhelp.protect_affinity_mask( /*restore_process_mask=*/false ); - if ( libiomp_try_restoring_original_mask()==0 ) { - // Now we have the right mask to capture, restored by libiomp. - const int curMaskSize = BasicMaskSize * numMasks; - std::memset( processMask, 0, curMaskSize ); - get_thread_affinity_mask( curMaskSize, processMask ); - } else - affhelp.dismiss(); // thread mask has not changed - dynamic_unlink( libhandle ); - // Destructor of affinity_helper restores the thread mask (unless dismissed). - } -#endif - for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) { - for ( std::size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) { - if ( CPU_ISSET( i, processMask + m ) ) - ++availableProcs; - } - } - process_mask = processMask; - } - else { - // Failed to get the process affinity mask; assume the whole machine can be used. - availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs; - delete[] processMask; - } - theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap - __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL ); -} - -int AvailableHwConcurrency() { - atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); - return theNumProcs; -} - -/* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */ -#elif __ANDROID__ - -// Work-around for Android that reads the correct number of available CPUs since system calls are unreliable. -// Format of "present" file is: ([<int>-<int>|<int>],)+ -int AvailableHwConcurrency() { - FILE *fp = fopen("/sys/devices/system/cpu/present", "r"); - if (fp == NULL) return 1; - int num_args, lower, upper, num_cpus=0; - while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) { - switch(num_args) { - case 2: num_cpus += upper - lower + 1; break; - case 1: num_cpus += 1; break; - } - fscanf(fp, ","); - } - return (num_cpus > 0) ? num_cpus : 1; -} - -#elif defined(_SC_NPROCESSORS_ONLN) - -int AvailableHwConcurrency() { - int n = sysconf(_SC_NPROCESSORS_ONLN); - return (n > 0) ? n : 1; -} - -#elif _WIN32||_WIN64 - -static std::atomic<do_once_state> hardware_concurrency_info; - -static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff; - -// Statically allocate an array for processor group information. -// Windows 7 supports maximum 4 groups, but let's look ahead a little. -static const WORD MaxProcessorGroups = 64; - -struct ProcessorGroupInfo { - DWORD_PTR mask; ///< Affinity mask covering the whole group - int numProcs; ///< Number of processors in the group - int numProcsRunningTotal; ///< Subtotal of processors in this and preceding groups - - //! Total number of processor groups in the system - static int NumGroups; - - //! Index of the group with a slot reserved for the first external thread - /** In the context of multiple processor groups support current implementation - defines "the first external thread" as the first thread to invoke - AvailableHwConcurrency(). - - TODO: Implement a dynamic scheme remapping workers depending on the pending - external threads affinity. **/ - static int HoleIndex; -}; - -int ProcessorGroupInfo::NumGroups = 1; -int ProcessorGroupInfo::HoleIndex = 0; - -ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups]; - -struct TBB_GROUP_AFFINITY { - DWORD_PTR Mask; - WORD Group; - WORD Reserved[3]; -}; - -static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL; -static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL; -static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread, - const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff ); -static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* ); - -static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = { - DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount) - , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount) - , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity) - , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity) -}; - -static void initialize_hardware_concurrency_info () { -#if __TBB_WIN8UI_SUPPORT - // For these applications processor groups info is unavailable - // Setting up a number of processors for one processor group - theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency(); -#else /* __TBB_WIN8UI_SUPPORT */ - dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable, - sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) ); - SYSTEM_INFO si; - GetNativeSystemInfo(&si); - DWORD_PTR pam, sam, m = 1; - GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam ); - int nproc = 0; - for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) { - if ( pam & m ) - ++nproc; - } - __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); - // By default setting up a number of processors for one processor group - theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; - // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present - if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) { - // The process does not have restricting affinity mask and multiple processor groups are possible - ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount(); - __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); - // Fail safety bootstrap. Release versions will limit available concurrency - // level, while debug ones would assert. - if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) - ProcessorGroupInfo::NumGroups = MaxProcessorGroups; - if ( ProcessorGroupInfo::NumGroups > 1 ) { - TBB_GROUP_AFFINITY ga; - if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) - ProcessorGroupInfo::HoleIndex = ga.Group; - int nprocs = 0; - for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { - ProcessorGroupInfo &pgi = theProcessorGroups[i]; - pgi.numProcs = (int)TBB_GetActiveProcessorCount(i); - __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); - pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; - pgi.numProcsRunningTotal = nprocs += pgi.numProcs; - } - __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); - } - } -#endif /* __TBB_WIN8UI_SUPPORT */ - - PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups); - if (ProcessorGroupInfo::NumGroups>1) - for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i) - PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs); -} - -int NumberOfProcessorGroups() { - __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "NumberOfProcessorGroups is used before AvailableHwConcurrency" ); - return ProcessorGroupInfo::NumGroups; -} - -// Offset for the slot reserved for the first external thread -#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx)) - -int FindProcessorGroupIndex ( int procIdx ) { - // In case of oversubscription spread extra workers in a round robin manner - int holeIdx; - const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; - if ( procIdx >= numProcs - 1 ) { - holeIdx = INT_MAX; - procIdx = (procIdx - numProcs + 1) % numProcs; - } - else - holeIdx = ProcessorGroupInfo::HoleIndex; - __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "FindProcessorGroupIndex is used before AvailableHwConcurrency" ); - // Approximate the likely group index assuming all groups are of the same size - int i = procIdx / theProcessorGroups[0].numProcs; - // Make sure the approximation is a valid group index - if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1; - // Now adjust the approximation up or down - if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) { - while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) { - __TBB_ASSERT( i > 0, NULL ); - --i; - } - } - else { - do { - ++i; - } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) ); - } - __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL ); - return i; -} - -void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) { - __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" ); - if ( !TBB_SetThreadGroupAffinity ) - return; - TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} }; - TBB_SetThreadGroupAffinity( hThread, &ga, NULL ); -} - -int AvailableHwConcurrency() { - atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); - return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; -} - -/* End of _WIN32||_WIN64 implementation */ -#else - #error AvailableHwConcurrency is not implemented for this OS -#endif - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* !__TBB_HardwareConcurrency */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Source file for miscellaneous entities that are infrequently referenced by +// an executing program, and implementation of which requires dynamic linking. + +#include "misc.h" + +#if !defined(__TBB_HardwareConcurrency) + +#include "dynamic_link.h" +#include <stdio.h> +#include <limits.h> + +#if _WIN32||_WIN64 +#include <windows.h> +#if __TBB_WIN8UI_SUPPORT +#include <thread> +#endif +#else +#include <unistd.h> +#if __linux__ +#include <sys/sysinfo.h> +#include <cstring> +#include <sched.h> +#include <cerrno> +#elif __sun +#include <sys/sysinfo.h> +#elif __FreeBSD__ +#include <cerrno> +#include <cstring> +#include <sys/param.h> // Required by <sys/cpuset.h> +#include <sys/cpuset.h> +#endif +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_USE_OS_AFFINITY_SYSCALL + +#if __linux__ +// Handlers for interoperation with libiomp +static int (*libiomp_try_restoring_original_mask)(); +// Table for mapping to libiomp entry points +static const dynamic_link_descriptor iompLinkTable[] = { + DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask ) +}; +#endif + +static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) { +#if __linux__ + if( sched_setaffinity( 0, maskSize, threadMask ) ) +#else /* FreeBSD */ + if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#endif + // Here and below the error severity is lowered from critical level + // because it may happen during TBB library unload because of not + // waiting for workers to complete (current RML policy, to be fixed). + // handle_perror( errno, "setaffinity syscall" ); + runtime_warning( "setaffinity syscall failed" ); +} + +static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) { +#if __linux__ + if( sched_getaffinity( 0, maskSize, threadMask ) ) +#else /* FreeBSD */ + if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#endif + runtime_warning( "getaffinity syscall failed" ); +} + +static basic_mask_t* process_mask; +static int num_masks; + +void destroy_process_mask() { + if( process_mask ) { + delete [] process_mask; + } +} + +#define curMaskSize sizeof(basic_mask_t) * num_masks +affinity_helper::~affinity_helper() { + if( threadMask ) { + if( is_changed ) { + set_thread_affinity_mask( curMaskSize, threadMask ); + } + delete [] threadMask; + } +} +void affinity_helper::protect_affinity_mask( bool restore_process_mask ) { + if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity? + threadMask = new basic_mask_t [num_masks]; + std::memset( threadMask, 0, curMaskSize ); + get_thread_affinity_mask( curMaskSize, threadMask ); + if( restore_process_mask ) { + __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" ); + is_changed = memcmp( process_mask, threadMask, curMaskSize ); + if( is_changed ) + set_thread_affinity_mask( curMaskSize, process_mask ); + } else { + // Assume that the mask will be changed by the caller. + is_changed = 1; + } + } +} +void affinity_helper::dismiss() { + if( threadMask ) { + delete [] threadMask; + threadMask = NULL; + } + is_changed = 0; +} +#undef curMaskSize + +static std::atomic<do_once_state> hardware_concurrency_info; + +static int theNumProcs; + +static void initialize_hardware_concurrency_info () { + int err; + int availableProcs = 0; + int numMasks = 1; +#if __linux__ + int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); + int pid = getpid(); +#else /* FreeBSD >= 7.1 */ + int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); +#endif + basic_mask_t* processMask; + const std::size_t BasicMaskSize = sizeof(basic_mask_t); + for (;;) { + const int curMaskSize = BasicMaskSize * numMasks; + processMask = new basic_mask_t[numMasks]; + std::memset( processMask, 0, curMaskSize ); +#if __linux__ + err = sched_getaffinity( pid, curMaskSize, processMask ); + if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) + break; +#else /* FreeBSD >= 7.1 */ + // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask + err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask ); + if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 ) + break; +#endif /* FreeBSD >= 7.1 */ + delete[] processMask; + numMasks <<= 1; + } + if ( !err ) { + // We have found the mask size and captured the process affinity mask into processMask. + num_masks = numMasks; // do here because it's needed for affinity_helper to work +#if __linux__ + // For better coexistence with libiomp which might have changed the mask already, + // check for its presence and ask it to restore the mask. + dynamic_link_handle libhandle; + if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) { + // We have found the symbol provided by libiomp5 for restoring original thread affinity. + affinity_helper affhelp; + affhelp.protect_affinity_mask( /*restore_process_mask=*/false ); + if ( libiomp_try_restoring_original_mask()==0 ) { + // Now we have the right mask to capture, restored by libiomp. + const int curMaskSize = BasicMaskSize * numMasks; + std::memset( processMask, 0, curMaskSize ); + get_thread_affinity_mask( curMaskSize, processMask ); + } else + affhelp.dismiss(); // thread mask has not changed + dynamic_unlink( libhandle ); + // Destructor of affinity_helper restores the thread mask (unless dismissed). + } +#endif + for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) { + for ( std::size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) { + if ( CPU_ISSET( i, processMask + m ) ) + ++availableProcs; + } + } + process_mask = processMask; + } + else { + // Failed to get the process affinity mask; assume the whole machine can be used. + availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs; + delete[] processMask; + } + theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap + __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL ); +} + +int AvailableHwConcurrency() { + atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); + return theNumProcs; +} + +/* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */ +#elif __ANDROID__ + +// Work-around for Android that reads the correct number of available CPUs since system calls are unreliable. +// Format of "present" file is: ([<int>-<int>|<int>],)+ +int AvailableHwConcurrency() { + FILE *fp = fopen("/sys/devices/system/cpu/present", "r"); + if (fp == NULL) return 1; + int num_args, lower, upper, num_cpus=0; + while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) { + switch(num_args) { + case 2: num_cpus += upper - lower + 1; break; + case 1: num_cpus += 1; break; + } + fscanf(fp, ","); + } + return (num_cpus > 0) ? num_cpus : 1; +} + +#elif defined(_SC_NPROCESSORS_ONLN) + +int AvailableHwConcurrency() { + int n = sysconf(_SC_NPROCESSORS_ONLN); + return (n > 0) ? n : 1; +} + +#elif _WIN32||_WIN64 + +static std::atomic<do_once_state> hardware_concurrency_info; + +static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff; + +// Statically allocate an array for processor group information. +// Windows 7 supports maximum 4 groups, but let's look ahead a little. +static const WORD MaxProcessorGroups = 64; + +struct ProcessorGroupInfo { + DWORD_PTR mask; ///< Affinity mask covering the whole group + int numProcs; ///< Number of processors in the group + int numProcsRunningTotal; ///< Subtotal of processors in this and preceding groups + + //! Total number of processor groups in the system + static int NumGroups; + + //! Index of the group with a slot reserved for the first external thread + /** In the context of multiple processor groups support current implementation + defines "the first external thread" as the first thread to invoke + AvailableHwConcurrency(). + + TODO: Implement a dynamic scheme remapping workers depending on the pending + external threads affinity. **/ + static int HoleIndex; +}; + +int ProcessorGroupInfo::NumGroups = 1; +int ProcessorGroupInfo::HoleIndex = 0; + +ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups]; + +struct TBB_GROUP_AFFINITY { + DWORD_PTR Mask; + WORD Group; + WORD Reserved[3]; +}; + +static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL; +static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL; +static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread, + const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff ); +static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* ); + +static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = { + DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount) + , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount) + , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity) + , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity) +}; + +static void initialize_hardware_concurrency_info () { +#if __TBB_WIN8UI_SUPPORT + // For these applications processor groups info is unavailable + // Setting up a number of processors for one processor group + theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency(); +#else /* __TBB_WIN8UI_SUPPORT */ + dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable, + sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) ); + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + DWORD_PTR pam, sam, m = 1; + GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam ); + int nproc = 0; + for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) { + if ( pam & m ) + ++nproc; + } + __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); + // By default setting up a number of processors for one processor group + theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; + // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present + if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) { + // The process does not have restricting affinity mask and multiple processor groups are possible + ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount(); + __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); + // Fail safety bootstrap. Release versions will limit available concurrency + // level, while debug ones would assert. + if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) + ProcessorGroupInfo::NumGroups = MaxProcessorGroups; + if ( ProcessorGroupInfo::NumGroups > 1 ) { + TBB_GROUP_AFFINITY ga; + if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) + ProcessorGroupInfo::HoleIndex = ga.Group; + int nprocs = 0; + for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { + ProcessorGroupInfo &pgi = theProcessorGroups[i]; + pgi.numProcs = (int)TBB_GetActiveProcessorCount(i); + __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); + pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; + pgi.numProcsRunningTotal = nprocs += pgi.numProcs; + } + __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); + } + } +#endif /* __TBB_WIN8UI_SUPPORT */ + + PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups); + if (ProcessorGroupInfo::NumGroups>1) + for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i) + PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs); +} + +int NumberOfProcessorGroups() { + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "NumberOfProcessorGroups is used before AvailableHwConcurrency" ); + return ProcessorGroupInfo::NumGroups; +} + +// Offset for the slot reserved for the first external thread +#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx)) + +int FindProcessorGroupIndex ( int procIdx ) { + // In case of oversubscription spread extra workers in a round robin manner + int holeIdx; + const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; + if ( procIdx >= numProcs - 1 ) { + holeIdx = INT_MAX; + procIdx = (procIdx - numProcs + 1) % numProcs; + } + else + holeIdx = ProcessorGroupInfo::HoleIndex; + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "FindProcessorGroupIndex is used before AvailableHwConcurrency" ); + // Approximate the likely group index assuming all groups are of the same size + int i = procIdx / theProcessorGroups[0].numProcs; + // Make sure the approximation is a valid group index + if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1; + // Now adjust the approximation up or down + if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) { + while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) { + __TBB_ASSERT( i > 0, NULL ); + --i; + } + } + else { + do { + ++i; + } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) ); + } + __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL ); + return i; +} + +void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) { + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" ); + if ( !TBB_SetThreadGroupAffinity ) + return; + TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} }; + TBB_SetThreadGroupAffinity( hThread, &ga, NULL ); +} + +int AvailableHwConcurrency() { + atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); + return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; +} + +/* End of _WIN32||_WIN64 implementation */ +#else + #error AvailableHwConcurrency is not implemented for this OS +#endif + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* !__TBB_HardwareConcurrency */ diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.cpp b/contrib/libs/tbb/src/tbb/observer_proxy.cpp index c6495c064c..4f7c07c266 100644 --- a/contrib/libs/tbb/src/tbb/observer_proxy.cpp +++ b/contrib/libs/tbb/src/tbb/observer_proxy.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,164 +14,164 @@ limitations under the License. */ -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" #include "observer_proxy.h" #include "arena.h" -#include "main.h" -#include "thread_data.h" +#include "main.h" +#include "thread_data.h" + +#include <atomic> -#include <atomic> - namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { #if TBB_USE_ASSERT -extern std::atomic<int> the_observer_proxy_count; +extern std::atomic<int> the_observer_proxy_count; #endif /* TBB_USE_ASSERT */ -observer_proxy::observer_proxy( d1::task_scheduler_observer& tso ) - : my_ref_count(1), my_list(NULL), my_next(NULL), my_prev(NULL), my_observer(&tso) +observer_proxy::observer_proxy( d1::task_scheduler_observer& tso ) + : my_ref_count(1), my_list(NULL), my_next(NULL), my_prev(NULL), my_observer(&tso) { #if TBB_USE_ASSERT - ++the_observer_proxy_count; + ++the_observer_proxy_count; #endif /* TBB_USE_ASSERT */ } -observer_proxy::~observer_proxy() { +observer_proxy::~observer_proxy() { __TBB_ASSERT( !my_ref_count, "Attempt to destroy proxy still in use" ); poison_value(my_ref_count); poison_pointer(my_prev); poison_pointer(my_next); -#if TBB_USE_ASSERT - --the_observer_proxy_count; +#if TBB_USE_ASSERT + --the_observer_proxy_count; #endif /* TBB_USE_ASSERT */ } -void observer_list::clear() { +void observer_list::clear() { // Though the method will work fine for the empty list, we require the caller // to check for the list emptiness before invoking it to avoid extra overhead. __TBB_ASSERT( !empty(), NULL ); { scoped_lock lock(mutex(), /*is_writer=*/true); - observer_proxy *next = my_head.load(std::memory_order_relaxed); + observer_proxy *next = my_head.load(std::memory_order_relaxed); while ( observer_proxy *p = next ) { next = p->my_next; // Both proxy p and observer p->my_observer (if non-null) are guaranteed // to be alive while the list is locked. - d1::task_scheduler_observer *obs = p->my_observer; + d1::task_scheduler_observer *obs = p->my_observer; // Make sure that possible concurrent observer destruction does not // conflict with the proxy list cleanup. - if (!obs || !(p = obs->my_proxy.exchange(nullptr))) { + if (!obs || !(p = obs->my_proxy.exchange(nullptr))) { continue; - } + } // accessing 'obs' after detaching of obs->my_proxy leads to the race with observer destruction - __TBB_ASSERT(!next || p == next->my_prev, nullptr); - __TBB_ASSERT(is_alive(p->my_ref_count), "Observer's proxy died prematurely"); - __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed) == 1, "Reference for observer is missing"); - poison_pointer(p->my_observer); + __TBB_ASSERT(!next || p == next->my_prev, nullptr); + __TBB_ASSERT(is_alive(p->my_ref_count), "Observer's proxy died prematurely"); + __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed) == 1, "Reference for observer is missing"); + poison_pointer(p->my_observer); remove(p); - --p->my_ref_count; + --p->my_ref_count; delete p; } } - - // If observe(false) is called concurrently with the destruction of the arena, - // need to wait until all proxies are removed. - for (atomic_backoff backoff; ; backoff.pause()) { - scoped_lock lock(mutex(), /*is_writer=*/false); - if (my_head.load(std::memory_order_relaxed) == nullptr) { - break; - } - } - - __TBB_ASSERT(my_head.load(std::memory_order_relaxed) == nullptr && my_tail.load(std::memory_order_relaxed) == nullptr, nullptr); + + // If observe(false) is called concurrently with the destruction of the arena, + // need to wait until all proxies are removed. + for (atomic_backoff backoff; ; backoff.pause()) { + scoped_lock lock(mutex(), /*is_writer=*/false); + if (my_head.load(std::memory_order_relaxed) == nullptr) { + break; + } + } + + __TBB_ASSERT(my_head.load(std::memory_order_relaxed) == nullptr && my_tail.load(std::memory_order_relaxed) == nullptr, nullptr); } -void observer_list::insert( observer_proxy* p ) { +void observer_list::insert( observer_proxy* p ) { scoped_lock lock(mutex(), /*is_writer=*/true); - if (my_head.load(std::memory_order_relaxed)) { - p->my_prev = my_tail.load(std::memory_order_relaxed); - my_tail.load(std::memory_order_relaxed)->my_next = p; - } else { - my_head.store(p, std::memory_order_relaxed); + if (my_head.load(std::memory_order_relaxed)) { + p->my_prev = my_tail.load(std::memory_order_relaxed); + my_tail.load(std::memory_order_relaxed)->my_next = p; + } else { + my_head.store(p, std::memory_order_relaxed); } - my_tail.store(p, std::memory_order_relaxed); + my_tail.store(p, std::memory_order_relaxed); } -void observer_list::remove(observer_proxy* p) { - __TBB_ASSERT(my_head.load(std::memory_order_relaxed), "Attempt to remove an item from an empty list"); - __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be NULL"); - if (p == my_tail.load(std::memory_order_relaxed)) { - __TBB_ASSERT(!p->my_next, nullptr); - my_tail.store(p->my_prev, std::memory_order_relaxed); - } else { - __TBB_ASSERT(p->my_next, nullptr); +void observer_list::remove(observer_proxy* p) { + __TBB_ASSERT(my_head.load(std::memory_order_relaxed), "Attempt to remove an item from an empty list"); + __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be NULL"); + if (p == my_tail.load(std::memory_order_relaxed)) { + __TBB_ASSERT(!p->my_next, nullptr); + my_tail.store(p->my_prev, std::memory_order_relaxed); + } else { + __TBB_ASSERT(p->my_next, nullptr); p->my_next->my_prev = p->my_prev; } - if (p == my_head.load(std::memory_order_relaxed)) { - __TBB_ASSERT(!p->my_prev, nullptr); - my_head.store(p->my_next, std::memory_order_relaxed); - } else { - __TBB_ASSERT(p->my_prev, nullptr); + if (p == my_head.load(std::memory_order_relaxed)) { + __TBB_ASSERT(!p->my_prev, nullptr); + my_head.store(p->my_next, std::memory_order_relaxed); + } else { + __TBB_ASSERT(p->my_prev, nullptr); p->my_prev->my_next = p->my_next; } - __TBB_ASSERT((my_head.load(std::memory_order_relaxed) && my_tail.load(std::memory_order_relaxed)) || - (!my_head.load(std::memory_order_relaxed) && !my_tail.load(std::memory_order_relaxed)), nullptr); + __TBB_ASSERT((my_head.load(std::memory_order_relaxed) && my_tail.load(std::memory_order_relaxed)) || + (!my_head.load(std::memory_order_relaxed) && !my_tail.load(std::memory_order_relaxed)), nullptr); } -void observer_list::remove_ref(observer_proxy* p) { - std::uintptr_t r = p->my_ref_count.load(std::memory_order_acquire); - __TBB_ASSERT(is_alive(r), nullptr); - while (r > 1) { - if (p->my_ref_count.compare_exchange_strong(r, r - 1)) { +void observer_list::remove_ref(observer_proxy* p) { + std::uintptr_t r = p->my_ref_count.load(std::memory_order_acquire); + __TBB_ASSERT(is_alive(r), nullptr); + while (r > 1) { + if (p->my_ref_count.compare_exchange_strong(r, r - 1)) { return; } } - __TBB_ASSERT(r == 1, nullptr); + __TBB_ASSERT(r == 1, nullptr); // Reference count might go to zero { // Use lock to avoid resurrection by a thread concurrently walking the list observer_list::scoped_lock lock(mutex(), /*is_writer=*/true); r = --p->my_ref_count; - if (!r) { + if (!r) { remove(p); - } + } } - __TBB_ASSERT(r || !p->my_ref_count, nullptr); - if (!r) { + __TBB_ASSERT(r || !p->my_ref_count, nullptr); + if (!r) { delete p; - } + } } -void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker) { +void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker) { // Pointer p marches though the list from last (exclusively) to the end. - observer_proxy* p = last, * prev = p; - for (;;) { - d1::task_scheduler_observer* tso = nullptr; + observer_proxy* p = last, * prev = p; + for (;;) { + d1::task_scheduler_observer* tso = nullptr; // Hold lock on list only long enough to advance to the next proxy in the list. { scoped_lock lock(mutex(), /*is_writer=*/false); do { - if (p) { + if (p) { // We were already processing the list. - if (observer_proxy* q = p->my_next) { - if (p == prev) { + if (observer_proxy* q = p->my_next) { + if (p == prev) { remove_ref_fast(prev); // sets prev to NULL if successful - } + } p = q; - } else { + } else { // Reached the end of the list. - if (p == prev) { + if (p == prev) { // Keep the reference as we store the 'last' pointer in scheduler - __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)) >= 1 + (p->my_observer ? 1 : 0), nullptr); + __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)) >= 1 + (p->my_observer ? 1 : 0), nullptr); } else { // The last few proxies were empty - __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)), nullptr); + __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)), nullptr); ++p->my_ref_count; - if (prev) { + if (prev) { lock.release(); remove_ref(prev); } @@ -181,130 +181,130 @@ void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker } } else { // Starting pass through the list - p = my_head.load(std::memory_order_relaxed); - if (!p) { + p = my_head.load(std::memory_order_relaxed); + if (!p) { return; - } + } } tso = p->my_observer; - } while (!tso); + } while (!tso); ++p->my_ref_count; ++tso->my_busy_count; } - __TBB_ASSERT(!prev || p != prev, nullptr); + __TBB_ASSERT(!prev || p != prev, nullptr); // Release the proxy pinned before p - if (prev) { + if (prev) { remove_ref(prev); - } + } // Do not hold any locks on the list while calling user's code. // Do not intercept any exceptions that may escape the callback so that // they are either handled by the TBB scheduler or passed to the debugger. tso->on_scheduler_entry(worker); - __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed), nullptr); + __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed), nullptr); intptr_t bc = --tso->my_busy_count; - __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); + __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); prev = p; } } -void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) { +void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) { // Pointer p marches though the list from the beginning to last (inclusively). - observer_proxy* p = nullptr, * prev = nullptr; - for (;;) { - d1::task_scheduler_observer* tso = nullptr; + observer_proxy* p = nullptr, * prev = nullptr; + for (;;) { + d1::task_scheduler_observer* tso = nullptr; // Hold lock on list only long enough to advance to the next proxy in the list. { scoped_lock lock(mutex(), /*is_writer=*/false); do { - if (p) { + if (p) { // We were already processing the list. - if (p != last) { - __TBB_ASSERT(p->my_next, "List items before 'last' must have valid my_next pointer"); - if (p == prev) + if (p != last) { + __TBB_ASSERT(p->my_next, "List items before 'last' must have valid my_next pointer"); + if (p == prev) remove_ref_fast(prev); // sets prev to NULL if successful p = p->my_next; } else { // remove the reference from the last item remove_ref_fast(p); - if (p) { + if (p) { lock.release(); - if (p != prev && prev) { - remove_ref(prev); - } + if (p != prev && prev) { + remove_ref(prev); + } remove_ref(p); } return; } } else { // Starting pass through the list - p = my_head.load(std::memory_order_relaxed); - __TBB_ASSERT(p, "Nonzero 'last' must guarantee that the global list is non-empty"); + p = my_head.load(std::memory_order_relaxed); + __TBB_ASSERT(p, "Nonzero 'last' must guarantee that the global list is non-empty"); } tso = p->my_observer; - } while (!tso); + } while (!tso); // The item is already refcounted - if (p != last) // the last is already referenced since entry notification + if (p != last) // the last is already referenced since entry notification ++p->my_ref_count; ++tso->my_busy_count; } - __TBB_ASSERT(!prev || p != prev, nullptr); - if (prev) + __TBB_ASSERT(!prev || p != prev, nullptr); + if (prev) remove_ref(prev); // Do not hold any locks on the list while calling user's code. // Do not intercept any exceptions that may escape the callback so that // they are either handled by the TBB scheduler or passed to the debugger. tso->on_scheduler_exit(worker); - __TBB_ASSERT(p->my_ref_count || p == last, nullptr); + __TBB_ASSERT(p->my_ref_count || p == last, nullptr); intptr_t bc = --tso->my_busy_count; - __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); + __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); prev = p; } } -void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) { +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) { if( enable ) { - if( !tso.my_proxy.load(std::memory_order_relaxed) ) { - observer_proxy* p = new observer_proxy(tso); - tso.my_proxy.store(p, std::memory_order_relaxed); - tso.my_busy_count.store(0, std::memory_order_relaxed); - - thread_data* td = governor::get_thread_data_if_initialized(); - if (p->my_observer->my_task_arena == nullptr) { - if (!(td && td->my_arena)) { - td = governor::get_thread_data(); + if( !tso.my_proxy.load(std::memory_order_relaxed) ) { + observer_proxy* p = new observer_proxy(tso); + tso.my_proxy.store(p, std::memory_order_relaxed); + tso.my_busy_count.store(0, std::memory_order_relaxed); + + thread_data* td = governor::get_thread_data_if_initialized(); + if (p->my_observer->my_task_arena == nullptr) { + if (!(td && td->my_arena)) { + td = governor::get_thread_data(); } - __TBB_ASSERT(__TBB_InitOnce::initialization_done(), nullptr); - __TBB_ASSERT(td && td->my_arena, nullptr); - p->my_list = &td->my_arena->my_observers; - } else { - d1::task_arena* ta = p->my_observer->my_task_arena; - arena* a = ta->my_arena.load(std::memory_order_acquire); - if (a == nullptr) { // Avoid recursion during arena initialization - ta->initialize(); - a = ta->my_arena.load(std::memory_order_relaxed); + __TBB_ASSERT(__TBB_InitOnce::initialization_done(), nullptr); + __TBB_ASSERT(td && td->my_arena, nullptr); + p->my_list = &td->my_arena->my_observers; + } else { + d1::task_arena* ta = p->my_observer->my_task_arena; + arena* a = ta->my_arena.load(std::memory_order_acquire); + if (a == nullptr) { // Avoid recursion during arena initialization + ta->initialize(); + a = ta->my_arena.load(std::memory_order_relaxed); } - __TBB_ASSERT(a != nullptr, nullptr); - p->my_list = &a->my_observers; + __TBB_ASSERT(a != nullptr, nullptr); + p->my_list = &a->my_observers; + } + p->my_list->insert(p); + // Notify newly activated observer and other pending ones if it belongs to current arena + if (td && td->my_arena && &td->my_arena->my_observers == p->my_list) { + p->my_list->notify_entry_observers(td->my_last_observer, td->my_is_worker); } - p->my_list->insert(p); - // Notify newly activated observer and other pending ones if it belongs to current arena - if (td && td->my_arena && &td->my_arena->my_observers == p->my_list) { - p->my_list->notify_entry_observers(td->my_last_observer, td->my_is_worker); - } } } else { // Make sure that possible concurrent proxy list cleanup does not conflict // with the observer destruction here. - if ( observer_proxy* proxy = tso.my_proxy.exchange(nullptr) ) { + if ( observer_proxy* proxy = tso.my_proxy.exchange(nullptr) ) { // List destruction should not touch this proxy after we've won the above interlocked exchange. - __TBB_ASSERT( proxy->my_observer == &tso, nullptr); - __TBB_ASSERT( is_alive(proxy->my_ref_count.load(std::memory_order_relaxed)), "Observer's proxy died prematurely" ); - __TBB_ASSERT( proxy->my_ref_count.load(std::memory_order_relaxed) >= 1, "reference for observer missing" ); + __TBB_ASSERT( proxy->my_observer == &tso, nullptr); + __TBB_ASSERT( is_alive(proxy->my_ref_count.load(std::memory_order_relaxed)), "Observer's proxy died prematurely" ); + __TBB_ASSERT( proxy->my_ref_count.load(std::memory_order_relaxed) >= 1, "reference for observer missing" ); observer_list &list = *proxy->my_list; { // Ensure that none of the list walkers relies on observer pointer validity observer_list::scoped_lock lock(list.mutex(), /*is_writer=*/true); - proxy->my_observer = nullptr; + proxy->my_observer = nullptr; // Proxy may still be held by other threads (to track the last notified observer) if( !--proxy->my_ref_count ) {// nobody can increase it under exclusive lock list.remove(proxy); @@ -312,11 +312,11 @@ void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) delete proxy; } } - spin_wait_until_eq(tso.my_busy_count, 0); // other threads are still accessing the callback + spin_wait_until_eq(tso.my_busy_count, 0); // other threads are still accessing the callback } } } -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.h b/contrib/libs/tbb/src/tbb/observer_proxy.h index 174c1b1f6c..2450247ecd 100644 --- a/contrib/libs/tbb/src/tbb/observer_proxy.h +++ b/contrib/libs/tbb/src/tbb/observer_proxy.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,18 +14,18 @@ limitations under the License. */ -#ifndef __TBB_observer_proxy_H -#define __TBB_observer_proxy_H +#ifndef __TBB_observer_proxy_H +#define __TBB_observer_proxy_H -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_aligned_space.h" +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_aligned_space.h" -#include "oneapi/tbb/task_scheduler_observer.h" -#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task_scheduler_observer.h" +#include "oneapi/tbb/spin_rw_mutex.h" namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { class observer_list { friend class arena; @@ -35,10 +35,10 @@ class observer_list { typedef aligned_space<spin_rw_mutex> my_mutex_type; //! Pointer to the head of this list. - std::atomic<observer_proxy*> my_head{nullptr}; + std::atomic<observer_proxy*> my_head{nullptr}; //! Pointer to the tail of this list. - std::atomic<observer_proxy*> my_tail{nullptr}; + std::atomic<observer_proxy*> my_tail{nullptr}; //! Mutex protecting this list. my_mutex_type my_mutex; @@ -57,7 +57,7 @@ class observer_list { void do_notify_exit_observers( observer_proxy* last, bool worker ); public: - observer_list () = default; + observer_list () = default; //! Removes and destroys all observer proxies from the list. /** Cannot be used concurrently with other methods. **/ @@ -79,11 +79,11 @@ public: //! Accessor to the reader-writer mutex associated with the list. spin_rw_mutex& mutex () { return my_mutex.begin()[0]; } - bool empty () const { return my_head.load(std::memory_order_relaxed) == nullptr; } + bool empty () const { return my_head.load(std::memory_order_relaxed) == nullptr; } //! Call entry notifications on observers added after last was notified. /** Updates last to become the last notified observer proxy (in the global list) - or leaves it to be nullptr. The proxy has its refcount incremented. **/ + or leaves it to be nullptr. The proxy has its refcount incremented. **/ inline void notify_entry_observers( observer_proxy*& last, bool worker ); //! Call exit notifications on last and observers added before it. @@ -95,14 +95,14 @@ public: object into a proxy so that a list item remained valid even after the corresponding proxy object is destroyed by the user code. **/ class observer_proxy { - friend class task_scheduler_observer; + friend class task_scheduler_observer; friend class observer_list; - friend void observe(d1::task_scheduler_observer&, bool); + friend void observe(d1::task_scheduler_observer&, bool); //! Reference count used for garbage collection. /** 1 for reference from my task_scheduler_observer. 1 for each task dispatcher's last observer pointer. No accounting for neighbors in the shared list. */ - std::atomic<std::uintptr_t> my_ref_count; + std::atomic<std::uintptr_t> my_ref_count; //! Reference to the list this observer belongs to. observer_list* my_list; //! Pointer to next observer in the list specified by my_head. @@ -112,18 +112,18 @@ class observer_proxy { /** For the head of the list points to the last item. **/ observer_proxy* my_prev; //! Associated observer - d1::task_scheduler_observer* my_observer; + d1::task_scheduler_observer* my_observer; //! Constructs proxy for the given observer and adds it to the specified list. - observer_proxy( d1::task_scheduler_observer& ); + observer_proxy( d1::task_scheduler_observer& ); ~observer_proxy(); }; // class observer_proxy -void observer_list::remove_ref_fast( observer_proxy*& p ) { +void observer_list::remove_ref_fast( observer_proxy*& p ) { if( p->my_observer ) { // Can decrement refcount quickly, as it cannot drop to zero while under the lock. - std::uintptr_t r = --p->my_ref_count; + std::uintptr_t r = --p->my_ref_count; __TBB_ASSERT_EX( r, NULL ); p = NULL; } else { @@ -131,24 +131,24 @@ void observer_list::remove_ref_fast( observer_proxy*& p ) { } } -void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) { - if (last == my_tail.load(std::memory_order_relaxed)) +void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) { + if (last == my_tail.load(std::memory_order_relaxed)) return; - do_notify_entry_observers(last, worker); + do_notify_entry_observers(last, worker); } -void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) { - if (last == nullptr) { +void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) { + if (last == nullptr) { return; - } - __TBB_ASSERT(!is_poisoned(last), NULL); + } + __TBB_ASSERT(!is_poisoned(last), NULL); do_notify_exit_observers( last, worker ); - __TBB_ASSERT(last != nullptr, NULL); - poison_pointer(last); + __TBB_ASSERT(last != nullptr, NULL); + poison_pointer(last); } -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb -#endif /* __TBB_observer_proxy_H */ +#endif /* __TBB_observer_proxy_H */ diff --git a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp index cf14da2ce4..b7655c6b35 100644 --- a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp +++ b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp @@ -1,471 +1,471 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/parallel_pipeline.h" -#include "oneapi/tbb/spin_mutex.h" -#include "oneapi/tbb/tbb_allocator.h" -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "itt_notify.h" -#include "tls.h" -#include "oneapi/tbb/detail/_exception.h" -#include "oneapi/tbb/detail/_small_object_pool.h" - -namespace tbb { -namespace detail { -namespace r1 { - -void handle_perror(int error_code, const char* aux_info); - -using Token = unsigned long; - -//! A processing pipeline that applies filters to items. -/** @ingroup algorithms */ -class pipeline { - friend void parallel_pipeline(d1::task_group_context&, std::size_t, const d1::filter_node&); -public: - - //! Construct empty pipeline. - pipeline(d1::task_group_context& cxt, std::size_t max_token) : - my_context(cxt), - first_filter(nullptr), - last_filter(nullptr), - input_tokens(Token(max_token)), - end_of_input(false), - wait_ctx(0) { - __TBB_ASSERT( max_token>0, "pipeline::run must have at least one token" ); - } - - ~pipeline(); - - //! Add filter to end of pipeline. - void add_filter( d1::base_filter& ); - - //! Traverse tree of fitler-node in-order and add filter for each leaf - void fill_pipeline(const d1::filter_node& root) { - if( root.left && root.right ) { - fill_pipeline(*root.left); - fill_pipeline(*root.right); - } - else { - __TBB_ASSERT(!root.left && !root.right, "tree should be full"); - add_filter(*root.create_filter()); - } - } - -private: - friend class stage_task; - friend class base_filter; - friend void set_end_of_input(d1::base_filter& bf); - - task_group_context& my_context; - - //! Pointer to first filter in the pipeline. - d1::base_filter* first_filter; - - //! Pointer to last filter in the pipeline. - d1::base_filter* last_filter; - - //! Number of idle tokens waiting for input stage. - std::atomic<Token> input_tokens; - - //! False until flow_control::stop() is called. - std::atomic<bool> end_of_input; - - d1::wait_context wait_ctx; -}; - -//! This structure is used to store task information in a input buffer -struct task_info { - void* my_object = nullptr; - //! Invalid unless a task went through an ordered stage. - Token my_token = 0; - //! False until my_token is set. - bool my_token_ready = false; - //! True if my_object is valid. - bool is_valid = false; - //! Set to initial state (no object, no token) - void reset() { - my_object = nullptr; - my_token = 0; - my_token_ready = false; - is_valid = false; - } -}; - -//! A buffer of input items for a filter. -/** Each item is a task_info, inserted into a position in the buffer corresponding to a Token. */ -class input_buffer { - friend class base_filter; - friend class stage_task; - friend class pipeline; - friend void set_end_of_input(d1::base_filter& bf); - - using size_type = Token; - - //! Array of deferred tasks that cannot yet start executing. - task_info* array; - - //! Size of array - /** Always 0 or a power of 2 */ - size_type array_size; - - //! Lowest token that can start executing. - /** All prior Token have already been seen. */ - Token low_token; - - //! Serializes updates. - spin_mutex array_mutex; - - //! Resize "array". - /** Caller is responsible to acquiring a lock on "array_mutex". */ - void grow( size_type minimum_size ); - - //! Initial size for "array" - /** Must be a power of 2 */ - static const size_type initial_buffer_size = 4; - - //! Used for out of order buffer, and for assigning my_token if is_ordered and my_token not already assigned - Token high_token; - - //! True for ordered filter, false otherwise. - const bool is_ordered; - - //! for parallel filters that accepts NULLs, thread-local flag for reaching end_of_input - using end_of_input_tls_t = basic_tls<std::intptr_t>; - end_of_input_tls_t end_of_input_tls; - bool end_of_input_tls_allocated; // no way to test pthread creation of TLS - -public: - input_buffer(const input_buffer&) = delete; - input_buffer& operator=(const input_buffer&) = delete; - - //! Construct empty buffer. - input_buffer( bool ordered) : - array(nullptr), - array_size(0), - low_token(0), - high_token(0), - is_ordered(ordered), - end_of_input_tls(), - end_of_input_tls_allocated(false) { - grow(initial_buffer_size); - __TBB_ASSERT( array, nullptr ); - } - - //! Destroy the buffer. - ~input_buffer() { - __TBB_ASSERT( array, nullptr ); - cache_aligned_allocator<task_info>().deallocate(array,array_size); - poison_pointer( array ); - if( end_of_input_tls_allocated ) { - destroy_my_tls(); - } - } - - //! Define order when the first filter is serial_in_order. - Token get_ordered_token(){ - return high_token++; - } - - //! Put a token into the buffer. - /** If task information was placed into buffer, returns true; - otherwise returns false, informing the caller to create and spawn a task. - */ - bool try_put_token( task_info& info ) { - info.is_valid = true; - spin_mutex::scoped_lock lock( array_mutex ); - Token token; - if( is_ordered ) { - if( !info.my_token_ready ) { - info.my_token = high_token++; - info.my_token_ready = true; - } - token = info.my_token; - } else - token = high_token++; - __TBB_ASSERT( (long)(token-low_token)>=0, nullptr ); - if( token!=low_token ) { - // Trying to put token that is beyond low_token. - // Need to wait until low_token catches up before dispatching. - if( token-low_token>=array_size ) - grow( token-low_token+1 ); - ITT_NOTIFY( sync_releasing, this ); - array[token&(array_size-1)] = info; - return true; - } - return false; - } - - //! Note that processing of a token is finished. - /** Fires up processing of the next token, if processing was deferred. */ - // Uses template to avoid explicit dependency on stage_task. - template<typename StageTask> - void try_to_spawn_task_for_next_token(StageTask& spawner, d1::execution_data& ed) { - task_info wakee; - { - spin_mutex::scoped_lock lock( array_mutex ); - // Wake the next task - task_info& item = array[++low_token & (array_size-1)]; - ITT_NOTIFY( sync_acquired, this ); - wakee = item; - item.is_valid = false; - } - if( wakee.is_valid ) - spawner.spawn_stage_task(wakee, ed); - } - - // end_of_input signal for parallel_pipeline, parallel input filters with 0 tokens allowed. - void create_my_tls() { - int status = end_of_input_tls.create(); - if(status) - handle_perror(status, "TLS not allocated for filter"); - end_of_input_tls_allocated = true; - } - void destroy_my_tls() { - int status = end_of_input_tls.destroy(); - if(status) - handle_perror(status, "Failed to destroy filter TLS"); - } - bool my_tls_end_of_input() { - return end_of_input_tls.get() != 0; - } - void set_my_tls_end_of_input() { - end_of_input_tls.set(1); - } -}; - -void input_buffer::grow( size_type minimum_size ) { - size_type old_size = array_size; - size_type new_size = old_size ? 2*old_size : initial_buffer_size; - while( new_size<minimum_size ) - new_size*=2; - task_info* new_array = cache_aligned_allocator<task_info>().allocate(new_size); - task_info* old_array = array; - for( size_type i=0; i<new_size; ++i ) - new_array[i].is_valid = false; - Token t=low_token; - for( size_type i=0; i<old_size; ++i, ++t ) - new_array[t&(new_size-1)] = old_array[t&(old_size-1)]; - array = new_array; - array_size = new_size; - if( old_array ) - cache_aligned_allocator<task_info>().deallocate(old_array,old_size); -} - -class stage_task : public d1::task, public task_info { -private: - friend class pipeline; - pipeline& my_pipeline; - d1::base_filter* my_filter; - d1::small_object_allocator m_allocator; - //! True if this task has not yet read the input. - bool my_at_start; - - //! True if this can be executed again. - bool execute_filter(d1::execution_data& ed); - - //! Spawn task if token is available. - void try_spawn_stage_task(d1::execution_data& ed) { - ITT_NOTIFY( sync_releasing, &my_pipeline.input_tokens ); - if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_relaxed)) > 1 ) { - d1::small_object_allocator alloc{}; - r1::spawn( *alloc.new_object<stage_task>(ed, my_pipeline, alloc ), my_pipeline.my_context ); - } - } - -public: - - //! Construct stage_task for first stage in a pipeline. - /** Such a stage has not read any input yet. */ - stage_task(pipeline& pipeline, d1::small_object_allocator& alloc ) : - my_pipeline(pipeline), - my_filter(pipeline.first_filter), - m_allocator(alloc), - my_at_start(true) - { - task_info::reset(); - my_pipeline.wait_ctx.reserve(); - } - //! Construct stage_task for a subsequent stage in a pipeline. - stage_task(pipeline& pipeline, d1::base_filter* filter, const task_info& info, d1::small_object_allocator& alloc) : - task_info(info), - my_pipeline(pipeline), - my_filter(filter), - m_allocator(alloc), - my_at_start(false) - { - my_pipeline.wait_ctx.reserve(); - } - //! Roughly equivalent to the constructor of input stage task - void reset() { - task_info::reset(); - my_filter = my_pipeline.first_filter; - my_at_start = true; - } - void finalize(d1::execution_data& ed) { - m_allocator.delete_object(this, ed); - } - //! The virtual task execution method - task* execute(d1::execution_data& ed) override { - if(!execute_filter(ed)) { - finalize(ed); - return nullptr; - } - return this; - } - task* cancel(d1::execution_data& ed) override { - finalize(ed); - return nullptr; - } - - ~stage_task() { - if ( my_filter && my_object ) { - my_filter->finalize(my_object); - my_object = nullptr; - } - my_pipeline.wait_ctx.release(); - } - //! Creates and spawns stage_task from task_info - void spawn_stage_task(const task_info& info, d1::execution_data& ed) { - d1::small_object_allocator alloc{}; - stage_task* clone = alloc.new_object<stage_task>(ed, my_pipeline, my_filter, info, alloc); - r1::spawn(*clone, my_pipeline.my_context); - } -}; - -bool stage_task::execute_filter(d1::execution_data& ed) { - __TBB_ASSERT( !my_at_start || !my_object, "invalid state of task" ); - if( my_at_start ) { - if( my_filter->is_serial() ) { - my_object = (*my_filter)(my_object); - if( my_object || ( my_filter->object_may_be_null() && !my_pipeline.end_of_input.load(std::memory_order_relaxed)) ) { - if( my_filter->is_ordered() ) { - my_token = my_filter->my_input_buffer->get_ordered_token(); - my_token_ready = true; - } - if( !my_filter->next_filter_in_pipeline ) { // we're only filter in pipeline - reset(); - return true; - } else { - try_spawn_stage_task(ed); - } - } else { - my_pipeline.end_of_input.store(true, std::memory_order_relaxed); - return false; - } - } else /*not is_serial*/ { - if ( my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { - return false; - } - - try_spawn_stage_task(ed); - - my_object = (*my_filter)(my_object); - if( !my_object && (!my_filter->object_may_be_null() || my_filter->my_input_buffer->my_tls_end_of_input()) ){ - my_pipeline.end_of_input.store(true, std::memory_order_relaxed); - return false; - } - } - my_at_start = false; - } else { - my_object = (*my_filter)(my_object); - if( my_filter->is_serial() ) - my_filter->my_input_buffer->try_to_spawn_task_for_next_token(*this, ed); - } - my_filter = my_filter->next_filter_in_pipeline; - if( my_filter ) { - // There is another filter to execute. - if( my_filter->is_serial() ) { - // The next filter must execute tokens when they are available (in order for serial_in_order) - if( my_filter->my_input_buffer->try_put_token(*this) ){ - my_filter = nullptr; // To prevent deleting my_object twice if exception occurs - return false; - } - } - } else { - // Reached end of the pipe. - std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_relaxed); - - if( ntokens_avail>0 // Only recycle if there is one available token - || my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { - return false; // No need to recycle for new input - } - ITT_NOTIFY( sync_acquired, &my_pipeline.input_tokens ); - // Recycle as an input stage task. - reset(); - } - return true; -} - -pipeline:: ~pipeline() { - while( first_filter ) { - d1::base_filter* f = first_filter; - if( input_buffer* b = f->my_input_buffer ) { - b->~input_buffer(); - deallocate_memory(b); - } - first_filter = f->next_filter_in_pipeline; - f->~base_filter(); - deallocate_memory(f); - } -} - -void pipeline::add_filter( d1::base_filter& new_fitler ) { - __TBB_ASSERT( new_fitler.next_filter_in_pipeline==d1::base_filter::not_in_pipeline(), "filter already part of pipeline?" ); - new_fitler.my_pipeline = this; - if ( first_filter == nullptr ) - first_filter = &new_fitler; - else - last_filter->next_filter_in_pipeline = &new_fitler; - new_fitler.next_filter_in_pipeline = nullptr; - last_filter = &new_fitler; - if( new_fitler.is_serial() ) { - new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( new_fitler.is_ordered() ); - } else { - if( first_filter == &new_fitler && new_fitler.object_may_be_null() ) { - //TODO: buffer only needed to hold TLS; could improve - new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( /*is_ordered*/false ); - new_fitler.my_input_buffer->create_my_tls(); - } - } -} - -void __TBB_EXPORTED_FUNC parallel_pipeline(d1::task_group_context& cxt, std::size_t max_token, const d1::filter_node& fn) { - pipeline pipe(cxt, max_token); - - pipe.fill_pipeline(fn); - - d1::small_object_allocator alloc{}; - stage_task& st = *alloc.new_object<stage_task>(pipe, alloc); - - // Start execution of tasks - r1::execute_and_wait(st, cxt, pipe.wait_ctx, cxt); -} - -void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter& bf) { - __TBB_ASSERT(bf.my_input_buffer, nullptr); - __TBB_ASSERT(bf.object_may_be_null(), nullptr); - if(bf.is_serial() ) { - bf.my_pipeline->end_of_input.store(true, std::memory_order_relaxed); - } else { - __TBB_ASSERT(bf.my_input_buffer->end_of_input_tls_allocated, nullptr); - bf.my_input_buffer->set_my_tls_end_of_input(); - } -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/parallel_pipeline.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "itt_notify.h" +#include "tls.h" +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_small_object_pool.h" + +namespace tbb { +namespace detail { +namespace r1 { + +void handle_perror(int error_code, const char* aux_info); + +using Token = unsigned long; + +//! A processing pipeline that applies filters to items. +/** @ingroup algorithms */ +class pipeline { + friend void parallel_pipeline(d1::task_group_context&, std::size_t, const d1::filter_node&); +public: + + //! Construct empty pipeline. + pipeline(d1::task_group_context& cxt, std::size_t max_token) : + my_context(cxt), + first_filter(nullptr), + last_filter(nullptr), + input_tokens(Token(max_token)), + end_of_input(false), + wait_ctx(0) { + __TBB_ASSERT( max_token>0, "pipeline::run must have at least one token" ); + } + + ~pipeline(); + + //! Add filter to end of pipeline. + void add_filter( d1::base_filter& ); + + //! Traverse tree of fitler-node in-order and add filter for each leaf + void fill_pipeline(const d1::filter_node& root) { + if( root.left && root.right ) { + fill_pipeline(*root.left); + fill_pipeline(*root.right); + } + else { + __TBB_ASSERT(!root.left && !root.right, "tree should be full"); + add_filter(*root.create_filter()); + } + } + +private: + friend class stage_task; + friend class base_filter; + friend void set_end_of_input(d1::base_filter& bf); + + task_group_context& my_context; + + //! Pointer to first filter in the pipeline. + d1::base_filter* first_filter; + + //! Pointer to last filter in the pipeline. + d1::base_filter* last_filter; + + //! Number of idle tokens waiting for input stage. + std::atomic<Token> input_tokens; + + //! False until flow_control::stop() is called. + std::atomic<bool> end_of_input; + + d1::wait_context wait_ctx; +}; + +//! This structure is used to store task information in a input buffer +struct task_info { + void* my_object = nullptr; + //! Invalid unless a task went through an ordered stage. + Token my_token = 0; + //! False until my_token is set. + bool my_token_ready = false; + //! True if my_object is valid. + bool is_valid = false; + //! Set to initial state (no object, no token) + void reset() { + my_object = nullptr; + my_token = 0; + my_token_ready = false; + is_valid = false; + } +}; + +//! A buffer of input items for a filter. +/** Each item is a task_info, inserted into a position in the buffer corresponding to a Token. */ +class input_buffer { + friend class base_filter; + friend class stage_task; + friend class pipeline; + friend void set_end_of_input(d1::base_filter& bf); + + using size_type = Token; + + //! Array of deferred tasks that cannot yet start executing. + task_info* array; + + //! Size of array + /** Always 0 or a power of 2 */ + size_type array_size; + + //! Lowest token that can start executing. + /** All prior Token have already been seen. */ + Token low_token; + + //! Serializes updates. + spin_mutex array_mutex; + + //! Resize "array". + /** Caller is responsible to acquiring a lock on "array_mutex". */ + void grow( size_type minimum_size ); + + //! Initial size for "array" + /** Must be a power of 2 */ + static const size_type initial_buffer_size = 4; + + //! Used for out of order buffer, and for assigning my_token if is_ordered and my_token not already assigned + Token high_token; + + //! True for ordered filter, false otherwise. + const bool is_ordered; + + //! for parallel filters that accepts NULLs, thread-local flag for reaching end_of_input + using end_of_input_tls_t = basic_tls<std::intptr_t>; + end_of_input_tls_t end_of_input_tls; + bool end_of_input_tls_allocated; // no way to test pthread creation of TLS + +public: + input_buffer(const input_buffer&) = delete; + input_buffer& operator=(const input_buffer&) = delete; + + //! Construct empty buffer. + input_buffer( bool ordered) : + array(nullptr), + array_size(0), + low_token(0), + high_token(0), + is_ordered(ordered), + end_of_input_tls(), + end_of_input_tls_allocated(false) { + grow(initial_buffer_size); + __TBB_ASSERT( array, nullptr ); + } + + //! Destroy the buffer. + ~input_buffer() { + __TBB_ASSERT( array, nullptr ); + cache_aligned_allocator<task_info>().deallocate(array,array_size); + poison_pointer( array ); + if( end_of_input_tls_allocated ) { + destroy_my_tls(); + } + } + + //! Define order when the first filter is serial_in_order. + Token get_ordered_token(){ + return high_token++; + } + + //! Put a token into the buffer. + /** If task information was placed into buffer, returns true; + otherwise returns false, informing the caller to create and spawn a task. + */ + bool try_put_token( task_info& info ) { + info.is_valid = true; + spin_mutex::scoped_lock lock( array_mutex ); + Token token; + if( is_ordered ) { + if( !info.my_token_ready ) { + info.my_token = high_token++; + info.my_token_ready = true; + } + token = info.my_token; + } else + token = high_token++; + __TBB_ASSERT( (long)(token-low_token)>=0, nullptr ); + if( token!=low_token ) { + // Trying to put token that is beyond low_token. + // Need to wait until low_token catches up before dispatching. + if( token-low_token>=array_size ) + grow( token-low_token+1 ); + ITT_NOTIFY( sync_releasing, this ); + array[token&(array_size-1)] = info; + return true; + } + return false; + } + + //! Note that processing of a token is finished. + /** Fires up processing of the next token, if processing was deferred. */ + // Uses template to avoid explicit dependency on stage_task. + template<typename StageTask> + void try_to_spawn_task_for_next_token(StageTask& spawner, d1::execution_data& ed) { + task_info wakee; + { + spin_mutex::scoped_lock lock( array_mutex ); + // Wake the next task + task_info& item = array[++low_token & (array_size-1)]; + ITT_NOTIFY( sync_acquired, this ); + wakee = item; + item.is_valid = false; + } + if( wakee.is_valid ) + spawner.spawn_stage_task(wakee, ed); + } + + // end_of_input signal for parallel_pipeline, parallel input filters with 0 tokens allowed. + void create_my_tls() { + int status = end_of_input_tls.create(); + if(status) + handle_perror(status, "TLS not allocated for filter"); + end_of_input_tls_allocated = true; + } + void destroy_my_tls() { + int status = end_of_input_tls.destroy(); + if(status) + handle_perror(status, "Failed to destroy filter TLS"); + } + bool my_tls_end_of_input() { + return end_of_input_tls.get() != 0; + } + void set_my_tls_end_of_input() { + end_of_input_tls.set(1); + } +}; + +void input_buffer::grow( size_type minimum_size ) { + size_type old_size = array_size; + size_type new_size = old_size ? 2*old_size : initial_buffer_size; + while( new_size<minimum_size ) + new_size*=2; + task_info* new_array = cache_aligned_allocator<task_info>().allocate(new_size); + task_info* old_array = array; + for( size_type i=0; i<new_size; ++i ) + new_array[i].is_valid = false; + Token t=low_token; + for( size_type i=0; i<old_size; ++i, ++t ) + new_array[t&(new_size-1)] = old_array[t&(old_size-1)]; + array = new_array; + array_size = new_size; + if( old_array ) + cache_aligned_allocator<task_info>().deallocate(old_array,old_size); +} + +class stage_task : public d1::task, public task_info { +private: + friend class pipeline; + pipeline& my_pipeline; + d1::base_filter* my_filter; + d1::small_object_allocator m_allocator; + //! True if this task has not yet read the input. + bool my_at_start; + + //! True if this can be executed again. + bool execute_filter(d1::execution_data& ed); + + //! Spawn task if token is available. + void try_spawn_stage_task(d1::execution_data& ed) { + ITT_NOTIFY( sync_releasing, &my_pipeline.input_tokens ); + if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_relaxed)) > 1 ) { + d1::small_object_allocator alloc{}; + r1::spawn( *alloc.new_object<stage_task>(ed, my_pipeline, alloc ), my_pipeline.my_context ); + } + } + +public: + + //! Construct stage_task for first stage in a pipeline. + /** Such a stage has not read any input yet. */ + stage_task(pipeline& pipeline, d1::small_object_allocator& alloc ) : + my_pipeline(pipeline), + my_filter(pipeline.first_filter), + m_allocator(alloc), + my_at_start(true) + { + task_info::reset(); + my_pipeline.wait_ctx.reserve(); + } + //! Construct stage_task for a subsequent stage in a pipeline. + stage_task(pipeline& pipeline, d1::base_filter* filter, const task_info& info, d1::small_object_allocator& alloc) : + task_info(info), + my_pipeline(pipeline), + my_filter(filter), + m_allocator(alloc), + my_at_start(false) + { + my_pipeline.wait_ctx.reserve(); + } + //! Roughly equivalent to the constructor of input stage task + void reset() { + task_info::reset(); + my_filter = my_pipeline.first_filter; + my_at_start = true; + } + void finalize(d1::execution_data& ed) { + m_allocator.delete_object(this, ed); + } + //! The virtual task execution method + task* execute(d1::execution_data& ed) override { + if(!execute_filter(ed)) { + finalize(ed); + return nullptr; + } + return this; + } + task* cancel(d1::execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~stage_task() { + if ( my_filter && my_object ) { + my_filter->finalize(my_object); + my_object = nullptr; + } + my_pipeline.wait_ctx.release(); + } + //! Creates and spawns stage_task from task_info + void spawn_stage_task(const task_info& info, d1::execution_data& ed) { + d1::small_object_allocator alloc{}; + stage_task* clone = alloc.new_object<stage_task>(ed, my_pipeline, my_filter, info, alloc); + r1::spawn(*clone, my_pipeline.my_context); + } +}; + +bool stage_task::execute_filter(d1::execution_data& ed) { + __TBB_ASSERT( !my_at_start || !my_object, "invalid state of task" ); + if( my_at_start ) { + if( my_filter->is_serial() ) { + my_object = (*my_filter)(my_object); + if( my_object || ( my_filter->object_may_be_null() && !my_pipeline.end_of_input.load(std::memory_order_relaxed)) ) { + if( my_filter->is_ordered() ) { + my_token = my_filter->my_input_buffer->get_ordered_token(); + my_token_ready = true; + } + if( !my_filter->next_filter_in_pipeline ) { // we're only filter in pipeline + reset(); + return true; + } else { + try_spawn_stage_task(ed); + } + } else { + my_pipeline.end_of_input.store(true, std::memory_order_relaxed); + return false; + } + } else /*not is_serial*/ { + if ( my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { + return false; + } + + try_spawn_stage_task(ed); + + my_object = (*my_filter)(my_object); + if( !my_object && (!my_filter->object_may_be_null() || my_filter->my_input_buffer->my_tls_end_of_input()) ){ + my_pipeline.end_of_input.store(true, std::memory_order_relaxed); + return false; + } + } + my_at_start = false; + } else { + my_object = (*my_filter)(my_object); + if( my_filter->is_serial() ) + my_filter->my_input_buffer->try_to_spawn_task_for_next_token(*this, ed); + } + my_filter = my_filter->next_filter_in_pipeline; + if( my_filter ) { + // There is another filter to execute. + if( my_filter->is_serial() ) { + // The next filter must execute tokens when they are available (in order for serial_in_order) + if( my_filter->my_input_buffer->try_put_token(*this) ){ + my_filter = nullptr; // To prevent deleting my_object twice if exception occurs + return false; + } + } + } else { + // Reached end of the pipe. + std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_relaxed); + + if( ntokens_avail>0 // Only recycle if there is one available token + || my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { + return false; // No need to recycle for new input + } + ITT_NOTIFY( sync_acquired, &my_pipeline.input_tokens ); + // Recycle as an input stage task. + reset(); + } + return true; +} + +pipeline:: ~pipeline() { + while( first_filter ) { + d1::base_filter* f = first_filter; + if( input_buffer* b = f->my_input_buffer ) { + b->~input_buffer(); + deallocate_memory(b); + } + first_filter = f->next_filter_in_pipeline; + f->~base_filter(); + deallocate_memory(f); + } +} + +void pipeline::add_filter( d1::base_filter& new_fitler ) { + __TBB_ASSERT( new_fitler.next_filter_in_pipeline==d1::base_filter::not_in_pipeline(), "filter already part of pipeline?" ); + new_fitler.my_pipeline = this; + if ( first_filter == nullptr ) + first_filter = &new_fitler; + else + last_filter->next_filter_in_pipeline = &new_fitler; + new_fitler.next_filter_in_pipeline = nullptr; + last_filter = &new_fitler; + if( new_fitler.is_serial() ) { + new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( new_fitler.is_ordered() ); + } else { + if( first_filter == &new_fitler && new_fitler.object_may_be_null() ) { + //TODO: buffer only needed to hold TLS; could improve + new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( /*is_ordered*/false ); + new_fitler.my_input_buffer->create_my_tls(); + } + } +} + +void __TBB_EXPORTED_FUNC parallel_pipeline(d1::task_group_context& cxt, std::size_t max_token, const d1::filter_node& fn) { + pipeline pipe(cxt, max_token); + + pipe.fill_pipeline(fn); + + d1::small_object_allocator alloc{}; + stage_task& st = *alloc.new_object<stage_task>(pipe, alloc); + + // Start execution of tasks + r1::execute_and_wait(st, cxt, pipe.wait_ctx, cxt); +} + +void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter& bf) { + __TBB_ASSERT(bf.my_input_buffer, nullptr); + __TBB_ASSERT(bf.object_may_be_null(), nullptr); + if(bf.is_serial() ) { + bf.my_pipeline->end_of_input.store(true, std::memory_order_relaxed); + } else { + __TBB_ASSERT(bf.my_input_buffer->end_of_input_tls_allocated, nullptr); + bf.my_input_buffer->set_my_tls_end_of_input(); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/private_server.cpp b/contrib/libs/tbb/src/tbb/private_server.cpp index f029993e44..bc0af84bb4 100644 --- a/contrib/libs/tbb/src/tbb/private_server.cpp +++ b/contrib/libs/tbb/src/tbb/private_server.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,24 +14,24 @@ limitations under the License. */ -#include "oneapi/tbb/cache_aligned_allocator.h" - -#include "rml_tbb.h" -#include "rml_thread_monitor.h" - +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "rml_tbb.h" +#include "rml_thread_monitor.h" + #include "scheduler_common.h" #include "governor.h" -#include "misc.h" +#include "misc.h" + +#include <atomic> -#include <atomic> - namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { namespace rml { -using rml::internal::thread_monitor; +using rml::internal::thread_monitor; typedef thread_monitor::handle_type thread_handle; class private_server; @@ -55,7 +55,7 @@ private: //! Associated thread has ended normal life sequence and promises to never touch *this again. st_quit }; - std::atomic<state_t> my_state; + std::atomic<state_t> my_state; //! Associated server private_server& my_server; @@ -64,7 +64,7 @@ private: tbb_client& my_client; //! index used for avoiding the 64K aliasing problem - const std::size_t my_index; + const std::size_t my_index; //! Monitor for sleeping when there is no work to do. /** The invariant that holds for sleeping workers is: @@ -80,7 +80,7 @@ private: friend class private_server; //! Actions executed by the associated thread - void run() noexcept; + void run() noexcept; //! Wake up associated thread (or launch a thread if there is none) void wake_or_launch(); @@ -93,13 +93,13 @@ private: static void release_handle(thread_handle my_handle, bool join); protected: - private_worker( private_server& server, tbb_client& client, const std::size_t i ) : - my_state(st_init), my_server(server), my_client(client), my_index(i), - my_thread_monitor(), my_handle(), my_next() - {} + private_worker( private_server& server, tbb_client& client, const std::size_t i ) : + my_state(st_init), my_server(server), my_client(client), my_index(i), + my_thread_monitor(), my_handle(), my_next() + {} }; -static const std::size_t cache_line_size = tbb::detail::max_nfs_size; +static const std::size_t cache_line_size = tbb::detail::max_nfs_size; #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress overzealous compiler warnings about uninstantiable class @@ -109,7 +109,7 @@ static const std::size_t cache_line_size = tbb::detail::max_nfs_size; class padded_private_worker: public private_worker { char pad[cache_line_size - sizeof(private_worker)%cache_line_size]; public: - padded_private_worker( private_server& server, tbb_client& client, const std::size_t i ) + padded_private_worker( private_server& server, tbb_client& client, const std::size_t i ) : private_worker(server,client,i) { suppress_unused_warning(pad); } }; #if _MSC_VER && !defined(__INTEL_COMPILER) @@ -124,29 +124,29 @@ private: const tbb_client::size_type my_n_thread; //! Stack size for each thread. */ - const std::size_t my_stack_size; + const std::size_t my_stack_size; //! Number of jobs that could use their associated thread minus number of active threads. /** If negative, indicates oversubscription. If positive, indicates that more threads should run. Can be lowered asynchronously, but must be raised only while holding my_asleep_list_mutex, because raising it impacts the invariant for sleeping threads. */ - std::atomic<int> my_slack; + std::atomic<int> my_slack; //! Counter used to determine when to delete this. - std::atomic<int> my_ref_count; + std::atomic<int> my_ref_count; padded_private_worker* my_thread_array; //! List of workers that are asleep or committed to sleeping until notified by another thread. - std::atomic<private_worker*> my_asleep_list_root; + std::atomic<private_worker*> my_asleep_list_root; //! Protects my_asleep_list_root typedef scheduler_mutex_type asleep_list_mutex_type; asleep_list_mutex_type my_asleep_list_mutex; #if TBB_USE_ASSERT - std::atomic<int> my_net_slack_requests; + std::atomic<int> my_net_slack_requests; #endif /* TBB_USE_ASSERT */ //! Wake up to two sleeping workers, if there are any sleeping. @@ -154,7 +154,7 @@ private: which in turn each wake up two threads, etc. */ void propagate_chain_reaction() { // First test of a double-check idiom. Second test is inside wake_some(0). - if( my_asleep_list_root.load(std::memory_order_acquire) ) + if( my_asleep_list_root.load(std::memory_order_acquire) ) wake_some(0); } @@ -178,27 +178,27 @@ private: public: private_server( tbb_client& client ); - version_type version() const override { + version_type version() const override { return 0; } - void request_close_connection( bool /*exiting*/ ) override { - for( std::size_t i=0; i<my_n_thread; ++i ) + void request_close_connection( bool /*exiting*/ ) override { + for( std::size_t i=0; i<my_n_thread; ++i ) my_thread_array[i].start_shutdown(); remove_server_ref(); } - void yield() override { d0::yield(); } + void yield() override { d0::yield(); } - void independent_thread_number_changed( int ) override {__TBB_ASSERT(false,NULL);} + void independent_thread_number_changed( int ) override {__TBB_ASSERT(false,NULL);} - unsigned default_concurrency() const override { return governor::default_num_threads() - 1; } + unsigned default_concurrency() const override { return governor::default_num_threads() - 1; } - void adjust_job_count_estimate( int delta ) override; + void adjust_job_count_estimate( int delta ) override; #if _WIN32||_WIN64 - void register_external_thread ( ::rml::server::execution_resource_t& ) override {} - void unregister_external_thread ( ::rml::server::execution_resource_t ) override {} + void register_external_thread ( ::rml::server::execution_resource_t& ) override {} + void unregister_external_thread ( ::rml::server::execution_resource_t ) override {} #endif /* _WIN32||_WIN64 */ }; @@ -232,12 +232,12 @@ void private_worker::release_handle(thread_handle handle, bool join) { } void private_worker::start_shutdown() { - state_t expected_state = my_state.load(std::memory_order_acquire); - __TBB_ASSERT( expected_state!=st_quit, NULL ); + state_t expected_state = my_state.load(std::memory_order_acquire); + __TBB_ASSERT( expected_state!=st_quit, NULL ); - while( !my_state.compare_exchange_strong( expected_state, st_quit ) ); - - if( expected_state==st_normal || expected_state==st_starting ) { + while( !my_state.compare_exchange_strong( expected_state, st_quit ) ); + + if( expected_state==st_normal || expected_state==st_starting ) { // May have invalidated invariant for sleeping, so wake up the thread. // Note that the notify() here occurs without maintaining invariants for my_slack. // It does not matter, because my_state==st_quit overrides checking of my_slack. @@ -245,15 +245,15 @@ void private_worker::start_shutdown() { // Do not need release handle in st_init state, // because in this case the thread wasn't started yet. // For st_starting release is done at launch site. - if (expected_state==st_normal) + if (expected_state==st_normal) release_handle(my_handle, governor::does_client_join_workers(my_client)); - } else if( expected_state==st_init ) { + } else if( expected_state==st_init ) { // Perform action that otherwise would be performed by associated thread when it quits. my_server.remove_server_ref(); } } -void private_worker::run() noexcept { +void private_worker::run() noexcept { my_server.propagate_chain_reaction(); // Transiting to st_normal here would require setting my_handle, @@ -261,17 +261,17 @@ void private_worker::run() noexcept { // complications in handle management on Windows. ::rml::job& j = *my_client.create_one_job(); - while( my_state.load(std::memory_order_acquire)!=st_quit ) { - if( my_server.my_slack.load(std::memory_order_acquire)>=0 ) { + while( my_state.load(std::memory_order_acquire)!=st_quit ) { + if( my_server.my_slack.load(std::memory_order_acquire)>=0 ) { my_client.process(j); } else { thread_monitor::cookie c; // Prepare to wait my_thread_monitor.prepare_wait(c); // Check/set the invariant for sleeping - if( my_state.load(std::memory_order_acquire)!=st_quit && my_server.try_insert_in_asleep_list(*this) ) { + if( my_state.load(std::memory_order_acquire)!=st_quit && my_server.try_insert_in_asleep_list(*this) ) { my_thread_monitor.commit_wait(c); - __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); + __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); my_server.propagate_chain_reaction(); } else { // Invariant broken @@ -286,32 +286,32 @@ void private_worker::run() noexcept { } inline void private_worker::wake_or_launch() { - state_t expected_state = st_init; - if( my_state.compare_exchange_strong( expected_state, st_starting ) ) { + state_t expected_state = st_init; + if( my_state.compare_exchange_strong( expected_state, st_starting ) ) { // after this point, remove_server_ref() must be done by created thread -#if __TBB_USE_WINAPI +#if __TBB_USE_WINAPI my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size, &this->my_index ); -#elif __TBB_USE_POSIX +#elif __TBB_USE_POSIX { affinity_helper fpa; fpa.protect_affinity_mask( /*restore_process_mask=*/true ); my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size ); // Implicit destruction of fpa resets original affinity mask. } -#endif /* __TBB_USE_POSIX */ - expected_state = st_starting; - if ( !my_state.compare_exchange_strong( expected_state, st_normal ) ) { +#endif /* __TBB_USE_POSIX */ + expected_state = st_starting; + if ( !my_state.compare_exchange_strong( expected_state, st_normal ) ) { // Do shutdown during startup. my_handle can't be released // by start_shutdown, because my_handle value might be not set yet // at time of transition from st_starting to st_quit. - __TBB_ASSERT( expected_state==st_quit, NULL ); + __TBB_ASSERT( expected_state==st_quit, NULL ); release_handle(my_handle, governor::does_client_join_workers(my_client)); } } - else { - __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" ); + else { + __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" ); my_thread_monitor.notify(); - } + } } //------------------------------------------------------------------------ @@ -321,27 +321,27 @@ private_server::private_server( tbb_client& client ) : my_client(client), my_n_thread(client.max_job_count()), my_stack_size(client.min_stack_size()), - my_slack(0), - my_ref_count(my_n_thread+1), - my_thread_array(NULL), - my_asleep_list_root(NULL) + my_slack(0), + my_ref_count(my_n_thread+1), + my_thread_array(NULL), + my_asleep_list_root(NULL) #if TBB_USE_ASSERT - , my_net_slack_requests(0) + , my_net_slack_requests(0) #endif /* TBB_USE_ASSERT */ -{ +{ my_thread_array = tbb::cache_aligned_allocator<padded_private_worker>().allocate( my_n_thread ); - for( std::size_t i=0; i<my_n_thread; ++i ) { + for( std::size_t i=0; i<my_n_thread; ++i ) { private_worker* t = new( &my_thread_array[i] ) padded_private_worker( *this, client, i ); - t->my_next = my_asleep_list_root.exchange(t, std::memory_order_relaxed); + t->my_next = my_asleep_list_root.exchange(t, std::memory_order_relaxed); } } private_server::~private_server() { __TBB_ASSERT( my_net_slack_requests==0, NULL ); - for( std::size_t i=my_n_thread; i--; ) + for( std::size_t i=my_n_thread; i--; ) my_thread_array[i].~padded_private_worker(); tbb::cache_aligned_allocator<padded_private_worker>().deallocate( my_thread_array, my_n_thread ); - tbb::detail::poison_pointer( my_thread_array ); + tbb::detail::poison_pointer( my_thread_array ); } inline bool private_server::try_insert_in_asleep_list( private_worker& t ) { @@ -352,7 +352,7 @@ inline bool private_server::try_insert_in_asleep_list( private_worker& t ) { // it sees us sleeping on the list and wakes us up. int k = ++my_slack; if( k<=0 ) { - t.my_next = my_asleep_list_root.exchange(&t, std::memory_order_relaxed); + t.my_next = my_asleep_list_root.exchange(&t, std::memory_order_relaxed); return true; } else { --my_slack; @@ -366,23 +366,23 @@ void private_server::wake_some( int additional_slack ) { private_worker**w = wakee; { asleep_list_mutex_type::scoped_lock lock(my_asleep_list_mutex); - while( my_asleep_list_root.load(std::memory_order_relaxed) && w<wakee+2 ) { + while( my_asleep_list_root.load(std::memory_order_relaxed) && w<wakee+2 ) { if( additional_slack>0 ) { - // additional demand does not exceed surplus supply - if ( additional_slack+my_slack.load(std::memory_order_acquire)<=0 ) + // additional demand does not exceed surplus supply + if ( additional_slack+my_slack.load(std::memory_order_acquire)<=0 ) break; --additional_slack; } else { // Chain reaction; Try to claim unit of slack - int old = my_slack; + int old = my_slack; do { if( old<=0 ) goto done; - } while( !my_slack.compare_exchange_strong(old,old-1) ); + } while( !my_slack.compare_exchange_strong(old,old-1) ); } // Pop sleeping worker to combine with claimed unit of slack - auto old = my_asleep_list_root.load(std::memory_order_relaxed); - my_asleep_list_root.store(old->my_next, std::memory_order_relaxed); - *w++ = old; + auto old = my_asleep_list_root.load(std::memory_order_relaxed); + my_asleep_list_root.store(old->my_next, std::memory_order_relaxed); + *w++ = old; } if( additional_slack ) { // Contribute our unused slack to my_slack. @@ -390,11 +390,11 @@ void private_server::wake_some( int additional_slack ) { } } done: - while( w>wakee ) { - private_worker* ww = *--w; - ww->my_next = NULL; - ww->wake_or_launch(); - } + while( w>wakee ) { + private_worker* ww = *--w; + ww->my_next = NULL; + ww->wake_or_launch(); + } } void private_server::adjust_job_count_estimate( int delta ) { @@ -414,7 +414,7 @@ tbb_server* make_private_server( tbb_client& client ) { } } // namespace rml -} // namespace r1 -} // namespace detail -} // namespace tbb +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/profiling.cpp b/contrib/libs/tbb/src/tbb/profiling.cpp index 43e7cf7478..2603f35b88 100644 --- a/contrib/libs/tbb/src/tbb/profiling.cpp +++ b/contrib/libs/tbb/src/tbb/profiling.cpp @@ -1,265 +1,265 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/detail/_template_helpers.h" - -#include "main.h" -#include "itt_notify.h" - -#include "oneapi/tbb/profiling.h" - -#include <string.h> - -namespace tbb { -namespace detail { -namespace r1 { - -#if __TBB_USE_ITT_NOTIFY -bool ITT_Present; -static std::atomic<bool> ITT_InitializationDone; - -static __itt_domain *tbb_domains[d1::ITT_NUM_DOMAINS] = {}; - -struct resource_string { - const char *str; - __itt_string_handle *itt_str_handle; -}; - -// -// populate resource strings -// -#define TBB_STRING_RESOURCE( index_name, str ) { str, nullptr }, -static resource_string strings_for_itt[] = { - #include "oneapi/tbb/detail/_string_resource.h" - { "num_resource_strings", nullptr } -}; -#undef TBB_STRING_RESOURCE - -static __itt_string_handle* ITT_get_string_handle(std::uintptr_t idx) { - __TBB_ASSERT(idx < NUM_STRINGS, "string handle out of valid range"); - return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : NULL; -} - -static void ITT_init_domains() { - tbb_domains[d1::ITT_DOMAIN_MAIN] = __itt_domain_create( _T("tbb") ); - tbb_domains[d1::ITT_DOMAIN_MAIN]->flags = 1; - tbb_domains[d1::ITT_DOMAIN_FLOW] = __itt_domain_create( _T("tbb.flow") ); - tbb_domains[d1::ITT_DOMAIN_FLOW]->flags = 1; - tbb_domains[d1::ITT_DOMAIN_ALGO] = __itt_domain_create( _T("tbb.algorithm") ); - tbb_domains[d1::ITT_DOMAIN_ALGO]->flags = 1; -} - -static void ITT_init_strings() { - for ( std::uintptr_t i = 0; i < NUM_STRINGS; ++i ) { -#if _WIN32||_WIN64 - strings_for_itt[i].itt_str_handle = __itt_string_handle_createA( strings_for_itt[i].str ); -#else - strings_for_itt[i].itt_str_handle = __itt_string_handle_create( strings_for_itt[i].str ); -#endif - } -} - -static void ITT_init() { - ITT_init_domains(); - ITT_init_strings(); -} - -/** Thread-unsafe lazy one-time initialization of tools interop. - Used by both dummy handlers and general TBB one-time initialization routine. **/ -void ITT_DoUnsafeOneTimeInitialization () { - // Double check ITT_InitializationDone is necessary because the first check - // in ITT_DoOneTimeInitialization is not guarded with the __TBB_InitOnce lock. - if ( !ITT_InitializationDone ) { - ITT_Present = (__TBB_load_ittnotify()!=0); - if (ITT_Present) ITT_init(); - ITT_InitializationDone = true; - } -} - -/** Thread-safe lazy one-time initialization of tools interop. - Used by dummy handlers only. **/ -extern "C" -void ITT_DoOneTimeInitialization() { - if ( !ITT_InitializationDone ) { - __TBB_InitOnce::lock(); - ITT_DoUnsafeOneTimeInitialization(); - __TBB_InitOnce::unlock(); - } -} - -void create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname) { - ITT_SYNC_CREATE(ptr, objtype, objname); -} - -void call_itt_notify(int t, void *ptr) { - switch (t) { - case 0: ITT_NOTIFY(sync_prepare, ptr); break; - case 1: ITT_NOTIFY(sync_cancel, ptr); break; - case 2: ITT_NOTIFY(sync_acquired, ptr); break; - case 3: ITT_NOTIFY(sync_releasing, ptr); break; - case 4: ITT_NOTIFY(sync_destroy, ptr); break; - } -} - -void itt_set_sync_name(void* obj, const tchar* name) { - __itt_sync_rename(obj, name); -} - -const __itt_id itt_null_id = { 0, 0, 0 }; - -static inline __itt_domain* get_itt_domain(d1::itt_domain_enum idx) { - if (tbb_domains[idx] == NULL) { - ITT_DoOneTimeInitialization(); - } - return tbb_domains[idx]; -} - -static inline void itt_id_make(__itt_id* id, void* addr, unsigned long long extra) { - *id = __itt_id_make(addr, extra); -} - -static inline void itt_id_create(const __itt_domain* domain, __itt_id id) { - __itt_id_create(domain, id); -} - -void itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, - void* parent, unsigned long long parent_extra, string_resource_index name_index) { - if (__itt_domain* d = get_itt_domain(domain)) { - __itt_id group_id = itt_null_id; - __itt_id parent_id = itt_null_id; - itt_id_make(&group_id, group, group_extra); - itt_id_create(d, group_id); - if (parent) { - itt_id_make(&parent_id, parent, parent_extra); - } - __itt_string_handle* n = ITT_get_string_handle(name_index); - __itt_task_group(d, group_id, parent_id, n); - } -} - -void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, - string_resource_index key, const char *value ) { - if ( __itt_domain *d = get_itt_domain( domain ) ) { - __itt_id id = itt_null_id; - itt_id_make( &id, addr, addr_extra ); - __itt_string_handle *k = ITT_get_string_handle(key); - size_t value_length = strlen( value ); -#if _WIN32||_WIN64 - __itt_metadata_str_addA(d, id, k, value, value_length); -#else - __itt_metadata_str_add(d, id, k, value, value_length); -#endif - } -} - -void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, - string_resource_index key, void *value ) { - if ( __itt_domain *d = get_itt_domain( domain ) ) { - __itt_id id = itt_null_id; - itt_id_make( &id, addr, addr_extra ); - __itt_string_handle *k = ITT_get_string_handle(key); -#if __TBB_x86_32 - __itt_metadata_add(d, id, k, __itt_metadata_u32, 1, value); -#else - __itt_metadata_add(d, id, k, __itt_metadata_u64, 1, value); -#endif - } -} - -void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, - itt_relation relation, void *addr1, unsigned long long addr1_extra ) { - if ( __itt_domain *d = get_itt_domain( domain ) ) { - __itt_id id0 = itt_null_id; - __itt_id id1 = itt_null_id; - itt_id_make( &id0, addr0, addr0_extra ); - itt_id_make( &id1, addr1, addr1_extra ); - __itt_relation_add( d, id0, (__itt_relation)relation, id1 ); - } -} - -void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, - void* parent, unsigned long long parent_extra, string_resource_index name_index) { - if (__itt_domain* d = get_itt_domain(domain)) { - __itt_id task_id = itt_null_id; - __itt_id parent_id = itt_null_id; - if (task) { - itt_id_make(&task_id, task, task_extra); - } - if (parent) { - itt_id_make(&parent_id, parent, parent_extra); - } - __itt_string_handle* n = ITT_get_string_handle(name_index); - __itt_task_begin(d, task_id, parent_id, n); - } -} - -void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain) { - if (__itt_domain* d = get_itt_domain(domain)) { - __itt_task_end(d); - } -} - -void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void *region, unsigned long long region_extra, - void *parent, unsigned long long parent_extra, string_resource_index /* name_index */ ) { - if ( __itt_domain *d = get_itt_domain( domain ) ) { - __itt_id region_id = itt_null_id; - __itt_id parent_id = itt_null_id; - itt_id_make( ®ion_id, region, region_extra ); - if ( parent ) { - itt_id_make( &parent_id, parent, parent_extra ); - } - __itt_region_begin( d, region_id, parent_id, NULL ); - } -} - -void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void *region, unsigned long long region_extra ) { - if ( __itt_domain *d = get_itt_domain( domain ) ) { - __itt_id region_id = itt_null_id; - itt_id_make( ®ion_id, region, region_extra ); - __itt_region_end( d, region_id ); - } -} - -#else -void create_itt_sync(void* /*ptr*/, const tchar* /*objtype*/, const tchar* /*objname*/) {} -void call_itt_notify(int /*t*/, void* /*ptr*/) {} -void itt_set_sync_name(void* /*obj*/, const tchar* /*name*/) {} -void itt_make_task_group(d1::itt_domain_enum /*domain*/, void* /*group*/, unsigned long long /*group_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/) {} -void itt_metadata_str_add(d1::itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/, - string_resource_index /*key*/, const char* /*value*/ ) { } -void itt_metadata_ptr_add(d1::itt_domain_enum /*domain*/, void * /*addr*/, unsigned long long /*addr_extra*/, - string_resource_index /*key*/, void * /*value*/ ) {} -void itt_relation_add(d1::itt_domain_enum /*domain*/, void* /*addr0*/, unsigned long long /*addr0_extra*/, - itt_relation /*relation*/, void* /*addr1*/, unsigned long long /*addr1_extra*/ ) { } -void itt_task_begin(d1::itt_domain_enum /*domain*/, void* /*task*/, unsigned long long /*task_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } -void itt_task_end(d1::itt_domain_enum /*domain*/ ) { } -void itt_region_begin(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/, - void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } -void itt_region_end(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/ ) { } -#endif /* __TBB_USE_ITT_NOTIFY */ - -const tchar - *SyncType_Scheduler = _T("%Constant") - ; -const tchar - *SyncObj_ContextsList = _T("TBB Scheduler") - ; -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "main.h" +#include "itt_notify.h" + +#include "oneapi/tbb/profiling.h" + +#include <string.h> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_USE_ITT_NOTIFY +bool ITT_Present; +static std::atomic<bool> ITT_InitializationDone; + +static __itt_domain *tbb_domains[d1::ITT_NUM_DOMAINS] = {}; + +struct resource_string { + const char *str; + __itt_string_handle *itt_str_handle; +}; + +// +// populate resource strings +// +#define TBB_STRING_RESOURCE( index_name, str ) { str, nullptr }, +static resource_string strings_for_itt[] = { + #include "oneapi/tbb/detail/_string_resource.h" + { "num_resource_strings", nullptr } +}; +#undef TBB_STRING_RESOURCE + +static __itt_string_handle* ITT_get_string_handle(std::uintptr_t idx) { + __TBB_ASSERT(idx < NUM_STRINGS, "string handle out of valid range"); + return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : NULL; +} + +static void ITT_init_domains() { + tbb_domains[d1::ITT_DOMAIN_MAIN] = __itt_domain_create( _T("tbb") ); + tbb_domains[d1::ITT_DOMAIN_MAIN]->flags = 1; + tbb_domains[d1::ITT_DOMAIN_FLOW] = __itt_domain_create( _T("tbb.flow") ); + tbb_domains[d1::ITT_DOMAIN_FLOW]->flags = 1; + tbb_domains[d1::ITT_DOMAIN_ALGO] = __itt_domain_create( _T("tbb.algorithm") ); + tbb_domains[d1::ITT_DOMAIN_ALGO]->flags = 1; +} + +static void ITT_init_strings() { + for ( std::uintptr_t i = 0; i < NUM_STRINGS; ++i ) { +#if _WIN32||_WIN64 + strings_for_itt[i].itt_str_handle = __itt_string_handle_createA( strings_for_itt[i].str ); +#else + strings_for_itt[i].itt_str_handle = __itt_string_handle_create( strings_for_itt[i].str ); +#endif + } +} + +static void ITT_init() { + ITT_init_domains(); + ITT_init_strings(); +} + +/** Thread-unsafe lazy one-time initialization of tools interop. + Used by both dummy handlers and general TBB one-time initialization routine. **/ +void ITT_DoUnsafeOneTimeInitialization () { + // Double check ITT_InitializationDone is necessary because the first check + // in ITT_DoOneTimeInitialization is not guarded with the __TBB_InitOnce lock. + if ( !ITT_InitializationDone ) { + ITT_Present = (__TBB_load_ittnotify()!=0); + if (ITT_Present) ITT_init(); + ITT_InitializationDone = true; + } +} + +/** Thread-safe lazy one-time initialization of tools interop. + Used by dummy handlers only. **/ +extern "C" +void ITT_DoOneTimeInitialization() { + if ( !ITT_InitializationDone ) { + __TBB_InitOnce::lock(); + ITT_DoUnsafeOneTimeInitialization(); + __TBB_InitOnce::unlock(); + } +} + +void create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname) { + ITT_SYNC_CREATE(ptr, objtype, objname); +} + +void call_itt_notify(int t, void *ptr) { + switch (t) { + case 0: ITT_NOTIFY(sync_prepare, ptr); break; + case 1: ITT_NOTIFY(sync_cancel, ptr); break; + case 2: ITT_NOTIFY(sync_acquired, ptr); break; + case 3: ITT_NOTIFY(sync_releasing, ptr); break; + case 4: ITT_NOTIFY(sync_destroy, ptr); break; + } +} + +void itt_set_sync_name(void* obj, const tchar* name) { + __itt_sync_rename(obj, name); +} + +const __itt_id itt_null_id = { 0, 0, 0 }; + +static inline __itt_domain* get_itt_domain(d1::itt_domain_enum idx) { + if (tbb_domains[idx] == NULL) { + ITT_DoOneTimeInitialization(); + } + return tbb_domains[idx]; +} + +static inline void itt_id_make(__itt_id* id, void* addr, unsigned long long extra) { + *id = __itt_id_make(addr, extra); +} + +static inline void itt_id_create(const __itt_domain* domain, __itt_id id) { + __itt_id_create(domain, id); +} + +void itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_id group_id = itt_null_id; + __itt_id parent_id = itt_null_id; + itt_id_make(&group_id, group, group_extra); + itt_id_create(d, group_id); + if (parent) { + itt_id_make(&parent_id, parent, parent_extra); + } + __itt_string_handle* n = ITT_get_string_handle(name_index); + __itt_task_group(d, group_id, parent_id, n); + } +} + +void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, const char *value ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id = itt_null_id; + itt_id_make( &id, addr, addr_extra ); + __itt_string_handle *k = ITT_get_string_handle(key); + size_t value_length = strlen( value ); +#if _WIN32||_WIN64 + __itt_metadata_str_addA(d, id, k, value, value_length); +#else + __itt_metadata_str_add(d, id, k, value, value_length); +#endif + } +} + +void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, void *value ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id = itt_null_id; + itt_id_make( &id, addr, addr_extra ); + __itt_string_handle *k = ITT_get_string_handle(key); +#if __TBB_x86_32 + __itt_metadata_add(d, id, k, __itt_metadata_u32, 1, value); +#else + __itt_metadata_add(d, id, k, __itt_metadata_u64, 1, value); +#endif + } +} + +void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, + itt_relation relation, void *addr1, unsigned long long addr1_extra ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id0 = itt_null_id; + __itt_id id1 = itt_null_id; + itt_id_make( &id0, addr0, addr0_extra ); + itt_id_make( &id1, addr1, addr1_extra ); + __itt_relation_add( d, id0, (__itt_relation)relation, id1 ); + } +} + +void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_id task_id = itt_null_id; + __itt_id parent_id = itt_null_id; + if (task) { + itt_id_make(&task_id, task, task_extra); + } + if (parent) { + itt_id_make(&parent_id, parent, parent_extra); + } + __itt_string_handle* n = ITT_get_string_handle(name_index); + __itt_task_begin(d, task_id, parent_id, n); + } +} + +void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_task_end(d); + } +} + +void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void *region, unsigned long long region_extra, + void *parent, unsigned long long parent_extra, string_resource_index /* name_index */ ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id region_id = itt_null_id; + __itt_id parent_id = itt_null_id; + itt_id_make( ®ion_id, region, region_extra ); + if ( parent ) { + itt_id_make( &parent_id, parent, parent_extra ); + } + __itt_region_begin( d, region_id, parent_id, NULL ); + } +} + +void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void *region, unsigned long long region_extra ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id region_id = itt_null_id; + itt_id_make( ®ion_id, region, region_extra ); + __itt_region_end( d, region_id ); + } +} + +#else +void create_itt_sync(void* /*ptr*/, const tchar* /*objtype*/, const tchar* /*objname*/) {} +void call_itt_notify(int /*t*/, void* /*ptr*/) {} +void itt_set_sync_name(void* /*obj*/, const tchar* /*name*/) {} +void itt_make_task_group(d1::itt_domain_enum /*domain*/, void* /*group*/, unsigned long long /*group_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/) {} +void itt_metadata_str_add(d1::itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/, + string_resource_index /*key*/, const char* /*value*/ ) { } +void itt_metadata_ptr_add(d1::itt_domain_enum /*domain*/, void * /*addr*/, unsigned long long /*addr_extra*/, + string_resource_index /*key*/, void * /*value*/ ) {} +void itt_relation_add(d1::itt_domain_enum /*domain*/, void* /*addr0*/, unsigned long long /*addr0_extra*/, + itt_relation /*relation*/, void* /*addr1*/, unsigned long long /*addr1_extra*/ ) { } +void itt_task_begin(d1::itt_domain_enum /*domain*/, void* /*task*/, unsigned long long /*task_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } +void itt_task_end(d1::itt_domain_enum /*domain*/ ) { } +void itt_region_begin(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } +void itt_region_end(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/ ) { } +#endif /* __TBB_USE_ITT_NOTIFY */ + +const tchar + *SyncType_Scheduler = _T("%Constant") + ; +const tchar + *SyncObj_ContextsList = _T("TBB Scheduler") + ; +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp index 5051d8937d..cfdc4d3c2a 100644 --- a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,14 +18,14 @@ with SPIN tool using <TBB directory>/tools/spin_models/ReaderWriterMutex.pml. There could be some code looking as "can be restructured" but its structure does matter! */ -#include "oneapi/tbb/queuing_rw_mutex.h" -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/queuing_rw_mutex.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" #include "itt_notify.h" namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) // Workaround for overzealous compiler warnings @@ -35,524 +35,524 @@ namespace r1 { //! A view of a T* with additional functionality for twiddling low-order bits. template<typename T> -class tricky_atomic_pointer { +class tricky_atomic_pointer { public: - using word = uintptr_t; + using word = uintptr_t; - static T* fetch_add( std::atomic<word>& location, word addend, std::memory_order memory_order ) { - return reinterpret_cast<T*>(location.fetch_add(addend, memory_order)); + static T* fetch_add( std::atomic<word>& location, word addend, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.fetch_add(addend, memory_order)); } - - static T* exchange( std::atomic<word>& location, T* value, std::memory_order memory_order ) { - return reinterpret_cast<T*>(location.exchange(reinterpret_cast<word>(value), memory_order)); + + static T* exchange( std::atomic<word>& location, T* value, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.exchange(reinterpret_cast<word>(value), memory_order)); + } + + static T* compare_exchange_strong( std::atomic<word>& obj, const T* expected, const T* desired, std::memory_order memory_order ) { + word expd = reinterpret_cast<word>(expected); + obj.compare_exchange_strong(expd, reinterpret_cast<word>(desired), memory_order); + return reinterpret_cast<T*>(expd); + } + + static void store( std::atomic<word>& location, const T* value, std::memory_order memory_order ) { + location.store(reinterpret_cast<word>(value), memory_order); + } + + static T* load( std::atomic<word>& location, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.load(memory_order)); } - - static T* compare_exchange_strong( std::atomic<word>& obj, const T* expected, const T* desired, std::memory_order memory_order ) { - word expd = reinterpret_cast<word>(expected); - obj.compare_exchange_strong(expd, reinterpret_cast<word>(desired), memory_order); - return reinterpret_cast<T*>(expd); + + static void spin_wait_while_eq(const std::atomic<word>& location, const T* value) { + tbb::detail::d0::spin_wait_while_eq(location, reinterpret_cast<word>(value) ); } - static void store( std::atomic<word>& location, const T* value, std::memory_order memory_order ) { - location.store(reinterpret_cast<word>(value), memory_order); - } - - static T* load( std::atomic<word>& location, std::memory_order memory_order ) { - return reinterpret_cast<T*>(location.load(memory_order)); - } - - static void spin_wait_while_eq(const std::atomic<word>& location, const T* value) { - tbb::detail::d0::spin_wait_while_eq(location, reinterpret_cast<word>(value) ); - } - T* & ref; tricky_atomic_pointer( T*& original ) : ref(original) {}; - tricky_atomic_pointer(const tricky_atomic_pointer&) = delete; - tricky_atomic_pointer& operator=(const tricky_atomic_pointer&) = delete; - T* operator&( const word operand2 ) const { + tricky_atomic_pointer(const tricky_atomic_pointer&) = delete; + tricky_atomic_pointer& operator=(const tricky_atomic_pointer&) = delete; + T* operator&( const word operand2 ) const { return reinterpret_cast<T*>( reinterpret_cast<word>(ref) & operand2 ); } - T* operator|( const word operand2 ) const { + T* operator|( const word operand2 ) const { return reinterpret_cast<T*>( reinterpret_cast<word>(ref) | operand2 ); } }; -using tricky_pointer = tricky_atomic_pointer<queuing_rw_mutex::scoped_lock>; +using tricky_pointer = tricky_atomic_pointer<queuing_rw_mutex::scoped_lock>; #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) // Workaround for overzealous compiler warnings #pragma warning (pop) #endif -//! Flag bits in a state_t that specify information about a locking request. -enum state_t_flags : unsigned char { - STATE_NONE = 0, - STATE_WRITER = 1<<0, - STATE_READER = 1<<1, - STATE_READER_UNBLOCKNEXT = 1<<2, - STATE_ACTIVEREADER = 1<<3, - STATE_UPGRADE_REQUESTED = 1<<4, - STATE_UPGRADE_WAITING = 1<<5, - STATE_UPGRADE_LOSER = 1<<6, - STATE_COMBINED_WAITINGREADER = STATE_READER | STATE_READER_UNBLOCKNEXT, - STATE_COMBINED_READER = STATE_COMBINED_WAITINGREADER | STATE_ACTIVEREADER, - STATE_COMBINED_UPGRADING = STATE_UPGRADE_WAITING | STATE_UPGRADE_LOSER -}; - -static const unsigned char RELEASED = 0; -static const unsigned char ACQUIRED = 1; - -struct queuing_rw_mutex_impl { - //! Try to acquire the internal lock - /** Returns true if lock was successfully acquired. */ - static bool try_acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) - { - auto expected = RELEASED; - return s.my_internal_lock.compare_exchange_strong(expected, ACQUIRED); - } - - //! Acquire the internal lock - static void acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) - { - // Usually, we would use the test-test-and-set idiom here, with exponential backoff. - // But so far, experiments indicate there is no value in doing so here. - while( !try_acquire_internal_lock(s) ) { - machine_pause(1); - } - } - - //! Release the internal lock - static void release_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) - { - s.my_internal_lock.store(RELEASED, std::memory_order_release); - } - - //! Wait for internal lock to be released - static void wait_for_release_of_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) - { - spin_wait_until_eq(s.my_internal_lock, RELEASED); - } - - //! A helper function - static void unblock_or_wait_on_internal_lock(d1::queuing_rw_mutex::scoped_lock& s, uintptr_t flag ) { - if( flag ) { - wait_for_release_of_internal_lock(s); +//! Flag bits in a state_t that specify information about a locking request. +enum state_t_flags : unsigned char { + STATE_NONE = 0, + STATE_WRITER = 1<<0, + STATE_READER = 1<<1, + STATE_READER_UNBLOCKNEXT = 1<<2, + STATE_ACTIVEREADER = 1<<3, + STATE_UPGRADE_REQUESTED = 1<<4, + STATE_UPGRADE_WAITING = 1<<5, + STATE_UPGRADE_LOSER = 1<<6, + STATE_COMBINED_WAITINGREADER = STATE_READER | STATE_READER_UNBLOCKNEXT, + STATE_COMBINED_READER = STATE_COMBINED_WAITINGREADER | STATE_ACTIVEREADER, + STATE_COMBINED_UPGRADING = STATE_UPGRADE_WAITING | STATE_UPGRADE_LOSER +}; + +static const unsigned char RELEASED = 0; +static const unsigned char ACQUIRED = 1; + +struct queuing_rw_mutex_impl { + //! Try to acquire the internal lock + /** Returns true if lock was successfully acquired. */ + static bool try_acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + auto expected = RELEASED; + return s.my_internal_lock.compare_exchange_strong(expected, ACQUIRED); + } + + //! Acquire the internal lock + static void acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + // Usually, we would use the test-test-and-set idiom here, with exponential backoff. + // But so far, experiments indicate there is no value in doing so here. + while( !try_acquire_internal_lock(s) ) { + machine_pause(1); + } + } + + //! Release the internal lock + static void release_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + s.my_internal_lock.store(RELEASED, std::memory_order_release); + } + + //! Wait for internal lock to be released + static void wait_for_release_of_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + spin_wait_until_eq(s.my_internal_lock, RELEASED); + } + + //! A helper function + static void unblock_or_wait_on_internal_lock(d1::queuing_rw_mutex::scoped_lock& s, uintptr_t flag ) { + if( flag ) { + wait_for_release_of_internal_lock(s); } - else { - release_internal_lock(s); - } - } - - //! Mask for low order bit of a pointer. - static const tricky_pointer::word FLAG = 0x1; - - static uintptr_t get_flag( d1::queuing_rw_mutex::scoped_lock* ptr ) { - return reinterpret_cast<uintptr_t>(ptr) & FLAG; - } - - //------------------------------------------------------------------------ - // Methods of queuing_rw_mutex::scoped_lock - //------------------------------------------------------------------------ - - //! A method to acquire queuing_rw_mutex lock - static void acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) - { - __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); - - // Must set all fields before the exchange, because once the - // exchange executes, *this becomes accessible to other threads. - s.my_mutex = &m; - s.my_prev.store(0U, std::memory_order_relaxed); - s.my_next.store(0U, std::memory_order_relaxed); - s.my_going.store(0U, std::memory_order_relaxed); - s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed); - s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); - - queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release); - - if( write ) { // Acquiring for write - - if( predecessor ) { - ITT_NOTIFY(sync_prepare, s.my_mutex); - predecessor = tricky_pointer(predecessor) & ~FLAG; - __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); - #if TBB_USE_ASSERT - atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" - __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); - #endif - tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); - spin_wait_until_eq(s.my_going, 1U); + else { + release_internal_lock(s); + } + } + + //! Mask for low order bit of a pointer. + static const tricky_pointer::word FLAG = 0x1; + + static uintptr_t get_flag( d1::queuing_rw_mutex::scoped_lock* ptr ) { + return reinterpret_cast<uintptr_t>(ptr) & FLAG; + } + + //------------------------------------------------------------------------ + // Methods of queuing_rw_mutex::scoped_lock + //------------------------------------------------------------------------ + + //! A method to acquire queuing_rw_mutex lock + static void acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) + { + __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + s.my_mutex = &m; + s.my_prev.store(0U, std::memory_order_relaxed); + s.my_next.store(0U, std::memory_order_relaxed); + s.my_going.store(0U, std::memory_order_relaxed); + s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed); + s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); + + queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release); + + if( write ) { // Acquiring for write + + if( predecessor ) { + ITT_NOTIFY(sync_prepare, s.my_mutex); + predecessor = tricky_pointer(predecessor) & ~FLAG; + __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); + #if TBB_USE_ASSERT + atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" + __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); + #endif + tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); + spin_wait_until_eq(s.my_going, 1U); } - - } else { // Acquiring for read - #if __TBB_USE_ITT_NOTIFY - bool sync_prepare_done = false; - #endif - if( predecessor ) { - unsigned char pred_state; - __TBB_ASSERT( !s.my_prev, "the predecessor is already set" ); - if( tricky_pointer(predecessor) & FLAG ) { - /* this is only possible if predecessor is an upgrading reader and it signals us to wait */ - pred_state = STATE_UPGRADE_WAITING; - predecessor = tricky_pointer(predecessor) & ~FLAG; - } else { - // Load predecessor->my_state now, because once predecessor->my_next becomes - // non-NULL, we must assume that *predecessor might be destroyed. - pred_state = STATE_READER; - predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel); - } - tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); - __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); - #if TBB_USE_ASSERT - atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" - __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); - #endif - tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); - if( pred_state != STATE_ACTIVEREADER ) { - #if __TBB_USE_ITT_NOTIFY - sync_prepare_done = true; - ITT_NOTIFY(sync_prepare, s.my_mutex); - #endif - spin_wait_until_eq(s.my_going, 1U); - } + + } else { // Acquiring for read + #if __TBB_USE_ITT_NOTIFY + bool sync_prepare_done = false; + #endif + if( predecessor ) { + unsigned char pred_state; + __TBB_ASSERT( !s.my_prev, "the predecessor is already set" ); + if( tricky_pointer(predecessor) & FLAG ) { + /* this is only possible if predecessor is an upgrading reader and it signals us to wait */ + pred_state = STATE_UPGRADE_WAITING; + predecessor = tricky_pointer(predecessor) & ~FLAG; + } else { + // Load predecessor->my_state now, because once predecessor->my_next becomes + // non-NULL, we must assume that *predecessor might be destroyed. + pred_state = STATE_READER; + predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel); + } + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); + #if TBB_USE_ASSERT + atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" + __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); + #endif + tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); + if( pred_state != STATE_ACTIVEREADER ) { + #if __TBB_USE_ITT_NOTIFY + sync_prepare_done = true; + ITT_NOTIFY(sync_prepare, s.my_mutex); + #endif + spin_wait_until_eq(s.my_going, 1U); + } } - // The protected state must have been acquired here before it can be further released to any other reader(s): - unsigned char old_state = STATE_READER; - s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel); - if( old_state!=STATE_READER ) { -#if __TBB_USE_ITT_NOTIFY - if( !sync_prepare_done ) - ITT_NOTIFY(sync_prepare, s.my_mutex); + // The protected state must have been acquired here before it can be further released to any other reader(s): + unsigned char old_state = STATE_READER; + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel); + if( old_state!=STATE_READER ) { +#if __TBB_USE_ITT_NOTIFY + if( !sync_prepare_done ) + ITT_NOTIFY(sync_prepare, s.my_mutex); #endif - // Failed to become active reader -> need to unblock the next waiting reader first - __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" ); - spin_wait_while_eq(s.my_next, 0U); - /* my_state should be changed before unblocking the next otherwise it might finish - and another thread can get our old state and left blocked */ - s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed); - tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release); - } - __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" ); + // Failed to become active reader -> need to unblock the next waiting reader first + __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" ); + spin_wait_while_eq(s.my_next, 0U); + /* my_state should be changed before unblocking the next otherwise it might finish + and another thread can get our old state and left blocked */ + s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed); + tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release); + } + __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" ); } - ITT_NOTIFY(sync_acquired, s.my_mutex); - - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); - } - - //! A method to acquire queuing_rw_mutex if it is free - static bool try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) - { - __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); - - if( m.q_tail.load(std::memory_order_relaxed) ) - return false; // Someone already took the lock - - // Must set all fields before the exchange, because once the - // exchange executes, *this becomes accessible to other threads. - s.my_prev.store(0U, std::memory_order_relaxed); - s.my_next.store(0U, std::memory_order_relaxed); - s.my_going.store(0U, std::memory_order_relaxed); // TODO: remove dead assignment? - s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_ACTIVEREADER), std::memory_order_relaxed); - s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); - - // The CAS must have release semantics, because we are - // "sending" the fields initialized above to other processors. - d1::queuing_rw_mutex::scoped_lock* expected = nullptr; - if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) ) - return false; // Someone already took the lock - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); - s.my_mutex = &m; - ITT_NOTIFY(sync_acquired, s.my_mutex); - return true; - } - - //! A method to release queuing_rw_mutex lock - static void release(d1::queuing_rw_mutex::scoped_lock& s) { - __TBB_ASSERT(s.my_mutex!=nullptr, "no lock acquired"); - - ITT_NOTIFY(sync_releasing, s.my_mutex); - - if( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) { // Acquired for write - - // The logic below is the same as "writerUnlock", but elides - // "return" from the middle of the routine. - // In the statement below, acquire semantics of reading my_next is required - // so that following operations with fields of my_next are safe. - d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); - if( !next ) { - d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { - // this was the only item in the queue, and the queue is now empty. - goto done; - } - spin_wait_while_eq( s.my_next, 0U ); - next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + ITT_NOTIFY(sync_acquired, s.my_mutex); + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + } + + //! A method to acquire queuing_rw_mutex if it is free + static bool try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) + { + __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); + + if( m.q_tail.load(std::memory_order_relaxed) ) + return false; // Someone already took the lock + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + s.my_prev.store(0U, std::memory_order_relaxed); + s.my_next.store(0U, std::memory_order_relaxed); + s.my_going.store(0U, std::memory_order_relaxed); // TODO: remove dead assignment? + s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_ACTIVEREADER), std::memory_order_relaxed); + s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); + + // The CAS must have release semantics, because we are + // "sending" the fields initialized above to other processors. + d1::queuing_rw_mutex::scoped_lock* expected = nullptr; + if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) ) + return false; // Someone already took the lock + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + s.my_mutex = &m; + ITT_NOTIFY(sync_acquired, s.my_mutex); + return true; + } + + //! A method to release queuing_rw_mutex lock + static void release(d1::queuing_rw_mutex::scoped_lock& s) { + __TBB_ASSERT(s.my_mutex!=nullptr, "no lock acquired"); + + ITT_NOTIFY(sync_releasing, s.my_mutex); + + if( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) { // Acquired for write + + // The logic below is the same as "writerUnlock", but elides + // "return" from the middle of the routine. + // In the statement below, acquire semantics of reading my_next is required + // so that following operations with fields of my_next are safe. + d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + if( !next ) { + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { + // this was the only item in the queue, and the queue is now empty. + goto done; + } + spin_wait_while_eq( s.my_next, 0U ); + next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + } + next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early + if( next->my_state==STATE_UPGRADE_WAITING ) { + // the next waiting for upgrade means this writer was upgraded before. + acquire_internal_lock(s); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); + next->my_going.store(1U, std::memory_order_release); + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + } else { + // next->state cannot be STATE_UPGRADE_REQUESTED + __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" ); + __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" ); + tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed); + next->my_going.store(1U, std::memory_order_release); } - next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early - if( next->my_state==STATE_UPGRADE_WAITING ) { - // the next waiting for upgrade means this writer was upgraded before. - acquire_internal_lock(s); - // Responsibility transition, the one who reads uncorrupted my_prev will do release. - d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); - next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); - next->my_going.store(1U, std::memory_order_release); - unblock_or_wait_on_internal_lock(s, get_flag(tmp)); - } else { - // next->state cannot be STATE_UPGRADE_REQUESTED - __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" ); - __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" ); - tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed); - next->my_going.store(1U, std::memory_order_release); - } - - } else { // Acquired for read - - queuing_rw_mutex::scoped_lock *tmp = nullptr; - retry: - // Addition to the original paper: Mark my_prev as in use - queuing_rw_mutex::scoped_lock *predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); - - if( predecessor ) { - if( !(try_acquire_internal_lock(*predecessor)) ) - { - // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades. - // In the second case, it could or could not know my "in use" flag - need to check - // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release); - if( !(tricky_pointer(tmp) & FLAG) ) { - // Wait for the predecessor to change my_prev (e.g. during unlink) - // TODO: spin_wait condition seems never reachable - tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG ); - // Now owner of predecessor is waiting for _us_ to release its lock - release_internal_lock(*predecessor); - } - // else the "in use" flag is back -> the predecessor didn't get it and will release itself; nothing to do - - tmp = nullptr; - goto retry; + + } else { // Acquired for read + + queuing_rw_mutex::scoped_lock *tmp = nullptr; + retry: + // Addition to the original paper: Mark my_prev as in use + queuing_rw_mutex::scoped_lock *predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); + + if( predecessor ) { + if( !(try_acquire_internal_lock(*predecessor)) ) + { + // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades. + // In the second case, it could or could not know my "in use" flag - need to check + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release); + if( !(tricky_pointer(tmp) & FLAG) ) { + // Wait for the predecessor to change my_prev (e.g. during unlink) + // TODO: spin_wait condition seems never reachable + tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG ); + // Now owner of predecessor is waiting for _us_ to release its lock + release_internal_lock(*predecessor); + } + // else the "in use" flag is back -> the predecessor didn't get it and will release itself; nothing to do + + tmp = nullptr; + goto retry; + } + __TBB_ASSERT(predecessor && predecessor->my_internal_lock.load(std::memory_order_relaxed)==ACQUIRED, "predecessor's lock is not acquired"); + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + acquire_internal_lock(s); + + tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release); + + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + } + __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" ); + + // ensure acquire semantics of reading 'my_next' + if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below + // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0 + tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release); + // I->prev->next = I->next; + __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed)==predecessor, nullptr); + predecessor->my_next.store(s.my_next.load(std::memory_order_relaxed), std::memory_order_release); } - __TBB_ASSERT(predecessor && predecessor->my_internal_lock.load(std::memory_order_relaxed)==ACQUIRED, "predecessor's lock is not acquired"); - tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); - acquire_internal_lock(s); - - tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release); - - d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); - } - __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" ); - - // ensure acquire semantics of reading 'my_next' - if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below - // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0 - tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release); - // I->prev->next = I->next; - __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed)==predecessor, nullptr); - predecessor->my_next.store(s.my_next.load(std::memory_order_relaxed), std::memory_order_release); - } - // Safe to release in the order opposite to acquiring which makes the code simpler - release_internal_lock(*predecessor); - - } else { // No predecessor when we looked - acquire_internal_lock(s); // "exclusiveLock(&I->EL)" - d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); - if( !next ) { - d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); - next = tricky_pointer::load(s.my_next, std::memory_order_relaxed); - } else { - goto unlock_self; - } + // Safe to release in the order opposite to acquiring which makes the code simpler + release_internal_lock(*predecessor); + + } else { // No predecessor when we looked + acquire_internal_lock(s); // "exclusiveLock(&I->EL)" + d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + if( !next ) { + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + next = tricky_pointer::load(s.my_next, std::memory_order_relaxed); + } else { + goto unlock_self; + } } - next->my_going.store(2U, std::memory_order_relaxed); - // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); - next->my_going.store(1U, std::memory_order_release); + next->my_going.store(2U, std::memory_order_relaxed); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); + next->my_going.store(1U, std::memory_order_release); } - unlock_self: - unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + unlock_self: + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); } - done: - spin_wait_while_eq( s.my_going, 2U ); - - s.initialize(); + done: + spin_wait_while_eq( s.my_going, 2U ); + + s.initialize(); } - static bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { - if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader - - ITT_NOTIFY(sync_releasing, s.my_mutex); - s.my_state.store(STATE_READER, std::memory_order_relaxed); - if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) { - // the following load of q_tail must not be reordered with setting STATE_READER above - if( &s==s.my_mutex->q_tail.load() ) { - unsigned char old_state = STATE_READER; - s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release); - if( old_state==STATE_READER ) - return true; // Downgrade completed - } - /* wait for the next to register */ - spin_wait_while_eq( s.my_next, 0U ); + static bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { + if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader + + ITT_NOTIFY(sync_releasing, s.my_mutex); + s.my_state.store(STATE_READER, std::memory_order_relaxed); + if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) { + // the following load of q_tail must not be reordered with setting STATE_READER above + if( &s==s.my_mutex->q_tail.load() ) { + unsigned char old_state = STATE_READER; + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release); + if( old_state==STATE_READER ) + return true; // Downgrade completed + } + /* wait for the next to register */ + spin_wait_while_eq( s.my_next, 0U ); } - d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire); - __TBB_ASSERT( next, "still no successor at this point!" ); - if( next->my_state & STATE_COMBINED_WAITINGREADER ) - next->my_going.store(1U, std::memory_order_release); - else if( next->my_state==STATE_UPGRADE_WAITING ) - // the next waiting for upgrade means this writer was upgraded before. - next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); - s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);; - return true; + d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + __TBB_ASSERT( next, "still no successor at this point!" ); + if( next->my_state & STATE_COMBINED_WAITINGREADER ) + next->my_going.store(1U, std::memory_order_release); + else if( next->my_state==STATE_UPGRADE_WAITING ) + // the next waiting for upgrade means this writer was upgraded before. + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); + s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);; + return true; } - static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { - if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer - - __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" ); - - queuing_rw_mutex::scoped_lock * tmp; - queuing_rw_mutex::scoped_lock * me = &s; - - ITT_NOTIFY(sync_releasing, s.my_mutex); - s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed); - requested: - __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" ); - acquire_internal_lock(s); - d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); - queuing_rw_mutex::scoped_lock * next; - next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire); - unsigned short n_state = next->my_state; - /* the next reader can be blocked by our state. the best thing to do is to unblock it */ - if( n_state & STATE_COMBINED_WAITINGREADER ) - next->my_going.store(1U, std::memory_order_release); - // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::exchange(next->my_prev, &s, std::memory_order_release); - unblock_or_wait_on_internal_lock(s, get_flag(tmp)); - if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) { - // save next|FLAG for simplicity of following comparisons - tmp = tricky_pointer(next)|FLAG; - for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) { - if( s.my_state & STATE_COMBINED_UPGRADING ) { - if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp ) - tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); - goto waiting; - } + static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { + if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer + + __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" ); + + queuing_rw_mutex::scoped_lock * tmp; + queuing_rw_mutex::scoped_lock * me = &s; + + ITT_NOTIFY(sync_releasing, s.my_mutex); + s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed); + requested: + __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" ); + acquire_internal_lock(s); + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + queuing_rw_mutex::scoped_lock * next; + next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire); + unsigned short n_state = next->my_state; + /* the next reader can be blocked by our state. the best thing to do is to unblock it */ + if( n_state & STATE_COMBINED_WAITINGREADER ) + next->my_going.store(1U, std::memory_order_release); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::exchange(next->my_prev, &s, std::memory_order_release); + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) { + // save next|FLAG for simplicity of following comparisons + tmp = tricky_pointer(next)|FLAG; + for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) { + if( s.my_state & STATE_COMBINED_UPGRADING ) { + if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp ) + tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); + goto waiting; + } } - __TBB_ASSERT(tricky_pointer::load(s.my_next, std::memory_order_relaxed) != (tricky_pointer(next)|FLAG), nullptr); - goto requested; - } else { - __TBB_ASSERT( n_state & (STATE_WRITER | STATE_UPGRADE_WAITING), "unexpected state"); - __TBB_ASSERT( (tricky_pointer(next)|FLAG) == tricky_pointer::load(s.my_next, std::memory_order_relaxed), nullptr); - tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); + __TBB_ASSERT(tricky_pointer::load(s.my_next, std::memory_order_relaxed) != (tricky_pointer(next)|FLAG), nullptr); + goto requested; + } else { + __TBB_ASSERT( n_state & (STATE_WRITER | STATE_UPGRADE_WAITING), "unexpected state"); + __TBB_ASSERT( (tricky_pointer(next)|FLAG) == tricky_pointer::load(s.my_next, std::memory_order_relaxed), nullptr); + tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); } } else { - /* We are in the tail; whoever comes next is blocked by q_tail&FLAG */ - release_internal_lock(s); - } // if( this != my_mutex->q_tail... ) - { - unsigned char old_state = STATE_UPGRADE_REQUESTED; - s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire); + /* We are in the tail; whoever comes next is blocked by q_tail&FLAG */ + release_internal_lock(s); + } // if( this != my_mutex->q_tail... ) + { + unsigned char old_state = STATE_UPGRADE_REQUESTED; + s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire); } - waiting: - __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" ); - __TBB_ASSERT( s.my_state & STATE_COMBINED_UPGRADING, "wrong state at upgrade waiting_retry" ); - __TBB_ASSERT( me==&s, nullptr ); - ITT_NOTIFY(sync_prepare, s.my_mutex); - /* if no one was blocked by the "corrupted" q_tail, turn it back */ - expected = tricky_pointer(me)|FLAG; - s.my_mutex->q_tail.compare_exchange_strong(expected, &s, std::memory_order_release); - queuing_rw_mutex::scoped_lock * predecessor; - // Mark my_prev as 'in use' to prevent predecessor from releasing - predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); - if( predecessor ) { - bool success = try_acquire_internal_lock(*predecessor); - { - // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state. - // Corrupted pointer transitions responsibility to release the predecessor`s node on us. - unsigned char old_state = STATE_UPGRADE_REQUESTED; - predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release); - } - if( !success ) { - // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release); - if( tricky_pointer(tmp) & FLAG ) { - tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); - predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); - } else { - // TODO: spin_wait condition seems never reachable - tricky_pointer::spin_wait_while_eq(s.my_prev, tricky_pointer(predecessor)|FLAG); - release_internal_lock(*predecessor); - } + waiting: + __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" ); + __TBB_ASSERT( s.my_state & STATE_COMBINED_UPGRADING, "wrong state at upgrade waiting_retry" ); + __TBB_ASSERT( me==&s, nullptr ); + ITT_NOTIFY(sync_prepare, s.my_mutex); + /* if no one was blocked by the "corrupted" q_tail, turn it back */ + expected = tricky_pointer(me)|FLAG; + s.my_mutex->q_tail.compare_exchange_strong(expected, &s, std::memory_order_release); + queuing_rw_mutex::scoped_lock * predecessor; + // Mark my_prev as 'in use' to prevent predecessor from releasing + predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); + if( predecessor ) { + bool success = try_acquire_internal_lock(*predecessor); + { + // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state. + // Corrupted pointer transitions responsibility to release the predecessor`s node on us. + unsigned char old_state = STATE_UPGRADE_REQUESTED; + predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release); + } + if( !success ) { + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release); + if( tricky_pointer(tmp) & FLAG ) { + tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); + predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); + } else { + // TODO: spin_wait condition seems never reachable + tricky_pointer::spin_wait_while_eq(s.my_prev, tricky_pointer(predecessor)|FLAG); + release_internal_lock(*predecessor); + } } else { - tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); - release_internal_lock(*predecessor); - tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); - predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + release_internal_lock(*predecessor); + tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); + predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); } - if( predecessor ) - goto waiting; + if( predecessor ) + goto waiting; } else { - tricky_pointer::store(s.my_prev, nullptr, std::memory_order_relaxed); + tricky_pointer::store(s.my_prev, nullptr, std::memory_order_relaxed); } - __TBB_ASSERT( !predecessor && !s.my_prev, nullptr ); - - // additional lifetime issue prevention checks - // wait for the successor to finish working with my fields - wait_for_release_of_internal_lock(s); - // now wait for the predecessor to finish working with my fields - spin_wait_while_eq( s.my_going, 2U ); - - // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear). - atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear) - - bool result = ( s.my_state != STATE_UPGRADE_LOSER ); - s.my_state.store(STATE_WRITER, std::memory_order_relaxed); - s.my_going.store(1U, std::memory_order_relaxed); - - ITT_NOTIFY(sync_acquired, s.my_mutex); - return result; + __TBB_ASSERT( !predecessor && !s.my_prev, nullptr ); + + // additional lifetime issue prevention checks + // wait for the successor to finish working with my fields + wait_for_release_of_internal_lock(s); + // now wait for the predecessor to finish working with my fields + spin_wait_while_eq( s.my_going, 2U ); + + // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear). + atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear) + + bool result = ( s.my_state != STATE_UPGRADE_LOSER ); + s.my_state.store(STATE_WRITER, std::memory_order_relaxed); + s.my_going.store(1U, std::memory_order_relaxed); + + ITT_NOTIFY(sync_acquired, s.my_mutex); + return result; } - static void construct(d1::queuing_rw_mutex& m) { - suppress_unused_warning(m); - ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T("")); - } -}; + static void construct(d1::queuing_rw_mutex& m) { + suppress_unused_warning(m); + ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T("")); + } +}; + +void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { + queuing_rw_mutex_impl::acquire(m, s, write); +} -void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { - queuing_rw_mutex_impl::acquire(m, s, write); -} +bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { + return queuing_rw_mutex_impl::try_acquire(m, s, write); +} -bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { - return queuing_rw_mutex_impl::try_acquire(m, s, write); -} +void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) { + queuing_rw_mutex_impl::release(s); +} + +bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { + return queuing_rw_mutex_impl::upgrade_to_writer(s); +} -void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) { - queuing_rw_mutex_impl::release(s); +bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { + return queuing_rw_mutex_impl::downgrade_to_reader(s); } -bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { - return queuing_rw_mutex_impl::upgrade_to_writer(s); +void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) { + queuing_rw_mutex_impl::construct(m); } -bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { - return queuing_rw_mutex_impl::downgrade_to_reader(s); -} - -void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) { - queuing_rw_mutex_impl::construct(m); -} - -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/rml_base.h b/contrib/libs/tbb/src/tbb/rml_base.h index 8f937b6fc8..9e1705837c 100644 --- a/contrib/libs/tbb/src/tbb/rml_base.h +++ b/contrib/libs/tbb/src/tbb/rml_base.h @@ -1,163 +1,163 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// Header guard and namespace names follow rml conventions. - -#ifndef __RML_rml_base_H -#define __RML_rml_base_H - -#include <cstddef> - -#if _WIN32||_WIN64 -#include <windows.h> -#endif /* _WIN32||_WIN64 */ - -#ifdef RML_PURE_VIRTUAL_HANDLER -#define RML_PURE(T) {RML_PURE_VIRTUAL_HANDLER(); return (T)0;} -#else -#define RML_PURE(T) = 0; -#endif - -namespace rml { - -class server; - -class versioned_object { -public: - //! A version number - typedef unsigned version_type; - - virtual ~versioned_object() {} - - //! Get version of this object - /** The version number is incremented when a incompatible change is introduced. - The version number is invariant for the lifetime of the object. */ - virtual version_type version() const RML_PURE(version_type) - -}; - -//! Represents a client's job for an execution context. -/** A job object is constructed by the client. - Not derived from versioned_object because version is same as for client. */ -class job { - friend class server; -}; - -//! Information that client provides to server when asking for a server. -/** The instance must endure at least until acknowledge_close_connection is called. */ -class client: public versioned_object { -public: - //! Typedef for convenience of derived classes in other namespaces. - typedef ::rml::job job; - - //! Index of a job in a job pool - typedef unsigned size_type; - - //! Maximum number of threads that client can exploit profitably if nothing else is running on the machine. - /** The returned value should remain invariant for the lifetime of the connection. [idempotent] */ - virtual size_type max_job_count() const RML_PURE(size_type) - - //! Minimum stack size for each job. 0 means to use default stack size. [idempotent] - virtual std::size_t min_stack_size() const RML_PURE(std::size_t) - - //! Server calls this routine when it needs client to create a job object. - virtual job* create_one_job() RML_PURE(job*) - - //! Acknowledge that all jobs have been cleaned up. - /** Called by server in response to request_close_connection - after cleanup(job) has been called for each job. */ - virtual void acknowledge_close_connection() RML_PURE(void) - - //! Inform client that server is done with *this. - /** Client should destroy the job. - Not necessarily called by execution context represented by *this. - Never called while any other thread is working on the job. */ - virtual void cleanup( job& ) RML_PURE(void) - - // In general, we should not add new virtual methods, because that would - // break derived classes. Think about reserving some vtable slots. -}; - -// Information that server provides to client. -// Virtual functions are routines provided by the server for the client to call. -class server: public versioned_object { -public: - //! Typedef for convenience of derived classes. - typedef ::rml::job job; - -#if _WIN32||_WIN64 - typedef void* execution_resource_t; -#endif - - //! Request that connection to server be closed. - /** Causes each job associated with the client to have its cleanup method called, - possibly by a thread different than the thread that created the job. - This method can return before all cleanup methods return. - Actions that have to wait after all cleanup methods return should be part of - client::acknowledge_close_connection. - Pass true as exiting if request_close_connection() is called because exit() is - called. In that case, it is the client's responsibility to make sure all threads - are terminated. In all other cases, pass false. */ - virtual void request_close_connection( bool exiting = false ) = 0; - - //! Called by client thread when it reaches a point where it cannot make progress until other threads do. - virtual void yield() = 0; - - //! Called by client to indicate a change in the number of non-RML threads that are running. - /** This is a performance hint to the RML to adjust how many threads it should let run - concurrently. The delta is the change in the number of non-RML threads that are running. - For example, a value of 1 means the client has started running another thread, and a value - of -1 indicates that the client has blocked or terminated one of its threads. */ - virtual void independent_thread_number_changed( int delta ) = 0; - - //! Default level of concurrency for which RML strives when there are no non-RML threads running. - /** Normally, the value is the hardware concurrency minus one. - The "minus one" accounts for the thread created by main(). */ - virtual unsigned default_concurrency() const = 0; -}; - -class factory { -public: - //! status results - enum status_type { - st_success=0, - st_connection_exists, - st_not_found, - st_incompatible - }; - -protected: - //! Pointer to routine that waits for server to indicate when client can close itself. - status_type (*my_wait_to_close_routine)( factory& ); - -public: - //! Library handle for use by RML. -#if _WIN32||_WIN64 - HMODULE library_handle; -#else - void* library_handle; -#endif /* _WIN32||_WIN64 */ - - //! Special marker to keep dll from being unloaded prematurely - static const std::size_t c_dont_unload = 1; -}; - -//! Typedef for callback functions to print server info -typedef void (*server_info_callback_t)( void* arg, const char* server_info ); - -} // namespace rml - -#endif /* __RML_rml_base_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Header guard and namespace names follow rml conventions. + +#ifndef __RML_rml_base_H +#define __RML_rml_base_H + +#include <cstddef> + +#if _WIN32||_WIN64 +#include <windows.h> +#endif /* _WIN32||_WIN64 */ + +#ifdef RML_PURE_VIRTUAL_HANDLER +#define RML_PURE(T) {RML_PURE_VIRTUAL_HANDLER(); return (T)0;} +#else +#define RML_PURE(T) = 0; +#endif + +namespace rml { + +class server; + +class versioned_object { +public: + //! A version number + typedef unsigned version_type; + + virtual ~versioned_object() {} + + //! Get version of this object + /** The version number is incremented when a incompatible change is introduced. + The version number is invariant for the lifetime of the object. */ + virtual version_type version() const RML_PURE(version_type) + +}; + +//! Represents a client's job for an execution context. +/** A job object is constructed by the client. + Not derived from versioned_object because version is same as for client. */ +class job { + friend class server; +}; + +//! Information that client provides to server when asking for a server. +/** The instance must endure at least until acknowledge_close_connection is called. */ +class client: public versioned_object { +public: + //! Typedef for convenience of derived classes in other namespaces. + typedef ::rml::job job; + + //! Index of a job in a job pool + typedef unsigned size_type; + + //! Maximum number of threads that client can exploit profitably if nothing else is running on the machine. + /** The returned value should remain invariant for the lifetime of the connection. [idempotent] */ + virtual size_type max_job_count() const RML_PURE(size_type) + + //! Minimum stack size for each job. 0 means to use default stack size. [idempotent] + virtual std::size_t min_stack_size() const RML_PURE(std::size_t) + + //! Server calls this routine when it needs client to create a job object. + virtual job* create_one_job() RML_PURE(job*) + + //! Acknowledge that all jobs have been cleaned up. + /** Called by server in response to request_close_connection + after cleanup(job) has been called for each job. */ + virtual void acknowledge_close_connection() RML_PURE(void) + + //! Inform client that server is done with *this. + /** Client should destroy the job. + Not necessarily called by execution context represented by *this. + Never called while any other thread is working on the job. */ + virtual void cleanup( job& ) RML_PURE(void) + + // In general, we should not add new virtual methods, because that would + // break derived classes. Think about reserving some vtable slots. +}; + +// Information that server provides to client. +// Virtual functions are routines provided by the server for the client to call. +class server: public versioned_object { +public: + //! Typedef for convenience of derived classes. + typedef ::rml::job job; + +#if _WIN32||_WIN64 + typedef void* execution_resource_t; +#endif + + //! Request that connection to server be closed. + /** Causes each job associated with the client to have its cleanup method called, + possibly by a thread different than the thread that created the job. + This method can return before all cleanup methods return. + Actions that have to wait after all cleanup methods return should be part of + client::acknowledge_close_connection. + Pass true as exiting if request_close_connection() is called because exit() is + called. In that case, it is the client's responsibility to make sure all threads + are terminated. In all other cases, pass false. */ + virtual void request_close_connection( bool exiting = false ) = 0; + + //! Called by client thread when it reaches a point where it cannot make progress until other threads do. + virtual void yield() = 0; + + //! Called by client to indicate a change in the number of non-RML threads that are running. + /** This is a performance hint to the RML to adjust how many threads it should let run + concurrently. The delta is the change in the number of non-RML threads that are running. + For example, a value of 1 means the client has started running another thread, and a value + of -1 indicates that the client has blocked or terminated one of its threads. */ + virtual void independent_thread_number_changed( int delta ) = 0; + + //! Default level of concurrency for which RML strives when there are no non-RML threads running. + /** Normally, the value is the hardware concurrency minus one. + The "minus one" accounts for the thread created by main(). */ + virtual unsigned default_concurrency() const = 0; +}; + +class factory { +public: + //! status results + enum status_type { + st_success=0, + st_connection_exists, + st_not_found, + st_incompatible + }; + +protected: + //! Pointer to routine that waits for server to indicate when client can close itself. + status_type (*my_wait_to_close_routine)( factory& ); + +public: + //! Library handle for use by RML. +#if _WIN32||_WIN64 + HMODULE library_handle; +#else + void* library_handle; +#endif /* _WIN32||_WIN64 */ + + //! Special marker to keep dll from being unloaded prematurely + static const std::size_t c_dont_unload = 1; +}; + +//! Typedef for callback functions to print server info +typedef void (*server_info_callback_t)( void* arg, const char* server_info ); + +} // namespace rml + +#endif /* __RML_rml_base_H */ diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.cpp b/contrib/libs/tbb/src/tbb/rml_tbb.cpp index 1ebdb2bb35..122e2709f7 100644 --- a/contrib/libs/tbb/src/tbb/rml_tbb.cpp +++ b/contrib/libs/tbb/src/tbb/rml_tbb.cpp @@ -1,113 +1,113 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_assert.h" - -#include "rml_tbb.h" -#include "dynamic_link.h" - -namespace tbb { -namespace detail { -namespace r1 { -namespace rml { - -#define MAKE_SERVER(x) DLD(__TBB_make_rml_server,x) -#define GET_INFO(x) DLD(__TBB_call_with_my_server_info,x) -#define SERVER tbb_server -#define CLIENT tbb_client -#define FACTORY tbb_factory - -#if __TBB_WEAK_SYMBOLS_PRESENT - #pragma weak __TBB_make_rml_server - #pragma weak __TBB_call_with_my_server_info - extern "C" { - ::rml::factory::status_type __TBB_make_rml_server( rml::tbb_factory& f, rml::tbb_server*& server, rml::tbb_client& client ); - void __TBB_call_with_my_server_info( ::rml::server_info_callback_t cb, void* arg ); - } -#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ - -#if TBB_USE_DEBUG -#define DEBUG_SUFFIX "_debug" -#else -#define DEBUG_SUFFIX -#endif /* TBB_USE_DEBUG */ - -// RML_SERVER_NAME is the name of the RML server library. -#if _WIN32 || _WIN64 -#define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll" -#elif __APPLE__ -#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib" -#elif __linux__ -#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1" -#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX -#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so" -#else -#error Unknown OS -#endif - -const ::rml::versioned_object::version_type CLIENT_VERSION = 2; - -#if __TBB_WEAK_SYMBOLS_PRESENT - #pragma weak __RML_open_factory - #pragma weak __RML_close_factory - extern "C" { - ::rml::factory::status_type __RML_open_factory ( ::rml::factory&, ::rml::versioned_object::version_type&, ::rml::versioned_object::version_type ); - void __RML_close_factory( ::rml::factory& f ); - } -#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ - -::rml::factory::status_type FACTORY::open() { - // Failure of following assertion indicates that factory is already open, or not zero-inited. - __TBB_ASSERT_EX( !library_handle, NULL ); - status_type (*open_factory_routine)( factory&, version_type&, version_type ); - dynamic_link_descriptor server_link_table[4] = { - DLD(__RML_open_factory,open_factory_routine), - MAKE_SERVER(my_make_server_routine), - DLD(__RML_close_factory,my_wait_to_close_routine), - GET_INFO(my_call_with_server_info_routine), - }; - status_type result; - if ( dynamic_link( RML_SERVER_NAME, server_link_table, 4, &library_handle ) ) { - version_type server_version; - result = (*open_factory_routine)( *this, server_version, CLIENT_VERSION ); - // server_version can be checked here for incompatibility if necessary. - } else { - library_handle = NULL; - result = st_not_found; - } - return result; -} - -void FACTORY::close() { - if ( library_handle ) - (*my_wait_to_close_routine)(*this); - if ( (size_t)library_handle>FACTORY::c_dont_unload ) { - dynamic_unlink(library_handle); - library_handle = NULL; - } -} - -::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) { - // Failure of following assertion means that factory was not successfully opened. - __TBB_ASSERT_EX( my_make_server_routine, NULL ); - return (*my_make_server_routine)(*this,s,c); -} - -} // namespace rml -} // namespace r1 -} // namespace detail -} // namespace tbb - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" + +#include "rml_tbb.h" +#include "dynamic_link.h" + +namespace tbb { +namespace detail { +namespace r1 { +namespace rml { + +#define MAKE_SERVER(x) DLD(__TBB_make_rml_server,x) +#define GET_INFO(x) DLD(__TBB_call_with_my_server_info,x) +#define SERVER tbb_server +#define CLIENT tbb_client +#define FACTORY tbb_factory + +#if __TBB_WEAK_SYMBOLS_PRESENT + #pragma weak __TBB_make_rml_server + #pragma weak __TBB_call_with_my_server_info + extern "C" { + ::rml::factory::status_type __TBB_make_rml_server( rml::tbb_factory& f, rml::tbb_server*& server, rml::tbb_client& client ); + void __TBB_call_with_my_server_info( ::rml::server_info_callback_t cb, void* arg ); + } +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +// RML_SERVER_NAME is the name of the RML server library. +#if _WIN32 || _WIN64 +#define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll" +#elif __APPLE__ +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib" +#elif __linux__ +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1" +#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so" +#else +#error Unknown OS +#endif + +const ::rml::versioned_object::version_type CLIENT_VERSION = 2; + +#if __TBB_WEAK_SYMBOLS_PRESENT + #pragma weak __RML_open_factory + #pragma weak __RML_close_factory + extern "C" { + ::rml::factory::status_type __RML_open_factory ( ::rml::factory&, ::rml::versioned_object::version_type&, ::rml::versioned_object::version_type ); + void __RML_close_factory( ::rml::factory& f ); + } +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +::rml::factory::status_type FACTORY::open() { + // Failure of following assertion indicates that factory is already open, or not zero-inited. + __TBB_ASSERT_EX( !library_handle, NULL ); + status_type (*open_factory_routine)( factory&, version_type&, version_type ); + dynamic_link_descriptor server_link_table[4] = { + DLD(__RML_open_factory,open_factory_routine), + MAKE_SERVER(my_make_server_routine), + DLD(__RML_close_factory,my_wait_to_close_routine), + GET_INFO(my_call_with_server_info_routine), + }; + status_type result; + if ( dynamic_link( RML_SERVER_NAME, server_link_table, 4, &library_handle ) ) { + version_type server_version; + result = (*open_factory_routine)( *this, server_version, CLIENT_VERSION ); + // server_version can be checked here for incompatibility if necessary. + } else { + library_handle = NULL; + result = st_not_found; + } + return result; +} + +void FACTORY::close() { + if ( library_handle ) + (*my_wait_to_close_routine)(*this); + if ( (size_t)library_handle>FACTORY::c_dont_unload ) { + dynamic_unlink(library_handle); + library_handle = NULL; + } +} + +::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) { + // Failure of following assertion means that factory was not successfully opened. + __TBB_ASSERT_EX( my_make_server_routine, NULL ); + return (*my_make_server_routine)(*this,s,c); +} + +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.h b/contrib/libs/tbb/src/tbb/rml_tbb.h index ef8e5bfbbc..de923be1b2 100644 --- a/contrib/libs/tbb/src/tbb/rml_tbb.h +++ b/contrib/libs/tbb/src/tbb/rml_tbb.h @@ -1,94 +1,94 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// Header guard and namespace names follow TBB conventions. - -#ifndef __TBB_rml_tbb_H -#define __TBB_rml_tbb_H - -#include "oneapi/tbb/version.h" -#include "rml_base.h" - -namespace tbb { -namespace detail { -namespace r1 { -namespace rml { - -//------------------------------------------------------------------------ -// Classes instantiated by the server -//------------------------------------------------------------------------ - -//! Represents a set of oneTBB worker threads provided by the server. -class tbb_server: public ::rml::server { -public: - //! Inform server of adjustments in the number of workers that the client can profitably use. - virtual void adjust_job_count_estimate( int delta ) = 0; - -#if _WIN32||_WIN64 - //! Inform server of a oneTBB external thread. - virtual void register_external_thread( execution_resource_t& v ) = 0; - - //! Inform server that the oneTBB external thread is done with its work. - virtual void unregister_external_thread( execution_resource_t v ) = 0; -#endif /* _WIN32||_WIN64 */ -}; - -//------------------------------------------------------------------------ -// Classes instantiated by the client -//------------------------------------------------------------------------ - -class tbb_client: public ::rml::client { -public: - //! Defined by TBB to steal a task and execute it. - /** Called by server when it wants an execution context to do some TBB work. - The method should return when it is okay for the thread to yield indefinitely. */ - virtual void process( job& ) RML_PURE(void) -}; - -/** Client must ensure that instance is zero-inited, typically by being a file-scope object. */ -class tbb_factory: public ::rml::factory { - - //! Pointer to routine that creates an RML server. - status_type (*my_make_server_routine)( tbb_factory&, tbb_server*&, tbb_client& ); - - //! Pointer to routine that calls callback function with server version info. - void (*my_call_with_server_info_routine)( ::rml::server_info_callback_t cb, void* arg ); - -public: - typedef ::rml::versioned_object::version_type version_type; - typedef tbb_client client_type; - typedef tbb_server server_type; - - //! Open factory. - /** Dynamically links against RML library. - Returns st_success, st_incompatible, or st_not_found. */ - status_type open(); - - //! Factory method to be called by client to create a server object. - /** Factory must be open. - Returns st_success, or st_incompatible . */ - status_type make_server( server_type*&, client_type& ); - - //! Close factory - void close(); -}; - -} // namespace rml -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /*__TBB_rml_tbb_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Header guard and namespace names follow TBB conventions. + +#ifndef __TBB_rml_tbb_H +#define __TBB_rml_tbb_H + +#include "oneapi/tbb/version.h" +#include "rml_base.h" + +namespace tbb { +namespace detail { +namespace r1 { +namespace rml { + +//------------------------------------------------------------------------ +// Classes instantiated by the server +//------------------------------------------------------------------------ + +//! Represents a set of oneTBB worker threads provided by the server. +class tbb_server: public ::rml::server { +public: + //! Inform server of adjustments in the number of workers that the client can profitably use. + virtual void adjust_job_count_estimate( int delta ) = 0; + +#if _WIN32||_WIN64 + //! Inform server of a oneTBB external thread. + virtual void register_external_thread( execution_resource_t& v ) = 0; + + //! Inform server that the oneTBB external thread is done with its work. + virtual void unregister_external_thread( execution_resource_t v ) = 0; +#endif /* _WIN32||_WIN64 */ +}; + +//------------------------------------------------------------------------ +// Classes instantiated by the client +//------------------------------------------------------------------------ + +class tbb_client: public ::rml::client { +public: + //! Defined by TBB to steal a task and execute it. + /** Called by server when it wants an execution context to do some TBB work. + The method should return when it is okay for the thread to yield indefinitely. */ + virtual void process( job& ) RML_PURE(void) +}; + +/** Client must ensure that instance is zero-inited, typically by being a file-scope object. */ +class tbb_factory: public ::rml::factory { + + //! Pointer to routine that creates an RML server. + status_type (*my_make_server_routine)( tbb_factory&, tbb_server*&, tbb_client& ); + + //! Pointer to routine that calls callback function with server version info. + void (*my_call_with_server_info_routine)( ::rml::server_info_callback_t cb, void* arg ); + +public: + typedef ::rml::versioned_object::version_type version_type; + typedef tbb_client client_type; + typedef tbb_server server_type; + + //! Open factory. + /** Dynamically links against RML library. + Returns st_success, st_incompatible, or st_not_found. */ + status_type open(); + + //! Factory method to be called by client to create a server object. + /** Factory must be open. + Returns st_success, or st_incompatible . */ + status_type make_server( server_type*&, client_type& ); + + //! Close factory + void close(); +}; + +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /*__TBB_rml_tbb_H */ diff --git a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h index 47c3f01698..613ec72e98 100644 --- a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h +++ b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h @@ -1,258 +1,258 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -// All platform-specific threading support is encapsulated here. */ - -#ifndef __RML_thread_monitor_H -#define __RML_thread_monitor_H - -#if __TBB_USE_WINAPI -#include <windows.h> -#include <process.h> -#include <malloc.h> //_alloca -#include "misc.h" // support for processor groups -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) -#include <thread> -#endif -#elif __TBB_USE_POSIX -#include <pthread.h> -#include <cstring> -#include <cstdlib> -#else -#error Unsupported platform -#endif -#include <cstdio> - -#include "oneapi/tbb/detail/_template_helpers.h" - -#include "itt_notify.h" -#include "semaphore.h" - -// All platform-specific threading support is in this header. - -#if (_WIN32||_WIN64)&&!__TBB_ipf -// Deal with 64K aliasing. The formula for "offset" is a Fibonacci hash function, -// which has the desirable feature of spreading out the offsets fairly evenly -// without knowing the total number of offsets, and furthermore unlikely to -// accidentally cancel out other 64K aliasing schemes that Microsoft might implement later. -// See Knuth Vol 3. "Theorem S" for details on Fibonacci hashing. -// The second statement is really does need "volatile", otherwise the compiler might remove the _alloca. -#define AVOID_64K_ALIASING(idx) \ - std::size_t offset = (idx+1) * 40503U % (1U<<16); \ - void* volatile sink_for_alloca = _alloca(offset); \ - __TBB_ASSERT_EX(sink_for_alloca, "_alloca failed"); -#else -// Linux thread allocators avoid 64K aliasing. -#define AVOID_64K_ALIASING(idx) tbb::detail::suppress_unused_warning(idx) -#endif /* _WIN32||_WIN64 */ - -namespace tbb { -namespace detail { -namespace r1 { - -// Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info -void handle_perror(int error_code, const char* aux_info); - -namespace rml { -namespace internal { - -#if __TBB_USE_ITT_NOTIFY -static const ::tbb::detail::r1::tchar *SyncType_RML = _T("%Constant"); -static const ::tbb::detail::r1::tchar *SyncObj_ThreadMonitor = _T("RML Thr Monitor"); -#endif /* __TBB_USE_ITT_NOTIFY */ - -//! Monitor with limited two-phase commit form of wait. -/** At most one thread should wait on an instance at a time. */ -class thread_monitor { -public: - class cookie { - friend class thread_monitor; - std::atomic<std::size_t> my_epoch{0}; - }; - thread_monitor() : skipped_wakeup(false), my_sema() { - ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor); - } - ~thread_monitor() {} - - //! If a thread is waiting or started a two-phase wait, notify it. - /** Can be called by any thread. */ - void notify(); - - //! Begin two-phase wait. - /** Should only be called by thread that owns the monitor. - The caller must either complete the wait or cancel it. */ - void prepare_wait( cookie& c ); - - //! Complete a two-phase wait and wait until notification occurs after the earlier prepare_wait. - void commit_wait( cookie& c ); - - //! Cancel a two-phase wait. - void cancel_wait(); - -#if __TBB_USE_WINAPI - typedef HANDLE handle_type; - - #define __RML_DECL_THREAD_ROUTINE unsigned WINAPI - typedef unsigned (WINAPI *thread_routine_type)(void*); - - //! Launch a thread - static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = NULL ); - -#elif __TBB_USE_POSIX - typedef pthread_t handle_type; - - #define __RML_DECL_THREAD_ROUTINE void* - typedef void*(*thread_routine_type)(void*); - - //! Launch a thread - static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size ); -#endif /* __TBB_USE_POSIX */ - - //! Join thread - static void join(handle_type handle); - - //! Detach thread - static void detach_thread(handle_type handle); -private: - cookie my_cookie; // epoch counter - std::atomic<bool> in_wait{false}; - bool skipped_wakeup; - binary_semaphore my_sema; -#if __TBB_USE_POSIX - static void check( int error_code, const char* routine ); -#endif -}; - -#if __TBB_USE_WINAPI - -#ifndef STACK_SIZE_PARAM_IS_A_RESERVATION -#define STACK_SIZE_PARAM_IS_A_RESERVATION 0x00010000 -#endif - -// _beginthreadex API is not available in Windows 8 Store* applications, so use std::thread instead -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) -inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_function, void* arg, std::size_t, const std::size_t*) { -//TODO: check that exception thrown from std::thread is not swallowed silently - std::thread* thread_tmp=new std::thread(thread_function, arg); - return thread_tmp->native_handle(); -} -#else -inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const std::size_t* worker_index ) { - unsigned thread_id; - int number_of_processor_groups = ( worker_index ) ? NumberOfProcessorGroups() : 0; - unsigned create_flags = ( number_of_processor_groups > 1 ) ? CREATE_SUSPENDED : 0; - HANDLE h = (HANDLE)_beginthreadex( NULL, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id ); - if( !h ) { - handle_perror(0, "thread_monitor::launch: _beginthreadex failed\n"); - } - if ( number_of_processor_groups > 1 ) { - MoveThreadIntoProcessorGroup( h, FindProcessorGroupIndex( static_cast<int>(*worker_index) ) ); - ResumeThread( h ); - } - return h; -} -#endif //__TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) - -void thread_monitor::join(handle_type handle) { -#if TBB_USE_ASSERT - DWORD res = -#endif - WaitForSingleObjectEx(handle, INFINITE, FALSE); - __TBB_ASSERT( res==WAIT_OBJECT_0, NULL ); -#if TBB_USE_ASSERT - BOOL val = -#endif - CloseHandle(handle); - __TBB_ASSERT( val, NULL ); -} - -void thread_monitor::detach_thread(handle_type handle) { -#if TBB_USE_ASSERT - BOOL val = -#endif - CloseHandle(handle); - __TBB_ASSERT( val, NULL ); -} - -#endif /* __TBB_USE_WINAPI */ - -#if __TBB_USE_POSIX -inline void thread_monitor::check( int error_code, const char* routine ) { - if( error_code ) { - handle_perror(error_code, routine); - } -} - -inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routine)(void*), void* arg, std::size_t stack_size ) { - // FIXME - consider more graceful recovery than just exiting if a thread cannot be launched. - // Note that there are some tricky situations to deal with, such that the thread is already - // grabbed as part of an OpenMP team. - pthread_attr_t s; - check(pthread_attr_init( &s ), "pthread_attr_init has failed"); - if( stack_size>0 ) - check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" ); - pthread_t handle; - check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" ); - check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" ); - return handle; -} - -void thread_monitor::join(handle_type handle) { - check(pthread_join(handle, NULL), "pthread_join has failed"); -} - -void thread_monitor::detach_thread(handle_type handle) { - check(pthread_detach(handle), "pthread_detach has failed"); -} -#endif /* __TBB_USE_POSIX */ - -inline void thread_monitor::notify() { - my_cookie.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire) + 1, std::memory_order_release); - bool do_signal = in_wait.exchange( false ); - if( do_signal ) - my_sema.V(); -} - -inline void thread_monitor::prepare_wait( cookie& c ) { - if( skipped_wakeup ) { - // Lazily consume a signal that was skipped due to cancel_wait - skipped_wakeup = false; - my_sema.P(); // does not really wait on the semaphore - } - // Former c = my_cookie - c.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire), std::memory_order_release); - in_wait.store( true, std::memory_order_seq_cst ); -} - -inline void thread_monitor::commit_wait( cookie& c ) { - bool do_it = ( c.my_epoch.load(std::memory_order_relaxed) == my_cookie.my_epoch.load(std::memory_order_relaxed) ); - if( do_it ) my_sema.P(); - else cancel_wait(); -} - -inline void thread_monitor::cancel_wait() { - // if not in_wait, then some thread has sent us a signal; - // it will be consumed by the next prepare_wait call - skipped_wakeup = ! in_wait.exchange( false ); -} - -} // namespace internal -} // namespace rml -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* __RML_thread_monitor_H */ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// All platform-specific threading support is encapsulated here. */ + +#ifndef __RML_thread_monitor_H +#define __RML_thread_monitor_H + +#if __TBB_USE_WINAPI +#include <windows.h> +#include <process.h> +#include <malloc.h> //_alloca +#include "misc.h" // support for processor groups +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +#include <thread> +#endif +#elif __TBB_USE_POSIX +#include <pthread.h> +#include <cstring> +#include <cstdlib> +#else +#error Unsupported platform +#endif +#include <cstdio> + +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "itt_notify.h" +#include "semaphore.h" + +// All platform-specific threading support is in this header. + +#if (_WIN32||_WIN64)&&!__TBB_ipf +// Deal with 64K aliasing. The formula for "offset" is a Fibonacci hash function, +// which has the desirable feature of spreading out the offsets fairly evenly +// without knowing the total number of offsets, and furthermore unlikely to +// accidentally cancel out other 64K aliasing schemes that Microsoft might implement later. +// See Knuth Vol 3. "Theorem S" for details on Fibonacci hashing. +// The second statement is really does need "volatile", otherwise the compiler might remove the _alloca. +#define AVOID_64K_ALIASING(idx) \ + std::size_t offset = (idx+1) * 40503U % (1U<<16); \ + void* volatile sink_for_alloca = _alloca(offset); \ + __TBB_ASSERT_EX(sink_for_alloca, "_alloca failed"); +#else +// Linux thread allocators avoid 64K aliasing. +#define AVOID_64K_ALIASING(idx) tbb::detail::suppress_unused_warning(idx) +#endif /* _WIN32||_WIN64 */ + +namespace tbb { +namespace detail { +namespace r1 { + +// Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info +void handle_perror(int error_code, const char* aux_info); + +namespace rml { +namespace internal { + +#if __TBB_USE_ITT_NOTIFY +static const ::tbb::detail::r1::tchar *SyncType_RML = _T("%Constant"); +static const ::tbb::detail::r1::tchar *SyncObj_ThreadMonitor = _T("RML Thr Monitor"); +#endif /* __TBB_USE_ITT_NOTIFY */ + +//! Monitor with limited two-phase commit form of wait. +/** At most one thread should wait on an instance at a time. */ +class thread_monitor { +public: + class cookie { + friend class thread_monitor; + std::atomic<std::size_t> my_epoch{0}; + }; + thread_monitor() : skipped_wakeup(false), my_sema() { + ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor); + } + ~thread_monitor() {} + + //! If a thread is waiting or started a two-phase wait, notify it. + /** Can be called by any thread. */ + void notify(); + + //! Begin two-phase wait. + /** Should only be called by thread that owns the monitor. + The caller must either complete the wait or cancel it. */ + void prepare_wait( cookie& c ); + + //! Complete a two-phase wait and wait until notification occurs after the earlier prepare_wait. + void commit_wait( cookie& c ); + + //! Cancel a two-phase wait. + void cancel_wait(); + +#if __TBB_USE_WINAPI + typedef HANDLE handle_type; + + #define __RML_DECL_THREAD_ROUTINE unsigned WINAPI + typedef unsigned (WINAPI *thread_routine_type)(void*); + + //! Launch a thread + static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = NULL ); + +#elif __TBB_USE_POSIX + typedef pthread_t handle_type; + + #define __RML_DECL_THREAD_ROUTINE void* + typedef void*(*thread_routine_type)(void*); + + //! Launch a thread + static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size ); +#endif /* __TBB_USE_POSIX */ + + //! Join thread + static void join(handle_type handle); + + //! Detach thread + static void detach_thread(handle_type handle); +private: + cookie my_cookie; // epoch counter + std::atomic<bool> in_wait{false}; + bool skipped_wakeup; + binary_semaphore my_sema; +#if __TBB_USE_POSIX + static void check( int error_code, const char* routine ); +#endif +}; + +#if __TBB_USE_WINAPI + +#ifndef STACK_SIZE_PARAM_IS_A_RESERVATION +#define STACK_SIZE_PARAM_IS_A_RESERVATION 0x00010000 +#endif + +// _beginthreadex API is not available in Windows 8 Store* applications, so use std::thread instead +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_function, void* arg, std::size_t, const std::size_t*) { +//TODO: check that exception thrown from std::thread is not swallowed silently + std::thread* thread_tmp=new std::thread(thread_function, arg); + return thread_tmp->native_handle(); +} +#else +inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const std::size_t* worker_index ) { + unsigned thread_id; + int number_of_processor_groups = ( worker_index ) ? NumberOfProcessorGroups() : 0; + unsigned create_flags = ( number_of_processor_groups > 1 ) ? CREATE_SUSPENDED : 0; + HANDLE h = (HANDLE)_beginthreadex( NULL, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id ); + if( !h ) { + handle_perror(0, "thread_monitor::launch: _beginthreadex failed\n"); + } + if ( number_of_processor_groups > 1 ) { + MoveThreadIntoProcessorGroup( h, FindProcessorGroupIndex( static_cast<int>(*worker_index) ) ); + ResumeThread( h ); + } + return h; +} +#endif //__TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + +void thread_monitor::join(handle_type handle) { +#if TBB_USE_ASSERT + DWORD res = +#endif + WaitForSingleObjectEx(handle, INFINITE, FALSE); + __TBB_ASSERT( res==WAIT_OBJECT_0, NULL ); +#if TBB_USE_ASSERT + BOOL val = +#endif + CloseHandle(handle); + __TBB_ASSERT( val, NULL ); +} + +void thread_monitor::detach_thread(handle_type handle) { +#if TBB_USE_ASSERT + BOOL val = +#endif + CloseHandle(handle); + __TBB_ASSERT( val, NULL ); +} + +#endif /* __TBB_USE_WINAPI */ + +#if __TBB_USE_POSIX +inline void thread_monitor::check( int error_code, const char* routine ) { + if( error_code ) { + handle_perror(error_code, routine); + } +} + +inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routine)(void*), void* arg, std::size_t stack_size ) { + // FIXME - consider more graceful recovery than just exiting if a thread cannot be launched. + // Note that there are some tricky situations to deal with, such that the thread is already + // grabbed as part of an OpenMP team. + pthread_attr_t s; + check(pthread_attr_init( &s ), "pthread_attr_init has failed"); + if( stack_size>0 ) + check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" ); + pthread_t handle; + check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" ); + check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" ); + return handle; +} + +void thread_monitor::join(handle_type handle) { + check(pthread_join(handle, NULL), "pthread_join has failed"); +} + +void thread_monitor::detach_thread(handle_type handle) { + check(pthread_detach(handle), "pthread_detach has failed"); +} +#endif /* __TBB_USE_POSIX */ + +inline void thread_monitor::notify() { + my_cookie.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire) + 1, std::memory_order_release); + bool do_signal = in_wait.exchange( false ); + if( do_signal ) + my_sema.V(); +} + +inline void thread_monitor::prepare_wait( cookie& c ) { + if( skipped_wakeup ) { + // Lazily consume a signal that was skipped due to cancel_wait + skipped_wakeup = false; + my_sema.P(); // does not really wait on the semaphore + } + // Former c = my_cookie + c.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire), std::memory_order_release); + in_wait.store( true, std::memory_order_seq_cst ); +} + +inline void thread_monitor::commit_wait( cookie& c ) { + bool do_it = ( c.my_epoch.load(std::memory_order_relaxed) == my_cookie.my_epoch.load(std::memory_order_relaxed) ); + if( do_it ) my_sema.P(); + else cancel_wait(); +} + +inline void thread_monitor::cancel_wait() { + // if not in_wait, then some thread has sent us a signal; + // it will be consumed by the next prepare_wait call + skipped_wakeup = ! in_wait.exchange( false ); +} + +} // namespace internal +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __RML_thread_monitor_H */ diff --git a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp index 1954a87fe0..fe7fb66dc8 100644 --- a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp @@ -1,120 +1,120 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_rtm_mutex.h" -#include "itt_notify.h" -#include "governor.h" -#include "misc.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { - -// maximum number of times to retry -// TODO: experiment on retry values. -static constexpr int retry_threshold = 10; - -struct rtm_mutex_impl { - //! Release speculative mutex - static void release(d1::rtm_mutex::scoped_lock& s) { - switch(s.m_transaction_state) { - case d1::rtm_mutex::rtm_state::rtm_transacting: - __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); - end_transaction(); - s.m_mutex = nullptr; - break; - case d1::rtm_mutex::rtm_state::rtm_real: - s.m_mutex->unlock(); - s.m_mutex = nullptr; - break; - case d1::rtm_mutex::rtm_state::rtm_none: - __TBB_ASSERT(false, "mutex is not locked, but in release"); - break; - default: - __TBB_ASSERT(false, "invalid m_transaction_state"); - } - s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_none; - } - - //! Acquire lock on the given mutex. - static void acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { - __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, "scoped_lock already in transaction"); - if(governor::speculation_enabled()) { - int num_retries = 0; - unsigned int abort_code = 0; - do { - if(m.m_flag.load(std::memory_order_acquire)) { - if(only_speculate) return; - spin_wait_while_eq(m.m_flag, true); - } - // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) - { - // started speculation - if(m.m_flag.load(std::memory_order_relaxed)) { - abort_transaction(); - } - s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_transacting; - // Don not wrap the following assignment to a function, - // because it can abort the transaction in debug. Need mutex for release(). - s.m_mutex = &m; - return; // successfully started speculation - } - ++num_retries; - } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold)); - } - - if(only_speculate) return; - s.m_mutex = &m; - s.m_mutex->lock(); - s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; - return; - } - - //! Try to acquire lock on the given mutex. - static bool try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { - acquire(m, s, /*only_speculate=*/true); - if (s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_transacting) { - return true; - } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, NULL); - // transacting acquire failed. try_lock the real mutex - if (m.try_lock()) { - s.m_mutex = &m; - s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; - return true; - } - return false; - } -}; - -void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { - rtm_mutex_impl::acquire(m, s, only_speculate); -} -bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { - return rtm_mutex_impl::try_acquire(m, s); -} -void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock& s) { - rtm_mutex_impl::release(s); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_rtm_mutex.h" +#include "itt_notify.h" +#include "governor.h" +#include "misc.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +// maximum number of times to retry +// TODO: experiment on retry values. +static constexpr int retry_threshold = 10; + +struct rtm_mutex_impl { + //! Release speculative mutex + static void release(d1::rtm_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_mutex::rtm_state::rtm_transacting: + __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); + end_transaction(); + s.m_mutex = nullptr; + break; + case d1::rtm_mutex::rtm_state::rtm_real: + s.m_mutex->unlock(); + s.m_mutex = nullptr; + break; + case d1::rtm_mutex::rtm_state::rtm_none: + __TBB_ASSERT(false, "mutex is not locked, but in release"); + break; + default: + __TBB_ASSERT(false, "invalid m_transaction_state"); + } + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_none; + } + + //! Acquire lock on the given mutex. + static void acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + if(m.m_flag.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_while_eq(m.m_flag, true); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.m_flag.load(std::memory_order_relaxed)) { + abort_transaction(); + } + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_transacting; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold)); + } + + if(only_speculate) return; + s.m_mutex = &m; + s.m_mutex->lock(); + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; + return; + } + + //! Try to acquire lock on the given mutex. + static bool try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { + acquire(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_transacting) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, NULL); + // transacting acquire failed. try_lock the real mutex + if (m.try_lock()) { + s.m_mutex = &m; + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; + return true; + } + return false; + } +}; + +void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { + rtm_mutex_impl::acquire(m, s, only_speculate); +} +bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { + return rtm_mutex_impl::try_acquire(m, s); +} +void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock& s) { + rtm_mutex_impl::release(s); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp index 15eefaad05..5e50de4c39 100644 --- a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp @@ -1,271 +1,271 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/detail/_assert.h" -#include "oneapi/tbb/detail/_rtm_rw_mutex.h" -#include "itt_notify.h" -#include "governor.h" -#include "misc.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { - -struct rtm_rw_mutex_impl { - // maximum number of times to retry - // TODO: experiment on retry values. - static constexpr int retry_threshold_read = 10; - static constexpr int retry_threshold_write = 10; - - //! Release speculative mutex - static void release(d1::rtm_rw_mutex::scoped_lock& s) { - switch(s.m_transaction_state) { - case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: - case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: - __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); - end_transaction(); - s.m_mutex = nullptr; - break; - case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: - __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag set but read lock acquired"); - s.m_mutex->unlock_shared(); - s.m_mutex = nullptr; - break; - case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: - __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag unset but write lock acquired"); - s.m_mutex->write_flag.store(false, std::memory_order_relaxed); - s.m_mutex->unlock(); - s.m_mutex = nullptr; - break; - case d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex: - __TBB_ASSERT(false, "rtm_not_in_mutex, but in release"); - break; - default: - __TBB_ASSERT(false, "invalid m_transaction_state"); - } - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex; - } - - //! Acquire write lock on the given mutex. - static void acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); - if(governor::speculation_enabled()) { - int num_retries = 0; - unsigned int abort_code = 0; - do { - if(m.m_state.load(std::memory_order_acquire)) { - if(only_speculate) return; - spin_wait_until_eq(m.m_state, d1::rtm_rw_mutex::state_type(0)); - } - // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) - { - // started speculation - if(m.m_state.load(std::memory_order_relaxed)) { // add spin_rw_mutex to read-set. - // reader or writer grabbed the lock, so abort. - abort_transaction(); - } - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; - // Don not wrap the following assignment to a function, - // because it can abort the transaction in debug. Need mutex for release(). - s.m_mutex = &m; - return; // successfully started speculation - } - ++num_retries; - } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_write)); - } - - if(only_speculate) return; - s.m_mutex = &m; // should apply a real try_lock... - s.m_mutex->lock(); // kill transactional writers - __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After acquire for write, write_flag already true"); - m.write_flag.store(true, std::memory_order_relaxed); // kill transactional readers - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; - return; - } - - //! Acquire read lock on given mutex. - // only_speculate : true if we are doing a try_acquire. If true and we fail to speculate, don't - // really acquire the lock, return and do a try_acquire on the contained spin_rw_mutex. If - // the lock is already held by a writer, just return. - static void acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); - if(governor::speculation_enabled()) { - int num_retries = 0; - unsigned int abort_code = 0; - do { - // if in try_acquire, and lock is held as writer, don't attempt to speculate. - if(m.write_flag.load(std::memory_order_acquire)) { - if(only_speculate) return; - spin_wait_while_eq(m.write_flag, true); - } - // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) - { - // started speculation - if(m.write_flag.load(std::memory_order_relaxed)) { // add write_flag to read-set. - abort_transaction(); // writer grabbed the lock, so abort. - } - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; - // Don not wrap the following assignment to a function, - // because it can abort the transaction in debug. Need mutex for release(). - s.m_mutex = &m; - return; // successfully started speculation - } - // fallback path - // retry only if there is any hope of getting into a transaction soon - // Retry in the following cases (from Section 8.3.5 of - // Intel(R) Architecture Instruction Set Extensions Programming Reference): - // 1. abort caused by XABORT instruction (bit 0 of EAX register is set) - // 2. the transaction may succeed on a retry (bit 1 of EAX register is set) - // 3. if another logical processor conflicted with a memory address - // that was part of the transaction that aborted (bit 2 of EAX register is set) - // That is, retry if (abort_code & 0x7) is non-zero - ++num_retries; - } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_read)); - } - - if(only_speculate) return; - s.m_mutex = &m; - s.m_mutex->lock_shared(); - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; - } - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - static bool upgrade(d1::rtm_rw_mutex::scoped_lock& s) { - switch(s.m_transaction_state) { - case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: { - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; - bool no_release = s.m_mutex->upgrade(); - __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "After upgrade, write_flag already true"); - s.m_mutex->write_flag.store(true, std::memory_order_relaxed); - return no_release; - } - case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: { - d1::rtm_rw_mutex& m = *s.m_mutex; - if(m.m_state.load(std::memory_order_acquire)) { // add spin_rw_mutex to read-set. - // Real reader or writer holds the lock; so commit the read and re-acquire for write. - release(s); - acquire_writer(m, s, false); - return false; - } else - { - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; - return true; - } - } - default: - __TBB_ASSERT(false, "Invalid state for upgrade"); - return false; - } - } - - //! Downgrade writer to a reader. - static bool downgrade(d1::rtm_rw_mutex::scoped_lock& s) { - switch (s.m_transaction_state) { - case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; - __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "Before downgrade write_flag not true"); - s.m_mutex->write_flag.store(false, std::memory_order_relaxed); - s.m_mutex->downgrade(); - return true; - case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; - return true; - default: - __TBB_ASSERT(false, "Invalid state for downgrade"); - return false; - } - } - - //! Try to acquire write lock on the given mutex. - // There may be reader(s) which acquired the spin_rw_mutex, as well as possibly - // transactional reader(s). If this is the case, the acquire will fail, and assigning - // write_flag will kill the transactors. So we only assign write_flag if we have successfully - // acquired the lock. - static bool try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { - acquire_writer(m, s, /*only_speculate=*/true); - if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer) { - return true; - } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); - // transacting write acquire failed. try_lock the real mutex - if (m.try_lock()) { - s.m_mutex = &m; - // only shoot down readers if we're not transacting ourselves - __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After try_acquire_writer, write_flag already true"); - m.write_flag.store(true, std::memory_order_relaxed); - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; - return true; - } - return false; - } - - //! Try to acquire read lock on the given mutex. - static bool try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { - // speculatively acquire the lock. If this fails, do try_lock_shared on the spin_rw_mutex. - acquire_reader(m, s, /*only_speculate=*/true); - if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader) { - return true; - } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); - // transacting read acquire failed. try_lock_shared the real mutex - if (m.try_lock_shared()) { - s.m_mutex = &m; - s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; - return true; - } - return false; - } -}; - -void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { - rtm_rw_mutex_impl::acquire_writer(m, s, only_speculate); -} -//! Internal acquire read lock. -// only_speculate == true if we're doing a try_lock, else false. -void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { - rtm_rw_mutex_impl::acquire_reader(m, s, only_speculate); -} -//! Internal upgrade reader to become a writer. -bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock& s) { - return rtm_rw_mutex_impl::upgrade(s); -} -//! Internal downgrade writer to become a reader. -bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock& s) { - return rtm_rw_mutex_impl::downgrade(s); -} -//! Internal try_acquire write lock. -bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { - return rtm_rw_mutex_impl::try_acquire_writer(m, s); -} -//! Internal try_acquire read lock. -bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { - return rtm_rw_mutex_impl::try_acquire_reader(m, s); -} -//! Internal release lock. -void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock& s) { - rtm_rw_mutex_impl::release(s); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - - +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_rtm_rw_mutex.h" +#include "itt_notify.h" +#include "governor.h" +#include "misc.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +struct rtm_rw_mutex_impl { + // maximum number of times to retry + // TODO: experiment on retry values. + static constexpr int retry_threshold_read = 10; + static constexpr int retry_threshold_write = 10; + + //! Release speculative mutex + static void release(d1::rtm_rw_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: + __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); + end_transaction(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: + __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag set but read lock acquired"); + s.m_mutex->unlock_shared(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: + __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag unset but write lock acquired"); + s.m_mutex->write_flag.store(false, std::memory_order_relaxed); + s.m_mutex->unlock(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex: + __TBB_ASSERT(false, "rtm_not_in_mutex, but in release"); + break; + default: + __TBB_ASSERT(false, "invalid m_transaction_state"); + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex; + } + + //! Acquire write lock on the given mutex. + static void acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + if(m.m_state.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_until_eq(m.m_state, d1::rtm_rw_mutex::state_type(0)); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.m_state.load(std::memory_order_relaxed)) { // add spin_rw_mutex to read-set. + // reader or writer grabbed the lock, so abort. + abort_transaction(); + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_write)); + } + + if(only_speculate) return; + s.m_mutex = &m; // should apply a real try_lock... + s.m_mutex->lock(); // kill transactional writers + __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After acquire for write, write_flag already true"); + m.write_flag.store(true, std::memory_order_relaxed); // kill transactional readers + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + return; + } + + //! Acquire read lock on given mutex. + // only_speculate : true if we are doing a try_acquire. If true and we fail to speculate, don't + // really acquire the lock, return and do a try_acquire on the contained spin_rw_mutex. If + // the lock is already held by a writer, just return. + static void acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + // if in try_acquire, and lock is held as writer, don't attempt to speculate. + if(m.write_flag.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_while_eq(m.write_flag, true); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.write_flag.load(std::memory_order_relaxed)) { // add write_flag to read-set. + abort_transaction(); // writer grabbed the lock, so abort. + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + // fallback path + // retry only if there is any hope of getting into a transaction soon + // Retry in the following cases (from Section 8.3.5 of + // Intel(R) Architecture Instruction Set Extensions Programming Reference): + // 1. abort caused by XABORT instruction (bit 0 of EAX register is set) + // 2. the transaction may succeed on a retry (bit 1 of EAX register is set) + // 3. if another logical processor conflicted with a memory address + // that was part of the transaction that aborted (bit 2 of EAX register is set) + // That is, retry if (abort_code & 0x7) is non-zero + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_read)); + } + + if(only_speculate) return; + s.m_mutex = &m; + s.m_mutex->lock_shared(); + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + } + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + static bool upgrade(d1::rtm_rw_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: { + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + bool no_release = s.m_mutex->upgrade(); + __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "After upgrade, write_flag already true"); + s.m_mutex->write_flag.store(true, std::memory_order_relaxed); + return no_release; + } + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: { + d1::rtm_rw_mutex& m = *s.m_mutex; + if(m.m_state.load(std::memory_order_acquire)) { // add spin_rw_mutex to read-set. + // Real reader or writer holds the lock; so commit the read and re-acquire for write. + release(s); + acquire_writer(m, s, false); + return false; + } else + { + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; + return true; + } + } + default: + __TBB_ASSERT(false, "Invalid state for upgrade"); + return false; + } + } + + //! Downgrade writer to a reader. + static bool downgrade(d1::rtm_rw_mutex::scoped_lock& s) { + switch (s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "Before downgrade write_flag not true"); + s.m_mutex->write_flag.store(false, std::memory_order_relaxed); + s.m_mutex->downgrade(); + return true; + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; + return true; + default: + __TBB_ASSERT(false, "Invalid state for downgrade"); + return false; + } + } + + //! Try to acquire write lock on the given mutex. + // There may be reader(s) which acquired the spin_rw_mutex, as well as possibly + // transactional reader(s). If this is the case, the acquire will fail, and assigning + // write_flag will kill the transactors. So we only assign write_flag if we have successfully + // acquired the lock. + static bool try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + acquire_writer(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + // transacting write acquire failed. try_lock the real mutex + if (m.try_lock()) { + s.m_mutex = &m; + // only shoot down readers if we're not transacting ourselves + __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After try_acquire_writer, write_flag already true"); + m.write_flag.store(true, std::memory_order_relaxed); + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + return true; + } + return false; + } + + //! Try to acquire read lock on the given mutex. + static bool try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + // speculatively acquire the lock. If this fails, do try_lock_shared on the spin_rw_mutex. + acquire_reader(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + // transacting read acquire failed. try_lock_shared the real mutex + if (m.try_lock_shared()) { + s.m_mutex = &m; + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + return true; + } + return false; + } +}; + +void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + rtm_rw_mutex_impl::acquire_writer(m, s, only_speculate); +} +//! Internal acquire read lock. +// only_speculate == true if we're doing a try_lock, else false. +void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + rtm_rw_mutex_impl::acquire_reader(m, s, only_speculate); +} +//! Internal upgrade reader to become a writer. +bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::upgrade(s); +} +//! Internal downgrade writer to become a reader. +bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::downgrade(s); +} +//! Internal try_acquire write lock. +bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::try_acquire_writer(m, s); +} +//! Internal try_acquire read lock. +bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::try_acquire_reader(m, s); +} +//! Internal release lock. +void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock& s) { + rtm_rw_mutex_impl::release(s); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + + diff --git a/contrib/libs/tbb/src/tbb/scheduler_common.h b/contrib/libs/tbb/src/tbb/scheduler_common.h index 1524717e9c..ee13dbf981 100644 --- a/contrib/libs/tbb/src/tbb/scheduler_common.h +++ b/contrib/libs/tbb/src/tbb/scheduler_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,32 +17,32 @@ #ifndef _TBB_scheduler_common_H #define _TBB_scheduler_common_H -#include "oneapi/tbb/detail/_utils.h" -#include "oneapi/tbb/detail/_template_helpers.h" -#include "oneapi/tbb/detail/_task.h" -#include "oneapi/tbb/detail/_machine.h" -#include "oneapi/tbb/task_group.h" -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "itt_notify.h" -#include "co_context.h" -#include "misc.h" -#include "governor.h" +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/detail/_machine.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "itt_notify.h" +#include "co_context.h" +#include "misc.h" +#include "governor.h" #ifndef __TBB_SCHEDULER_MUTEX_TYPE #define __TBB_SCHEDULER_MUTEX_TYPE tbb::spin_mutex #endif // TODO: add conditional inclusion based on specified type -#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/spin_mutex.h" -#if TBB_USE_ASSERT -#include <atomic> -#endif +#if TBB_USE_ASSERT +#include <atomic> +#endif -#include <cstdint> -#include <exception> +#include <cstdint> +#include <exception> -//! Mutex type for global locks in the scheduler -using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE; +//! Mutex type for global locks in the scheduler +using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE; #if _MSC_VER && !defined(__INTEL_COMPILER) // Workaround for overzealous compiler warnings @@ -52,228 +52,228 @@ using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE; #endif namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { class arena; -class mail_inbox; +class mail_inbox; class mail_outbox; class market; class observer_proxy; -enum task_stream_accessor_type { front_accessor = 0, back_nonnull_accessor }; -template<task_stream_accessor_type> class task_stream; - -using isolation_type = std::intptr_t; -constexpr isolation_type no_isolation = 0; - -//------------------------------------------------------------------------ -// Extended execute data -//------------------------------------------------------------------------ - -//! Execute data used on a task dispatcher side, reflects a current execution state -struct execution_data_ext : d1::execution_data { - task_dispatcher* task_disp{}; - isolation_type isolation{}; - d1::wait_context* wait_ctx{}; +enum task_stream_accessor_type { front_accessor = 0, back_nonnull_accessor }; +template<task_stream_accessor_type> class task_stream; + +using isolation_type = std::intptr_t; +constexpr isolation_type no_isolation = 0; + +//------------------------------------------------------------------------ +// Extended execute data +//------------------------------------------------------------------------ + +//! Execute data used on a task dispatcher side, reflects a current execution state +struct execution_data_ext : d1::execution_data { + task_dispatcher* task_disp{}; + isolation_type isolation{}; + d1::wait_context* wait_ctx{}; }; -//------------------------------------------------------------------------ -// Task accessor -//------------------------------------------------------------------------ - -//! Interpretation of reserved task fields inside a task dispatcher -struct task_accessor { - static constexpr std::uint64_t proxy_task_trait = 1; - static constexpr std::uint64_t resume_task_trait = 2; - static d1::task_group_context*& context(d1::task& t) { - task_group_context** tgc = reinterpret_cast<task_group_context**>(&t.m_reserved[0]); - return *tgc; - } - static isolation_type& isolation(d1::task& t) { - isolation_type* tag = reinterpret_cast<isolation_type*>(&t.m_reserved[2]); - return *tag; - } - static void set_proxy_trait(d1::task& t) { - // TODO: refactor proxy tasks not to work on uninitialized memory. - //__TBB_ASSERT((t.m_version_and_traits & proxy_task_trait) == 0, nullptr); - t.m_version_and_traits |= proxy_task_trait; - } - static bool is_proxy_task(d1::task& t) { - return (t.m_version_and_traits & proxy_task_trait) != 0; - } - static void set_resume_trait(d1::task& t) { - __TBB_ASSERT((t.m_version_and_traits & resume_task_trait) == 0, nullptr); - t.m_version_and_traits |= resume_task_trait; - } - static bool is_resume_task(d1::task& t) { - return (t.m_version_and_traits & resume_task_trait) != 0; - } -}; - -//------------------------------------------------------------------------ -//! Extended variant of the standard offsetof macro -/** The standard offsetof macro is not sufficient for TBB as it can be used for - POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/ -#define __TBB_offsetof(class_name, member_name) \ - ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000) - -//! Returns address of the object containing a member with the given name and address -#define __TBB_get_object_ref(class_name, member_name, member_addr) \ - (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name))) - -//! Helper class for tracking floating point context and task group context switches -/** Assuming presence of an itt collector, in addition to keeping track of floating - point context, this class emits itt events to indicate begin and end of task group - context execution **/ -template <bool report_tasks> -class context_guard_helper { - const d1::task_group_context* curr_ctx; - d1::cpu_ctl_env guard_cpu_ctl_env; - d1::cpu_ctl_env curr_cpu_ctl_env; -public: - context_guard_helper() : curr_ctx(NULL) { - guard_cpu_ctl_env.get_env(); - curr_cpu_ctl_env = guard_cpu_ctl_env; - } - ~context_guard_helper() { - if (curr_cpu_ctl_env != guard_cpu_ctl_env) - guard_cpu_ctl_env.set_env(); - if (report_tasks && curr_ctx) - ITT_TASK_END; - } - // The function is called from bypass dispatch loop on the hot path. - // Consider performance issues when refactoring. - void set_ctx(const d1::task_group_context* ctx) { - if (!ctx) - return; - const d1::cpu_ctl_env* ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&ctx->my_cpu_ctl_env); - // Compare the FPU settings directly because the context can be reused between parallel algorithms. - if (*ctl != curr_cpu_ctl_env) { - curr_cpu_ctl_env = *ctl; - curr_cpu_ctl_env.set_env(); - } - if (report_tasks && ctx != curr_ctx) { - // if task group context was active, report end of current execution frame. - if (curr_ctx) - ITT_TASK_END; - // reporting begin of new task group context execution frame. - // using address of task group context object to group tasks (parent). - // id of task execution frame is NULL and reserved for future use. - ITT_TASK_BEGIN(ctx, ctx->my_name, NULL); - curr_ctx = ctx; - } - } -#if _WIN64 - void restore_default() { - if (curr_cpu_ctl_env != guard_cpu_ctl_env) { - guard_cpu_ctl_env.set_env(); - curr_cpu_ctl_env = guard_cpu_ctl_env; - } - } -#endif // _WIN64 -}; - -#if (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) -#if _MSC_VER -#pragma intrinsic(__rdtsc) -#endif -inline std::uint64_t machine_time_stamp() { -#if __INTEL_COMPILER - return _rdtsc(); -#elif _MSC_VER - return __rdtsc(); -#else - std::uint32_t hi, lo; - __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo)); - return (std::uint64_t(hi) << 32) | lo; -#endif +//------------------------------------------------------------------------ +// Task accessor +//------------------------------------------------------------------------ + +//! Interpretation of reserved task fields inside a task dispatcher +struct task_accessor { + static constexpr std::uint64_t proxy_task_trait = 1; + static constexpr std::uint64_t resume_task_trait = 2; + static d1::task_group_context*& context(d1::task& t) { + task_group_context** tgc = reinterpret_cast<task_group_context**>(&t.m_reserved[0]); + return *tgc; + } + static isolation_type& isolation(d1::task& t) { + isolation_type* tag = reinterpret_cast<isolation_type*>(&t.m_reserved[2]); + return *tag; + } + static void set_proxy_trait(d1::task& t) { + // TODO: refactor proxy tasks not to work on uninitialized memory. + //__TBB_ASSERT((t.m_version_and_traits & proxy_task_trait) == 0, nullptr); + t.m_version_and_traits |= proxy_task_trait; + } + static bool is_proxy_task(d1::task& t) { + return (t.m_version_and_traits & proxy_task_trait) != 0; + } + static void set_resume_trait(d1::task& t) { + __TBB_ASSERT((t.m_version_and_traits & resume_task_trait) == 0, nullptr); + t.m_version_and_traits |= resume_task_trait; + } + static bool is_resume_task(d1::task& t) { + return (t.m_version_and_traits & resume_task_trait) != 0; + } +}; + +//------------------------------------------------------------------------ +//! Extended variant of the standard offsetof macro +/** The standard offsetof macro is not sufficient for TBB as it can be used for + POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/ +#define __TBB_offsetof(class_name, member_name) \ + ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000) + +//! Returns address of the object containing a member with the given name and address +#define __TBB_get_object_ref(class_name, member_name, member_addr) \ + (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name))) + +//! Helper class for tracking floating point context and task group context switches +/** Assuming presence of an itt collector, in addition to keeping track of floating + point context, this class emits itt events to indicate begin and end of task group + context execution **/ +template <bool report_tasks> +class context_guard_helper { + const d1::task_group_context* curr_ctx; + d1::cpu_ctl_env guard_cpu_ctl_env; + d1::cpu_ctl_env curr_cpu_ctl_env; +public: + context_guard_helper() : curr_ctx(NULL) { + guard_cpu_ctl_env.get_env(); + curr_cpu_ctl_env = guard_cpu_ctl_env; + } + ~context_guard_helper() { + if (curr_cpu_ctl_env != guard_cpu_ctl_env) + guard_cpu_ctl_env.set_env(); + if (report_tasks && curr_ctx) + ITT_TASK_END; + } + // The function is called from bypass dispatch loop on the hot path. + // Consider performance issues when refactoring. + void set_ctx(const d1::task_group_context* ctx) { + if (!ctx) + return; + const d1::cpu_ctl_env* ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&ctx->my_cpu_ctl_env); + // Compare the FPU settings directly because the context can be reused between parallel algorithms. + if (*ctl != curr_cpu_ctl_env) { + curr_cpu_ctl_env = *ctl; + curr_cpu_ctl_env.set_env(); + } + if (report_tasks && ctx != curr_ctx) { + // if task group context was active, report end of current execution frame. + if (curr_ctx) + ITT_TASK_END; + // reporting begin of new task group context execution frame. + // using address of task group context object to group tasks (parent). + // id of task execution frame is NULL and reserved for future use. + ITT_TASK_BEGIN(ctx, ctx->my_name, NULL); + curr_ctx = ctx; + } + } +#if _WIN64 + void restore_default() { + if (curr_cpu_ctl_env != guard_cpu_ctl_env) { + guard_cpu_ctl_env.set_env(); + curr_cpu_ctl_env = guard_cpu_ctl_env; + } + } +#endif // _WIN64 +}; + +#if (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) +#if _MSC_VER +#pragma intrinsic(__rdtsc) +#endif +inline std::uint64_t machine_time_stamp() { +#if __INTEL_COMPILER + return _rdtsc(); +#elif _MSC_VER + return __rdtsc(); +#else + std::uint32_t hi, lo; + __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo)); + return (std::uint64_t(hi) << 32) | lo; +#endif +} + +inline void prolonged_pause_impl() { + // Assumption based on practice: 1000-2000 ticks seems to be a suitable invariant for the + // majority of platforms. Currently, skip platforms that define __TBB_STEALING_PAUSE + // because these platforms require very careful tuning. + std::uint64_t prev = machine_time_stamp(); + const std::uint64_t finish = prev + 1000; + atomic_backoff backoff; + do { + backoff.bounded_pause(); + std::uint64_t curr = machine_time_stamp(); + if (curr <= prev) + // Possibly, the current logical thread is moved to another hardware thread or overflow is occurred. + break; + prev = curr; + } while (prev < finish); +} +#else +inline void prolonged_pause_impl() { +#ifdef __TBB_ipf + static const long PauseTime = 1500; +#else + static const long PauseTime = 80; +#endif + // TODO IDEA: Update PauseTime adaptively? + machine_pause(PauseTime); } +#endif -inline void prolonged_pause_impl() { - // Assumption based on practice: 1000-2000 ticks seems to be a suitable invariant for the - // majority of platforms. Currently, skip platforms that define __TBB_STEALING_PAUSE - // because these platforms require very careful tuning. - std::uint64_t prev = machine_time_stamp(); - const std::uint64_t finish = prev + 1000; - atomic_backoff backoff; - do { - backoff.bounded_pause(); - std::uint64_t curr = machine_time_stamp(); - if (curr <= prev) - // Possibly, the current logical thread is moved to another hardware thread or overflow is occurred. - break; - prev = curr; - } while (prev < finish); +inline void prolonged_pause() { +#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) + if (governor::wait_package_enabled()) { + std::uint64_t time_stamp = machine_time_stamp(); + // _tpause function directs the processor to enter an implementation-dependent optimized state + // until the Time Stamp Counter reaches or exceeds the value specified in second parameter. + // Constant "700" is ticks to wait for. + // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state. + _tpause(0, time_stamp + 700); + } + else +#endif + prolonged_pause_impl(); } -#else -inline void prolonged_pause_impl() { -#ifdef __TBB_ipf - static const long PauseTime = 1500; -#else - static const long PauseTime = 80; -#endif - // TODO IDEA: Update PauseTime adaptively? - machine_pause(PauseTime); -} -#endif - -inline void prolonged_pause() { -#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) - if (governor::wait_package_enabled()) { - std::uint64_t time_stamp = machine_time_stamp(); - // _tpause function directs the processor to enter an implementation-dependent optimized state - // until the Time Stamp Counter reaches or exceeds the value specified in second parameter. - // Constant "700" is ticks to wait for. - // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state. - _tpause(0, time_stamp + 700); - } - else -#endif - prolonged_pause_impl(); -} - -class stealing_loop_backoff { - const int my_pause_threshold; - const int my_yield_threshold; - int my_pause_count; - int my_yield_count; -public: - // my_yield_threshold = 100 is an experimental value. Ideally, once we start calling __TBB_Yield(), - // the time spent spinning before calling is_out_of_work() should be approximately - // the time it takes for a thread to be woken up. Doing so would guarantee that we do - // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. - stealing_loop_backoff(int num_workers) - : my_pause_threshold{ 2 * (num_workers + 1) } -#if __APPLE__ - // threshold value tuned separately for macOS due to high cost of sched_yield there - , my_yield_threshold{10} -#else - , my_yield_threshold{100} -#endif - , my_pause_count{} - , my_yield_count{} - {} - bool pause() { - prolonged_pause(); - if (my_pause_count++ >= my_pause_threshold) { - my_pause_count = my_pause_threshold; - d0::yield(); - if (my_yield_count++ >= my_yield_threshold) { - my_yield_count = my_yield_threshold; - return true; - } - } - return false; - } - void reset_wait() { - my_pause_count = my_yield_count = 0; - } -}; - -//------------------------------------------------------------------------ -// Exception support -//------------------------------------------------------------------------ + +class stealing_loop_backoff { + const int my_pause_threshold; + const int my_yield_threshold; + int my_pause_count; + int my_yield_count; +public: + // my_yield_threshold = 100 is an experimental value. Ideally, once we start calling __TBB_Yield(), + // the time spent spinning before calling is_out_of_work() should be approximately + // the time it takes for a thread to be woken up. Doing so would guarantee that we do + // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. + stealing_loop_backoff(int num_workers) + : my_pause_threshold{ 2 * (num_workers + 1) } +#if __APPLE__ + // threshold value tuned separately for macOS due to high cost of sched_yield there + , my_yield_threshold{10} +#else + , my_yield_threshold{100} +#endif + , my_pause_count{} + , my_yield_count{} + {} + bool pause() { + prolonged_pause(); + if (my_pause_count++ >= my_pause_threshold) { + my_pause_count = my_pause_threshold; + d0::yield(); + if (my_yield_count++ >= my_yield_threshold) { + my_yield_count = my_yield_threshold; + return true; + } + } + return false; + } + void reset_wait() { + my_pause_count = my_yield_count = 0; + } +}; + +//------------------------------------------------------------------------ +// Exception support +//------------------------------------------------------------------------ //! Task group state change propagation global epoch /** Together with generic_scheduler::my_context_state_propagation_epoch forms cross-thread signaling mechanism that allows to avoid locking at the hot path @@ -284,47 +284,47 @@ public: and thus registration/deregistration routines take slower branch that may block (at most one thread of the pool can be blocked at any moment). Otherwise the control path is lock-free and fast. **/ -extern std::atomic<std::uintptr_t> the_context_state_propagation_epoch; +extern std::atomic<std::uintptr_t> the_context_state_propagation_epoch; //! Mutex guarding state change propagation across task groups forest. /** Also protects modification of related data structures. **/ typedef scheduler_mutex_type context_state_propagation_mutex_type; extern context_state_propagation_mutex_type the_context_state_propagation_mutex; -class tbb_exception_ptr { - std::exception_ptr my_ptr; -public: - static tbb_exception_ptr* allocate() noexcept; +class tbb_exception_ptr { + std::exception_ptr my_ptr; +public: + static tbb_exception_ptr* allocate() noexcept; - //! Destroys this objects - /** Note that objects of this type can be created only by the allocate() method. **/ - void destroy() noexcept; + //! Destroys this objects + /** Note that objects of this type can be created only by the allocate() method. **/ + void destroy() noexcept; - //! Throws the contained exception . - void throw_self(); + //! Throws the contained exception . + void throw_self(); -private: - tbb_exception_ptr(const std::exception_ptr& src) : my_ptr(src) {} -}; // class tbb_exception_ptr +private: + tbb_exception_ptr(const std::exception_ptr& src) : my_ptr(src) {} +}; // class tbb_exception_ptr //------------------------------------------------------------------------ // Debugging support //------------------------------------------------------------------------ #if TBB_USE_ASSERT -static const std::uintptr_t venom = tbb::detail::select_size_t_constant<0xDEADBEEFU, 0xDDEEAADDDEADBEEFULL>::value; +static const std::uintptr_t venom = tbb::detail::select_size_t_constant<0xDEADBEEFU, 0xDDEEAADDDEADBEEFULL>::value; -inline void poison_value(std::uintptr_t& val) { val = venom; } +inline void poison_value(std::uintptr_t& val) { val = venom; } -inline void poison_value(std::atomic<std::uintptr_t>& val) { val.store(venom, std::memory_order_relaxed); } +inline void poison_value(std::atomic<std::uintptr_t>& val) { val.store(venom, std::memory_order_relaxed); } /** Expected to be used in assertions only, thus no empty form is defined. **/ -inline bool is_alive(std::uintptr_t v) { return v != venom; } +inline bool is_alive(std::uintptr_t v) { return v != venom; } /** Logically, this method should be a member of class task. But we do not want to publish it, so it is here instead. */ -inline void assert_task_valid(const d1::task* t) { - assert_pointer_valid(t); +inline void assert_task_valid(const d1::task* t) { + assert_pointer_valid(t); } #else /* !TBB_USE_ASSERT */ @@ -332,174 +332,174 @@ inline void assert_task_valid(const d1::task* t) { the variable used as its argument may be undefined in release builds. **/ #define poison_value(g) ((void)0) -inline void assert_task_valid(const d1::task*) {} +inline void assert_task_valid(const d1::task*) {} #endif /* !TBB_USE_ASSERT */ -struct suspend_point_type { -#if __TBB_RESUMABLE_TASKS - //! The arena related to this task_dispatcher - arena* m_arena{ nullptr }; - //! The random for the resume task - FastRandom m_random; - //! The flag is raised when the original owner should return to this task dispatcher. - std::atomic<bool> m_is_owner_recalled{ false }; - //! Inicates if the resume task should be placed to the critical task stream. - bool m_is_critical{ false }; - //! Associated coroutine - co_context m_co_context; - - struct resume_task final : public d1::task { - task_dispatcher& m_target; - explicit resume_task(task_dispatcher& target) : m_target(target) { - task_accessor::set_resume_trait(*this); +struct suspend_point_type { +#if __TBB_RESUMABLE_TASKS + //! The arena related to this task_dispatcher + arena* m_arena{ nullptr }; + //! The random for the resume task + FastRandom m_random; + //! The flag is raised when the original owner should return to this task dispatcher. + std::atomic<bool> m_is_owner_recalled{ false }; + //! Inicates if the resume task should be placed to the critical task stream. + bool m_is_critical{ false }; + //! Associated coroutine + co_context m_co_context; + + struct resume_task final : public d1::task { + task_dispatcher& m_target; + explicit resume_task(task_dispatcher& target) : m_target(target) { + task_accessor::set_resume_trait(*this); + } + d1::task* execute(d1::execution_data& ed) override; + d1::task* cancel(d1::execution_data&) override { + __TBB_ASSERT(false, "The resume task cannot be canceled"); + return nullptr; } - d1::task* execute(d1::execution_data& ed) override; - d1::task* cancel(d1::execution_data&) override { - __TBB_ASSERT(false, "The resume task cannot be canceled"); - return nullptr; - } - } m_resume_task; - - suspend_point_type(arena* a, std::size_t stack_size, task_dispatcher& target); -#endif /*__TBB_RESUMABLE_TASKS */ -}; - -class alignas (max_nfs_size) task_dispatcher { -public: - // TODO: reconsider low level design to better organize dependencies and files. - friend class thread_data; - friend class arena_slot; - friend class nested_arena_context; - friend class delegated_task; - friend struct base_waiter; - - //! The data of the current thread attached to this task_dispatcher - thread_data* m_thread_data{ nullptr }; - - //! The current execution data - execution_data_ext m_execute_data_ext; - - //! Properties - struct properties { - bool outermost{ true }; - bool fifo_tasks_allowed{ true }; - bool critical_task_allowed{ true }; - } m_properties; - - //! Position in the call stack when stealing is still allowed. - std::uintptr_t m_stealing_threshold{}; - - //! Suspend point (null if this task dispatcher has been never suspended) - suspend_point_type* m_suspend_point{ nullptr }; - - //! Attempt to get a task from the mailbox. - /** Gets a task only if it has not been executed by its sender or a thief - that has stolen it from the sender's task pool. Otherwise returns NULL. - This method is intended to be used only by the thread extracting the proxy - from its mailbox. (In contrast to local task pool, mailbox can be read only - by its owner). **/ - d1::task* get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation); - - d1::task* get_critical_task(d1::task*, execution_data_ext&, isolation_type, bool); - - template <bool ITTPossible, typename Waiter> - d1::task* receive_or_steal_task(thread_data& tls, execution_data_ext& ed, Waiter& waiter, - isolation_type isolation, bool outermost, bool criticality_absence); - - template <bool ITTPossible, typename Waiter> - d1::task* local_wait_for_all(d1::task * t, Waiter& waiter); - - task_dispatcher(const task_dispatcher&) = delete; - - bool can_steal(); -public: - task_dispatcher(arena* a); - - ~task_dispatcher() { - if (m_suspend_point) { - m_suspend_point->~suspend_point_type(); - cache_aligned_deallocate(m_suspend_point); - } - poison_pointer(m_thread_data); - poison_pointer(m_suspend_point); - } - - template <typename Waiter> - d1::task* local_wait_for_all(d1::task* t, Waiter& waiter); - - bool allow_fifo_task(bool new_state) { - bool old_state = m_properties.fifo_tasks_allowed; - m_properties.fifo_tasks_allowed = new_state; - return old_state; - } - - isolation_type set_isolation(isolation_type isolation) { - isolation_type prev = m_execute_data_ext.isolation; - m_execute_data_ext.isolation = isolation; - return prev; + } m_resume_task; + + suspend_point_type(arena* a, std::size_t stack_size, task_dispatcher& target); +#endif /*__TBB_RESUMABLE_TASKS */ +}; + +class alignas (max_nfs_size) task_dispatcher { +public: + // TODO: reconsider low level design to better organize dependencies and files. + friend class thread_data; + friend class arena_slot; + friend class nested_arena_context; + friend class delegated_task; + friend struct base_waiter; + + //! The data of the current thread attached to this task_dispatcher + thread_data* m_thread_data{ nullptr }; + + //! The current execution data + execution_data_ext m_execute_data_ext; + + //! Properties + struct properties { + bool outermost{ true }; + bool fifo_tasks_allowed{ true }; + bool critical_task_allowed{ true }; + } m_properties; + + //! Position in the call stack when stealing is still allowed. + std::uintptr_t m_stealing_threshold{}; + + //! Suspend point (null if this task dispatcher has been never suspended) + suspend_point_type* m_suspend_point{ nullptr }; + + //! Attempt to get a task from the mailbox. + /** Gets a task only if it has not been executed by its sender or a thief + that has stolen it from the sender's task pool. Otherwise returns NULL. + This method is intended to be used only by the thread extracting the proxy + from its mailbox. (In contrast to local task pool, mailbox can be read only + by its owner). **/ + d1::task* get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation); + + d1::task* get_critical_task(d1::task*, execution_data_ext&, isolation_type, bool); + + template <bool ITTPossible, typename Waiter> + d1::task* receive_or_steal_task(thread_data& tls, execution_data_ext& ed, Waiter& waiter, + isolation_type isolation, bool outermost, bool criticality_absence); + + template <bool ITTPossible, typename Waiter> + d1::task* local_wait_for_all(d1::task * t, Waiter& waiter); + + task_dispatcher(const task_dispatcher&) = delete; + + bool can_steal(); +public: + task_dispatcher(arena* a); + + ~task_dispatcher() { + if (m_suspend_point) { + m_suspend_point->~suspend_point_type(); + cache_aligned_deallocate(m_suspend_point); + } + poison_pointer(m_thread_data); + poison_pointer(m_suspend_point); } - thread_data& get_thread_data() { - __TBB_ASSERT(m_thread_data, nullptr); - return *m_thread_data; + template <typename Waiter> + d1::task* local_wait_for_all(d1::task* t, Waiter& waiter); + + bool allow_fifo_task(bool new_state) { + bool old_state = m_properties.fifo_tasks_allowed; + m_properties.fifo_tasks_allowed = new_state; + return old_state; + } + + isolation_type set_isolation(isolation_type isolation) { + isolation_type prev = m_execute_data_ext.isolation; + m_execute_data_ext.isolation = isolation; + return prev; } - static void execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx); - - void set_stealing_threshold(std::uintptr_t stealing_threshold) { - bool assert_condition = (stealing_threshold == 0 && m_stealing_threshold != 0) || - (stealing_threshold != 0 && m_stealing_threshold == 0); - __TBB_ASSERT_EX( assert_condition, nullptr ); - m_stealing_threshold = stealing_threshold; + thread_data& get_thread_data() { + __TBB_ASSERT(m_thread_data, nullptr); + return *m_thread_data; } - - d1::task* get_inbox_or_critical_task(execution_data_ext&, mail_inbox&, isolation_type, bool); - d1::task* get_stream_or_critical_task(execution_data_ext&, arena&, task_stream<front_accessor>&, - unsigned& /*hint_for_stream*/, isolation_type, - bool /*critical_allowed*/); - d1::task* steal_or_get_critical(execution_data_ext&, arena&, unsigned /*arena_index*/, FastRandom&, - isolation_type, bool /*critical_allowed*/); - -#if __TBB_RESUMABLE_TASKS - /* [[noreturn]] */ void co_local_wait_for_all() noexcept; - void suspend(suspend_callback_type suspend_callback, void* user_callback); - void resume(task_dispatcher& target); - suspend_point_type* get_suspend_point(); - void init_suspend_point(arena* a, std::size_t stack_size); - friend void internal_resume(suspend_point_type*); - void recall_point(); -#endif /* __TBB_RESUMABLE_TASKS */ + + static void execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx); + + void set_stealing_threshold(std::uintptr_t stealing_threshold) { + bool assert_condition = (stealing_threshold == 0 && m_stealing_threshold != 0) || + (stealing_threshold != 0 && m_stealing_threshold == 0); + __TBB_ASSERT_EX( assert_condition, nullptr ); + m_stealing_threshold = stealing_threshold; + } + + d1::task* get_inbox_or_critical_task(execution_data_ext&, mail_inbox&, isolation_type, bool); + d1::task* get_stream_or_critical_task(execution_data_ext&, arena&, task_stream<front_accessor>&, + unsigned& /*hint_for_stream*/, isolation_type, + bool /*critical_allowed*/); + d1::task* steal_or_get_critical(execution_data_ext&, arena&, unsigned /*arena_index*/, FastRandom&, + isolation_type, bool /*critical_allowed*/); + +#if __TBB_RESUMABLE_TASKS + /* [[noreturn]] */ void co_local_wait_for_all() noexcept; + void suspend(suspend_callback_type suspend_callback, void* user_callback); + void resume(task_dispatcher& target); + suspend_point_type* get_suspend_point(); + void init_suspend_point(arena* a, std::size_t stack_size); + friend void internal_resume(suspend_point_type*); + void recall_point(); +#endif /* __TBB_RESUMABLE_TASKS */ }; -inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) { - return base - stack_size / 2; -} - -struct task_group_context_impl { - static void destroy(d1::task_group_context&); - static void initialize(d1::task_group_context&); - static void register_with(d1::task_group_context&, thread_data*); - static void bind_to_impl(d1::task_group_context&, thread_data*); - static void bind_to(d1::task_group_context&, thread_data*); - template <typename T> - static void propagate_task_group_state(d1::task_group_context&, std::atomic<T> d1::task_group_context::*, d1::task_group_context&, T); - static bool cancel_group_execution(d1::task_group_context&); - static bool is_group_execution_cancelled(const d1::task_group_context&); - static void reset(d1::task_group_context&); - static void capture_fp_settings(d1::task_group_context&); - static void copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src); +inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) { + return base - stack_size / 2; +} + +struct task_group_context_impl { + static void destroy(d1::task_group_context&); + static void initialize(d1::task_group_context&); + static void register_with(d1::task_group_context&, thread_data*); + static void bind_to_impl(d1::task_group_context&, thread_data*); + static void bind_to(d1::task_group_context&, thread_data*); + template <typename T> + static void propagate_task_group_state(d1::task_group_context&, std::atomic<T> d1::task_group_context::*, d1::task_group_context&, T); + static bool cancel_group_execution(d1::task_group_context&); + static bool is_group_execution_cancelled(const d1::task_group_context&); + static void reset(d1::task_group_context&); + static void capture_fp_settings(d1::task_group_context&); + static void copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src); }; - -//! Forward declaration for scheduler entities -bool gcc_rethrow_exception_broken(); -void fix_broken_rethrow(); -//! Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info -void handle_perror(int error_code, const char* aux_info); - -} // namespace r1 -} // namespace detail + +//! Forward declaration for scheduler entities +bool gcc_rethrow_exception_broken(); +void fix_broken_rethrow(); +//! Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info +void handle_perror(int error_code, const char* aux_info); + +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_scheduler_common_H */ diff --git a/contrib/libs/tbb/src/tbb/semaphore.cpp b/contrib/libs/tbb/src/tbb/semaphore.cpp index 7764ac8f49..92c9e675ab 100644 --- a/contrib/libs/tbb/src/tbb/semaphore.cpp +++ b/contrib/libs/tbb/src/tbb/semaphore.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,17 +17,17 @@ #include "semaphore.h" #if __TBB_USE_SRWLOCK #include "dynamic_link.h" // Refers to src/tbb, not include/tbb -#error #include "tbb_misc.h" +#error #include "tbb_misc.h" #endif namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { // TODO: For new win UI port, we can use SRWLock API without dynamic_link etc. #if __TBB_USE_SRWLOCK -static std::atomic<do_once_state> concmon_module_inited; +static std::atomic<do_once_state> concmon_module_inited; void WINAPI init_binsem_using_event( SRWLOCK* h_ ) { @@ -87,6 +87,6 @@ void binary_semaphore::V() { __TBB_release_binsem( &my_sem.lock ); } #endif /* __TBB_USE_SRWLOCK */ -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/semaphore.h b/contrib/libs/tbb/src/tbb/semaphore.h index 207b53da07..0a88536e36 100644 --- a/contrib/libs/tbb/src/tbb/semaphore.h +++ b/contrib/libs/tbb/src/tbb/semaphore.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ limitations under the License. */ -#ifndef __TBB_semaphore_H -#define __TBB_semaphore_H +#ifndef __TBB_semaphore_H +#define __TBB_semaphore_H -#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_utils.h" #if _WIN32||_WIN64 -#include <windows.h> +#include <windows.h> #elif __APPLE__ #include <mach/semaphore.h> #include <mach/task.h> @@ -29,101 +29,101 @@ #else #include <semaphore.h> #ifdef TBB_USE_DEBUG -#include <cerrno> +#include <cerrno> #endif #endif /*_WIN32||_WIN64*/ -#include <atomic> - -#if __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ - -/* Futex definitions */ -#include <unistd.h> -#include <sys/syscall.h> - -#if defined(SYS_futex) - -/* This section is included for Linux and some other systems that may support futexes.*/ - -#define __TBB_USE_FUTEX 1 - -#if defined(__has_include) -#define __TBB_has_include __has_include -#else -#define __TBB_has_include(x) 0 -#endif - -/* -If available, use typical headers where futex API is defined. While Linux and OpenBSD -are known to provide such headers, other systems might have them as well. -*/ -#if defined(__linux__) || __TBB_has_include(<linux/futex.h>) -#include <linux/futex.h> -#elif defined(__OpenBSD__) || __TBB_has_include(<sys/futex.h>) -#error #include <sys/futex.h> -#endif - -#include <climits> -#include <cerrno> - -/* -Some systems might not define the macros or use different names. In such case we expect -the actual parameter values to match Linux: 0 for wait, 1 for wake. -*/ -#if defined(FUTEX_WAIT_PRIVATE) -#define __TBB_FUTEX_WAIT FUTEX_WAIT_PRIVATE -#elif defined(FUTEX_WAIT) -#define __TBB_FUTEX_WAIT FUTEX_WAIT -#else -#define __TBB_FUTEX_WAIT 0 -#endif - -#if defined(FUTEX_WAKE_PRIVATE) -#define __TBB_FUTEX_WAKE FUTEX_WAKE_PRIVATE -#elif defined(FUTEX_WAKE) -#define __TBB_FUTEX_WAKE FUTEX_WAKE -#else -#define __TBB_FUTEX_WAKE 1 -#endif - -#endif // SYS_futex -#endif // __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ - +#include <atomic> + +#if __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ + +/* Futex definitions */ +#include <unistd.h> +#include <sys/syscall.h> + +#if defined(SYS_futex) + +/* This section is included for Linux and some other systems that may support futexes.*/ + +#define __TBB_USE_FUTEX 1 + +#if defined(__has_include) +#define __TBB_has_include __has_include +#else +#define __TBB_has_include(x) 0 +#endif + +/* +If available, use typical headers where futex API is defined. While Linux and OpenBSD +are known to provide such headers, other systems might have them as well. +*/ +#if defined(__linux__) || __TBB_has_include(<linux/futex.h>) +#include <linux/futex.h> +#elif defined(__OpenBSD__) || __TBB_has_include(<sys/futex.h>) +#error #include <sys/futex.h> +#endif + +#include <climits> +#include <cerrno> + +/* +Some systems might not define the macros or use different names. In such case we expect +the actual parameter values to match Linux: 0 for wait, 1 for wake. +*/ +#if defined(FUTEX_WAIT_PRIVATE) +#define __TBB_FUTEX_WAIT FUTEX_WAIT_PRIVATE +#elif defined(FUTEX_WAIT) +#define __TBB_FUTEX_WAIT FUTEX_WAIT +#else +#define __TBB_FUTEX_WAIT 0 +#endif + +#if defined(FUTEX_WAKE_PRIVATE) +#define __TBB_FUTEX_WAKE FUTEX_WAKE_PRIVATE +#elif defined(FUTEX_WAKE) +#define __TBB_FUTEX_WAKE FUTEX_WAKE +#else +#define __TBB_FUTEX_WAKE 1 +#endif + +#endif // SYS_futex +#endif // __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ + namespace tbb { -namespace detail { -namespace r1 { - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Futex implementation -//////////////////////////////////////////////////////////////////////////////////////////////////// - -#if __TBB_USE_FUTEX - -static inline int futex_wait( void *futex, int comparand ) { - int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 ); -#if TBB_USE_ASSERT - int e = errno; - __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." ); -#endif /* TBB_USE_ASSERT */ - return r; -} - -static inline int futex_wakeup_one( void *futex ) { - int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 ); - __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" ); - return r; -} - -// Additional possible methods that are not required right now -// static inline int futex_wakeup_all( void *futex ) { -// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 ); -// __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" ); -// return r; -// } - -#endif // __TBB_USE_FUTEX - -//////////////////////////////////////////////////////////////////////////////////////////////////// +namespace detail { +namespace r1 { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Futex implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if __TBB_USE_FUTEX + +static inline int futex_wait( void *futex, int comparand ) { + int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 ); +#if TBB_USE_ASSERT + int e = errno; + __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." ); +#endif /* TBB_USE_ASSERT */ + return r; +} + +static inline int futex_wakeup_one( void *futex ) { + int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 ); + __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" ); + return r; +} + +// Additional possible methods that are not required right now +// static inline int futex_wakeup_all( void *futex ) { +// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 ); +// __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" ); +// return r; +// } + +#endif // __TBB_USE_FUTEX + +//////////////////////////////////////////////////////////////////////////////////////////////////// #if _WIN32||_WIN64 typedef LONG sem_count_t; //! Edsger Dijkstra's counting semaphore @@ -272,8 +272,8 @@ private: #if __TBB_USE_FUTEX class binary_semaphore : no_copy { -// The implementation is equivalent to the "Mutex, Take 3" one -// in the paper "Futexes Are Tricky" by Ulrich Drepper +// The implementation is equivalent to the "Mutex, Take 3" one +// in the paper "Futexes Are Tricky" by Ulrich Drepper public: //! ctor binary_semaphore() { my_sem = 1; } @@ -281,24 +281,24 @@ public: ~binary_semaphore() {} //! wait/acquire void P() { - int s = 0; - if( !my_sem.compare_exchange_strong( s, 1 ) ) { + int s = 0; + if( !my_sem.compare_exchange_strong( s, 1 ) ) { if( s!=2 ) - s = my_sem.exchange( 2 ); - while( s!=0 ) { // This loop deals with spurious wakeup + s = my_sem.exchange( 2 ); + while( s!=0 ) { // This loop deals with spurious wakeup futex_wait( &my_sem, 2 ); - s = my_sem.exchange( 2 ); + s = my_sem.exchange( 2 ); } } } //! post/release void V() { - __TBB_ASSERT( my_sem.load(std::memory_order_relaxed)>=1, "multiple V()'s in a row?" ); - if( my_sem.exchange( 0 )==2 ) + __TBB_ASSERT( my_sem.load(std::memory_order_relaxed)>=1, "multiple V()'s in a row?" ); + if( my_sem.exchange( 0 )==2 ) futex_wakeup_one( &my_sem ); } private: - std::atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits + std::atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits }; #else typedef uint32_t sem_count_t; @@ -328,8 +328,8 @@ private: #endif /* __TBB_USE_FUTEX */ #endif /* _WIN32||_WIN64 */ -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb -#endif /* __TBB_semaphore_H */ +#endif /* __TBB_semaphore_H */ diff --git a/contrib/libs/tbb/src/tbb/small_object_pool.cpp b/contrib/libs/tbb/src/tbb/small_object_pool.cpp index 0cf4091260..28d11d011d 100644 --- a/contrib/libs/tbb/src/tbb/small_object_pool.cpp +++ b/contrib/libs/tbb/src/tbb/small_object_pool.cpp @@ -1,154 +1,154 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/cache_aligned_allocator.h" -#include "oneapi/tbb/detail/_small_object_pool.h" -#include "oneapi/tbb/detail/_task.h" -#include "governor.h" -#include "thread_data.h" -#include "task_dispatcher.h" - -#include <cstddef> - -namespace tbb { -namespace detail { -namespace r1 { - -small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list = - reinterpret_cast<small_object_pool_impl::small_object*>(1); - -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) { - auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); - auto pool = tls.my_small_object_pool; - return pool->allocate_impl(allocator, number_of_bytes); -} - -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) { - // TODO: optimize if the allocator contains a valid pool. - auto tls = governor::get_thread_data(); - auto pool = tls->my_small_object_pool; - return pool->allocate_impl(allocator, number_of_bytes); -} - -void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes) -{ - small_object* obj{nullptr}; - - if (number_of_bytes <= small_object_size) { - if (m_private_list) { - obj = m_private_list; - m_private_list = m_private_list->next; - } else if (m_public_list.load(std::memory_order_relaxed)) { - // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence. - obj = m_public_list.exchange(nullptr); - __TBB_ASSERT( obj, "another thread emptied the my_public_list" ); - m_private_list = obj->next; - } else { - obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr}; - ++m_private_counter; - } - } else { - obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr}; - } - allocator = this; - - // Return uninitialized memory for further construction on user side. - obj->~small_object(); - return obj; -} - -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) { - auto pool = static_cast<small_object_pool_impl*>(&allocator); - auto tls = governor::get_thread_data(); - pool->deallocate_impl(ptr, number_of_bytes, *tls); -} - -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) { - auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); - auto pool = static_cast<small_object_pool_impl*>(&allocator); - pool->deallocate_impl(ptr, number_of_bytes, tls); -} - -void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) { - __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null"); - __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)"); - - if (number_of_bytes <= small_object_size) { - auto obj = new (ptr) small_object{nullptr}; - if (td.my_small_object_pool == this) { - obj->next = m_private_list; - m_private_list = obj; - } else { - auto old_public_list = m_public_list.load(std::memory_order_relaxed); - - for (;;) { - if (old_public_list == dead_public_list) { - obj->~small_object(); - cache_aligned_deallocate(obj); - if (++m_public_counter == 0) - { - this->~small_object_pool_impl(); - cache_aligned_deallocate(this); - } - break; - } - obj->next = old_public_list; - if (m_public_list.compare_exchange_strong(old_public_list, obj)) { - break; - } - } - } - } else { - cache_aligned_deallocate(ptr); - } -} - -std::int64_t small_object_pool_impl::cleanup_list(small_object* list) -{ - std::int64_t removed_count{}; - - while (list) { - small_object* current = list; - list = list->next; - current->~small_object(); - cache_aligned_deallocate(current); - ++removed_count; - } - return removed_count; -} - -void small_object_pool_impl::destroy() -{ - // clean up private list and subtract the removed count from private counter - m_private_counter -= cleanup_list(m_private_list); - // Grab public list and place dead mark - small_object* public_list = m_public_list.exchange(dead_public_list); - // clean up public list and subtract from private (intentionally) counter - m_private_counter -= cleanup_list(public_list); - __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0"); - // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it - // atomically with operator-= not to access m_private_counter after the subtraction. - auto new_value = m_public_counter -= m_private_counter; - // check if this method is responsible to clean up the resources - if (new_value == 0) { - this->~small_object_pool_impl(); - cache_aligned_deallocate(this); - } -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/detail/_small_object_pool.h" +#include "oneapi/tbb/detail/_task.h" +#include "governor.h" +#include "thread_data.h" +#include "task_dispatcher.h" + +#include <cstddef> + +namespace tbb { +namespace detail { +namespace r1 { + +small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list = + reinterpret_cast<small_object_pool_impl::small_object*>(1); + +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) { + auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); + auto pool = tls.my_small_object_pool; + return pool->allocate_impl(allocator, number_of_bytes); +} + +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) { + // TODO: optimize if the allocator contains a valid pool. + auto tls = governor::get_thread_data(); + auto pool = tls->my_small_object_pool; + return pool->allocate_impl(allocator, number_of_bytes); +} + +void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes) +{ + small_object* obj{nullptr}; + + if (number_of_bytes <= small_object_size) { + if (m_private_list) { + obj = m_private_list; + m_private_list = m_private_list->next; + } else if (m_public_list.load(std::memory_order_relaxed)) { + // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence. + obj = m_public_list.exchange(nullptr); + __TBB_ASSERT( obj, "another thread emptied the my_public_list" ); + m_private_list = obj->next; + } else { + obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr}; + ++m_private_counter; + } + } else { + obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr}; + } + allocator = this; + + // Return uninitialized memory for further construction on user side. + obj->~small_object(); + return obj; +} + +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) { + auto pool = static_cast<small_object_pool_impl*>(&allocator); + auto tls = governor::get_thread_data(); + pool->deallocate_impl(ptr, number_of_bytes, *tls); +} + +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) { + auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); + auto pool = static_cast<small_object_pool_impl*>(&allocator); + pool->deallocate_impl(ptr, number_of_bytes, tls); +} + +void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) { + __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null"); + __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)"); + + if (number_of_bytes <= small_object_size) { + auto obj = new (ptr) small_object{nullptr}; + if (td.my_small_object_pool == this) { + obj->next = m_private_list; + m_private_list = obj; + } else { + auto old_public_list = m_public_list.load(std::memory_order_relaxed); + + for (;;) { + if (old_public_list == dead_public_list) { + obj->~small_object(); + cache_aligned_deallocate(obj); + if (++m_public_counter == 0) + { + this->~small_object_pool_impl(); + cache_aligned_deallocate(this); + } + break; + } + obj->next = old_public_list; + if (m_public_list.compare_exchange_strong(old_public_list, obj)) { + break; + } + } + } + } else { + cache_aligned_deallocate(ptr); + } +} + +std::int64_t small_object_pool_impl::cleanup_list(small_object* list) +{ + std::int64_t removed_count{}; + + while (list) { + small_object* current = list; + list = list->next; + current->~small_object(); + cache_aligned_deallocate(current); + ++removed_count; + } + return removed_count; +} + +void small_object_pool_impl::destroy() +{ + // clean up private list and subtract the removed count from private counter + m_private_counter -= cleanup_list(m_private_list); + // Grab public list and place dead mark + small_object* public_list = m_public_list.exchange(dead_public_list); + // clean up public list and subtract from private (intentionally) counter + m_private_counter -= cleanup_list(public_list); + __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0"); + // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it + // atomically with operator-= not to access m_private_counter after the subtraction. + auto new_value = m_public_counter -= m_private_counter; + // check if this method is responsible to clean up the resources + if (new_value == 0) { + this->~small_object_pool_impl(); + cache_aligned_deallocate(this); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/small_object_pool_impl.h b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h index 971e8d47da..a6b664beab 100644 --- a/contrib/libs/tbb/src/tbb/small_object_pool_impl.h +++ b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h @@ -1,59 +1,59 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_small_object_pool_impl_H -#define __TBB_small_object_pool_impl_H - -#include "oneapi/tbb/detail/_small_object_pool.h" -#include "oneapi/tbb/detail/_utils.h" - -#include <cstddef> -#include <cstdint> -#include <atomic> - - -namespace tbb { -namespace detail { -namespace r1 { - -class thread_data; - -class small_object_pool_impl : public d1::small_object_pool -{ - static constexpr std::size_t small_object_size = 256; - struct small_object { - small_object* next; - }; - static small_object* const dead_public_list; -public: - void* allocate_impl(small_object_pool*& allocator, std::size_t number_of_bytes); - void deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td); - void destroy(); -private: - static std::int64_t cleanup_list(small_object* list); - ~small_object_pool_impl() = default; -private: - alignas(max_nfs_size) small_object* m_private_list; - std::int64_t m_private_counter{}; - alignas(max_nfs_size) std::atomic<small_object*> m_public_list; - std::atomic<std::int64_t> m_public_counter{}; -}; - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif /* __TBB_small_object_pool_impl_H */ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_small_object_pool_impl_H +#define __TBB_small_object_pool_impl_H + +#include "oneapi/tbb/detail/_small_object_pool.h" +#include "oneapi/tbb/detail/_utils.h" + +#include <cstddef> +#include <cstdint> +#include <atomic> + + +namespace tbb { +namespace detail { +namespace r1 { + +class thread_data; + +class small_object_pool_impl : public d1::small_object_pool +{ + static constexpr std::size_t small_object_size = 256; + struct small_object { + small_object* next; + }; + static small_object* const dead_public_list; +public: + void* allocate_impl(small_object_pool*& allocator, std::size_t number_of_bytes); + void deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td); + void destroy(); +private: + static std::int64_t cleanup_list(small_object* list); + ~small_object_pool_impl() = default; +private: + alignas(max_nfs_size) small_object* m_private_list; + std::int64_t m_private_counter{}; + alignas(max_nfs_size) std::atomic<small_object*> m_public_list; + std::atomic<std::int64_t> m_public_counter{}; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_small_object_pool_impl_H */ diff --git a/contrib/libs/tbb/src/tbb/task.cpp b/contrib/libs/tbb/src/tbb/task.cpp index 75c5d01d32..129614447a 100644 --- a/contrib/libs/tbb/src/tbb/task.cpp +++ b/contrib/libs/tbb/src/tbb/task.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,209 +17,209 @@ // Do not include task.h directly. Use scheduler_common.h instead #include "scheduler_common.h" #include "governor.h" -#include "arena.h" -#include "thread_data.h" -#include "task_dispatcher.h" -#include "waiters.h" +#include "arena.h" +#include "thread_data.h" +#include "task_dispatcher.h" +#include "waiters.h" #include "itt_notify.h" -#include "oneapi/tbb/detail/_task.h" -#include "oneapi/tbb/partitioner.h" -#include "oneapi/tbb/task.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/partitioner.h" +#include "oneapi/tbb/task.h" -#include <cstring> +#include <cstring> namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { //------------------------------------------------------------------------ -// resumable tasks +// resumable tasks //------------------------------------------------------------------------ -#if __TBB_RESUMABLE_TASKS +#if __TBB_RESUMABLE_TASKS -void suspend(suspend_callback_type suspend_callback, void* user_callback) { - thread_data& td = *governor::get_thread_data(); - td.my_task_dispatcher->suspend(suspend_callback, user_callback); - // Do not access td after suspend. +void suspend(suspend_callback_type suspend_callback, void* user_callback) { + thread_data& td = *governor::get_thread_data(); + td.my_task_dispatcher->suspend(suspend_callback, user_callback); + // Do not access td after suspend. } -void resume(suspend_point_type* sp) { - assert_pointers_valid(sp, sp->m_arena); - task_dispatcher& task_disp = sp->m_resume_task.m_target; - __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); - - // TODO: remove this work-around - // Prolong the arena's lifetime while all coroutines are alive - // (otherwise the arena can be destroyed while some tasks are suspended). - arena& a = *sp->m_arena; - a.my_references += arena::ref_external; - - if (task_disp.m_properties.critical_task_allowed) { - // The target is not in the process of executing critical task, so the resume task is not critical. - a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); - } else { -#if __TBB_PREVIEW_CRITICAL_TASKS - // The target is in the process of executing critical task, so the resume task is critical. - a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); -#endif +void resume(suspend_point_type* sp) { + assert_pointers_valid(sp, sp->m_arena); + task_dispatcher& task_disp = sp->m_resume_task.m_target; + __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); + + // TODO: remove this work-around + // Prolong the arena's lifetime while all coroutines are alive + // (otherwise the arena can be destroyed while some tasks are suspended). + arena& a = *sp->m_arena; + a.my_references += arena::ref_external; + + if (task_disp.m_properties.critical_task_allowed) { + // The target is not in the process of executing critical task, so the resume task is not critical. + a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); + } else { +#if __TBB_PREVIEW_CRITICAL_TASKS + // The target is in the process of executing critical task, so the resume task is critical. + a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); +#endif } - // Do not access target after that point. - a.advertise_new_work<arena::wakeup>(); + // Do not access target after that point. + a.advertise_new_work<arena::wakeup>(); - // Release our reference to my_arena. - a.on_thread_leaving<arena::ref_external>(); + // Release our reference to my_arena. + a.on_thread_leaving<arena::ref_external>(); } -suspend_point_type* current_suspend_point() { - thread_data& td = *governor::get_thread_data(); - return td.my_task_dispatcher->get_suspend_point(); +suspend_point_type* current_suspend_point() { + thread_data& td = *governor::get_thread_data(); + return td.my_task_dispatcher->get_suspend_point(); } -static task_dispatcher& create_coroutine(thread_data& td) { - // We may have some task dispatchers cached - task_dispatcher* task_disp = td.my_arena->my_co_cache.pop(); - if (!task_disp) { - void* ptr = cache_aligned_allocate(sizeof(task_dispatcher)); - task_disp = new(ptr) task_dispatcher(td.my_arena); - task_disp->init_suspend_point(td.my_arena, td.my_arena->my_market->worker_stack_size()); - } - // Prolong the arena's lifetime until all coroutines is alive - // (otherwise the arena can be destroyed while some tasks are suspended). - // TODO: consider behavior if there are more than 4K external references. - td.my_arena->my_references += arena::ref_external; - return *task_disp; +static task_dispatcher& create_coroutine(thread_data& td) { + // We may have some task dispatchers cached + task_dispatcher* task_disp = td.my_arena->my_co_cache.pop(); + if (!task_disp) { + void* ptr = cache_aligned_allocate(sizeof(task_dispatcher)); + task_disp = new(ptr) task_dispatcher(td.my_arena); + task_disp->init_suspend_point(td.my_arena, td.my_arena->my_market->worker_stack_size()); + } + // Prolong the arena's lifetime until all coroutines is alive + // (otherwise the arena can be destroyed while some tasks are suspended). + // TODO: consider behavior if there are more than 4K external references. + td.my_arena->my_references += arena::ref_external; + return *task_disp; } -void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) { - __TBB_ASSERT(suspend_callback != nullptr, nullptr); - __TBB_ASSERT(user_callback != nullptr, nullptr); - __TBB_ASSERT(m_thread_data != nullptr, nullptr); - - arena_slot* slot = m_thread_data->my_arena_slot; - __TBB_ASSERT(slot != nullptr, nullptr); - - task_dispatcher& default_task_disp = slot->default_task_dispatcher(); - // TODO: simplify the next line, e.g. is_task_dispatcher_recalled( task_dispatcher& ) - bool is_recalled = default_task_disp.get_suspend_point()->m_is_owner_recalled.load(std::memory_order_acquire); - task_dispatcher& target = is_recalled ? default_task_disp : create_coroutine(*m_thread_data); - - thread_data::suspend_callback_wrapper callback = { suspend_callback, user_callback, get_suspend_point() }; - m_thread_data->set_post_resume_action(thread_data::post_resume_action::callback, &callback); - resume(target); - - if (m_properties.outermost) { - recall_point(); - } +void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) { + __TBB_ASSERT(suspend_callback != nullptr, nullptr); + __TBB_ASSERT(user_callback != nullptr, nullptr); + __TBB_ASSERT(m_thread_data != nullptr, nullptr); + + arena_slot* slot = m_thread_data->my_arena_slot; + __TBB_ASSERT(slot != nullptr, nullptr); + + task_dispatcher& default_task_disp = slot->default_task_dispatcher(); + // TODO: simplify the next line, e.g. is_task_dispatcher_recalled( task_dispatcher& ) + bool is_recalled = default_task_disp.get_suspend_point()->m_is_owner_recalled.load(std::memory_order_acquire); + task_dispatcher& target = is_recalled ? default_task_disp : create_coroutine(*m_thread_data); + + thread_data::suspend_callback_wrapper callback = { suspend_callback, user_callback, get_suspend_point() }; + m_thread_data->set_post_resume_action(thread_data::post_resume_action::callback, &callback); + resume(target); + + if (m_properties.outermost) { + recall_point(); + } } -void task_dispatcher::resume(task_dispatcher& target) { - // Do not create non-trivial objects on the stack of this function. They might never be destroyed - { - thread_data* td = m_thread_data; - __TBB_ASSERT(&target != this, "We cannot resume to ourself"); - __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); - __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); - __TBB_ASSERT(td->my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); - __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); - - // Change the task dispatcher - td->detach_task_dispatcher(); - td->attach_task_dispatcher(target); - } - __TBB_ASSERT(m_suspend_point != nullptr, "Suspend point must be created"); - __TBB_ASSERT(target.m_suspend_point != nullptr, "Suspend point must be created"); - // Swap to the target coroutine. - m_suspend_point->m_co_context.resume(target.m_suspend_point->m_co_context); - // Pay attention that m_thread_data can be changed after resume - { - thread_data* td = m_thread_data; - __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); - __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); - td->do_post_resume_action(); - - // Remove the recall flag if the thread in its original task dispatcher - arena_slot* slot = td->my_arena_slot; - __TBB_ASSERT(slot != nullptr, nullptr); - if (this == slot->my_default_task_dispatcher) { - __TBB_ASSERT(m_suspend_point != nullptr, nullptr); - m_suspend_point->m_is_owner_recalled.store(false, std::memory_order_relaxed); +void task_dispatcher::resume(task_dispatcher& target) { + // Do not create non-trivial objects on the stack of this function. They might never be destroyed + { + thread_data* td = m_thread_data; + __TBB_ASSERT(&target != this, "We cannot resume to ourself"); + __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); + __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); + __TBB_ASSERT(td->my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); + __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); + + // Change the task dispatcher + td->detach_task_dispatcher(); + td->attach_task_dispatcher(target); + } + __TBB_ASSERT(m_suspend_point != nullptr, "Suspend point must be created"); + __TBB_ASSERT(target.m_suspend_point != nullptr, "Suspend point must be created"); + // Swap to the target coroutine. + m_suspend_point->m_co_context.resume(target.m_suspend_point->m_co_context); + // Pay attention that m_thread_data can be changed after resume + { + thread_data* td = m_thread_data; + __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); + __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); + td->do_post_resume_action(); + + // Remove the recall flag if the thread in its original task dispatcher + arena_slot* slot = td->my_arena_slot; + __TBB_ASSERT(slot != nullptr, nullptr); + if (this == slot->my_default_task_dispatcher) { + __TBB_ASSERT(m_suspend_point != nullptr, nullptr); + m_suspend_point->m_is_owner_recalled.store(false, std::memory_order_relaxed); } } } -void thread_data::do_post_resume_action() { - __TBB_ASSERT(my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); - __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument"); - - switch (my_post_resume_action) { - case post_resume_action::register_waiter: - { - static_cast<extended_concurrent_monitor::resume_context*>(my_post_resume_arg)->notify(); - break; - } - case post_resume_action::resume: - { - r1::resume(static_cast<suspend_point_type*>(my_post_resume_arg)); - break; - } - case post_resume_action::callback: - { - suspend_callback_wrapper callback = *static_cast<suspend_callback_wrapper*>(my_post_resume_arg); - callback(); - break; - } - case post_resume_action::cleanup: - { - task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(my_post_resume_arg); - // Release coroutine's reference to my_arena. - my_arena->on_thread_leaving<arena::ref_external>(); - // Cache the coroutine for possible later re-usage - my_arena->my_co_cache.push(to_cleanup); - break; - } - case post_resume_action::notify: - { - std::atomic<bool>& owner_recall_flag = *static_cast<std::atomic<bool>*>(my_post_resume_arg); - owner_recall_flag.store(true, std::memory_order_release); - // Do not access recall_flag because it can be destroyed after the notification. - break; - } - default: - __TBB_ASSERT(false, "Unknown post resume action"); - } - - my_post_resume_action = post_resume_action::none; - my_post_resume_arg = nullptr; +void thread_data::do_post_resume_action() { + __TBB_ASSERT(my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); + __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument"); + + switch (my_post_resume_action) { + case post_resume_action::register_waiter: + { + static_cast<extended_concurrent_monitor::resume_context*>(my_post_resume_arg)->notify(); + break; + } + case post_resume_action::resume: + { + r1::resume(static_cast<suspend_point_type*>(my_post_resume_arg)); + break; + } + case post_resume_action::callback: + { + suspend_callback_wrapper callback = *static_cast<suspend_callback_wrapper*>(my_post_resume_arg); + callback(); + break; + } + case post_resume_action::cleanup: + { + task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(my_post_resume_arg); + // Release coroutine's reference to my_arena. + my_arena->on_thread_leaving<arena::ref_external>(); + // Cache the coroutine for possible later re-usage + my_arena->my_co_cache.push(to_cleanup); + break; + } + case post_resume_action::notify: + { + std::atomic<bool>& owner_recall_flag = *static_cast<std::atomic<bool>*>(my_post_resume_arg); + owner_recall_flag.store(true, std::memory_order_release); + // Do not access recall_flag because it can be destroyed after the notification. + break; + } + default: + __TBB_ASSERT(false, "Unknown post resume action"); + } + + my_post_resume_action = post_resume_action::none; + my_post_resume_arg = nullptr; } -#else +#else -void suspend(suspend_callback_type, void*) { - __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); +void suspend(suspend_callback_type, void*) { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); } -void resume(suspend_point_type*) { - __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); +void resume(suspend_point_type*) { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); } -suspend_point_type* current_suspend_point() { - __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); - return nullptr; +suspend_point_type* current_suspend_point() { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); + return nullptr; } -#endif /* __TBB_RESUMABLE_TASKS */ +#endif /* __TBB_RESUMABLE_TASKS */ -void notify_waiters(std::uintptr_t wait_ctx_addr) { - auto is_related_wait_ctx = [&] (extended_context context) { - return wait_ctx_addr == context.my_uniq_addr; - }; +void notify_waiters(std::uintptr_t wait_ctx_addr) { + auto is_related_wait_ctx = [&] (extended_context context) { + return wait_ctx_addr == context.my_uniq_addr; + }; - r1::governor::get_thread_data()->my_arena->my_market->get_wait_list().notify(is_related_wait_ctx); + r1::governor::get_thread_data()->my_arena->my_market->get_wait_list().notify(is_related_wait_ctx); } -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp index 9ecd85a913..86818af1d1 100644 --- a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp @@ -1,240 +1,240 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "task_dispatcher.h" -#include "waiters.h" - -namespace tbb { -namespace detail { -namespace r1 { - -static inline void spawn_and_notify(d1::task& t, arena_slot* slot, arena* a) { - slot->spawn(t); - a->advertise_new_work<arena::work_spawned>(); - // TODO: TBB_REVAMP_TODO slot->assert_task_pool_valid(); -} - -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx) { - thread_data* tls = governor::get_thread_data(); - task_group_context_impl::bind_to(ctx, tls); - arena* a = tls->my_arena; - arena_slot* slot = tls->my_arena_slot; - // Capture current context - task_accessor::context(t) = &ctx; - // Mark isolation - task_accessor::isolation(t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; - spawn_and_notify(t, slot, a); -} - -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id) { - thread_data* tls = governor::get_thread_data(); - task_group_context_impl::bind_to(ctx, tls); - arena* a = tls->my_arena; - arena_slot* slot = tls->my_arena_slot; - execution_data_ext& ed = tls->my_task_dispatcher->m_execute_data_ext; - - // Capture context - task_accessor::context(t) = &ctx; - // Mark isolation - task_accessor::isolation(t) = ed.isolation; - - if ( id != d1::no_slot && id != tls->my_arena_index ) { - // Allocate proxy task - d1::small_object_allocator alloc{}; - auto proxy = alloc.new_object<task_proxy>(static_cast<d1::execution_data&>(ed)); - // Mark as a proxy - task_accessor::set_proxy_trait(*proxy); - // Mark isolation for the proxy task - task_accessor::isolation(*proxy) = ed.isolation; - // Deallocation hint (tls) from the task allocator - proxy->allocator = alloc; - proxy->slot = id; - proxy->outbox = &a->mailbox(id); - // Mark proxy as present in both locations (sender's task pool and destination mailbox) - proxy->task_and_tag = intptr_t(&t) | task_proxy::location_mask; - // Mail the proxy - after this point t may be destroyed by another thread at any moment. - proxy->outbox->push(proxy); - // Spawn proxy to the local task pool - spawn_and_notify(*proxy, slot, a); - } else { - spawn_and_notify(t, slot, a); - } -} - -void __TBB_EXPORTED_FUNC submit(d1::task& t, d1::task_group_context& ctx, arena* a, std::uintptr_t as_critical) { - suppress_unused_warning(as_critical); - assert_pointer_valid(a); - thread_data& tls = *governor::get_thread_data(); - - // TODO revamp: for each use case investigate neccesity to make this call - task_group_context_impl::bind_to(ctx, &tls); - task_accessor::context(t) = &ctx; - // TODO revamp: consider respecting task isolation if this call is being made by external thread - task_accessor::isolation(t) = tls.my_task_dispatcher->m_execute_data_ext.isolation; - - // TODO: consider code refactoring when lane selection mechanism is unified. - - if ( tls.is_attached_to(a) ) { - arena_slot* slot = tls.my_arena_slot; -#if __TBB_PREVIEW_CRITICAL_TASKS - if( as_critical ) { - a->my_critical_task_stream.push( &t, subsequent_lane_selector(slot->critical_hint()) ); - } else -#endif - { - slot->spawn(t); - } - } else { - random_lane_selector lane_selector{tls.my_random}; -#if !__TBB_PREVIEW_CRITICAL_TASKS - suppress_unused_warning(as_critical); -#else - if ( as_critical ) { - a->my_critical_task_stream.push( &t, lane_selector ); - } else -#endif - { - // Avoid joining the arena the thread is not currently in. - a->my_fifo_task_stream.push( &t, lane_selector ); - } - } - // It is assumed that some thread will explicitly wait in the arena the task is submitted - // into. Therefore, no need to utilize mandatory concurrency here. - a->advertise_new_work<arena::work_spawned>(); -} - -void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { - task_accessor::context(t) = &t_ctx; - task_dispatcher::execute_and_wait(&t, wait_ctx, w_ctx); -} - -void __TBB_EXPORTED_FUNC wait(d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { - // Enter the task dispatch loop without a task - task_dispatcher::execute_and_wait(nullptr, wait_ctx, w_ctx); -} - -d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data* ed) { - if (ed) { - const execution_data_ext* ed_ext = static_cast<const execution_data_ext*>(ed); - assert_pointers_valid(ed_ext->task_disp, ed_ext->task_disp->m_thread_data); - return ed_ext->task_disp->m_thread_data->my_arena_index; - } else { - thread_data* td = governor::get_thread_data_if_initialized(); - return td ? int(td->my_arena_index) : -1; - } -} - -d1::task_group_context* __TBB_EXPORTED_FUNC current_context() { - thread_data* td = governor::get_thread_data(); - assert_pointers_valid(td, td->my_task_dispatcher); - - task_dispatcher* task_disp = td->my_task_dispatcher; - if (task_disp->m_properties.outermost) { - // No one task is executed, so no execute_data. - return nullptr; - } else { - return td->my_task_dispatcher->m_execute_data_ext.context; - } -} - -void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { - // Get an associated task dispatcher - thread_data* tls = governor::get_thread_data(); - __TBB_ASSERT(tls->my_task_dispatcher != nullptr, nullptr); - task_dispatcher& local_td = *tls->my_task_dispatcher; - - // TODO: factor out the binding to execute_and_wait_impl - if (t) { - task_group_context_impl::bind_to(*task_accessor::context(*t), tls); - // Propagate the isolation to the task executed without spawn. - task_accessor::isolation(*t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; - } - - // Waiting on special object tied to a waiting thread. - external_waiter waiter{ *tls->my_arena, wait_ctx }; - t = local_td.local_wait_for_all(t, waiter); - __TBB_ASSERT_EX(t == nullptr, "External waiter must not leave dispatch loop with a task"); - - // The external thread couldn't exit the dispatch loop in an idle state - if (local_td.m_thread_data->my_inbox.is_idle_state(true)) { - local_td.m_thread_data->my_inbox.set_is_idle(false); - } - - if (w_ctx.my_exception) { - __TBB_ASSERT(w_ctx.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); - w_ctx.my_exception->throw_self(); - } -} - -#if __TBB_RESUMABLE_TASKS - -#if _WIN32 -/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept -#else -/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept -#endif -{ - // Do not create non-trivial objects on the stack of this function. They will never be destroyed. - __TBB_ASSERT(arg != nullptr, nullptr); - task_dispatcher& task_disp = *static_cast<task_dispatcher*>(arg); - - assert_pointers_valid(task_disp.m_thread_data, task_disp.m_thread_data->my_arena); - task_disp.set_stealing_threshold(task_disp.m_thread_data->my_arena->calculate_stealing_threshold()); - __TBB_ASSERT(task_disp.can_steal(), nullptr); - task_disp.co_local_wait_for_all(); - // This code is unreachable -} - -/* [[noreturn]] */ void task_dispatcher::co_local_wait_for_all() noexcept { - // Do not create non-trivial objects on the stack of this function. They will never be destroyed. - assert_pointer_valid(m_thread_data); - - // Basically calls the user callback passed to the tbb::task::suspend function - m_thread_data->do_post_resume_action(); - - // Endless loop here because coroutine could be reused - for (;;) { - arena* a = m_thread_data->my_arena; - coroutine_waiter waiter(*a); - d1::task* resume_task = local_wait_for_all(nullptr, waiter); - assert_task_valid(resume_task); - __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr); - - m_thread_data->set_post_resume_action(thread_data::post_resume_action::cleanup, this); - resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target); - } - // This code is unreachable -} - -d1::suspend_point task_dispatcher::get_suspend_point() { - if (m_suspend_point == nullptr) { - assert_pointer_valid(m_thread_data); - // 0 means that we attach this task dispatcher to the current stack - init_suspend_point(m_thread_data->my_arena, 0); - } - assert_pointer_valid(m_suspend_point); - return m_suspend_point; -} -void task_dispatcher::init_suspend_point(arena* a, std::size_t stack_size) { - __TBB_ASSERT(m_suspend_point == nullptr, nullptr); - m_suspend_point = new(cache_aligned_allocate(sizeof(suspend_point_type))) - suspend_point_type(a, stack_size, *this); -} -#endif /* __TBB_RESUMABLE_TASKS */ -} // namespace r1 -} // namespace detail -} // namespace tbb - +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "task_dispatcher.h" +#include "waiters.h" + +namespace tbb { +namespace detail { +namespace r1 { + +static inline void spawn_and_notify(d1::task& t, arena_slot* slot, arena* a) { + slot->spawn(t); + a->advertise_new_work<arena::work_spawned>(); + // TODO: TBB_REVAMP_TODO slot->assert_task_pool_valid(); +} + +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx) { + thread_data* tls = governor::get_thread_data(); + task_group_context_impl::bind_to(ctx, tls); + arena* a = tls->my_arena; + arena_slot* slot = tls->my_arena_slot; + // Capture current context + task_accessor::context(t) = &ctx; + // Mark isolation + task_accessor::isolation(t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; + spawn_and_notify(t, slot, a); +} + +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id) { + thread_data* tls = governor::get_thread_data(); + task_group_context_impl::bind_to(ctx, tls); + arena* a = tls->my_arena; + arena_slot* slot = tls->my_arena_slot; + execution_data_ext& ed = tls->my_task_dispatcher->m_execute_data_ext; + + // Capture context + task_accessor::context(t) = &ctx; + // Mark isolation + task_accessor::isolation(t) = ed.isolation; + + if ( id != d1::no_slot && id != tls->my_arena_index ) { + // Allocate proxy task + d1::small_object_allocator alloc{}; + auto proxy = alloc.new_object<task_proxy>(static_cast<d1::execution_data&>(ed)); + // Mark as a proxy + task_accessor::set_proxy_trait(*proxy); + // Mark isolation for the proxy task + task_accessor::isolation(*proxy) = ed.isolation; + // Deallocation hint (tls) from the task allocator + proxy->allocator = alloc; + proxy->slot = id; + proxy->outbox = &a->mailbox(id); + // Mark proxy as present in both locations (sender's task pool and destination mailbox) + proxy->task_and_tag = intptr_t(&t) | task_proxy::location_mask; + // Mail the proxy - after this point t may be destroyed by another thread at any moment. + proxy->outbox->push(proxy); + // Spawn proxy to the local task pool + spawn_and_notify(*proxy, slot, a); + } else { + spawn_and_notify(t, slot, a); + } +} + +void __TBB_EXPORTED_FUNC submit(d1::task& t, d1::task_group_context& ctx, arena* a, std::uintptr_t as_critical) { + suppress_unused_warning(as_critical); + assert_pointer_valid(a); + thread_data& tls = *governor::get_thread_data(); + + // TODO revamp: for each use case investigate neccesity to make this call + task_group_context_impl::bind_to(ctx, &tls); + task_accessor::context(t) = &ctx; + // TODO revamp: consider respecting task isolation if this call is being made by external thread + task_accessor::isolation(t) = tls.my_task_dispatcher->m_execute_data_ext.isolation; + + // TODO: consider code refactoring when lane selection mechanism is unified. + + if ( tls.is_attached_to(a) ) { + arena_slot* slot = tls.my_arena_slot; +#if __TBB_PREVIEW_CRITICAL_TASKS + if( as_critical ) { + a->my_critical_task_stream.push( &t, subsequent_lane_selector(slot->critical_hint()) ); + } else +#endif + { + slot->spawn(t); + } + } else { + random_lane_selector lane_selector{tls.my_random}; +#if !__TBB_PREVIEW_CRITICAL_TASKS + suppress_unused_warning(as_critical); +#else + if ( as_critical ) { + a->my_critical_task_stream.push( &t, lane_selector ); + } else +#endif + { + // Avoid joining the arena the thread is not currently in. + a->my_fifo_task_stream.push( &t, lane_selector ); + } + } + // It is assumed that some thread will explicitly wait in the arena the task is submitted + // into. Therefore, no need to utilize mandatory concurrency here. + a->advertise_new_work<arena::work_spawned>(); +} + +void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + task_accessor::context(t) = &t_ctx; + task_dispatcher::execute_and_wait(&t, wait_ctx, w_ctx); +} + +void __TBB_EXPORTED_FUNC wait(d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + // Enter the task dispatch loop without a task + task_dispatcher::execute_and_wait(nullptr, wait_ctx, w_ctx); +} + +d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data* ed) { + if (ed) { + const execution_data_ext* ed_ext = static_cast<const execution_data_ext*>(ed); + assert_pointers_valid(ed_ext->task_disp, ed_ext->task_disp->m_thread_data); + return ed_ext->task_disp->m_thread_data->my_arena_index; + } else { + thread_data* td = governor::get_thread_data_if_initialized(); + return td ? int(td->my_arena_index) : -1; + } +} + +d1::task_group_context* __TBB_EXPORTED_FUNC current_context() { + thread_data* td = governor::get_thread_data(); + assert_pointers_valid(td, td->my_task_dispatcher); + + task_dispatcher* task_disp = td->my_task_dispatcher; + if (task_disp->m_properties.outermost) { + // No one task is executed, so no execute_data. + return nullptr; + } else { + return td->my_task_dispatcher->m_execute_data_ext.context; + } +} + +void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + // Get an associated task dispatcher + thread_data* tls = governor::get_thread_data(); + __TBB_ASSERT(tls->my_task_dispatcher != nullptr, nullptr); + task_dispatcher& local_td = *tls->my_task_dispatcher; + + // TODO: factor out the binding to execute_and_wait_impl + if (t) { + task_group_context_impl::bind_to(*task_accessor::context(*t), tls); + // Propagate the isolation to the task executed without spawn. + task_accessor::isolation(*t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; + } + + // Waiting on special object tied to a waiting thread. + external_waiter waiter{ *tls->my_arena, wait_ctx }; + t = local_td.local_wait_for_all(t, waiter); + __TBB_ASSERT_EX(t == nullptr, "External waiter must not leave dispatch loop with a task"); + + // The external thread couldn't exit the dispatch loop in an idle state + if (local_td.m_thread_data->my_inbox.is_idle_state(true)) { + local_td.m_thread_data->my_inbox.set_is_idle(false); + } + + if (w_ctx.my_exception) { + __TBB_ASSERT(w_ctx.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); + w_ctx.my_exception->throw_self(); + } +} + +#if __TBB_RESUMABLE_TASKS + +#if _WIN32 +/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept +#else +/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept +#endif +{ + // Do not create non-trivial objects on the stack of this function. They will never be destroyed. + __TBB_ASSERT(arg != nullptr, nullptr); + task_dispatcher& task_disp = *static_cast<task_dispatcher*>(arg); + + assert_pointers_valid(task_disp.m_thread_data, task_disp.m_thread_data->my_arena); + task_disp.set_stealing_threshold(task_disp.m_thread_data->my_arena->calculate_stealing_threshold()); + __TBB_ASSERT(task_disp.can_steal(), nullptr); + task_disp.co_local_wait_for_all(); + // This code is unreachable +} + +/* [[noreturn]] */ void task_dispatcher::co_local_wait_for_all() noexcept { + // Do not create non-trivial objects on the stack of this function. They will never be destroyed. + assert_pointer_valid(m_thread_data); + + // Basically calls the user callback passed to the tbb::task::suspend function + m_thread_data->do_post_resume_action(); + + // Endless loop here because coroutine could be reused + for (;;) { + arena* a = m_thread_data->my_arena; + coroutine_waiter waiter(*a); + d1::task* resume_task = local_wait_for_all(nullptr, waiter); + assert_task_valid(resume_task); + __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr); + + m_thread_data->set_post_resume_action(thread_data::post_resume_action::cleanup, this); + resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target); + } + // This code is unreachable +} + +d1::suspend_point task_dispatcher::get_suspend_point() { + if (m_suspend_point == nullptr) { + assert_pointer_valid(m_thread_data); + // 0 means that we attach this task dispatcher to the current stack + init_suspend_point(m_thread_data->my_arena, 0); + } + assert_pointer_valid(m_suspend_point); + return m_suspend_point; +} +void task_dispatcher::init_suspend_point(arena* a, std::size_t stack_size) { + __TBB_ASSERT(m_suspend_point == nullptr, nullptr); + m_suspend_point = new(cache_aligned_allocate(sizeof(suspend_point_type))) + suspend_point_type(a, stack_size, *this); +} +#endif /* __TBB_RESUMABLE_TASKS */ +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.h b/contrib/libs/tbb/src/tbb/task_dispatcher.h index 01d96aaeba..54a6c0d934 100644 --- a/contrib/libs/tbb/src/tbb/task_dispatcher.h +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.h @@ -1,465 +1,465 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_task_dispatcher_H -#define _TBB_task_dispatcher_H - -#include "oneapi/tbb/detail/_utils.h" -#include "oneapi/tbb/detail/_task.h" -#include "oneapi/tbb/global_control.h" - -#include "scheduler_common.h" -#include "waiters.h" -#include "arena_slot.h" -#include "arena.h" -#include "thread_data.h" -#include "mailbox.h" -#include "itt_notify.h" -#include "concurrent_monitor.h" - -#include <atomic> - -#if !__TBB_CPU_CTL_ENV_PRESENT -#include <fenv.h> // -#endif - -namespace tbb { -namespace detail { -namespace r1 { - -inline d1::task* get_self_recall_task(arena_slot& slot) { - suppress_unused_warning(slot); - d1::task* t = nullptr; -#if __TBB_RESUMABLE_TASKS - suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; - if (sp && sp->m_is_owner_recalled.load(std::memory_order_acquire)) { - t = &sp->m_resume_task; - __TBB_ASSERT(sp->m_resume_task.m_target.m_thread_data == nullptr, nullptr); - } -#endif /* __TBB_RESUMABLE_TASKS */ - return t; -} - -// Defined in exception.cpp -/*[[noreturn]]*/void do_throw_noexcept(void (*throw_exception)()) noexcept; - -//------------------------------------------------------------------------ -// Suspend point -//------------------------------------------------------------------------ -#if __TBB_RESUMABLE_TASKS - -inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed) { - execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed); - - if (ed_ext.wait_ctx) { - extended_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; - // The wait_ctx is present only in external_waiter. In that case we leave the current stack - // in the abandoned state to resume when waiting completes. - thread_data* td = ed_ext.task_disp->m_thread_data; - td->set_post_resume_action(thread_data::post_resume_action::register_waiter, &monitor_node); - - extended_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); - - if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) { - return nullptr; - } - - td->clear_post_resume_action(); - td->set_post_resume_action(thread_data::post_resume_action::resume, ed_ext.task_disp->get_suspend_point()); - } else { - // If wait_ctx is null, it can be only a worker thread on outermost level because - // coroutine_waiter interrupts bypass loop before the resume_task execution. - ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::notify, - &ed_ext.task_disp->get_suspend_point()->m_is_owner_recalled); - } - // Do not access this task because it might be destroyed - ed_ext.task_disp->resume(m_target); - return nullptr; -} - -inline suspend_point_type::suspend_point_type(arena* a, size_t stack_size, task_dispatcher& task_disp) - : m_arena(a) - , m_random(this) - , m_co_context(stack_size, &task_disp) - , m_resume_task(task_disp) -{ - assert_pointer_valid(m_arena); - assert_pointer_valid(m_arena->my_default_ctx); - task_accessor::context(m_resume_task) = m_arena->my_default_ctx; - task_accessor::isolation(m_resume_task) = no_isolation; - // Initialize the itt_caller for the context of the resume task. - // It will be bound to the stack of the first suspend call. - task_group_context_impl::bind_to(*task_accessor::context(m_resume_task), task_disp.m_thread_data); -} - -#endif /* __TBB_RESUMABLE_TASKS */ - -//------------------------------------------------------------------------ -// Task Dispatcher -//------------------------------------------------------------------------ -inline task_dispatcher::task_dispatcher(arena* a) { - m_execute_data_ext.context = a->my_default_ctx; - m_execute_data_ext.task_disp = this; -} - -inline bool task_dispatcher::can_steal() { - __TBB_ASSERT(m_stealing_threshold != 0, nullptr); - stack_anchor_type anchor{}; - return reinterpret_cast<std::uintptr_t>(&anchor) > m_stealing_threshold; -} - -inline d1::task* task_dispatcher::get_inbox_or_critical_task( - execution_data_ext& ed, mail_inbox& inbox, isolation_type isolation, bool critical_allowed) -{ - if (inbox.empty()) - return nullptr; - d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); - if (result) - return result; - // Check if there are tasks mailed to this thread via task-to-thread affinity mechanism. - result = get_mailbox_task(inbox, ed, isolation); - // There is a race with a thread adding a new task (possibly with suitable isolation) - // to our mailbox, so the below conditions might result in a false positive. - // Then set_is_idle(false) allows that task to be stolen; it's OK. - if (isolation != no_isolation && !result && !inbox.empty() && inbox.is_idle_state(true)) { - // We have proxy tasks in our mailbox but the isolation blocks their execution. - // So publish the proxy tasks in mailbox to be available for stealing from owner's task pool. - inbox.set_is_idle( false ); - } - return result; -} - -inline d1::task* task_dispatcher::get_stream_or_critical_task( - execution_data_ext& ed, arena& a, task_stream<front_accessor>& stream, unsigned& hint, - isolation_type isolation, bool critical_allowed) -{ - if (stream.empty()) - return nullptr; - d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); - if (result) - return result; - return a.get_stream_task(stream, hint); -} - -inline d1::task* task_dispatcher::steal_or_get_critical( - execution_data_ext& ed, arena& a, unsigned arena_index, FastRandom& random, - isolation_type isolation, bool critical_allowed) -{ - if (d1::task* t = a.steal_task(arena_index, random, ed, isolation)) { - ed.context = task_accessor::context(*t); - ed.isolation = task_accessor::isolation(*t); - return get_critical_task(t, ed, isolation, critical_allowed); - } - return nullptr; -} - -template <bool ITTPossible, typename Waiter> -d1::task* task_dispatcher::receive_or_steal_task( - thread_data& tls, execution_data_ext& ed, Waiter& waiter, isolation_type isolation, - bool fifo_allowed, bool critical_allowed) -{ - __TBB_ASSERT(governor::is_thread_data_set(&tls), NULL); - // Task to return - d1::task* t = nullptr; - // Get tls data (again) - arena& a = *tls.my_arena; - arena_slot& slot = *tls.my_arena_slot; - unsigned arena_index = tls.my_arena_index; - mail_inbox& inbox = tls.my_inbox; - task_stream<front_accessor>& resume_stream = a.my_resume_task_stream; - unsigned& resume_hint = slot.hint_for_resume_stream; - task_stream<front_accessor>& fifo_stream = a.my_fifo_task_stream; - unsigned& fifo_hint = slot.hint_for_fifo_stream; - - waiter.reset_wait(); - // Thread is in idle state now - inbox.set_is_idle(true); - - bool stealing_is_allowed = can_steal(); - - // Stealing loop mailbox/enqueue/other_slots - for (;;) { - __TBB_ASSERT(t == nullptr, nullptr); - // Check if the resource manager requires our arena to relinquish some threads - // For the external thread restore idle state to true after dispatch loop - if (!waiter.continue_execution(slot, t)) { - __TBB_ASSERT(t == nullptr, nullptr); - break; - } - // Start searching - if (t != nullptr) { - // continue_execution returned a task - } - else if ((t = get_inbox_or_critical_task(ed, inbox, isolation, critical_allowed))) { - // Successfully got the task from mailbox or critical task - } - else if ((t = get_stream_or_critical_task(ed, a, resume_stream, resume_hint, isolation, critical_allowed))) { - // Successfully got the resume or critical task - } - else if (fifo_allowed && isolation == no_isolation - && (t = get_stream_or_critical_task(ed, a, fifo_stream, fifo_hint, isolation, critical_allowed))) { - // Checked if there are tasks in starvation-resistant stream. Only allowed at the outermost dispatch level without isolation. - } - else if (stealing_is_allowed - && (t = steal_or_get_critical(ed, a, arena_index, tls.my_random, isolation, critical_allowed))) { - // Stole a task from a random arena slot - } - else { - t = get_critical_task(t, ed, isolation, critical_allowed); - } - - if (t != nullptr) { - ed.context = task_accessor::context(*t); - ed.isolation = task_accessor::isolation(*t); - a.my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); - break; // Stealing success, end of stealing attempt - } - // Nothing to do, pause a little. - waiter.pause(slot); - } // end of nonlocal task retrieval loop - if (inbox.is_idle_state(true)) { - inbox.set_is_idle(false); - } - return t; -} - -template <bool ITTPossible, typename Waiter> -d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { - assert_pointer_valid(m_thread_data); - __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); - - // Guard an outer/default execution state - struct dispatch_loop_guard { - task_dispatcher& task_disp; - execution_data_ext old_execute_data_ext; - properties old_properties; - - ~dispatch_loop_guard() { - task_disp.m_execute_data_ext = old_execute_data_ext; - task_disp.m_properties = old_properties; - - __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr); - __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr); - } - } dl_guard{ *this, m_execute_data_ext, m_properties }; - - // The context guard to track fp setting and itt tasks. - context_guard_helper</*report_tasks=*/ITTPossible> context_guard; - - // Current isolation context - const isolation_type isolation = dl_guard.old_execute_data_ext.isolation; - - // Critical work inflection point. Once turned false current execution context has taken - // critical task on the previous stack frame and cannot take more until that critical path is - // finished. - bool critical_allowed = dl_guard.old_properties.critical_task_allowed; - - // Extended execution data that is used for dispatching. - // Base version is passed to the task::execute method. - execution_data_ext& ed = m_execute_data_ext; - ed.context = t ? task_accessor::context(*t) : nullptr; - ed.original_slot = m_thread_data->my_arena_index; - ed.affinity_slot = d1::no_slot; - ed.task_disp = this; - ed.wait_ctx = waiter.wait_ctx(); - - m_properties.outermost = false; - m_properties.fifo_tasks_allowed = false; - - t = get_critical_task(t, ed, isolation, critical_allowed); - - // Infinite exception loop - for (;;) { - try { - // Main execution loop - do { - // We assume that bypass tasks are from the same task group. - context_guard.set_ctx(ed.context); - // Inner level evaluates tasks coming from nesting loops and those returned - // by just executed tasks (bypassing spawn or enqueue calls). - while (t != nullptr) { - assert_task_valid(t); - assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); - __TBB_ASSERT(ed.context->my_lifetime_state > d1::task_group_context::lifetime_state::locked && - ed.context->my_lifetime_state < d1::task_group_context::lifetime_state::dying, nullptr); - __TBB_ASSERT(m_thread_data->my_inbox.is_idle_state(false), nullptr); - __TBB_ASSERT(task_accessor::is_resume_task(*t) || isolation == no_isolation || isolation == ed.isolation, nullptr); - // Check premature leave - if (Waiter::postpone_execution(*t)) { - __TBB_ASSERT(task_accessor::is_resume_task(*t) && dl_guard.old_properties.outermost, - "Currently, the bypass loop can be interrupted only for resume task on outermost level"); - return t; - } - // Copy itt_caller to a stack because the context might be destroyed after t->execute. - void* itt_caller = ed.context->my_itt_caller; - suppress_unused_warning(itt_caller); - - ITT_CALLEE_ENTER(ITTPossible, t, itt_caller); - - if (ed.context->is_group_execution_cancelled()) { - t = t->cancel(ed); - } else { - t = t->execute(ed); - } - - ITT_CALLEE_LEAVE(ITTPossible, itt_caller); - - // The task affinity in execution data is set for affinitized tasks. - // So drop it after the task execution. - ed.affinity_slot = d1::no_slot; - // Reset task owner id for bypassed task - ed.original_slot = m_thread_data->my_arena_index; - t = get_critical_task(t, ed, isolation, critical_allowed); - } - __TBB_ASSERT(m_thread_data && governor::is_thread_data_set(m_thread_data), nullptr); - __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); - // When refactoring, pay attention that m_thread_data can be changed after t->execute() - __TBB_ASSERT(m_thread_data->my_arena_slot != nullptr, nullptr); - arena_slot& slot = *m_thread_data->my_arena_slot; - if (!waiter.continue_execution(slot, t)) { - break; - } - // Retrieve the task from local task pool - if (t || (slot.is_task_pool_published() && (t = slot.get_task(ed, isolation)))) { - __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, NULL); - ed.context = task_accessor::context(*t); - ed.isolation = task_accessor::isolation(*t); - continue; - } - // Retrieve the task from global sources - t = receive_or_steal_task<ITTPossible>( - *m_thread_data, ed, waiter, isolation, dl_guard.old_properties.fifo_tasks_allowed, - critical_allowed - ); - } while (t != nullptr); // main dispatch loop - break; // Exit exception loop; - } catch (...) { - if (global_control::active_value(global_control::terminate_on_exception) == 1) { - do_throw_noexcept([] { throw; }); - } - if (ed.context->cancel_group_execution()) { - /* We are the first to signal cancellation, so store the exception that caused it. */ - ed.context->my_exception = tbb_exception_ptr::allocate(); - } - } - } // Infinite exception loop - __TBB_ASSERT(t == nullptr, nullptr); - - -#if __TBB_RESUMABLE_TASKS - if (dl_guard.old_properties.outermost) { - recall_point(); - } -#endif /* __TBB_RESUMABLE_TASKS */ - - return nullptr; -} - -#if __TBB_RESUMABLE_TASKS -inline void task_dispatcher::recall_point() { - if (this != &m_thread_data->my_arena_slot->default_task_dispatcher()) { - __TBB_ASSERT(m_suspend_point != nullptr, nullptr); - __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr); - d1::suspend([](suspend_point_type* sp) { - sp->m_is_owner_recalled.store(true, std::memory_order_release); - auto is_related_suspend_point = [sp] (extended_context context) { - std::uintptr_t sp_addr = std::uintptr_t(sp); - return sp_addr == context.my_uniq_addr; - }; - sp->m_arena->my_market->get_wait_list().notify(is_related_suspend_point); - }); - - if (m_thread_data->my_inbox.is_idle_state(true)) { - m_thread_data->my_inbox.set_is_idle(false); - } - } -} -#endif /* __TBB_RESUMABLE_TASKS */ - -#if __TBB_PREVIEW_CRITICAL_TASKS -inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext& ed, isolation_type isolation, bool critical_allowed) { - __TBB_ASSERT( critical_allowed || !m_properties.critical_task_allowed, nullptr ); - - if (!critical_allowed) { - // The stack is already in the process of critical path execution. Cannot take another - // critical work until finish with the current one. - __TBB_ASSERT(!m_properties.critical_task_allowed, nullptr); - return t; - } - - assert_pointers_valid(m_thread_data, m_thread_data->my_arena, m_thread_data->my_arena_slot); - thread_data& td = *m_thread_data; - arena& a = *td.my_arena; - arena_slot& slot = *td.my_arena_slot; - - d1::task* crit_t = a.get_critical_task(slot.hint_for_critical_stream, isolation); - if (crit_t != nullptr) { - assert_task_valid(crit_t); - if (t != nullptr) { - assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); - r1::spawn(*t, *ed.context); - } - ed.context = task_accessor::context(*crit_t); - ed.isolation = task_accessor::isolation(*crit_t); - - // We cannot execute more than one critical task on the same stack. - // In other words, we prevent nested critical tasks. - m_properties.critical_task_allowed = false; - - // TODO: add a test that the observer is called when critical task is taken. - a.my_observers.notify_entry_observers(td.my_last_observer, td.my_is_worker); - t = crit_t; - } else { - // Was unable to find critical work in the queue. Allow inspecting the queue in nested - // invocations. Handles the case when critical task has been just completed. - m_properties.critical_task_allowed = true; - } - return t; -} -#else -inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext&, isolation_type, bool /*critical_allowed*/) { - return t; -} -#endif - -inline d1::task* task_dispatcher::get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation) { - while (task_proxy* const tp = my_inbox.pop(isolation)) { - if (d1::task* result = tp->extract_task<task_proxy::mailbox_bit>()) { - ed.original_slot = (unsigned short)(-2); - ed.affinity_slot = ed.task_disp->m_thread_data->my_arena_index; - return result; - } - // We have exclusive access to the proxy, and can destroy it. - tp->allocator.delete_object(tp, ed); - } - return NULL; -} - -template <typename Waiter> -d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter) { - if (governor::is_itt_present()) { - return local_wait_for_all</*ITTPossible = */ true>(t, waiter); - } else { - return local_wait_for_all</*ITTPossible = */ false>(t, waiter); - } -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // _TBB_task_dispatcher_H - +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_task_dispatcher_H +#define _TBB_task_dispatcher_H + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/global_control.h" + +#include "scheduler_common.h" +#include "waiters.h" +#include "arena_slot.h" +#include "arena.h" +#include "thread_data.h" +#include "mailbox.h" +#include "itt_notify.h" +#include "concurrent_monitor.h" + +#include <atomic> + +#if !__TBB_CPU_CTL_ENV_PRESENT +#include <fenv.h> // +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +inline d1::task* get_self_recall_task(arena_slot& slot) { + suppress_unused_warning(slot); + d1::task* t = nullptr; +#if __TBB_RESUMABLE_TASKS + suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; + if (sp && sp->m_is_owner_recalled.load(std::memory_order_acquire)) { + t = &sp->m_resume_task; + __TBB_ASSERT(sp->m_resume_task.m_target.m_thread_data == nullptr, nullptr); + } +#endif /* __TBB_RESUMABLE_TASKS */ + return t; +} + +// Defined in exception.cpp +/*[[noreturn]]*/void do_throw_noexcept(void (*throw_exception)()) noexcept; + +//------------------------------------------------------------------------ +// Suspend point +//------------------------------------------------------------------------ +#if __TBB_RESUMABLE_TASKS + +inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed) { + execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed); + + if (ed_ext.wait_ctx) { + extended_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; + // The wait_ctx is present only in external_waiter. In that case we leave the current stack + // in the abandoned state to resume when waiting completes. + thread_data* td = ed_ext.task_disp->m_thread_data; + td->set_post_resume_action(thread_data::post_resume_action::register_waiter, &monitor_node); + + extended_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); + + if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) { + return nullptr; + } + + td->clear_post_resume_action(); + td->set_post_resume_action(thread_data::post_resume_action::resume, ed_ext.task_disp->get_suspend_point()); + } else { + // If wait_ctx is null, it can be only a worker thread on outermost level because + // coroutine_waiter interrupts bypass loop before the resume_task execution. + ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::notify, + &ed_ext.task_disp->get_suspend_point()->m_is_owner_recalled); + } + // Do not access this task because it might be destroyed + ed_ext.task_disp->resume(m_target); + return nullptr; +} + +inline suspend_point_type::suspend_point_type(arena* a, size_t stack_size, task_dispatcher& task_disp) + : m_arena(a) + , m_random(this) + , m_co_context(stack_size, &task_disp) + , m_resume_task(task_disp) +{ + assert_pointer_valid(m_arena); + assert_pointer_valid(m_arena->my_default_ctx); + task_accessor::context(m_resume_task) = m_arena->my_default_ctx; + task_accessor::isolation(m_resume_task) = no_isolation; + // Initialize the itt_caller for the context of the resume task. + // It will be bound to the stack of the first suspend call. + task_group_context_impl::bind_to(*task_accessor::context(m_resume_task), task_disp.m_thread_data); +} + +#endif /* __TBB_RESUMABLE_TASKS */ + +//------------------------------------------------------------------------ +// Task Dispatcher +//------------------------------------------------------------------------ +inline task_dispatcher::task_dispatcher(arena* a) { + m_execute_data_ext.context = a->my_default_ctx; + m_execute_data_ext.task_disp = this; +} + +inline bool task_dispatcher::can_steal() { + __TBB_ASSERT(m_stealing_threshold != 0, nullptr); + stack_anchor_type anchor{}; + return reinterpret_cast<std::uintptr_t>(&anchor) > m_stealing_threshold; +} + +inline d1::task* task_dispatcher::get_inbox_or_critical_task( + execution_data_ext& ed, mail_inbox& inbox, isolation_type isolation, bool critical_allowed) +{ + if (inbox.empty()) + return nullptr; + d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); + if (result) + return result; + // Check if there are tasks mailed to this thread via task-to-thread affinity mechanism. + result = get_mailbox_task(inbox, ed, isolation); + // There is a race with a thread adding a new task (possibly with suitable isolation) + // to our mailbox, so the below conditions might result in a false positive. + // Then set_is_idle(false) allows that task to be stolen; it's OK. + if (isolation != no_isolation && !result && !inbox.empty() && inbox.is_idle_state(true)) { + // We have proxy tasks in our mailbox but the isolation blocks their execution. + // So publish the proxy tasks in mailbox to be available for stealing from owner's task pool. + inbox.set_is_idle( false ); + } + return result; +} + +inline d1::task* task_dispatcher::get_stream_or_critical_task( + execution_data_ext& ed, arena& a, task_stream<front_accessor>& stream, unsigned& hint, + isolation_type isolation, bool critical_allowed) +{ + if (stream.empty()) + return nullptr; + d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); + if (result) + return result; + return a.get_stream_task(stream, hint); +} + +inline d1::task* task_dispatcher::steal_or_get_critical( + execution_data_ext& ed, arena& a, unsigned arena_index, FastRandom& random, + isolation_type isolation, bool critical_allowed) +{ + if (d1::task* t = a.steal_task(arena_index, random, ed, isolation)) { + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + return get_critical_task(t, ed, isolation, critical_allowed); + } + return nullptr; +} + +template <bool ITTPossible, typename Waiter> +d1::task* task_dispatcher::receive_or_steal_task( + thread_data& tls, execution_data_ext& ed, Waiter& waiter, isolation_type isolation, + bool fifo_allowed, bool critical_allowed) +{ + __TBB_ASSERT(governor::is_thread_data_set(&tls), NULL); + // Task to return + d1::task* t = nullptr; + // Get tls data (again) + arena& a = *tls.my_arena; + arena_slot& slot = *tls.my_arena_slot; + unsigned arena_index = tls.my_arena_index; + mail_inbox& inbox = tls.my_inbox; + task_stream<front_accessor>& resume_stream = a.my_resume_task_stream; + unsigned& resume_hint = slot.hint_for_resume_stream; + task_stream<front_accessor>& fifo_stream = a.my_fifo_task_stream; + unsigned& fifo_hint = slot.hint_for_fifo_stream; + + waiter.reset_wait(); + // Thread is in idle state now + inbox.set_is_idle(true); + + bool stealing_is_allowed = can_steal(); + + // Stealing loop mailbox/enqueue/other_slots + for (;;) { + __TBB_ASSERT(t == nullptr, nullptr); + // Check if the resource manager requires our arena to relinquish some threads + // For the external thread restore idle state to true after dispatch loop + if (!waiter.continue_execution(slot, t)) { + __TBB_ASSERT(t == nullptr, nullptr); + break; + } + // Start searching + if (t != nullptr) { + // continue_execution returned a task + } + else if ((t = get_inbox_or_critical_task(ed, inbox, isolation, critical_allowed))) { + // Successfully got the task from mailbox or critical task + } + else if ((t = get_stream_or_critical_task(ed, a, resume_stream, resume_hint, isolation, critical_allowed))) { + // Successfully got the resume or critical task + } + else if (fifo_allowed && isolation == no_isolation + && (t = get_stream_or_critical_task(ed, a, fifo_stream, fifo_hint, isolation, critical_allowed))) { + // Checked if there are tasks in starvation-resistant stream. Only allowed at the outermost dispatch level without isolation. + } + else if (stealing_is_allowed + && (t = steal_or_get_critical(ed, a, arena_index, tls.my_random, isolation, critical_allowed))) { + // Stole a task from a random arena slot + } + else { + t = get_critical_task(t, ed, isolation, critical_allowed); + } + + if (t != nullptr) { + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + a.my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); + break; // Stealing success, end of stealing attempt + } + // Nothing to do, pause a little. + waiter.pause(slot); + } // end of nonlocal task retrieval loop + if (inbox.is_idle_state(true)) { + inbox.set_is_idle(false); + } + return t; +} + +template <bool ITTPossible, typename Waiter> +d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { + assert_pointer_valid(m_thread_data); + __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); + + // Guard an outer/default execution state + struct dispatch_loop_guard { + task_dispatcher& task_disp; + execution_data_ext old_execute_data_ext; + properties old_properties; + + ~dispatch_loop_guard() { + task_disp.m_execute_data_ext = old_execute_data_ext; + task_disp.m_properties = old_properties; + + __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr); + __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr); + } + } dl_guard{ *this, m_execute_data_ext, m_properties }; + + // The context guard to track fp setting and itt tasks. + context_guard_helper</*report_tasks=*/ITTPossible> context_guard; + + // Current isolation context + const isolation_type isolation = dl_guard.old_execute_data_ext.isolation; + + // Critical work inflection point. Once turned false current execution context has taken + // critical task on the previous stack frame and cannot take more until that critical path is + // finished. + bool critical_allowed = dl_guard.old_properties.critical_task_allowed; + + // Extended execution data that is used for dispatching. + // Base version is passed to the task::execute method. + execution_data_ext& ed = m_execute_data_ext; + ed.context = t ? task_accessor::context(*t) : nullptr; + ed.original_slot = m_thread_data->my_arena_index; + ed.affinity_slot = d1::no_slot; + ed.task_disp = this; + ed.wait_ctx = waiter.wait_ctx(); + + m_properties.outermost = false; + m_properties.fifo_tasks_allowed = false; + + t = get_critical_task(t, ed, isolation, critical_allowed); + + // Infinite exception loop + for (;;) { + try { + // Main execution loop + do { + // We assume that bypass tasks are from the same task group. + context_guard.set_ctx(ed.context); + // Inner level evaluates tasks coming from nesting loops and those returned + // by just executed tasks (bypassing spawn or enqueue calls). + while (t != nullptr) { + assert_task_valid(t); + assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); + __TBB_ASSERT(ed.context->my_lifetime_state > d1::task_group_context::lifetime_state::locked && + ed.context->my_lifetime_state < d1::task_group_context::lifetime_state::dying, nullptr); + __TBB_ASSERT(m_thread_data->my_inbox.is_idle_state(false), nullptr); + __TBB_ASSERT(task_accessor::is_resume_task(*t) || isolation == no_isolation || isolation == ed.isolation, nullptr); + // Check premature leave + if (Waiter::postpone_execution(*t)) { + __TBB_ASSERT(task_accessor::is_resume_task(*t) && dl_guard.old_properties.outermost, + "Currently, the bypass loop can be interrupted only for resume task on outermost level"); + return t; + } + // Copy itt_caller to a stack because the context might be destroyed after t->execute. + void* itt_caller = ed.context->my_itt_caller; + suppress_unused_warning(itt_caller); + + ITT_CALLEE_ENTER(ITTPossible, t, itt_caller); + + if (ed.context->is_group_execution_cancelled()) { + t = t->cancel(ed); + } else { + t = t->execute(ed); + } + + ITT_CALLEE_LEAVE(ITTPossible, itt_caller); + + // The task affinity in execution data is set for affinitized tasks. + // So drop it after the task execution. + ed.affinity_slot = d1::no_slot; + // Reset task owner id for bypassed task + ed.original_slot = m_thread_data->my_arena_index; + t = get_critical_task(t, ed, isolation, critical_allowed); + } + __TBB_ASSERT(m_thread_data && governor::is_thread_data_set(m_thread_data), nullptr); + __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); + // When refactoring, pay attention that m_thread_data can be changed after t->execute() + __TBB_ASSERT(m_thread_data->my_arena_slot != nullptr, nullptr); + arena_slot& slot = *m_thread_data->my_arena_slot; + if (!waiter.continue_execution(slot, t)) { + break; + } + // Retrieve the task from local task pool + if (t || (slot.is_task_pool_published() && (t = slot.get_task(ed, isolation)))) { + __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, NULL); + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + continue; + } + // Retrieve the task from global sources + t = receive_or_steal_task<ITTPossible>( + *m_thread_data, ed, waiter, isolation, dl_guard.old_properties.fifo_tasks_allowed, + critical_allowed + ); + } while (t != nullptr); // main dispatch loop + break; // Exit exception loop; + } catch (...) { + if (global_control::active_value(global_control::terminate_on_exception) == 1) { + do_throw_noexcept([] { throw; }); + } + if (ed.context->cancel_group_execution()) { + /* We are the first to signal cancellation, so store the exception that caused it. */ + ed.context->my_exception = tbb_exception_ptr::allocate(); + } + } + } // Infinite exception loop + __TBB_ASSERT(t == nullptr, nullptr); + + +#if __TBB_RESUMABLE_TASKS + if (dl_guard.old_properties.outermost) { + recall_point(); + } +#endif /* __TBB_RESUMABLE_TASKS */ + + return nullptr; +} + +#if __TBB_RESUMABLE_TASKS +inline void task_dispatcher::recall_point() { + if (this != &m_thread_data->my_arena_slot->default_task_dispatcher()) { + __TBB_ASSERT(m_suspend_point != nullptr, nullptr); + __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr); + d1::suspend([](suspend_point_type* sp) { + sp->m_is_owner_recalled.store(true, std::memory_order_release); + auto is_related_suspend_point = [sp] (extended_context context) { + std::uintptr_t sp_addr = std::uintptr_t(sp); + return sp_addr == context.my_uniq_addr; + }; + sp->m_arena->my_market->get_wait_list().notify(is_related_suspend_point); + }); + + if (m_thread_data->my_inbox.is_idle_state(true)) { + m_thread_data->my_inbox.set_is_idle(false); + } + } +} +#endif /* __TBB_RESUMABLE_TASKS */ + +#if __TBB_PREVIEW_CRITICAL_TASKS +inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext& ed, isolation_type isolation, bool critical_allowed) { + __TBB_ASSERT( critical_allowed || !m_properties.critical_task_allowed, nullptr ); + + if (!critical_allowed) { + // The stack is already in the process of critical path execution. Cannot take another + // critical work until finish with the current one. + __TBB_ASSERT(!m_properties.critical_task_allowed, nullptr); + return t; + } + + assert_pointers_valid(m_thread_data, m_thread_data->my_arena, m_thread_data->my_arena_slot); + thread_data& td = *m_thread_data; + arena& a = *td.my_arena; + arena_slot& slot = *td.my_arena_slot; + + d1::task* crit_t = a.get_critical_task(slot.hint_for_critical_stream, isolation); + if (crit_t != nullptr) { + assert_task_valid(crit_t); + if (t != nullptr) { + assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); + r1::spawn(*t, *ed.context); + } + ed.context = task_accessor::context(*crit_t); + ed.isolation = task_accessor::isolation(*crit_t); + + // We cannot execute more than one critical task on the same stack. + // In other words, we prevent nested critical tasks. + m_properties.critical_task_allowed = false; + + // TODO: add a test that the observer is called when critical task is taken. + a.my_observers.notify_entry_observers(td.my_last_observer, td.my_is_worker); + t = crit_t; + } else { + // Was unable to find critical work in the queue. Allow inspecting the queue in nested + // invocations. Handles the case when critical task has been just completed. + m_properties.critical_task_allowed = true; + } + return t; +} +#else +inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext&, isolation_type, bool /*critical_allowed*/) { + return t; +} +#endif + +inline d1::task* task_dispatcher::get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation) { + while (task_proxy* const tp = my_inbox.pop(isolation)) { + if (d1::task* result = tp->extract_task<task_proxy::mailbox_bit>()) { + ed.original_slot = (unsigned short)(-2); + ed.affinity_slot = ed.task_disp->m_thread_data->my_arena_index; + return result; + } + // We have exclusive access to the proxy, and can destroy it. + tp->allocator.delete_object(tp, ed); + } + return NULL; +} + +template <typename Waiter> +d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter) { + if (governor::is_itt_present()) { + return local_wait_for_all</*ITTPossible = */ true>(t, waiter); + } else { + return local_wait_for_all</*ITTPossible = */ false>(t, waiter); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_task_dispatcher_H + diff --git a/contrib/libs/tbb/src/tbb/task_group_context.cpp b/contrib/libs/tbb/src/tbb/task_group_context.cpp index 5836c955e9..3c296648ec 100644 --- a/contrib/libs/tbb/src/tbb/task_group_context.cpp +++ b/contrib/libs/tbb/src/tbb/task_group_context.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,212 +14,212 @@ limitations under the License. */ -#include "oneapi/tbb/detail/_config.h" -#include "oneapi/tbb/tbb_allocator.h" -#include "oneapi/tbb/task_group.h" -#include "governor.h" -#include "thread_data.h" -#include "scheduler_common.h" +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/task_group.h" +#include "governor.h" +#include "thread_data.h" +#include "scheduler_common.h" #include "itt_notify.h" -#include "task_dispatcher.h" +#include "task_dispatcher.h" + +#include <type_traits> -#include <type_traits> - namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { //------------------------------------------------------------------------ -// tbb_exception_ptr +// tbb_exception_ptr //------------------------------------------------------------------------ -tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { - tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); - return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; +tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { + tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); + return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; } -void tbb_exception_ptr::destroy() noexcept { - this->~tbb_exception_ptr(); - deallocate_memory(this); +void tbb_exception_ptr::destroy() noexcept { + this->~tbb_exception_ptr(); + deallocate_memory(this); } -void tbb_exception_ptr::throw_self() { - if (governor::rethrow_exception_broken()) fix_broken_rethrow(); - std::rethrow_exception(my_ptr); +void tbb_exception_ptr::throw_self() { + if (governor::rethrow_exception_broken()) fix_broken_rethrow(); + std::rethrow_exception(my_ptr); } //------------------------------------------------------------------------ -// task_group_context +// task_group_context //------------------------------------------------------------------------ -void task_group_context_impl::destroy(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); +void task_group_context_impl::destroy(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); - __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); + auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); + __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); - if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { - // The owner can be destroyed at any moment. Access the associate data with caution. - thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); - if (governor::is_thread_data_set(owner)) { - thread_data::context_list_state& cls = owner->my_context_list_state; - // We are the owner, so cls is valid. + if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { + // The owner can be destroyed at any moment. Access the associate data with caution. + thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); + if (governor::is_thread_data_set(owner)) { + thread_data::context_list_state& cls = owner->my_context_list_state; + // We are the owner, so cls is valid. // Local update of the context list - std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed); - // The sequentially-consistent store to prevent load of nonlocal update flag - // from being hoisted before the store to local update flag. - cls.local_update = 1; - if (cls.nonlocal_update.load(std::memory_order_relaxed)) { - spin_mutex::scoped_lock lock(cls.mutex); - ctx.my_node.remove_relaxed(); - cls.local_update.store(0, std::memory_order_relaxed); - } else { - ctx.my_node.remove_relaxed(); + std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed); + // The sequentially-consistent store to prevent load of nonlocal update flag + // from being hoisted before the store to local update flag. + cls.local_update = 1; + if (cls.nonlocal_update.load(std::memory_order_relaxed)) { + spin_mutex::scoped_lock lock(cls.mutex); + ctx.my_node.remove_relaxed(); + cls.local_update.store(0, std::memory_order_relaxed); + } else { + ctx.my_node.remove_relaxed(); // Release fence is necessary so that update of our neighbors in // the context list was committed when possible concurrent destroyer // proceeds after local update flag is reset by the following store. - cls.local_update.store(0, std::memory_order_release); - if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { + cls.local_update.store(0, std::memory_order_release); + if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { // Another thread was propagating cancellation request when we removed // ourselves from the list. We must ensure that it is not accessing us // when this destructor finishes. We'll be able to acquire the lock // below only after the other thread finishes with us. - spin_mutex::scoped_lock lock(cls.mutex); + spin_mutex::scoped_lock lock(cls.mutex); } } - } else { - d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; - if ( -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) -#else - !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) -#endif - ) { - __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); - // The "owner" local variable can be a dangling pointer here. Do not access it. - owner = nullptr; - spin_wait_until_eq(ctx.my_owner, nullptr); - // It is unsafe to remove the node because its neighbors might be already destroyed. - // TODO: reconsider the logic. - // ctx.my_node.remove_relaxed(); + } else { + d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; + if ( +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) +#else + !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) +#endif + ) { + __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); + // The "owner" local variable can be a dangling pointer here. Do not access it. + owner = nullptr; + spin_wait_until_eq(ctx.my_owner, nullptr); + // It is unsafe to remove the node because its neighbors might be already destroyed. + // TODO: reconsider the logic. + // ctx.my_node.remove_relaxed(); } else { - __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); - __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); - thread_data::context_list_state& cls = owner->my_context_list_state; - __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); - - ++cls.nonlocal_update; - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); - spin_wait_until_eq(cls.local_update, 0u); - { - spin_mutex::scoped_lock lock(cls.mutex); - ctx.my_node.remove_relaxed(); - } - --cls.nonlocal_update; + __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); + __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); + thread_data::context_list_state& cls = owner->my_context_list_state; + __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); + + ++cls.nonlocal_update; + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); + spin_wait_until_eq(cls.local_update, 0u); + { + spin_mutex::scoped_lock lock(cls.mutex); + ctx.my_node.remove_relaxed(); + } + --cls.nonlocal_update; } } } - - if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { - spin_wait_until_eq(ctx.my_owner, nullptr); - } - - d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); -#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER - suppress_unused_warning(ctl); + + if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { + spin_wait_until_eq(ctx.my_owner, nullptr); + } + + d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER + suppress_unused_warning(ctl); #endif - ctl->~cpu_ctl_env(); - - if (ctx.my_exception) - ctx.my_exception->destroy(); - ITT_STACK_DESTROY(ctx.my_itt_caller); - - poison_pointer(ctx.my_parent); - poison_pointer(ctx.my_parent); - poison_pointer(ctx.my_owner); - poison_pointer(ctx.my_node.next); - poison_pointer(ctx.my_node.prev); - poison_pointer(ctx.my_exception); - poison_pointer(ctx.my_itt_caller); + ctl->~cpu_ctl_env(); + + if (ctx.my_exception) + ctx.my_exception->destroy(); + ITT_STACK_DESTROY(ctx.my_itt_caller); + + poison_pointer(ctx.my_parent); + poison_pointer(ctx.my_parent); + poison_pointer(ctx.my_owner); + poison_pointer(ctx.my_node.next); + poison_pointer(ctx.my_node.prev); + poison_pointer(ctx.my_exception); + poison_pointer(ctx.my_itt_caller); } -void task_group_context_impl::initialize(d1::task_group_context& ctx) { - ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); - - ctx.my_cpu_ctl_env = 0; - ctx.my_cancellation_requested = 0; - ctx.my_state.store(0, std::memory_order_relaxed); - // Set the created state to bound at the first usage. - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); - ctx.my_parent = nullptr; - ctx.my_owner = nullptr; - ctx.my_node.next.store(nullptr, std::memory_order_relaxed); - ctx.my_node.next.store(nullptr, std::memory_order_relaxed); - ctx.my_exception = nullptr; - ctx.my_itt_caller = nullptr; - - static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); - d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; - if (ctx.my_traits.fp_settings) - ctl->get_env(); +void task_group_context_impl::initialize(d1::task_group_context& ctx) { + ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); + + ctx.my_cpu_ctl_env = 0; + ctx.my_cancellation_requested = 0; + ctx.my_state.store(0, std::memory_order_relaxed); + // Set the created state to bound at the first usage. + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); + ctx.my_parent = nullptr; + ctx.my_owner = nullptr; + ctx.my_node.next.store(nullptr, std::memory_order_relaxed); + ctx.my_node.next.store(nullptr, std::memory_order_relaxed); + ctx.my_exception = nullptr; + ctx.my_itt_caller = nullptr; + + static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); + d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; + if (ctx.my_traits.fp_settings) + ctl->get_env(); } -void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(td, NULL); - ctx.my_owner.store(td, std::memory_order_relaxed); - thread_data::context_list_state& cls = td->my_context_list_state; +void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(td, NULL); + ctx.my_owner.store(td, std::memory_order_relaxed); + thread_data::context_list_state& cls = td->my_context_list_state; // state propagation logic assumes new contexts are bound to head of the list - ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); + ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); // Notify threads that may be concurrently destroying contexts registered // in this scheduler's list that local list update is underway. // Prevent load of global propagation epoch counter from being hoisted before // speculative stores above, as well as load of nonlocal update flag from // being hoisted before the store to local update flag. - cls.local_update = 1; + cls.local_update = 1; // Finalize local context list update - if (cls.nonlocal_update.load(std::memory_order_relaxed)) { - spin_mutex::scoped_lock lock(cls.mutex); - d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); - head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); - ctx.my_node.next.store(head_next, std::memory_order_relaxed); - cls.local_update.store(0, std::memory_order_relaxed); - cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); - } else { - d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); - head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); - ctx.my_node.next.store(head_next, std::memory_order_relaxed); - cls.local_update.store(0, std::memory_order_release); + if (cls.nonlocal_update.load(std::memory_order_relaxed)) { + spin_mutex::scoped_lock lock(cls.mutex); + d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); + head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); + ctx.my_node.next.store(head_next, std::memory_order_relaxed); + cls.local_update.store(0, std::memory_order_relaxed); + cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); + } else { + d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); + head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); + ctx.my_node.next.store(head_next, std::memory_order_relaxed); + cls.local_update.store(0, std::memory_order_release); // Thread-local list of contexts allows concurrent traversal by another thread - // while propagating state change. To ensure visibility of ctx.my_node's members + // while propagating state change. To ensure visibility of ctx.my_node's members // to the concurrently traversing thread, the list's head is updated by means // of store-with-release. - cls.head.next.store(&ctx.my_node, std::memory_order_release); + cls.head.next.store(&ctx.my_node, std::memory_order_release); } } -void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); - __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); - - ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; - __TBB_ASSERT(ctx.my_parent, NULL); - +void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); + __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); + + ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; + __TBB_ASSERT(ctx.my_parent, NULL); + // Inherit FPU settings only if the context has not captured FPU settings yet. - if (!ctx.my_traits.fp_settings) - copy_fp_settings(ctx, *ctx.my_parent); + if (!ctx.my_traits.fp_settings) + copy_fp_settings(ctx, *ctx.my_parent); // Condition below prevents unnecessary thrashing parent context's cache line - if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { - ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below - } - if (ctx.my_parent->my_parent) { + if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { + ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below + } + if (ctx.my_parent->my_parent) { // Even if this context were made accessible for state change propagation - // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) + // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) // above), it still could be missed if state propagation from a grand-ancestor // was underway concurrently with binding. // Speculative propagation from the parent together with epoch counters @@ -229,265 +229,265 @@ void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_d // Acquire fence is necessary to prevent reordering subsequent speculative // loads of parent state data out of the scope where epoch counters comparison // can reliably validate it. - uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); + uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); // Speculative propagation of parent's state. The speculation will be // validated by the epoch counters check further on. - ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); - register_with(ctx, td); // Issues full fence + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + register_with(ctx, td); // Issues full fence // If no state propagation was detected by the following condition, the above // full fence guarantees that the parent had correct state during speculative // propagation before the fence. Otherwise the propagation from parent is // repeated under the lock. - if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { + if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { // Another thread may be propagating state change right now. So resort to lock. context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); } - } else { - register_with(ctx, td); // Issues full fence + } else { + register_with(ctx, td); // Issues full fence // As we do not have grand-ancestors, concurrent state propagation (if any) // may originate only from the parent context, and thus it is safe to directly // copy the state from it. - ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); } - - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); + + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); +} + +void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); + if (state <= d1::task_group_context::lifetime_state::locked) { + if (state == d1::task_group_context::lifetime_state::created && +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) +#else + ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) +#endif + ) { + // If we are in the outermost task dispatch loop of an external thread, then + // there is nothing to bind this context to, and we skip the binding part + // treating the context as isolated. + __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); + if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { + if (!ctx.my_traits.fp_settings) { + copy_fp_settings(ctx, *td->my_arena->my_default_ctx); + } + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); + } else { + bind_to_impl(ctx, td); + } + ITT_STACK_CREATE(ctx.my_itt_caller); + } + spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); + } + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); } -void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); - if (state <= d1::task_group_context::lifetime_state::locked) { - if (state == d1::task_group_context::lifetime_state::created && -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) -#else - ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) -#endif - ) { - // If we are in the outermost task dispatch loop of an external thread, then - // there is nothing to bind this context to, and we skip the binding part - // treating the context as isolated. - __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); - if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { - if (!ctx.my_traits.fp_settings) { - copy_fp_settings(ctx, *td->my_arena->my_default_ctx); - } - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); - } else { - bind_to_impl(ctx, td); - } - ITT_STACK_CREATE(ctx.my_itt_caller); - } - spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); - } - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); -} - template <typename T> -void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { +void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { // Nothing to do, whether descending from "src" or not, so no need to scan. // Hopefully this happens often thanks to earlier invocations. // This optimization is enabled by LIFO order in the context lists: // - new contexts are bound to the beginning of lists; // - descendants are newer than ancestors; // - earlier invocations are therefore likely to "paint" long chains. - } else if (&ctx == &src) { + } else if (&ctx == &src) { // This clause is disjunct from the traversal below, which skips src entirely. // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). // Letting the other thread prevail may also be fairer. - } else { - for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { - if (ancestor == &src) { - for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) - (c->*mptr_state).store(new_state, std::memory_order_relaxed); + } else { + for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { + if (ancestor == &src) { + for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) + (c->*mptr_state).store(new_state, std::memory_order_relaxed); break; } } } } -bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); - if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { - // This task group and any descendants have already been canceled. - // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, - // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) - return false; - } - governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); - return true; -} - -bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { - return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; -} - -// IMPORTANT: It is assumed that this method is not used concurrently! -void task_group_context_impl::reset(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - //! TODO: Add assertion that this context does not have children - // No fences are necessary since this context can be accessed from another thread - // only after stealing happened (which means necessary fences were used). - if (ctx.my_exception) { - ctx.my_exception->destroy(); - ctx.my_exception = NULL; - } - ctx.my_cancellation_requested = 0; -} - -// IMPORTANT: It is assumed that this method is not used concurrently! -void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - //! TODO: Add assertion that this context does not have children - // No fences are necessary since this context can be accessed from another thread - // only after stealing happened (which means necessary fences were used). - d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); - if (!ctx.my_traits.fp_settings) { - ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; - ctx.my_traits.fp_settings = true; - } - ctl->get_env(); -} - -void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); - __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); - - const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); - new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); - ctx.my_traits.fp_settings = true; -} - +bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); + if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { + // This task group and any descendants have already been canceled. + // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, + // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) + return false; + } + governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); + return true; +} + +bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { + return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; +} + +// IMPORTANT: It is assumed that this method is not used concurrently! +void task_group_context_impl::reset(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + //! TODO: Add assertion that this context does not have children + // No fences are necessary since this context can be accessed from another thread + // only after stealing happened (which means necessary fences were used). + if (ctx.my_exception) { + ctx.my_exception->destroy(); + ctx.my_exception = NULL; + } + ctx.my_cancellation_requested = 0; +} + +// IMPORTANT: It is assumed that this method is not used concurrently! +void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + //! TODO: Add assertion that this context does not have children + // No fences are necessary since this context can be accessed from another thread + // only after stealing happened (which means necessary fences were used). + d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); + if (!ctx.my_traits.fp_settings) { + ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; + ctx.my_traits.fp_settings = true; + } + ctl->get_env(); +} + +void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); + __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); + + const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); + new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); + ctx.my_traits.fp_settings = true; +} + template <typename T> -void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - spin_mutex::scoped_lock lock(my_context_list_state.mutex); +void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + spin_mutex::scoped_lock lock(my_context_list_state.mutex); // Acquire fence is necessary to ensure that the subsequent node->my_next load // returned the correct value in case it was just inserted in another thread. - // The fence also ensures visibility of the correct ctx.my_parent value. - d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); - while (node != &my_context_list_state.head) { - d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); - if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) - task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); - node = node->next.load(std::memory_order_relaxed); + // The fence also ensures visibility of the correct ctx.my_parent value. + d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); + while (node != &my_context_list_state.head) { + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) + task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); + node = node->next.load(std::memory_order_relaxed); } // Sync up local propagation epoch with the global one. Release fence prevents // reordering of possible store to *mptr_state after the sync point. - my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); + my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); } template <typename T> -bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) +bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) return true; // The whole propagation algorithm is under the lock in order to ensure correctness // in case of concurrent state changes at the different levels of the context tree. // See comment at the bottom of scheduler.cpp context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) + if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) // Another thread has concurrently changed the state. Back down. return false; // Advance global state propagation epoch - ++the_context_state_propagation_epoch; - // Propagate to all workers and external threads and sync up their local epochs with the global one + ++the_context_state_propagation_epoch; + // Propagate to all workers and external threads and sync up their local epochs with the global one unsigned num_workers = my_first_unused_worker_idx; - for (unsigned i = 0; i < num_workers; ++i) { - thread_data* td = my_workers[i]; + for (unsigned i = 0; i < num_workers; ++i) { + thread_data* td = my_workers[i]; // If the worker is only about to be registered, skip it. - if (td) - td->propagate_task_group_state(mptr_state, src, new_state); + if (td) + td->propagate_task_group_state(mptr_state, src, new_state); } - // Propagate to all external threads + // Propagate to all external threads // The whole propagation sequence is locked, thus no contention is expected - for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) - it->propagate_task_group_state(mptr_state, src, new_state); + for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) + it->propagate_task_group_state(mptr_state, src, new_state); return true; } -/* - Comments: - -1. The premise of the cancellation support implementation is that cancellations are - not part of the hot path of the program execution. Therefore all changes in its - implementation in order to reduce the overhead of the cancellation control flow - should be done only in ways that do not increase overhead of the normal execution. - - In general, contexts are used by all threads and their descendants are created in - different threads as well. In order to minimize impact of the cross-thread tree - maintenance (first of all because of the synchronization), the tree of contexts - is split into pieces, each of which is handled by a single thread. Such pieces - are represented as lists of contexts, members of which are contexts that were - bound to their parents in the given thread. - - The context tree maintenance and cancellation propagation algorithms are designed - in such a manner that cross-thread access to a context list will take place only - when cancellation signal is sent (by user or when an exception happens), and - synchronization is necessary only then. Thus the normal execution flow (without - exceptions and cancellation) remains free from any synchronization done on - behalf of exception handling and cancellation support. - -2. Consider parallel cancellations at the different levels of the context tree: - - Ctx1 <- Cancelled by Thread1 |- Thread2 started processing - | | - Ctx2 |- Thread1 started processing - | T1 |- Thread2 finishes and syncs up local counters - Ctx3 <- Cancelled by Thread2 | - | |- Ctx5 is bound to Ctx2 - Ctx4 | - T2 |- Thread1 reaches Ctx2 - - Thread-propagator of each cancellation increments global counter. However the thread - propagating the cancellation from the outermost context (Thread1) may be the last - to finish. Which means that the local counters may be synchronized earlier (by Thread2, - at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context - (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only - (Ctx2) may result in cancellation request being lost. - - This issue is solved by doing the whole propagation under the lock. - - If we need more concurrency while processing parallel cancellations, we could try - the following modification of the propagation algorithm: - - advance global counter and remember it - for each thread: - scan thread's list of contexts - for each thread: - sync up its local counter only if the global counter has not been changed - - However this version of the algorithm requires more analysis and verification. -*/ - -void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { - task_group_context_impl::initialize(ctx); +/* + Comments: + +1. The premise of the cancellation support implementation is that cancellations are + not part of the hot path of the program execution. Therefore all changes in its + implementation in order to reduce the overhead of the cancellation control flow + should be done only in ways that do not increase overhead of the normal execution. + + In general, contexts are used by all threads and their descendants are created in + different threads as well. In order to minimize impact of the cross-thread tree + maintenance (first of all because of the synchronization), the tree of contexts + is split into pieces, each of which is handled by a single thread. Such pieces + are represented as lists of contexts, members of which are contexts that were + bound to their parents in the given thread. + + The context tree maintenance and cancellation propagation algorithms are designed + in such a manner that cross-thread access to a context list will take place only + when cancellation signal is sent (by user or when an exception happens), and + synchronization is necessary only then. Thus the normal execution flow (without + exceptions and cancellation) remains free from any synchronization done on + behalf of exception handling and cancellation support. + +2. Consider parallel cancellations at the different levels of the context tree: + + Ctx1 <- Cancelled by Thread1 |- Thread2 started processing + | | + Ctx2 |- Thread1 started processing + | T1 |- Thread2 finishes and syncs up local counters + Ctx3 <- Cancelled by Thread2 | + | |- Ctx5 is bound to Ctx2 + Ctx4 | + T2 |- Thread1 reaches Ctx2 + + Thread-propagator of each cancellation increments global counter. However the thread + propagating the cancellation from the outermost context (Thread1) may be the last + to finish. Which means that the local counters may be synchronized earlier (by Thread2, + at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context + (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only + (Ctx2) may result in cancellation request being lost. + + This issue is solved by doing the whole propagation under the lock. + + If we need more concurrency while processing parallel cancellations, we could try + the following modification of the propagation algorithm: + + advance global counter and remember it + for each thread: + scan thread's list of contexts + for each thread: + sync up its local counter only if the global counter has not been changed + + However this version of the algorithm requires more analysis and verification. +*/ + +void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { + task_group_context_impl::initialize(ctx); +} +void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { + task_group_context_impl::destroy(ctx); } -void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { - task_group_context_impl::destroy(ctx); -} -void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { - task_group_context_impl::reset(ctx); -} -bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { - return task_group_context_impl::cancel_group_execution(ctx); -} -bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { - return task_group_context_impl::is_group_execution_cancelled(ctx); -} -void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { - task_group_context_impl::capture_fp_settings(ctx); -} - -} // namespace r1 -} // namespace detail -} // namespace tbb +void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { + task_group_context_impl::reset(ctx); +} +bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { + return task_group_context_impl::cancel_group_execution(ctx); +} +bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { + return task_group_context_impl::is_group_execution_cancelled(ctx); +} +void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { + task_group_context_impl::capture_fp_settings(ctx); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/task_stream.h b/contrib/libs/tbb/src/tbb/task_stream.h index a6e8ae852b..f32ef94e80 100644 --- a/contrib/libs/tbb/src/tbb/task_stream.h +++ b/contrib/libs/tbb/src/tbb/task_stream.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,53 +17,53 @@ #ifndef _TBB_task_stream_H #define _TBB_task_stream_H -//! This file is a possible future replacement for the task_stream class implemented in -//! task_stream.h. It refactors the code and extends task_stream capabilities by moving lane -//! management during operations on caller side. Despite the fact that new implementation should not -//! affect performance of the original task stream, analysis on this subject was not made at the -//! time it was developed. In addition, it is not clearly seen at the moment that this container -//! would be suitable for critical tasks due to linear time complexity on its operations. - -#include "oneapi/tbb/detail/_utils.h" - -#include "oneapi/tbb/spin_mutex.h" -#include "oneapi/tbb/cache_aligned_allocator.h" - -#include "scheduler_common.h" -#include "misc.h" // for FastRandom - +//! This file is a possible future replacement for the task_stream class implemented in +//! task_stream.h. It refactors the code and extends task_stream capabilities by moving lane +//! management during operations on caller side. Despite the fact that new implementation should not +//! affect performance of the original task stream, analysis on this subject was not made at the +//! time it was developed. In addition, it is not clearly seen at the moment that this container +//! would be suitable for critical tasks due to linear time complexity on its operations. + +#include "oneapi/tbb/detail/_utils.h" + +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "scheduler_common.h" +#include "misc.h" // for FastRandom + #include <deque> #include <climits> -#include <atomic> +#include <atomic> namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { //! Essentially, this is just a pair of a queue and a mutex to protect the queue. /** The reason std::pair is not used is that the code would look less clean if field names were replaced with 'first' and 'second'. **/ template< typename T, typename mutex_t > -struct alignas(max_nfs_size) queue_and_mutex { - typedef std::deque< T, cache_aligned_allocator<T> > queue_base_t; +struct alignas(max_nfs_size) queue_and_mutex { + typedef std::deque< T, cache_aligned_allocator<T> > queue_base_t; - queue_base_t my_queue{}; - mutex_t my_mutex{}; + queue_base_t my_queue{}; + mutex_t my_mutex{}; }; -using population_t = uintptr_t; +using population_t = uintptr_t; const population_t one = 1; -inline void set_one_bit( std::atomic<population_t>& dest, int pos ) { +inline void set_one_bit( std::atomic<population_t>& dest, int pos ) { __TBB_ASSERT( pos>=0, NULL ); __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); - dest.fetch_or( one<<pos ); + dest.fetch_or( one<<pos ); } -inline void clear_one_bit( std::atomic<population_t>& dest, int pos ) { +inline void clear_one_bit( std::atomic<population_t>& dest, int pos ) { __TBB_ASSERT( pos>=0, NULL ); __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); - dest.fetch_and( ~(one<<pos) ); + dest.fetch_and( ~(one<<pos) ); } inline bool is_bit_set( population_t val, int pos ) { @@ -72,217 +72,217 @@ inline bool is_bit_set( population_t val, int pos ) { return (val & (one<<pos)) != 0; } -struct random_lane_selector : -#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 - no_assign -#else - no_copy -#endif -{ - random_lane_selector( FastRandom& random ) : my_random( random ) {} - unsigned operator()( unsigned out_of ) const { - __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); - return my_random.get() & (out_of-1); - } -private: - FastRandom& my_random; -}; - -struct lane_selector_base : -#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 - no_assign -#else - no_copy -#endif -{ - unsigned& my_previous; - lane_selector_base( unsigned& previous ) : my_previous( previous ) {} -}; - -struct subsequent_lane_selector : lane_selector_base { - subsequent_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} - unsigned operator()( unsigned out_of ) const { - __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); - return (++my_previous &= out_of-1); - } -}; - -struct preceding_lane_selector : lane_selector_base { - preceding_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} - unsigned operator()( unsigned out_of ) const { - __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); - return (--my_previous &= (out_of-1)); - } -}; - -//! Specializes from which side of the underlying container elements are retrieved. Method must be -//! called under corresponding mutex locked. -template<task_stream_accessor_type accessor> -class task_stream_accessor : no_copy { -protected: - using lane_t = queue_and_mutex <d1::task*, spin_mutex>; - d1::task* get_item( lane_t::queue_base_t& queue ) { - d1::task* result = queue.front(); - queue.pop_front(); - return result; - } -}; - -template<> -class task_stream_accessor< back_nonnull_accessor > : no_copy { -protected: - using lane_t = queue_and_mutex <d1::task*, spin_mutex>; - d1::task* get_item( lane_t::queue_base_t& queue ) { - d1::task* result = nullptr; - __TBB_ASSERT(!queue.empty(), nullptr); - // Isolated task can put zeros in queue see look_specific - do { - result = queue.back(); - queue.pop_back(); - } while ( !result && !queue.empty() ); - - __TBB_ASSERT_RELEASE(result, nullptr); - return result; - } -}; - +struct random_lane_selector : +#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 + no_assign +#else + no_copy +#endif +{ + random_lane_selector( FastRandom& random ) : my_random( random ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return my_random.get() & (out_of-1); + } +private: + FastRandom& my_random; +}; + +struct lane_selector_base : +#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 + no_assign +#else + no_copy +#endif +{ + unsigned& my_previous; + lane_selector_base( unsigned& previous ) : my_previous( previous ) {} +}; + +struct subsequent_lane_selector : lane_selector_base { + subsequent_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return (++my_previous &= out_of-1); + } +}; + +struct preceding_lane_selector : lane_selector_base { + preceding_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return (--my_previous &= (out_of-1)); + } +}; + +//! Specializes from which side of the underlying container elements are retrieved. Method must be +//! called under corresponding mutex locked. +template<task_stream_accessor_type accessor> +class task_stream_accessor : no_copy { +protected: + using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + d1::task* get_item( lane_t::queue_base_t& queue ) { + d1::task* result = queue.front(); + queue.pop_front(); + return result; + } +}; + +template<> +class task_stream_accessor< back_nonnull_accessor > : no_copy { +protected: + using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + d1::task* get_item( lane_t::queue_base_t& queue ) { + d1::task* result = nullptr; + __TBB_ASSERT(!queue.empty(), nullptr); + // Isolated task can put zeros in queue see look_specific + do { + result = queue.back(); + queue.pop_back(); + } while ( !result && !queue.empty() ); + + __TBB_ASSERT_RELEASE(result, nullptr); + return result; + } +}; + //! The container for "fairness-oriented" aka "enqueued" tasks. -template<task_stream_accessor_type accessor> -class task_stream : public task_stream_accessor< accessor > { - using lane_t = typename task_stream_accessor<accessor>::lane_t; - std::atomic<population_t> population{}; - lane_t* lanes{nullptr}; - unsigned N{}; +template<task_stream_accessor_type accessor> +class task_stream : public task_stream_accessor< accessor > { + using lane_t = typename task_stream_accessor<accessor>::lane_t; + std::atomic<population_t> population{}; + lane_t* lanes{nullptr}; + unsigned N{}; public: - task_stream() = default; + task_stream() = default; void initialize( unsigned n_lanes ) { const unsigned max_lanes = sizeof(population_t) * CHAR_BIT; - N = n_lanes >= max_lanes ? max_lanes : n_lanes > 2 ? 1 << (tbb::detail::log2(n_lanes - 1) + 1) : 2; - __TBB_ASSERT( N == max_lanes || (N >= n_lanes && ((N - 1) & N) == 0), "number of lanes miscalculated" ); + N = n_lanes >= max_lanes ? max_lanes : n_lanes > 2 ? 1 << (tbb::detail::log2(n_lanes - 1) + 1) : 2; + __TBB_ASSERT( N == max_lanes || (N >= n_lanes && ((N - 1) & N) == 0), "number of lanes miscalculated" ); __TBB_ASSERT( N <= sizeof(population_t) * CHAR_BIT, NULL ); - lanes = static_cast<lane_t*>(cache_aligned_allocate(sizeof(lane_t) * N)); - for (unsigned i = 0; i < N; ++i) { - new (lanes + i) lane_t; + lanes = static_cast<lane_t*>(cache_aligned_allocate(sizeof(lane_t) * N)); + for (unsigned i = 0; i < N; ++i) { + new (lanes + i) lane_t; } - __TBB_ASSERT( !population.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( !population.load(std::memory_order_relaxed), NULL ); } ~task_stream() { - if (lanes) { - for (unsigned i = 0; i < N; ++i) { - lanes[i].~lane_t(); + if (lanes) { + for (unsigned i = 0; i < N; ++i) { + lanes[i].~lane_t(); } - cache_aligned_deallocate(lanes); + cache_aligned_deallocate(lanes); } } - //! Push a task into a lane. Lane selection is performed by passed functor. - template<typename lane_selector_t> - void push(d1::task* source, const lane_selector_t& next_lane ) { - bool succeed = false; - unsigned lane = 0; - do { - lane = next_lane( /*out_of=*/N ); - __TBB_ASSERT( lane < N, "Incorrect lane index." ); - } while( ! (succeed = try_push( source, lane )) ); - } - - //! Try finding and popping a task using passed functor for lane selection. Last used lane is - //! updated inside lane selector. - template<typename lane_selector_t> - d1::task* pop( const lane_selector_t& next_lane ) { - d1::task* popped = NULL; - unsigned lane = 0; - do { - lane = next_lane( /*out_of=*/N ); - __TBB_ASSERT( lane < N, "Incorrect lane index." ); - } while( !empty() && !(popped = try_pop( lane )) ); - return popped; - } - - //! Try finding and popping a related task. - d1::task* pop_specific( unsigned& last_used_lane, isolation_type isolation ) { - d1::task* result = NULL; - // Lane selection is round-robin in backward direction. - unsigned idx = last_used_lane & (N-1); - do { - if( is_bit_set( population.load(std::memory_order_relaxed), idx ) ) { - lane_t& lane = lanes[idx]; + //! Push a task into a lane. Lane selection is performed by passed functor. + template<typename lane_selector_t> + void push(d1::task* source, const lane_selector_t& next_lane ) { + bool succeed = false; + unsigned lane = 0; + do { + lane = next_lane( /*out_of=*/N ); + __TBB_ASSERT( lane < N, "Incorrect lane index." ); + } while( ! (succeed = try_push( source, lane )) ); + } + + //! Try finding and popping a task using passed functor for lane selection. Last used lane is + //! updated inside lane selector. + template<typename lane_selector_t> + d1::task* pop( const lane_selector_t& next_lane ) { + d1::task* popped = NULL; + unsigned lane = 0; + do { + lane = next_lane( /*out_of=*/N ); + __TBB_ASSERT( lane < N, "Incorrect lane index." ); + } while( !empty() && !(popped = try_pop( lane )) ); + return popped; + } + + //! Try finding and popping a related task. + d1::task* pop_specific( unsigned& last_used_lane, isolation_type isolation ) { + d1::task* result = NULL; + // Lane selection is round-robin in backward direction. + unsigned idx = last_used_lane & (N-1); + do { + if( is_bit_set( population.load(std::memory_order_relaxed), idx ) ) { + lane_t& lane = lanes[idx]; spin_mutex::scoped_lock lock; if( lock.try_acquire(lane.my_mutex) && !lane.my_queue.empty() ) { - result = look_specific( lane.my_queue, isolation ); + result = look_specific( lane.my_queue, isolation ); if( lane.my_queue.empty() ) - clear_one_bit( population, idx ); - if( result ) - break; + clear_one_bit( population, idx ); + if( result ) + break; } } - idx=(idx-1)&(N-1); - } while( !empty() && idx != last_used_lane ); + idx=(idx-1)&(N-1); + } while( !empty() && idx != last_used_lane ); last_used_lane = idx; return result; } //! Checks existence of a task. - bool empty() { - return !population.load(std::memory_order_relaxed); + bool empty() { + return !population.load(std::memory_order_relaxed); } -private: - //! Returns true on successful push, otherwise - false. - bool try_push(d1::task* source, unsigned lane_idx ) { - spin_mutex::scoped_lock lock; - if( lock.try_acquire( lanes[lane_idx].my_mutex ) ) { - lanes[lane_idx].my_queue.push_back( source ); - set_one_bit( population, lane_idx ); // TODO: avoid atomic op if the bit is already set - return true; - } - return false; - } - - //! Returns pointer to task on successful pop, otherwise - NULL. - d1::task* try_pop( unsigned lane_idx ) { - if( !is_bit_set( population.load(std::memory_order_relaxed), lane_idx ) ) - return NULL; - d1::task* result = NULL; - lane_t& lane = lanes[lane_idx]; - spin_mutex::scoped_lock lock; - if( lock.try_acquire( lane.my_mutex ) && !lane.my_queue.empty() ) { - result = this->get_item( lane.my_queue ); - if( lane.my_queue.empty() ) - clear_one_bit( population, lane_idx ); - } +private: + //! Returns true on successful push, otherwise - false. + bool try_push(d1::task* source, unsigned lane_idx ) { + spin_mutex::scoped_lock lock; + if( lock.try_acquire( lanes[lane_idx].my_mutex ) ) { + lanes[lane_idx].my_queue.push_back( source ); + set_one_bit( population, lane_idx ); // TODO: avoid atomic op if the bit is already set + return true; + } + return false; + } + + //! Returns pointer to task on successful pop, otherwise - NULL. + d1::task* try_pop( unsigned lane_idx ) { + if( !is_bit_set( population.load(std::memory_order_relaxed), lane_idx ) ) + return NULL; + d1::task* result = NULL; + lane_t& lane = lanes[lane_idx]; + spin_mutex::scoped_lock lock; + if( lock.try_acquire( lane.my_mutex ) && !lane.my_queue.empty() ) { + result = this->get_item( lane.my_queue ); + if( lane.my_queue.empty() ) + clear_one_bit( population, lane_idx ); + } return result; } - - // TODO: unify '*_specific' logic with 'pop' methods above - d1::task* look_specific( typename lane_t::queue_base_t& queue, isolation_type isolation ) { - __TBB_ASSERT( !queue.empty(), NULL ); - // TODO: add a worst-case performance test and consider an alternative container with better - // performance for isolation search. - typename lane_t::queue_base_t::iterator curr = queue.end(); - do { - // TODO: consider logic from get_task to simplify the code. - d1::task* result = *--curr; - if( result && task_accessor::isolation(*result) == isolation ) { - if( queue.end() - curr == 1 ) - queue.pop_back(); // a little of housekeeping along the way - else - *curr = 0; // grabbing task with the same isolation - // TODO: move one of the container's ends instead if the task has been found there - return result; - } - } while( curr != queue.begin() ); - return NULL; - } - + + // TODO: unify '*_specific' logic with 'pop' methods above + d1::task* look_specific( typename lane_t::queue_base_t& queue, isolation_type isolation ) { + __TBB_ASSERT( !queue.empty(), NULL ); + // TODO: add a worst-case performance test and consider an alternative container with better + // performance for isolation search. + typename lane_t::queue_base_t::iterator curr = queue.end(); + do { + // TODO: consider logic from get_task to simplify the code. + d1::task* result = *--curr; + if( result && task_accessor::isolation(*result) == isolation ) { + if( queue.end() - curr == 1 ) + queue.pop_back(); // a little of housekeeping along the way + else + *curr = 0; // grabbing task with the same isolation + // TODO: move one of the container's ends instead if the task has been found there + return result; + } + } while( curr != queue.begin() ); + return NULL; + } + }; // task_stream -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_task_stream_H */ diff --git a/contrib/libs/tbb/src/tbb/thread_data.h b/contrib/libs/tbb/src/tbb/thread_data.h index 6446b25454..41d4a0cf60 100644 --- a/contrib/libs/tbb/src/tbb/thread_data.h +++ b/contrib/libs/tbb/src/tbb/thread_data.h @@ -1,273 +1,273 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef __TBB_thread_data_H -#define __TBB_thread_data_H - -#include "oneapi/tbb/detail/_task.h" -#include "oneapi/tbb/task.h" - -#include "rml_base.h" // rml::job - -#include "scheduler_common.h" -#include "arena.h" -#include "concurrent_monitor.h" -#include "mailbox.h" -#include "misc.h" // FastRandom -#include "small_object_pool_impl.h" - -#include <atomic> - -namespace tbb { -namespace detail { -namespace r1 { - -class task; -class arena_slot; -class task_group_context; -class task_dispatcher; - -//------------------------------------------------------------------------ -// Thread Data -//------------------------------------------------------------------------ -class thread_data : public ::rml::job - , public intrusive_list_node - , no_copy { -public: - thread_data(unsigned short index, bool is_worker) - : my_arena_index{ index } - , my_is_worker{ is_worker } - , my_task_dispatcher{ nullptr } - , my_arena{} - , my_arena_slot{} - , my_inbox{} - , my_random{ this } - , my_last_observer{ nullptr } - , my_small_object_pool{new (cache_aligned_allocate(sizeof(small_object_pool_impl))) small_object_pool_impl{}} - , my_context_list_state{} -#if __TBB_RESUMABLE_TASKS - , my_post_resume_action{ post_resume_action::none } - , my_post_resume_arg{nullptr} -#endif /* __TBB_RESUMABLE_TASKS */ - { - ITT_SYNC_CREATE(&my_context_list_state.mutex, SyncType_Scheduler, SyncObj_ContextsList); - my_context_list_state.head.next.store(&my_context_list_state.head, std::memory_order_relaxed); - my_context_list_state.head.prev.store(&my_context_list_state.head, std::memory_order_relaxed); - } - - ~thread_data() { - context_list_cleanup(); - my_small_object_pool->destroy(); - poison_pointer(my_task_dispatcher); - poison_pointer(my_arena); - poison_pointer(my_arena_slot); - poison_pointer(my_last_observer); - poison_pointer(my_small_object_pool); -#if __TBB_RESUMABLE_TASKS - poison_pointer(my_post_resume_arg); -#endif /* __TBB_RESUMABLE_TASKS */ - poison_value(my_context_list_state.epoch); - poison_value(my_context_list_state.local_update); - poison_value(my_context_list_state.nonlocal_update); - } - - void attach_arena(arena& a, std::size_t index); - bool is_attached_to(arena*); - void attach_task_dispatcher(task_dispatcher&); - void detach_task_dispatcher(); - void context_list_cleanup(); - template <typename T> - void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state); - - //! Index of the arena slot the scheduler occupies now, or occupied last time - unsigned short my_arena_index; - - //! Indicates if the thread is created by RML - const bool my_is_worker; - - //! The current task dipsatcher - task_dispatcher* my_task_dispatcher; - - //! The arena that I own (if external thread) or am servicing at the moment (if worker) - arena* my_arena; - - //! Pointer to the slot in the arena we own at the moment - arena_slot* my_arena_slot; - - //! The mailbox (affinity mechanism) the current thread attached to - mail_inbox my_inbox; - - //! The random generator - FastRandom my_random; - - //! Last observer in the observers list processed on this slot - observer_proxy* my_last_observer; - - //! Pool of small object for fast task allocation - small_object_pool_impl* my_small_object_pool; - - struct context_list_state { - //! Head of the thread specific list of task group contexts. - d1::context_list_node head{}; - - //! Mutex protecting access to the list of task group contexts. - // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex - spin_mutex mutex{}; - - //! Last state propagation epoch known to this thread - /** Together with the_context_state_propagation_epoch constitute synchronization protocol - that keeps hot path of task group context construction destruction mostly - lock-free. - When local epoch equals the global one, the state of task group contexts - registered with this thread is consistent with that of the task group trees - they belong to. **/ - std::atomic<std::uintptr_t> epoch{}; - - //! Flag indicating that a context is being destructed by its owner thread - /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol - that keeps hot path of context destruction (by the owner thread) mostly - lock-free. **/ - std::atomic<std::uintptr_t> local_update{}; - - //! Flag indicating that a context is being destructed by non-owner thread. - /** See also my_local_update. **/ - std::atomic<std::uintptr_t> nonlocal_update{}; - } my_context_list_state; - -#if __TBB_RESUMABLE_TASKS - //! The list of possible post resume actions. - enum class post_resume_action { - invalid, - register_waiter, - resume, - callback, - cleanup, - notify, - none - }; - - //! The callback to call the user callback passed to tbb::suspend. - struct suspend_callback_wrapper { - suspend_callback_type suspend_callback; - void* user_callback; - suspend_point_type* tag; - - void operator()() { - __TBB_ASSERT(suspend_callback && user_callback && tag, nullptr); - suspend_callback(user_callback, tag); - } - }; - - //! Suspends the current coroutine (task_dispatcher). - void suspend(void* suspend_callback, void* user_callback); - - //! Resumes the target task_dispatcher. - void resume(task_dispatcher& target); - - //! Set post resume action to perform after resume. - void set_post_resume_action(post_resume_action pra, void* arg) { - __TBB_ASSERT(my_post_resume_action == post_resume_action::none, "The Post resume action must not be set"); - __TBB_ASSERT(!my_post_resume_arg, "The post resume action must not have an argument"); - my_post_resume_action = pra; - my_post_resume_arg = arg; - } - - void clear_post_resume_action() { - my_post_resume_action = thread_data::post_resume_action::none; - my_post_resume_arg = nullptr; - } - - //! Performs post resume action. - void do_post_resume_action(); - - //! The post resume action requested after the swap contexts. - post_resume_action my_post_resume_action; - - //! The post resume action argument. - void* my_post_resume_arg; -#endif /* __TBB_RESUMABLE_TASKS */ - - //! The default context - // TODO: consider using common default context because it is used only to simplify - // cancellation check. - d1::task_group_context my_default_context; -}; - -inline void thread_data::attach_arena(arena& a, std::size_t index) { - my_arena = &a; - my_arena_index = static_cast<unsigned short>(index); - my_arena_slot = a.my_slots + index; - // Read the current slot mail_outbox and attach it to the mail_inbox (remove inbox later maybe) - my_inbox.attach(my_arena->mailbox(index)); -} - -inline bool thread_data::is_attached_to(arena* a) { return my_arena == a; } - -inline void thread_data::context_list_cleanup() { - // Detach contexts remaining in the local list. - { - spin_mutex::scoped_lock lock(my_context_list_state.mutex); - d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_relaxed); - while (node != &my_context_list_state.head) { - using state_t = d1::task_group_context::lifetime_state; - - d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); - std::atomic<state_t>& state = ctx.my_lifetime_state; - - node = node->next.load(std::memory_order_relaxed); - - __TBB_ASSERT(ctx.my_owner == this, "The context should belong to the current thread."); - state_t expected = state_t::bound; - if ( -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - !((std::atomic<typename std::underlying_type<state_t>::type>&)state).compare_exchange_strong( - (typename std::underlying_type<state_t>::type&)expected, - (typename std::underlying_type<state_t>::type)state_t::detached) -#else - !state.compare_exchange_strong(expected, state_t::detached) -#endif - ) { - __TBB_ASSERT(expected == state_t::locked || expected == state_t::dying, nullptr); - spin_wait_until_eq(state, state_t::dying); - } else { - __TBB_ASSERT(expected == state_t::bound, nullptr); - ctx.my_owner.store(nullptr, std::memory_order_release); - } - } - } - spin_wait_until_eq(my_context_list_state.nonlocal_update, 0u); -} - -inline void thread_data::attach_task_dispatcher(task_dispatcher& task_disp) { - __TBB_ASSERT(my_task_dispatcher == nullptr, nullptr); - __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); - task_disp.m_thread_data = this; - my_task_dispatcher = &task_disp; -} - -inline void thread_data::detach_task_dispatcher() { - __TBB_ASSERT(my_task_dispatcher != nullptr, nullptr); - __TBB_ASSERT(my_task_dispatcher->m_thread_data == this, nullptr); - my_task_dispatcher->m_thread_data = nullptr; - my_task_dispatcher = nullptr; -} - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // __TBB_thread_data_H - +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_thread_data_H +#define __TBB_thread_data_H + +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/task.h" + +#include "rml_base.h" // rml::job + +#include "scheduler_common.h" +#include "arena.h" +#include "concurrent_monitor.h" +#include "mailbox.h" +#include "misc.h" // FastRandom +#include "small_object_pool_impl.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +class task; +class arena_slot; +class task_group_context; +class task_dispatcher; + +//------------------------------------------------------------------------ +// Thread Data +//------------------------------------------------------------------------ +class thread_data : public ::rml::job + , public intrusive_list_node + , no_copy { +public: + thread_data(unsigned short index, bool is_worker) + : my_arena_index{ index } + , my_is_worker{ is_worker } + , my_task_dispatcher{ nullptr } + , my_arena{} + , my_arena_slot{} + , my_inbox{} + , my_random{ this } + , my_last_observer{ nullptr } + , my_small_object_pool{new (cache_aligned_allocate(sizeof(small_object_pool_impl))) small_object_pool_impl{}} + , my_context_list_state{} +#if __TBB_RESUMABLE_TASKS + , my_post_resume_action{ post_resume_action::none } + , my_post_resume_arg{nullptr} +#endif /* __TBB_RESUMABLE_TASKS */ + { + ITT_SYNC_CREATE(&my_context_list_state.mutex, SyncType_Scheduler, SyncObj_ContextsList); + my_context_list_state.head.next.store(&my_context_list_state.head, std::memory_order_relaxed); + my_context_list_state.head.prev.store(&my_context_list_state.head, std::memory_order_relaxed); + } + + ~thread_data() { + context_list_cleanup(); + my_small_object_pool->destroy(); + poison_pointer(my_task_dispatcher); + poison_pointer(my_arena); + poison_pointer(my_arena_slot); + poison_pointer(my_last_observer); + poison_pointer(my_small_object_pool); +#if __TBB_RESUMABLE_TASKS + poison_pointer(my_post_resume_arg); +#endif /* __TBB_RESUMABLE_TASKS */ + poison_value(my_context_list_state.epoch); + poison_value(my_context_list_state.local_update); + poison_value(my_context_list_state.nonlocal_update); + } + + void attach_arena(arena& a, std::size_t index); + bool is_attached_to(arena*); + void attach_task_dispatcher(task_dispatcher&); + void detach_task_dispatcher(); + void context_list_cleanup(); + template <typename T> + void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state); + + //! Index of the arena slot the scheduler occupies now, or occupied last time + unsigned short my_arena_index; + + //! Indicates if the thread is created by RML + const bool my_is_worker; + + //! The current task dipsatcher + task_dispatcher* my_task_dispatcher; + + //! The arena that I own (if external thread) or am servicing at the moment (if worker) + arena* my_arena; + + //! Pointer to the slot in the arena we own at the moment + arena_slot* my_arena_slot; + + //! The mailbox (affinity mechanism) the current thread attached to + mail_inbox my_inbox; + + //! The random generator + FastRandom my_random; + + //! Last observer in the observers list processed on this slot + observer_proxy* my_last_observer; + + //! Pool of small object for fast task allocation + small_object_pool_impl* my_small_object_pool; + + struct context_list_state { + //! Head of the thread specific list of task group contexts. + d1::context_list_node head{}; + + //! Mutex protecting access to the list of task group contexts. + // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex + spin_mutex mutex{}; + + //! Last state propagation epoch known to this thread + /** Together with the_context_state_propagation_epoch constitute synchronization protocol + that keeps hot path of task group context construction destruction mostly + lock-free. + When local epoch equals the global one, the state of task group contexts + registered with this thread is consistent with that of the task group trees + they belong to. **/ + std::atomic<std::uintptr_t> epoch{}; + + //! Flag indicating that a context is being destructed by its owner thread + /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol + that keeps hot path of context destruction (by the owner thread) mostly + lock-free. **/ + std::atomic<std::uintptr_t> local_update{}; + + //! Flag indicating that a context is being destructed by non-owner thread. + /** See also my_local_update. **/ + std::atomic<std::uintptr_t> nonlocal_update{}; + } my_context_list_state; + +#if __TBB_RESUMABLE_TASKS + //! The list of possible post resume actions. + enum class post_resume_action { + invalid, + register_waiter, + resume, + callback, + cleanup, + notify, + none + }; + + //! The callback to call the user callback passed to tbb::suspend. + struct suspend_callback_wrapper { + suspend_callback_type suspend_callback; + void* user_callback; + suspend_point_type* tag; + + void operator()() { + __TBB_ASSERT(suspend_callback && user_callback && tag, nullptr); + suspend_callback(user_callback, tag); + } + }; + + //! Suspends the current coroutine (task_dispatcher). + void suspend(void* suspend_callback, void* user_callback); + + //! Resumes the target task_dispatcher. + void resume(task_dispatcher& target); + + //! Set post resume action to perform after resume. + void set_post_resume_action(post_resume_action pra, void* arg) { + __TBB_ASSERT(my_post_resume_action == post_resume_action::none, "The Post resume action must not be set"); + __TBB_ASSERT(!my_post_resume_arg, "The post resume action must not have an argument"); + my_post_resume_action = pra; + my_post_resume_arg = arg; + } + + void clear_post_resume_action() { + my_post_resume_action = thread_data::post_resume_action::none; + my_post_resume_arg = nullptr; + } + + //! Performs post resume action. + void do_post_resume_action(); + + //! The post resume action requested after the swap contexts. + post_resume_action my_post_resume_action; + + //! The post resume action argument. + void* my_post_resume_arg; +#endif /* __TBB_RESUMABLE_TASKS */ + + //! The default context + // TODO: consider using common default context because it is used only to simplify + // cancellation check. + d1::task_group_context my_default_context; +}; + +inline void thread_data::attach_arena(arena& a, std::size_t index) { + my_arena = &a; + my_arena_index = static_cast<unsigned short>(index); + my_arena_slot = a.my_slots + index; + // Read the current slot mail_outbox and attach it to the mail_inbox (remove inbox later maybe) + my_inbox.attach(my_arena->mailbox(index)); +} + +inline bool thread_data::is_attached_to(arena* a) { return my_arena == a; } + +inline void thread_data::context_list_cleanup() { + // Detach contexts remaining in the local list. + { + spin_mutex::scoped_lock lock(my_context_list_state.mutex); + d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_relaxed); + while (node != &my_context_list_state.head) { + using state_t = d1::task_group_context::lifetime_state; + + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); + std::atomic<state_t>& state = ctx.my_lifetime_state; + + node = node->next.load(std::memory_order_relaxed); + + __TBB_ASSERT(ctx.my_owner == this, "The context should belong to the current thread."); + state_t expected = state_t::bound; + if ( +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + !((std::atomic<typename std::underlying_type<state_t>::type>&)state).compare_exchange_strong( + (typename std::underlying_type<state_t>::type&)expected, + (typename std::underlying_type<state_t>::type)state_t::detached) +#else + !state.compare_exchange_strong(expected, state_t::detached) +#endif + ) { + __TBB_ASSERT(expected == state_t::locked || expected == state_t::dying, nullptr); + spin_wait_until_eq(state, state_t::dying); + } else { + __TBB_ASSERT(expected == state_t::bound, nullptr); + ctx.my_owner.store(nullptr, std::memory_order_release); + } + } + } + spin_wait_until_eq(my_context_list_state.nonlocal_update, 0u); +} + +inline void thread_data::attach_task_dispatcher(task_dispatcher& task_disp) { + __TBB_ASSERT(my_task_dispatcher == nullptr, nullptr); + __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); + task_disp.m_thread_data = this; + my_task_dispatcher = &task_disp; +} + +inline void thread_data::detach_task_dispatcher() { + __TBB_ASSERT(my_task_dispatcher != nullptr, nullptr); + __TBB_ASSERT(my_task_dispatcher->m_thread_data == this, nullptr); + my_task_dispatcher->m_thread_data = nullptr; + my_task_dispatcher = nullptr; +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_thread_data_H + diff --git a/contrib/libs/tbb/src/tbb/tls.h b/contrib/libs/tbb/src/tbb/tls.h index 28d58bcd89..5d28ca4dae 100644 --- a/contrib/libs/tbb/src/tbb/tls.h +++ b/contrib/libs/tbb/src/tbb/tls.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,24 +17,24 @@ #ifndef _TBB_tls_H #define _TBB_tls_H -#include "oneapi/tbb/detail/_config.h" - -#if __TBB_USE_POSIX +#include "oneapi/tbb/detail/_config.h" + +#if __TBB_USE_POSIX #include <pthread.h> -#else /* assume __TBB_USE_WINAPI */ -#include <windows.h> +#else /* assume __TBB_USE_WINAPI */ +#include <windows.h> #endif namespace tbb { -namespace detail { -namespace r1 { +namespace detail { +namespace r1 { typedef void (*tls_dtor_t)(void*); //! Basic cross-platform wrapper class for TLS operations. template <typename T> class basic_tls { -#if __TBB_USE_POSIX +#if __TBB_USE_POSIX typedef pthread_key_t tls_key_t; public: int create( tls_dtor_t dtor = NULL ) { @@ -43,7 +43,7 @@ public: int destroy() { return pthread_key_delete(my_key); } void set( T value ) { pthread_setspecific(my_key, (void*)value); } T get() { return (T)pthread_getspecific(my_key); } -#else /* __TBB_USE_WINAPI */ +#else /* __TBB_USE_WINAPI */ typedef DWORD tls_key_t; public: #if !__TBB_WIN8UI_SUPPORT @@ -69,7 +69,7 @@ public: void set( T value ) { FlsSetValue(my_key, (LPVOID)value); } T get() { return (T)FlsGetValue(my_key); } #endif /* !__TBB_WIN8UI_SUPPORT */ -#endif /* __TBB_USE_WINAPI */ +#endif /* __TBB_USE_WINAPI */ private: tls_key_t my_key; }; @@ -86,8 +86,8 @@ public: operator T() { return base::get(); } }; -} // namespace r1 -} // namespace detail +} // namespace r1 +} // namespace detail } // namespace tbb #endif /* _TBB_tls_H */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h index 541888dc03..e1ba837404 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h +++ b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h index fd6620843a..993b7b0bfd 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -22,8 +22,8 @@ @brief Public User API functions and types @mainpage -The Instrumentation and Tracing Technology API (ITT API) is used to -annotate a user's program with additional information +The Instrumentation and Tracing Technology API (ITT API) is used to +annotate a user's program with additional information that can be used by correctness and performance tools. The user inserts calls in their program. Those calls generate information that is collected at runtime, and used by Intel(R) Threading Tools. @@ -155,17 +155,17 @@ The same ID may not be reused for different instances, unless a previous #endif /* UNICODE || _UNICODE */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#ifndef ITTAPI_CDECL +#ifndef ITTAPI_CDECL # if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl +# define ITTAPI_CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) +# define ITTAPI_CDECL __attribute__ ((cdecl)) # else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ +# define ITTAPI_CDECL /* actual only on x86 platform */ # endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ +#endif /* ITTAPI_CDECL */ #ifndef STDCALL # if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -179,12 +179,12 @@ The same ID may not be reused for different instances, unless a previous # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL /* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ @@ -346,87 +346,87 @@ ITT_STUBV(ITTAPI, void, detach, (void)) /** @endcond */ /** - * @defgroup Intel Processor Trace control - * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data - * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35): - * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf - * Use this API to mark particular code regions for loading detailed performance statistics. - * This mode makes your analysis faster and more accurate. - * @{ -*/ -typedef unsigned char __itt_pt_region; - -/** - * @brief function saves a region name marked with Intel PT API and returns a region id. - * Only 7 names can be registered. Attempts to register more names will be ignored and a region id with auto names will be returned. - * For automatic naming of regions pass NULL as function parameter -*/ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_pt_region ITTAPI __itt_pt_region_createA(const char *name); -__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_pt_region_create __itt_pt_region_createW -#else /* UNICODE */ -# define __itt_pt_region_create __itt_pt_region_createA -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_pt_region ITTAPI __itt_pt_region_create(const char *name); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name)) -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_pt_region_createA ITTNOTIFY_DATA(pt_region_createA) -#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA) -#define __itt_pt_region_createW ITTNOTIFY_DATA(pt_region_createW) -#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_pt_region_create ITTNOTIFY_DATA(pt_region_create) -#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_pt_region_createA(name) (__itt_pt_region)0 -#define __itt_pt_region_createA_ptr 0 -#define __itt_pt_region_createW(name) (__itt_pt_region)0 -#define __itt_pt_region_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_pt_region_create(name) (__itt_pt_region)0 -#define __itt_pt_region_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_pt_region_createA_ptr 0 -#define __itt_pt_region_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_pt_region_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief function contains a special code pattern identified on the post-processing stage and - * marks the beginning of a code region targeted for Intel PT analysis - * @param[in] region - region id, 0 <= region < 8 -*/ -void __itt_mark_pt_region_begin(__itt_pt_region region); -/** - * @brief function contains a special code pattern identified on the post-processing stage and - * marks the end of a code region targeted for Intel PT analysis - * @param[in] region - region id, 0 <= region < 8 -*/ -void __itt_mark_pt_region_end(__itt_pt_region region); -/** @} Intel PT control group*/ - -/** + * @defgroup Intel Processor Trace control + * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data + * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35): + * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf + * Use this API to mark particular code regions for loading detailed performance statistics. + * This mode makes your analysis faster and more accurate. + * @{ +*/ +typedef unsigned char __itt_pt_region; + +/** + * @brief function saves a region name marked with Intel PT API and returns a region id. + * Only 7 names can be registered. Attempts to register more names will be ignored and a region id with auto names will be returned. + * For automatic naming of regions pass NULL as function parameter +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_pt_region ITTAPI __itt_pt_region_createA(const char *name); +__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_pt_region_create __itt_pt_region_createW +#else /* UNICODE */ +# define __itt_pt_region_create __itt_pt_region_createA +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_pt_region ITTAPI __itt_pt_region_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA ITTNOTIFY_DATA(pt_region_createA) +#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA) +#define __itt_pt_region_createW ITTNOTIFY_DATA(pt_region_createW) +#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create ITTNOTIFY_DATA(pt_region_create) +#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA(name) (__itt_pt_region)0 +#define __itt_pt_region_createA_ptr 0 +#define __itt_pt_region_createW(name) (__itt_pt_region)0 +#define __itt_pt_region_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create(name) (__itt_pt_region)0 +#define __itt_pt_region_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA_ptr 0 +#define __itt_pt_region_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief function contains a special code pattern identified on the post-processing stage and + * marks the beginning of a code region targeted for Intel PT analysis + * @param[in] region - region id, 0 <= region < 8 +*/ +void __itt_mark_pt_region_begin(__itt_pt_region region); +/** + * @brief function contains a special code pattern identified on the post-processing stage and + * marks the end of a code region targeted for Intel PT analysis + * @param[in] region - region id, 0 <= region < 8 +*/ +void __itt_mark_pt_region_end(__itt_pt_region region); +/** @} Intel PT control group*/ + +/** * @defgroup threads Threads * @ingroup public * Give names to threads @@ -2734,7 +2734,7 @@ ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info /** * @ingroup clockdomains - * @brief Recalculate clock domains frequencies and clock base timestamps. + * @brief Recalculate clock domains frequencies and clock base timestamps. */ void ITTAPI __itt_clock_domain_reset(void); @@ -2861,371 +2861,371 @@ ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt /** @endcond */ /** - * @defgroup counters Counters - * @ingroup public - * Counters are user-defined objects with a monotonically increasing - * value. Counter values are 64-bit unsigned integers. - * Counters have names that can be displayed in - * the tools. - * @{ - */ - -/** - * @brief opaque structure for counter identification - */ -/** @cond exclude_from_documentation */ - -typedef struct ___itt_counter* __itt_counter; - -/** - * @brief Create an unsigned 64 bits integer counter with given name/domain - * - * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), - * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) - * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer - * - * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); -__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_counter_create __itt_counter_createW -# define __itt_counter_create_ptr __itt_counter_createW_ptr -#else /* UNICODE */ -# define __itt_counter_create __itt_counter_createA -# define __itt_counter_create_ptr __itt_counter_createA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) -#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) -#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) -#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create ITTNOTIFY_DATA(counter_create) -#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA(name, domain) -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW(name, domain) -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create(name, domain) -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_createA_ptr 0 -#define __itt_counter_createW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Increment the unsigned 64 bits integer counter value - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_inc(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) -#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) -#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc(id) -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** - * @brief Increment the unsigned 64 bits integer counter value with x - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) -#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) -#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_delta(id, value) -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_delta_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Decrement the unsigned 64 bits integer counter value - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_dec(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) -#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) -#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec(id) -#define __itt_counter_dec_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** - * @brief Decrement the unsigned 64 bits integer counter value with x - * - * Calling this function to non-unsigned 64 bits integer counters has no effect - */ -void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) -#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) -#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec_delta(id, value) -#define __itt_counter_dec_delta_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_delta_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @ingroup counters - * @brief Increment a counter by one. - * The first call with a given name creates a counter by that name and sets its - * value to zero. Successive calls increment the counter value. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - */ -void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); - -/** - * @ingroup counters - * @brief Increment a counter by the value specified in delta. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - * @param[in] delta The amount by which to increment the counter - */ -void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) -#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) -#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) -#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) -#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_inc_v3(domain,name) -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3(domain,name,delta) -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_inc_v3_ptr 0 -#define __itt_counter_inc_delta_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - - -/** - * @ingroup counters - * @brief Decrement a counter by one. - * The first call with a given name creates a counter by that name and sets its - * value to zero. Successive calls decrement the counter value. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - */ -void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); - -/** - * @ingroup counters - * @brief Decrement a counter by the value specified in delta. - * @param[in] domain The domain controlling the call. Counter names are not domain specific. - * The domain argument is used only to enable or disable the API calls. - * @param[in] name The name of the counter - * @param[in] delta The amount by which to decrement the counter - */ -void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) -ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) -#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) -#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) -#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) -#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_dec_v3(domain,name) -#define __itt_counter_dec_v3_ptr 0 -#define __itt_counter_dec_delta_v3(domain,name,delta) -#define __itt_counter_dec_delta_v3_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_dec_v3_ptr 0 -#define __itt_counter_dec_delta_v3_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** @} counters group */ - - -/** - * @brief Set the counter value - */ -void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) -#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) -#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_set_value(id, value_ptr) -#define __itt_counter_set_value_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_set_value_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Set the counter value - */ -void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) -#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) -#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) -#define __itt_counter_set_value_ex_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_set_value_ex_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Create a typed counter with given name/domain - * - * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), - * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) - * can be used to change the value of the counter - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); -__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_counter_create_typed __itt_counter_create_typedW -# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr -#else /* UNICODE */ -# define __itt_counter_create_typed __itt_counter_create_typedA -# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) -#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) -#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) -#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) -#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA(name, domain, type) -#define __itt_counter_create_typedA_ptr 0 -#define __itt_counter_create_typedW(name, domain, type) -#define __itt_counter_create_typedW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed(name, domain, type) -#define __itt_counter_create_typed_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_counter_create_typedA_ptr 0 -#define __itt_counter_create_typedW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_counter_create_typed_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - -/** - * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or - * __itt_counter_create_typed() - */ -void ITTAPI __itt_counter_destroy(__itt_counter id); - -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) -#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) -#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) -#else /* INTEL_NO_ITTNOTIFY_API */ -#define __itt_counter_destroy(id) -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#define __itt_counter_destroy_ptr 0 -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ -/** @} counters group */ - -/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. + * Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @brief opaque structure for counter identification + */ +/** @cond exclude_from_documentation */ + +typedef struct ___itt_counter* __itt_counter; + +/** + * @brief Create an unsigned 64 bits integer counter with given name/domain + * + * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer + * + * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Increment the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Decrement the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) +#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) +#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec(id) +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Decrement the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) +#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_delta(id, value) +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls increment the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + +/** + * @ingroup counters + * @brief Decrement a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls decrement the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Decrement a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to decrement the counter + */ +void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) +#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) +#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) +#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_v3(domain,name) +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3(domain,name,delta) +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} counters group */ + + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) +#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) +#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value(id, value_ptr) +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) +#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) +#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create a typed counter with given name/domain + * + * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_typed __itt_counter_create_typedW +# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr +#else /* UNICODE */ +# define __itt_counter_create_typed __itt_counter_create_typedA +# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) +#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) +#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) +#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) +#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA(name, domain, type) +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW(name, domain, type) +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed(name, domain, type) +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or + * __itt_counter_create_typed() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** * @ingroup markers * @brief Create a marker instance. * @param[in] domain The domain for this marker @@ -3638,68 +3638,68 @@ ITT_STUBV(ITTAPI, void, enable_attach, (void)) /** @endcond */ -/** - * @brief Module load info - * This API is used to report necessary information in case of module relocation - * @param[in] start_addr - relocated module start address - * @param[in] end_addr - relocated module end address - * @param[in] path - file system path to the module - */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); -void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); -#if defined(UNICODE) || defined(_UNICODE) -# define __itt_module_load __itt_module_loadW -# define __itt_module_load_ptr __itt_module_loadW_ptr -#else /* UNICODE */ -# define __itt_module_load __itt_module_loadA -# define __itt_module_load_ptr __itt_module_loadA_ptr -#endif /* UNICODE */ -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -/** @cond exclude_from_documentation */ -#ifndef INTEL_NO_MACRO_BODY -#ifndef INTEL_NO_ITTNOTIFY_API -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) -ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) -#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) -#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) -#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load ITTNOTIFY_VOID(module_load) -#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#else /* INTEL_NO_ITTNOTIFY_API */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA(start_addr, end_addr, path) -#define __itt_module_loadA_ptr 0 -#define __itt_module_loadW(start_addr, end_addr, path) -#define __itt_module_loadW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load(start_addr, end_addr, path) -#define __itt_module_load_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_ITTNOTIFY_API */ -#else /* INTEL_NO_MACRO_BODY */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -#define __itt_module_loadA_ptr 0 -#define __itt_module_loadW_ptr 0 -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#define __itt_module_load_ptr 0 -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* INTEL_NO_MACRO_BODY */ -/** @endcond */ - - - +/** + * @brief Module load info + * This API is used to report necessary information in case of module relocation + * @param[in] start_addr - relocated module start address + * @param[in] end_addr - relocated module end address + * @param[in] path - file system path to the module + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); +void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_module_load __itt_module_loadW +# define __itt_module_load_ptr __itt_module_loadW_ptr +#else /* UNICODE */ +# define __itt_module_load __itt_module_loadA +# define __itt_module_load_ptr __itt_module_loadA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) +ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) +#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) +#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) +#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load ITTNOTIFY_VOID(module_load) +#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA(start_addr, end_addr, path) +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW(start_addr, end_addr, path) +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load(start_addr, end_addr, path) +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + + #ifdef __cplusplus } #endif /* __cplusplus */ @@ -4047,7 +4047,7 @@ ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) /** @endcond */ /** - * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create() */ void ITTAPI __itt_stack_caller_destroy(__itt_caller id); diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h index 446798bd3c..c25730d522 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -88,17 +88,17 @@ #endif /* UNICODE || _UNICODE */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#ifndef ITTAPI_CDECL +#ifndef ITTAPI_CDECL # if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl +# define ITTAPI_CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) +# define ITTAPI_CDECL __attribute__ ((cdecl)) # else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ +# define ITTAPI_CDECL /* actual only on x86 platform */ # endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ +#endif /* ITTAPI_CDECL */ #ifndef STDCALL # if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -112,12 +112,12 @@ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL /* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ @@ -162,7 +162,7 @@ # define ITT_ARCH ITT_ARCH_IA32E # elif defined _M_IA64 || defined __ia64__ # define ITT_ARCH ITT_ARCH_IA64 -# elif defined _M_ARM || defined __arm__ +# elif defined _M_ARM || defined __arm__ # define ITT_ARCH ITT_ARCH_ARM # elif defined __powerpc64__ # define ITT_ARCH ITT_ARCH_PPC64 @@ -192,7 +192,7 @@ #define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } /* Replace with snapshot date YYYYMMDD for promotion build. */ -#define API_VERSION_BUILD 20180723 +#define API_VERSION_BUILD 20180723 #ifndef API_VERSION_NUM #define API_VERSION_NUM 0.0.0 @@ -252,10 +252,10 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) return InterlockedIncrement(ptr); } #endif /* ITT_SIMPLE_INIT */ - -#define DL_SYMBOLS (1) -#define PTHREAD_SYMBOLS (1) - + +#define DL_SYMBOLS (1) +#define PTHREAD_SYMBOLS (1) + #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ #define __itt_get_proc(lib, name) dlsym(lib, name) #define __itt_mutex_init(mutex) {\ @@ -294,16 +294,16 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) #ifdef SDL_STRNCPY_S #define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l) #else -#define __itt_fstrcpyn(s1, b, s2, l) { \ - if (b > 0) { \ - /* 'volatile' is used to suppress the warning that a destination */ \ - /* bound depends on the length of the source. */ \ - volatile size_t num_to_copy = (size_t)(b - 1) < (size_t)(l) ? \ - (size_t)(b - 1) : (size_t)(l); \ - strncpy(s1, s2, num_to_copy); \ - s1[num_to_copy] = 0; \ - } \ -} +#define __itt_fstrcpyn(s1, b, s2, l) { \ + if (b > 0) { \ + /* 'volatile' is used to suppress the warning that a destination */ \ + /* bound depends on the length of the source. */ \ + volatile size_t num_to_copy = (size_t)(b - 1) < (size_t)(l) ? \ + (size_t)(b - 1) : (size_t)(l); \ + strncpy(s1, s2, num_to_copy); \ + s1[num_to_copy] = 0; \ + } \ +} #endif /* SDL_STRNCPY_S */ #define __itt_fstrdup(s) strdup(s) @@ -338,22 +338,22 @@ ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) return __TBB_machine_fetchadd4(ptr, 1) + 1L; } #endif /* ITT_SIMPLE_INIT */ - -void* dlopen(const char*, int) __attribute__((weak)); -void* dlsym(void*, const char*) __attribute__((weak)); -int dlclose(void*) __attribute__((weak)); -#define DL_SYMBOLS (dlopen && dlsym && dlclose) - -int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak)); -int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutex_destroy(pthread_mutex_t*) __attribute__((weak)); -int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak)); -int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak)); -int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak)); -pthread_t pthread_self(void) __attribute__((weak)); -#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self) - + +void* dlopen(const char*, int) __attribute__((weak)); +void* dlsym(void*, const char*) __attribute__((weak)); +int dlclose(void*) __attribute__((weak)); +#define DL_SYMBOLS (dlopen && dlsym && dlclose) + +int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak)); +int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutex_destroy(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak)); +int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak)); +int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak)); +pthread_t pthread_self(void) __attribute__((weak)); +#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self) + #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ typedef enum { @@ -402,27 +402,27 @@ typedef struct ___itt_api_info __itt_group_id group; } __itt_api_info; -typedef struct __itt_counter_info -{ - const char* nameA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* nameW; -#endif /* UNICODE || _UNICODE */ - const char* domainA; /*!< Copy of original name in ASCII. */ -#if defined(UNICODE) || defined(_UNICODE) - const wchar_t* domainW; /*!< Copy of original name in UNICODE. */ -#else /* UNICODE || _UNICODE */ - void* domainW; -#endif /* UNICODE || _UNICODE */ - int type; - long index; - int extra1; /*!< Reserved to the runtime */ - void* extra2; /*!< Reserved to the runtime */ - struct __itt_counter_info* next; -} __itt_counter_info_t; - +typedef struct __itt_counter_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + const char* domainA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* domainW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* domainW; +#endif /* UNICODE || _UNICODE */ + int type; + long index; + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct __itt_counter_info* next; +} __itt_counter_info_t; + struct ___itt_domain; struct ___itt_string_handle; @@ -446,8 +446,8 @@ typedef struct ___itt_global struct ___itt_domain* domain_list; struct ___itt_string_handle* string_list; __itt_collection_state state; - __itt_counter_info_t* counter_list; - unsigned int ipt_collect_events; + __itt_counter_info_t* counter_list; + unsigned int ipt_collect_events; } __itt_global; #pragma pack(pop) @@ -548,38 +548,38 @@ typedef struct ___itt_global } \ } -#define NEW_COUNTER_W(gptr,h,h_tail,name,domain,type) { \ - h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ - if (h != NULL) { \ - h->nameA = NULL; \ - h->nameW = name ? _wcsdup(name) : NULL; \ - h->domainA = NULL; \ - h->domainW = name ? _wcsdup(domain) : NULL; \ - h->type = type; \ - h->index = 0; \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->counter_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - -#define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \ - h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ - if (h != NULL) { \ - h->nameA = name ? __itt_fstrdup(name) : NULL; \ - h->nameW = NULL; \ - h->domainA = domain ? __itt_fstrdup(domain) : NULL; \ - h->domainW = NULL; \ - h->type = type; \ - h->index = 0; \ - h->next = NULL; \ - if (h_tail == NULL) \ - (gptr)->counter_list = h; \ - else \ - h_tail->next = h; \ - } \ -} - +#define NEW_COUNTER_W(gptr,h,h_tail,name,domain,type) { \ + h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ + if (h != NULL) { \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->domainA = NULL; \ + h->domainW = name ? _wcsdup(domain) : NULL; \ + h->type = type; \ + h->index = 0; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \ + h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ + if (h != NULL) { \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->domainA = domain ? __itt_fstrdup(domain) : NULL; \ + h->domainW = NULL; \ + h->type = type; \ + h->index = 0; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + #endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c index 1e2609a1c5..dd8ca8e755 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,14 +28,14 @@ #include <stdarg.h> #include <string.h> -#define INTEL_NO_MACRO_BODY +#define INTEL_NO_MACRO_BODY #define INTEL_ITTNOTIFY_API_PRIVATE #include "ittnotify.h" #include "legacy/ittnotify.h" #include "disable_warnings.h" -static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; +static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; #define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) @@ -93,8 +93,8 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; #endif /* LIB_VAR_NAME */ #define ITT_MUTEX_INIT_AND_LOCK(p) { \ - if (PTHREAD_SYMBOLS) \ - { \ + if (PTHREAD_SYMBOLS) \ + { \ if (!p.mutex_initialized) \ { \ if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ @@ -107,7 +107,7 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; __itt_thread_yield(); \ } \ __itt_mutex_lock(&p.mutex); \ - } \ + } \ } const int _N_(err) = 0; @@ -130,8 +130,8 @@ static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); #define __itt_fini_ittlib_name __itt_fini_ittlib_ptr #endif /* __itt_fini_ittlib_name */ -extern __itt_global _N_(_ittapi_global); - +extern __itt_global _N_(_ittapi_global); + /* building pointers to imported funcs */ #undef ITT_STUBV #undef ITT_STUB @@ -141,8 +141,8 @@ typedef type api ITT_JOIN(_N_(name),_t) args; \ ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ { \ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ return ITTNOTIFY_NAME(name) params; \ else \ @@ -155,8 +155,8 @@ typedef type api ITT_JOIN(_N_(name),_t) args; \ ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ { \ - if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ - __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ ITTNOTIFY_NAME(name) params; \ else \ @@ -246,9 +246,9 @@ __itt_global _N_(_ittapi_global) = { NULL, /* thread_list */ NULL, /* domain_list */ NULL, /* string_list */ - __itt_collection_normal, /* collection state */ - NULL, /* counter_list */ - 0 /* ipt_collect_events */ + __itt_collection_normal, /* collection state */ + NULL, /* counter_list */ + 0 /* ipt_collect_events */ }; typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); @@ -315,7 +315,7 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))( { NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -343,7 +343,7 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c #else if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(domain_create)(name); } #endif @@ -356,7 +356,7 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c { NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } @@ -383,7 +383,7 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre { if (h->strW != NULL && !wcscmp(h->strW, name)) break; } - if (h == NULL) + if (h == NULL) { NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); } @@ -415,7 +415,7 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre #else if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(string_handle_create)(name); } #endif @@ -428,162 +428,162 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre { NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return h; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createW)(name, domain); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - __itt_metadata_type type = __itt_metadata_u64; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_createA)(name, domain); - } -#else - if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create)(name, domain); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { - if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); - } - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || - (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; - - } - if (h == NULL) - { - NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -{ - __itt_counter_info_t *h_tail = NULL, *h = NULL; - - if (name == NULL) - { - return NULL; - } - - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); - if (_N_(_ittapi_global).api_initialized) - { -#if ITT_PLATFORM==ITT_PLATFORM_WIN - if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) - { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); - } -#else - if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) - { - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); - } -#endif - } - for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) - { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || - (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; - } - if (h == NULL) - { - NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); - } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); - return (__itt_counter)h; -} - +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createW)(name, domain); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createA)(name, domain); + } +#else + if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create)(name, domain); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); + } +#else + if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + /* -------------------------------------------------------------------------- */ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) @@ -764,7 +764,7 @@ static const char* __itt_fsplit(const char* s, const char* sep, const char** out /* This function return value of env variable that placed into static buffer. * !!! The same static buffer is used for subsequent calls. !!! - * This was done to avoid dynamic allocation for few calls. + * This was done to avoid dynamic allocation for few calls. * Actually we need this function only four times. */ static const char* __itt_get_env_var(const char* name) @@ -938,7 +938,7 @@ static const char* __itt_get_lib_name(void) return lib_name; } -/* Avoid clashes with std::min */ +/* Avoid clashes with std::min */ #define __itt_min(a,b) ((a) < (b) ? (a) : (b)) static __itt_group_id __itt_get_groups(void) @@ -955,9 +955,9 @@ static __itt_group_id __itt_get_groups(void) const char* chunk; while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) { - int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); - __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); - gr[min_len] = 0; + int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); + __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); + gr[min_len] = 0; for (i = 0; group_list[i].name != NULL; i++) { @@ -987,7 +987,7 @@ static __itt_group_id __itt_get_groups(void) return res; } - + #undef __itt_min static int __itt_lib_version(lib_t lib) @@ -1004,7 +1004,7 @@ static int __itt_lib_version(lib_t lib) /* It's not used right now! Comment it out to avoid warnings. static void __itt_reinit_all_pointers(void) { - register int i; + register int i; // Fill all pointers with initial stubs for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; @@ -1014,7 +1014,7 @@ static void __itt_reinit_all_pointers(void) static void __itt_nullify_all_pointers(void) { int i; - /* Nulify all pointers except domain_create, string_handle_create and counter_create */ + /* Nulify all pointers except domain_create, string_handle_create and counter_create */ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } @@ -1032,12 +1032,12 @@ ITT_EXTERN_C void _N_(fini_ittlib)(void) if (_N_(_ittapi_global).api_initialized) { - ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); if (_N_(_ittapi_global).api_initialized) { if (current_thread == 0) { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); if (_N_(_ittapi_global).lib != NULL) { __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); @@ -1058,7 +1058,7 @@ ITT_EXTERN_C void _N_(fini_ittlib)(void) current_thread = 0; } } - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); } } @@ -1081,13 +1081,13 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou { if (current_thread == 0) { - if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); if (lib_name == NULL) { lib_name = __itt_get_lib_name(); } groups = __itt_get_groups(); - if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) + if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) { _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); @@ -1099,7 +1099,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou switch (lib_version) { case 0: groups = __itt_group_legacy; - /* Falls through */ + /* Falls through */ case 1: /* Fill all pointers from dynamic library */ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) @@ -1153,7 +1153,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou else { __itt_nullify_all_pointers(); - + #if ITT_PLATFORM==ITT_PLATFORM_WIN int error = __itt_system_error(); #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -1174,7 +1174,7 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou } #ifndef ITT_SIMPLE_INIT - if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); #endif /* ITT_SIMPLE_INIT */ } @@ -1201,44 +1201,44 @@ ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -/** __itt_mark_pt_region functions marks region of interest - * region parameter defines different regions. - * 0 <= region < 8 */ - -#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) -void __itt_pt_mark(__itt_pt_region region); -void __itt_pt_mark_event(__itt_pt_region region); -#endif - -ITT_EXTERN_C void _N_(mark_pt_region_begin)(__itt_pt_region region) -{ -#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) - if (_N_(_ittapi_global).ipt_collect_events == 1) - { - __itt_pt_mark_event(2*region); - } - else - { - __itt_pt_mark(2*region); - } -#else - (void)region; -#endif -} - -ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) -{ -#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) - if (_N_(_ittapi_global).ipt_collect_events == 1) - { - __itt_pt_mark_event(2*region + 1); - } - else - { - __itt_pt_mark(2*region + 1); - } -#else - (void)region; -#endif -} - +/** __itt_mark_pt_region functions marks region of interest + * region parameter defines different regions. + * 0 <= region < 8 */ + +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) +void __itt_pt_mark(__itt_pt_region region); +void __itt_pt_mark_event(__itt_pt_region region); +#endif + +ITT_EXTERN_C void _N_(mark_pt_region_begin)(__itt_pt_region region) +{ +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) + if (_N_(_ittapi_global).ipt_collect_events == 1) + { + __itt_pt_mark_event(2*region); + } + else + { + __itt_pt_mark(2*region); + } +#else + (void)region; +#endif +} + +ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) +{ +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) + if (_N_(_ittapi_global).ipt_collect_events == 1) + { + __itt_pt_mark_event(2*region + 1); + } + else + { + __itt_pt_mark(2*region + 1); + } +#else + (void)region; +#endif +} + diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h index 27daf3ffb7..67cf683880 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,21 +46,21 @@ ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *na ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") -ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedA, __itt_group_counter, "\"%s\", \"%s\", %d") -ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedW, __itt_group_counter, "\"%s\", \"%s\", %d") -#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typed, __itt_group_counter, "\"%s\", \"%s\", %d") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - - +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedA, __itt_group_counter, "\"%s\", \"%s\", %d") +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedW, __itt_group_counter, "\"%s\", \"%s\", %d") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typed, __itt_group_counter, "\"%s\", \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") @@ -190,8 +190,8 @@ ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p") ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") -ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_dec_v3, __itt_group_structure, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_dec_delta_v3, __itt_group_structure, "%p, %p, %lu") +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_dec_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_dec_delta_v3, __itt_group_structure, "%p, %p, %lu") ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") @@ -256,23 +256,23 @@ ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMA #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name), (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"") -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name), (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name), (ITT_FORMAT name), pt_region_create, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name), (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name), (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name), (ITT_FORMAT name), pt_region_create, __itt_group_structure, "\"%s\"") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* __ITT_INTERNAL_BODY */ -ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") -ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") - -ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") -ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id), (ITT_FORMAT id), counter_dec, __itt_group_counter, "%p") -ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_dec_delta, __itt_group_counter, "%p, %lu") -ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr), (ITT_FORMAT id, value_ptr), counter_set_value, __itt_group_counter, "%p, %p") -ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr), (ITT_FORMAT id, clock_domain, timestamp, value_ptr), counter_set_value_ex, __itt_group_counter, "%p, %p, %llu, %p") - +ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") +ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") + +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id), (ITT_FORMAT id), counter_dec, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_dec_delta, __itt_group_counter, "%p, %lu") +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr), (ITT_FORMAT id, value_ptr), counter_set_value, __itt_group_counter, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr), (ITT_FORMAT id, clock_domain, timestamp, value_ptr), counter_set_value_ex, __itt_group_counter, "%p, %p, %llu, %p") + #ifndef __ITT_INTERNAL_BODY #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") @@ -341,14 +341,14 @@ ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, in #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* __ITT_INTERNAL_BODY */ -#ifndef __ITT_INTERNAL_BODY -#if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p") -ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p") -#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p") -#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ - - +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p") +ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ + + #endif /* __ITT_INTERNAL_INIT */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h index 3a73bb42cd..3849452c27 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h index b5ec034762..b05a199d1f 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2021 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -93,17 +93,17 @@ #endif /* UNICODE || _UNICODE */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#ifndef ITTAPI_CDECL +#ifndef ITTAPI_CDECL # if ITT_PLATFORM==ITT_PLATFORM_WIN -# define ITTAPI_CDECL __cdecl +# define ITTAPI_CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define ITTAPI_CDECL __attribute__ ((cdecl)) +# define ITTAPI_CDECL __attribute__ ((cdecl)) # else /* _M_IX86 || __i386__ */ -# define ITTAPI_CDECL /* actual only on x86 platform */ +# define ITTAPI_CDECL /* actual only on x86 platform */ # endif /* _M_IX86 || __i386__ */ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* ITTAPI_CDECL */ +#endif /* ITTAPI_CDECL */ #ifndef STDCALL # if ITT_PLATFORM==ITT_PLATFORM_WIN @@ -117,12 +117,12 @@ # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* STDCALL */ -#define ITTAPI ITTAPI_CDECL -#define LIBITTAPI ITTAPI_CDECL +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL /* TODO: Temporary for compatibility! */ -#define ITTAPI_CALL ITTAPI_CDECL -#define LIBITTAPI_CALL ITTAPI_CDECL +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ diff --git a/contrib/libs/tbb/src/tbb/version.cpp b/contrib/libs/tbb/src/tbb/version.cpp index 3bf2945d4a..ca113372f1 100644 --- a/contrib/libs/tbb/src/tbb/version.cpp +++ b/contrib/libs/tbb/src/tbb/version.cpp @@ -1,26 +1,26 @@ -/* - Copyright (c) 2020-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#include "oneapi/tbb/version.h" - -extern "C" int TBB_runtime_interface_version() { - return TBB_INTERFACE_VERSION; -} - -extern "C" const char* TBB_runtime_version() { - static const char version_str[] = TBB_VERSION_STRING; - return version_str; -} +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/version.h" + +extern "C" int TBB_runtime_interface_version() { + return TBB_INTERFACE_VERSION; +} + +extern "C" const char* TBB_runtime_version() { + static const char version_str[] = TBB_VERSION_STRING; + return version_str; +} diff --git a/contrib/libs/tbb/src/tbb/waiters.h b/contrib/libs/tbb/src/tbb/waiters.h index b3f1219de8..07ee5ab4f0 100644 --- a/contrib/libs/tbb/src/tbb/waiters.h +++ b/contrib/libs/tbb/src/tbb/waiters.h @@ -1,204 +1,204 @@ -/* - Copyright (c) 2005-2021 Intel Corporation - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -#ifndef _TBB_waiters_H -#define _TBB_waiters_H - -#include "oneapi/tbb/detail/_task.h" -#include "scheduler_common.h" -#include "arena.h" - -namespace tbb { -namespace detail { -namespace r1 { - -inline d1::task* get_self_recall_task(arena_slot& slot); - -class waiter_base { -public: - waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {} - - bool pause() { - if (my_backoff.pause()) { - my_arena.is_out_of_work(); - return true; - } - - return false; - } - - void reset_wait() { - my_backoff.reset_wait(); - } - -protected: - arena& my_arena; - stealing_loop_backoff my_backoff; -}; - -class outermost_worker_waiter : public waiter_base { -public: - using waiter_base::waiter_base; - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - - if (is_worker_should_leave(slot)) { - // Leave dispatch loop - return false; - } - - t = get_self_recall_task(slot); - return true; - } - - void pause(arena_slot&) { - waiter_base::pause(); - } - - - d1::wait_context* wait_ctx() { - return nullptr; - } - - static bool postpone_execution(d1::task&) { - return false; - } - -private: - using base_type = waiter_base; - - bool is_worker_should_leave(arena_slot& slot) const { - bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed); - bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool; - - if (is_top_priority_arena) { - // Worker in most priority arena do not leave arena, until all work in task_pool is done - if (is_task_pool_empty && my_arena.is_recall_requested()) { - return true; - } - } else { - if (my_arena.is_recall_requested()) { - // If worker has work in task pool, we must notify other threads, - // because can appear missed wake up of other threads - if (!is_task_pool_empty) { - my_arena.advertise_new_work<arena::wakeup>(); - } - return true; - } - } - - return false; - } -}; - -class sleep_waiter : public waiter_base { -protected: - using waiter_base::waiter_base; - - bool is_arena_empty() { - return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY; - } - - template <typename Pred> - void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { - my_arena.my_market->get_wait_list().wait<extended_concurrent_monitor::thread_context>(wakeup_condition, - extended_context{uniq_tag, &my_arena}); - } -}; - -class external_waiter : public sleep_waiter { -public: - external_waiter(arena& a, d1::wait_context& wo) - : sleep_waiter(a), my_wait_ctx(wo) - {} - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - if (!my_wait_ctx.continue_execution()) - return false; - t = get_self_recall_task(slot); - return true; - } - - void pause(arena_slot&) { - if (!sleep_waiter::pause()) { - return; - } - - auto wakeup_condition = [&] { return !is_arena_empty() || !my_wait_ctx.continue_execution(); }; - - sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition); - my_backoff.reset_wait(); - } - - d1::wait_context* wait_ctx() { - return &my_wait_ctx; - } - - static bool postpone_execution(d1::task&) { - return false; - } - -private: - d1::wait_context& my_wait_ctx; -}; - -#if __TBB_RESUMABLE_TASKS - -class coroutine_waiter : public sleep_waiter { -public: - using sleep_waiter::sleep_waiter; - - bool continue_execution(arena_slot& slot, d1::task*& t) const { - __TBB_ASSERT(t == nullptr, nullptr); - t = get_self_recall_task(slot); - return true; - } - - void pause(arena_slot& slot) { - if (!sleep_waiter::pause()) { - return; - } - - suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; - - auto wakeup_condition = [&] { return !is_arena_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; - - sleep(std::uintptr_t(sp), wakeup_condition); - my_backoff.reset_wait(); - } - - void reset_wait() { - my_backoff.reset_wait(); - } - - d1::wait_context* wait_ctx() { - return nullptr; - } - - static bool postpone_execution(d1::task& t) { - return task_accessor::is_resume_task(t); - } -}; - -#endif // __TBB_RESUMABLE_TASKS - -} // namespace r1 -} // namespace detail -} // namespace tbb - -#endif // _TBB_waiters_H +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_waiters_H +#define _TBB_waiters_H + +#include "oneapi/tbb/detail/_task.h" +#include "scheduler_common.h" +#include "arena.h" + +namespace tbb { +namespace detail { +namespace r1 { + +inline d1::task* get_self_recall_task(arena_slot& slot); + +class waiter_base { +public: + waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {} + + bool pause() { + if (my_backoff.pause()) { + my_arena.is_out_of_work(); + return true; + } + + return false; + } + + void reset_wait() { + my_backoff.reset_wait(); + } + +protected: + arena& my_arena; + stealing_loop_backoff my_backoff; +}; + +class outermost_worker_waiter : public waiter_base { +public: + using waiter_base::waiter_base; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + + if (is_worker_should_leave(slot)) { + // Leave dispatch loop + return false; + } + + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + waiter_base::pause(); + } + + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + using base_type = waiter_base; + + bool is_worker_should_leave(arena_slot& slot) const { + bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed); + bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool; + + if (is_top_priority_arena) { + // Worker in most priority arena do not leave arena, until all work in task_pool is done + if (is_task_pool_empty && my_arena.is_recall_requested()) { + return true; + } + } else { + if (my_arena.is_recall_requested()) { + // If worker has work in task pool, we must notify other threads, + // because can appear missed wake up of other threads + if (!is_task_pool_empty) { + my_arena.advertise_new_work<arena::wakeup>(); + } + return true; + } + } + + return false; + } +}; + +class sleep_waiter : public waiter_base { +protected: + using waiter_base::waiter_base; + + bool is_arena_empty() { + return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY; + } + + template <typename Pred> + void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { + my_arena.my_market->get_wait_list().wait<extended_concurrent_monitor::thread_context>(wakeup_condition, + extended_context{uniq_tag, &my_arena}); + } +}; + +class external_waiter : public sleep_waiter { +public: + external_waiter(arena& a, d1::wait_context& wo) + : sleep_waiter(a), my_wait_ctx(wo) + {} + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + if (!my_wait_ctx.continue_execution()) + return false; + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + if (!sleep_waiter::pause()) { + return; + } + + auto wakeup_condition = [&] { return !is_arena_empty() || !my_wait_ctx.continue_execution(); }; + + sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition); + my_backoff.reset_wait(); + } + + d1::wait_context* wait_ctx() { + return &my_wait_ctx; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + d1::wait_context& my_wait_ctx; +}; + +#if __TBB_RESUMABLE_TASKS + +class coroutine_waiter : public sleep_waiter { +public: + using sleep_waiter::sleep_waiter; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot& slot) { + if (!sleep_waiter::pause()) { + return; + } + + suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; + + auto wakeup_condition = [&] { return !is_arena_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; + + sleep(std::uintptr_t(sp), wakeup_condition); + my_backoff.reset_wait(); + } + + void reset_wait() { + my_backoff.reset_wait(); + } + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task& t) { + return task_accessor::is_resume_task(t); + } +}; + +#endif // __TBB_RESUMABLE_TASKS + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_waiters_H |