diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tbb/src | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/tbb/src')
61 files changed, 21928 insertions, 0 deletions
diff --git a/contrib/libs/tbb/src/tbb/allocator.cpp b/contrib/libs/tbb/src/tbb/allocator.cpp new file mode 100644 index 0000000000..6bf5a0be01 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/allocator.cpp @@ -0,0 +1,234 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/version.h" + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" + +#include "dynamic_link.h" +#include "misc.h" + +#include <cstdlib> + +#if _WIN32 || _WIN64 +#include <Windows.h> +#else +#include <dlfcn.h> +#endif /* _WIN32||_WIN64 */ + +#if __TBB_WEAK_SYMBOLS_PRESENT + +#pragma weak scalable_malloc +#pragma weak scalable_free +#pragma weak scalable_aligned_malloc +#pragma weak scalable_aligned_free + +extern "C" { + void* scalable_malloc(std::size_t); + void scalable_free(void*); + void* scalable_aligned_malloc(std::size_t, std::size_t); + void scalable_aligned_free(void*); +} + +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +namespace tbb { +namespace detail { +namespace r1 { + +//! Initialization routine used for first indirect call via allocate_handler. +static void* initialize_allocate_handler(std::size_t size); + +//! Handler for memory allocation +static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; + +//! Handler for memory deallocation +static void (*deallocate_handler)(void* pointer) = nullptr; + +//! Initialization routine used for first indirect call via cache_aligned_allocate_handler. +static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); + +//! Allocates memory using standard malloc. It is used when scalable_allocator is not available +static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); + +//! Allocates memory using standard free. It is used when scalable_allocator is not available +static void std_cache_aligned_deallocate(void* p); + +//! Handler for padded memory allocation +static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; + +//! Handler for padded memory deallocation +static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; + +//! Table describing how to link the handlers. +static const dynamic_link_descriptor MallocLinkTable[] = { + DLD(scalable_malloc, allocate_handler), + DLD(scalable_free, deallocate_handler), + DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), + DLD(scalable_aligned_free, cache_aligned_deallocate_handler), +}; + + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +// MALLOCLIB_NAME is the name of the oneTBB memory allocator library. +#if _WIN32||_WIN64 +#define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" +#elif __APPLE__ +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" +#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" +#elif __linux__ // Note that order of these #elif's is important! +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" +#else +#error Unknown OS +#endif + +//! Initialize the allocation/free handler pointers. +/** Caller is responsible for ensuring this routine is called exactly once. + The routine attempts to dynamically link with the TBB memory allocator. + If that allocator is not found, it links to malloc and free. */ +void initialize_handler_pointers() { + __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); + bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); + if(!success) { + // If unsuccessful, set the handlers to the default routines. + // This must be done now, and not before FillDynamicLinks runs, because if other + // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, + // which forces them to wait. + allocate_handler = &std::malloc; + deallocate_handler = &std::free; + cache_aligned_allocate_handler = &std_cache_aligned_allocate; + cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; + } + + PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); +} + +static std::once_flag initialization_state; +void initialize_cache_aligned_allocator() { + std::call_once(initialization_state, &initialize_handler_pointers); +} + +//! Executed on very first call through allocate_handler +static void* initialize_allocate_handler(std::size_t size) { + initialize_cache_aligned_allocator(); + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); + return (*allocate_handler)(size); +} + +//! Executed on very first call through cache_aligned_allocate_handler +static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { + initialize_cache_aligned_allocator(); + __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); + return (*cache_aligned_allocate_handler)(bytes, alignment); +} + +// TODO: use CPUID to find actual line size, though consider backward compatibility +// nfs - no false sharing +static constexpr std::size_t nfs_size = 128; + +std::size_t __TBB_EXPORTED_FUNC cache_line_size() { + return nfs_size; +} + +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { + const std::size_t cache_line_size = nfs_size; + __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two"); + + // Check for overflow + if (size + cache_line_size < size) { + throw_exception(exception_id::bad_alloc); + } + // scalable_aligned_malloc considers zero size request an error, and returns NULL + if (size == 0) size = 1; + + void* result = cache_aligned_allocate_handler(size, cache_line_size); + if (!result) { + throw_exception(exception_id::bad_alloc); + } + __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned"); + return result; +} + +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { + __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet."); + (*cache_aligned_deallocate_handler)(p); +} + +static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { + // TODO: make it common with cache_aligned_resource + std::size_t space = alignment + bytes; + std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); + if (!base) { + return nullptr; + } + std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1); + // Round up to the next cache line (align the base address) + __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header"); + __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); + + // Record where block actually starts. + (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; + return reinterpret_cast<void*>(result); +} + +static void std_cache_aligned_deallocate(void* p) { + if (p) { + __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); + // Recover where block actually starts + std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1]; + __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); + std::free(reinterpret_cast<void*>(base)); + } +} + +void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { + void* result = (*allocate_handler)(size); + if (!result) { + throw_exception(exception_id::bad_alloc); + } + return result; +} + +void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { + if (p) { + __TBB_ASSERT(deallocate_handler, "Initialization has not been yet."); + (*deallocate_handler)(p); + } +} + +bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { + if (allocate_handler == &initialize_allocate_handler) { + void* void_ptr = allocate_handler(1); + deallocate_handler(void_ptr); + } + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); + // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) + __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), + "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); + return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/arena.cpp b/contrib/libs/tbb/src/tbb/arena.cpp new file mode 100644 index 0000000000..1ddab36ff5 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/arena.cpp @@ -0,0 +1,757 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "task_dispatcher.h" +#include "governor.h" +#include "arena.h" +#include "itt_notify.h" +#include "semaphore.h" +#include "waiters.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/tbb_allocator.h" + +#include <atomic> +#include <cstring> +#include <functional> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_ARENA_BINDING +class numa_binding_observer : public tbb::task_scheduler_observer { + binding_handler* my_binding_handler; +public: + numa_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) + : task_scheduler_observer(*ta) + , my_binding_handler(construct_binding_handler(num_slots, numa_id, core_type, max_threads_per_core)) + {} + + void on_scheduler_entry( bool ) override { + apply_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); + } + + void on_scheduler_exit( bool ) override { + restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); + } + + ~numa_binding_observer(){ + destroy_binding_handler(my_binding_handler); + } +}; + +numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) { + numa_binding_observer* binding_observer = nullptr; + if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) { + binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core); + __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); + binding_observer->observe(true); + } + return binding_observer; +} + +void destroy_binding_observer( numa_binding_observer* binding_observer ) { + __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer"); + binding_observer->observe(false); + binding_observer->~numa_binding_observer(); + deallocate_memory(binding_observer); +} +#endif /*!__TBB_ARENA_BINDING*/ + +std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) { + if ( lower >= upper ) return out_of_arena; + // Start search for an empty slot from the one we occupied the last time + std::size_t index = tls.my_arena_index; + if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower; + __TBB_ASSERT( index >= lower && index < upper, NULL ); + // Find a free slot + for ( std::size_t i = index; i < upper; ++i ) + if (my_slots[i].try_occupy()) return i; + for ( std::size_t i = lower; i < index; ++i ) + if (my_slots[i].try_occupy()) return i; + return out_of_arena; +} + +template <bool as_worker> +std::size_t arena::occupy_free_slot(thread_data& tls) { + // Firstly, external threads try to occupy reserved slots + std::size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( tls, 0, my_num_reserved_slots ); + if ( index == out_of_arena ) { + // Secondly, all threads try to occupy all non-reserved slots + index = occupy_free_slot_in_range(tls, my_num_reserved_slots, my_num_slots ); + // Likely this arena is already saturated + if ( index == out_of_arena ) + return out_of_arena; + } + + atomic_update( my_limit, (unsigned)(index + 1), std::less<unsigned>() ); + return index; +} + +std::uintptr_t arena::calculate_stealing_threshold() { + stack_anchor_type anchor; + return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_market->worker_stack_size()); +} + +void arena::process(thread_data& tls) { + governor::set_thread_data(tls); // TODO: consider moving to create_one_job. + __TBB_ASSERT( is_alive(my_guard), nullptr); + __TBB_ASSERT( my_num_slots > 1, nullptr); + + std::size_t index = occupy_free_slot</*as_worker*/true>(tls); + if (index == out_of_arena) { + on_thread_leaving<ref_worker>(); + return; + } + __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); + tls.attach_arena(*this, index); + + task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(calculate_stealing_threshold()); + __TBB_ASSERT(task_disp.can_steal(), nullptr); + tls.attach_task_dispatcher(task_disp); + + __TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" ); + my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); + + // Waiting on special object tied to this arena + outermost_worker_waiter waiter(*this); + d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter); + __TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task"); + __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr); + __TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr); + + my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker); + tls.my_last_observer = nullptr; + + task_disp.set_stealing_threshold(0); + tls.detach_task_dispatcher(); + + // Arena slot detach (arena may be used in market::process) + // TODO: Consider moving several calls below into a new method(e.g.detach_arena). + tls.my_arena_slot->release(); + tls.my_arena_slot = nullptr; + tls.my_inbox.detach(); + __TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr); + __TBB_ASSERT(is_alive(my_guard), nullptr); + + // In contrast to earlier versions of TBB (before 3.0 U5) now it is possible + // that arena may be temporarily left unpopulated by threads. See comments in + // arena::on_thread_leaving() for more details. + on_thread_leaving<ref_worker>(); + __TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join"); +} + +arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level ) +{ + __TBB_ASSERT( !my_guard, "improperly allocated arena?" ); + __TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" ); + __TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" ); + my_market = &m; + my_limit = 1; + // Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks). + my_num_slots = num_arena_slots(num_slots); + my_num_reserved_slots = num_reserved_slots; + my_max_num_workers = num_slots-num_reserved_slots; + my_priority_level = priority_level; + my_references = ref_external; // accounts for the external thread + my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed); + my_observers.my_arena = this; + my_co_cache.init(4 * num_slots); + __TBB_ASSERT ( my_max_num_workers <= my_num_slots, NULL ); + // Initialize the default context. It should be allocated before task_dispatch construction. + my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context))) + d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings }; + // Construct slots. Mark internal synchronization elements for the tools. + task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots); + for( unsigned i = 0; i < my_num_slots; ++i ) { + // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL ); + __TBB_ASSERT( !my_slots[i].task_pool_ptr, NULL ); + __TBB_ASSERT( !my_slots[i].my_task_pool_size, NULL ); + mailbox(i).construct(); + my_slots[i].init_task_streams(i); + my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this); + my_slots[i].my_is_occupied.store(false, std::memory_order_relaxed); + } + my_fifo_task_stream.initialize(my_num_slots); + my_resume_task_stream.initialize(my_num_slots); +#if __TBB_PREVIEW_CRITICAL_TASKS + my_critical_task_stream.initialize(my_num_slots); +#endif +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + my_local_concurrency_requests = 0; + my_local_concurrency_flag.clear(); + my_global_concurrency_mode.store(false, std::memory_order_relaxed); +#endif +} + +arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level ) +{ + __TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" ); + __TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" ); + __TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" ); + std::size_t n = allocation_size(num_arena_slots(num_slots)); + unsigned char* storage = (unsigned char*)cache_aligned_allocate(n); + // Zero all slots to indicate that they are empty + std::memset( storage, 0, n ); + return *new( storage + num_arena_slots(num_slots) * sizeof(mail_outbox) ) + arena(m, num_slots, num_reserved_slots, priority_level); +} + +void arena::free_arena () { + __TBB_ASSERT( is_alive(my_guard), NULL ); + __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); + __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, + "Inconsistent state of a dying arena" ); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + __TBB_ASSERT( !my_global_concurrency_mode, NULL ); +#endif + poison_value( my_guard ); + std::intptr_t drained = 0; + for ( unsigned i = 0; i < my_num_slots; ++i ) { + // __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" ); + // TODO: understand the assertion and modify + // __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, NULL ); + __TBB_ASSERT( my_slots[i].head == my_slots[i].tail, NULL ); // TODO: replace by is_quiescent_local_task_pool_empty + my_slots[i].free_task_pool(); + drained += mailbox(i).drain(); + my_slots[i].my_default_task_dispatcher->~task_dispatcher(); + } + __TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed"); + __TBB_ASSERT(my_resume_task_stream.empty(), "Not all enqueued tasks were executed"); + // Cleanup coroutines/schedulers cache + my_co_cache.cleanup(); + my_default_ctx->~task_group_context(); + cache_aligned_deallocate(my_default_ctx); +#if __TBB_PREVIEW_CRITICAL_TASKS + __TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed"); +#endif + // remove an internal reference + my_market->release( /*is_public=*/false, /*blocking_terminate=*/false ); + if ( !my_observers.empty() ) { + my_observers.clear(); + } + void* storage = &mailbox(my_num_slots-1); + __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, NULL ); + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, NULL ); + this->~arena(); +#if TBB_USE_ASSERT > 1 + std::memset( storage, 0, allocation_size(my_num_slots) ); +#endif /* TBB_USE_ASSERT */ + cache_aligned_deallocate( storage ); +} + +bool arena::has_enqueued_tasks() { + return !my_fifo_task_stream.empty(); +} + +bool arena::is_out_of_work() { +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (my_local_concurrency_flag.try_clear_if([this] { + return !has_enqueued_tasks(); + })) { + my_market->adjust_demand(*this, /* delta = */ -1, /* mandatory = */ true); + } +#endif + + // TODO: rework it to return at least a hint about where a task was found; better if the task itself. + switch (my_pool_state.load(std::memory_order_acquire)) { + case SNAPSHOT_EMPTY: + return true; + case SNAPSHOT_FULL: { + // Use unique id for "busy" in order to avoid ABA problems. + const pool_state_t busy = pool_state_t(&busy); + // Helper for CAS execution + pool_state_t expected_state; + + // Request permission to take snapshot + expected_state = SNAPSHOT_FULL; + if (my_pool_state.compare_exchange_strong(expected_state, busy)) { + // Got permission. Take the snapshot. + // NOTE: This is not a lock, as the state can be set to FULL at + // any moment by a thread that spawns/enqueues new task. + std::size_t n = my_limit.load(std::memory_order_acquire); + // Make local copies of volatile parameters. Their change during + // snapshot taking procedure invalidates the attempt, and returns + // this thread into the dispatch loop. + std::size_t k; + for (k = 0; k < n; ++k) { + if (my_slots[k].task_pool.load(std::memory_order_relaxed) != EmptyTaskPool && + my_slots[k].head.load(std::memory_order_relaxed) < my_slots[k].tail.load(std::memory_order_relaxed)) + { + // k-th primary task pool is nonempty and does contain tasks. + break; + } + if (my_pool_state.load(std::memory_order_acquire) != busy) + return false; // the work was published + } + bool work_absent = k == n; + // Test and test-and-set. + if (my_pool_state.load(std::memory_order_acquire) == busy) { + bool no_stream_tasks = !has_enqueued_tasks() && my_resume_task_stream.empty(); +#if __TBB_PREVIEW_CRITICAL_TASKS + no_stream_tasks = no_stream_tasks && my_critical_task_stream.empty(); +#endif + work_absent = work_absent && no_stream_tasks; + if (work_absent) { + // save current demand value before setting SNAPSHOT_EMPTY, + // to avoid race with advertise_new_work. + int current_demand = (int)my_max_num_workers; + expected_state = busy; + if (my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_EMPTY)) { + // This thread transitioned pool to empty state, and thus is + // responsible for telling the market that there is no work to do. + my_market->adjust_demand(*this, -current_demand, /* mandatory = */ false); + return true; + } + return false; + } + // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it. + expected_state = busy; + my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_FULL); + } + } + return false; + } + default: + // Another thread is taking a snapshot. + return false; + } +} + +void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) { + task_group_context_impl::bind_to(ctx, &td); + task_accessor::context(t) = &ctx; + task_accessor::isolation(t) = no_isolation; + my_fifo_task_stream.push( &t, random_lane_selector(td.my_random) ); + advertise_new_work<work_enqueued>(); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +// Enable task_arena.h +#include "oneapi/tbb/task_arena.h" // task_arena_base + +namespace tbb { +namespace detail { +namespace r1 { + +#if TBB_USE_ASSERT +void assert_arena_priority_valid( tbb::task_arena::priority a_priority ) { + bool is_arena_priority_correct = + a_priority == tbb::task_arena::priority::high || + a_priority == tbb::task_arena::priority::normal || + a_priority == tbb::task_arena::priority::low; + __TBB_ASSERT( is_arena_priority_correct, + "Task arena priority should be equal to one of the predefined values." ); +} +#else +void assert_arena_priority_valid( tbb::task_arena::priority ) {} +#endif + +unsigned arena_priority_level( tbb::task_arena::priority a_priority ) { + assert_arena_priority_valid( a_priority ); + return market::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride); +} + +tbb::task_arena::priority arena_priority( unsigned priority_level ) { + auto priority = tbb::task_arena::priority( + (market::num_priority_levels - priority_level) * d1::priority_stride + ); + assert_arena_priority_valid( priority ); + return priority; +} + +struct task_arena_impl { + static void initialize(d1::task_arena_base&); + static void terminate(d1::task_arena_base&); + static bool attach(d1::task_arena_base&); + static void execute(d1::task_arena_base&, d1::delegate_base&); + static void wait(d1::task_arena_base&); + static int max_concurrency(const d1::task_arena_base*); + static void enqueue(d1::task&, d1::task_arena_base*); +}; + +void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) { + task_arena_impl::initialize(ta); +} +void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base& ta) { + task_arena_impl::terminate(ta); +} +bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base& ta) { + return task_arena_impl::attach(ta); +} +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base& ta, d1::delegate_base& d) { + task_arena_impl::execute(ta, d); +} +void __TBB_EXPORTED_FUNC wait(d1::task_arena_base& ta) { + task_arena_impl::wait(ta); +} + +int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) { + return task_arena_impl::max_concurrency(ta); +} + +void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) { + task_arena_impl::enqueue(t, ta); +} + +void task_arena_impl::initialize(d1::task_arena_base& ta) { + governor::one_time_init(); + if (ta.my_max_concurrency < 1) { +#if __TBB_ARENA_BINDING + +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + d1::constraints arena_constraints = d1::constraints{} + .set_core_type(ta.core_type()) + .set_max_threads_per_core(ta.max_threads_per_core()) + .set_numa_id(ta.my_numa_id); + ta.my_max_concurrency = (int)default_concurrency(arena_constraints); +#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + ta.my_max_concurrency = (int)default_concurrency(ta.my_numa_id); +#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + +#else /*!__TBB_ARENA_BINDING*/ + ta.my_max_concurrency = (int)governor::default_num_threads(); +#endif /*!__TBB_ARENA_BINDING*/ + } + + __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized"); + unsigned priority_level = arena_priority_level(ta.my_priority); + arena* a = market::create_arena(ta.my_max_concurrency, ta.my_num_reserved_slots, priority_level, /* stack_size = */ 0); + ta.my_arena.store(a, std::memory_order_release); + // add an internal market reference; a public reference was added in create_arena + market::global_market( /*is_public=*/false); +#if __TBB_ARENA_BINDING + a->my_numa_binding_observer = construct_binding_observer( + static_cast<d1::task_arena*>(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); +#endif /*__TBB_ARENA_BINDING*/ +} + +void task_arena_impl::terminate(d1::task_arena_base& ta) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + assert_pointer_valid(a); +#if __TBB_ARENA_BINDING + if(a->my_numa_binding_observer != nullptr ) { + destroy_binding_observer(a->my_numa_binding_observer); + a->my_numa_binding_observer = nullptr; + } +#endif /*__TBB_ARENA_BINDING*/ + a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); + a->on_thread_leaving<arena::ref_external>(); + ta.my_arena.store(nullptr, std::memory_order_relaxed); +} + +bool task_arena_impl::attach(d1::task_arena_base& ta) { + __TBB_ASSERT(!ta.my_arena.load(std::memory_order_relaxed), nullptr); + thread_data* td = governor::get_thread_data_if_initialized(); + if( td && td->my_arena ) { + arena* a = td->my_arena; + // There is an active arena to attach to. + // It's still used by s, so won't be destroyed right away. + __TBB_ASSERT(a->my_references > 0, NULL ); + a->my_references += arena::ref_external; + ta.my_num_reserved_slots = a->my_num_reserved_slots; + ta.my_priority = arena_priority(a->my_priority_level); + ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers; + __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, NULL); + ta.my_arena.store(a, std::memory_order_release); + // increases market's ref count for task_arena + market::global_market( /*is_public=*/true ); + return true; + } + return false; +} + +void task_arena_impl::enqueue(d1::task& t, d1::task_arena_base* ta) { + thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance + arena* a = ta->my_arena.load(std::memory_order_relaxed); + assert_pointers_valid(ta, a, a->my_default_ctx, td); + // Is there a better place for checking the state of my_default_ctx? + __TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(), + "The task will not be executed because default task_group_context of task_arena is cancelled. Has previously enqueued task thrown an exception?"); + a->enqueue_task(t, *a->my_default_ctx, *td); +} + +class nested_arena_context : no_copy { +public: + nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index) + : m_orig_execute_data_ext(td.my_task_dispatcher->m_execute_data_ext) + { + if (td.my_arena != &nested_arena) { + m_orig_arena = td.my_arena; + m_orig_slot_index = td.my_arena_index; + m_orig_last_observer = td.my_last_observer; + + td.detach_task_dispatcher(); + td.attach_arena(nested_arena, slot_index); + task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(m_orig_execute_data_ext.task_disp->m_stealing_threshold); + td.attach_task_dispatcher(task_disp); + + // If the calling thread occupies the slots out of external thread reserve we need to notify the + // market that this arena requires one worker less. + if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { + td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ -1, /* mandatory = */ false); + } + + td.my_last_observer = nullptr; + // The task_arena::execute method considers each calling thread as an external thread. + td.my_arena->my_observers.notify_entry_observers(td.my_last_observer, /* worker*/false); + } + + m_task_dispatcher = td.my_task_dispatcher; + m_orig_fifo_tasks_allowed = m_task_dispatcher->allow_fifo_task(true); + m_orig_critical_task_allowed = m_task_dispatcher->m_properties.critical_task_allowed; + m_task_dispatcher->m_properties.critical_task_allowed = true; + + execution_data_ext& ed_ext = td.my_task_dispatcher->m_execute_data_ext; + ed_ext.context = td.my_arena->my_default_ctx; + ed_ext.original_slot = td.my_arena_index; + ed_ext.affinity_slot = d1::no_slot; + ed_ext.task_disp = td.my_task_dispatcher; + ed_ext.isolation = no_isolation; + + __TBB_ASSERT(td.my_arena_slot, nullptr); + __TBB_ASSERT(td.my_arena_slot->is_occupied(), nullptr); + __TBB_ASSERT(td.my_task_dispatcher, nullptr); + } + ~nested_arena_context() { + thread_data& td = *m_task_dispatcher->m_thread_data; + __TBB_ASSERT(governor::is_thread_data_set(&td), nullptr); + m_task_dispatcher->allow_fifo_task(m_orig_fifo_tasks_allowed); + m_task_dispatcher->m_properties.critical_task_allowed = m_orig_critical_task_allowed; + if (m_orig_arena) { + td.my_arena->my_observers.notify_exit_observers(td.my_last_observer, /*worker*/ false); + td.my_last_observer = m_orig_last_observer; + + // Notify the market that this thread releasing a one slot + // that can be used by a worker thread. + if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { + td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false); + } + + td.my_task_dispatcher->set_stealing_threshold(0); + td.detach_task_dispatcher(); + td.my_arena_slot->release(); + td.my_arena->my_exit_monitors.notify_one(); // do not relax! + + td.attach_arena(*m_orig_arena, m_orig_slot_index); + td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); + } + td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext; + } + +private: + execution_data_ext m_orig_execute_data_ext{}; + arena* m_orig_arena{ nullptr }; + observer_proxy* m_orig_last_observer{ nullptr }; + task_dispatcher* m_task_dispatcher{ nullptr }; + unsigned m_orig_slot_index{}; + bool m_orig_fifo_tasks_allowed{}; + bool m_orig_critical_task_allowed{}; +}; + +class delegated_task : public d1::task { + d1::delegate_base& m_delegate; + concurrent_monitor& m_monitor; + d1::wait_context& m_wait_ctx; + std::atomic<bool> m_completed; + d1::task* execute(d1::execution_data& ed) override { + const execution_data_ext& ed_ext = static_cast<const execution_data_ext&>(ed); + execution_data_ext orig_execute_data_ext = ed_ext.task_disp->m_execute_data_ext; + __TBB_ASSERT(&ed_ext.task_disp->m_execute_data_ext == &ed, + "The execute data shall point to the current task dispatcher execute data"); + __TBB_ASSERT(ed_ext.task_disp->m_execute_data_ext.isolation == no_isolation, nullptr); + + ed_ext.task_disp->m_execute_data_ext.context = ed_ext.task_disp->get_thread_data().my_arena->my_default_ctx; + bool fifo_task_allowed = ed_ext.task_disp->allow_fifo_task(true); + try_call([&] { + m_delegate(); + }).on_completion([&] { + ed_ext.task_disp->m_execute_data_ext = orig_execute_data_ext; + ed_ext.task_disp->allow_fifo_task(fifo_task_allowed); + }); + + finalize(); + return nullptr; + } + d1::task* cancel(d1::execution_data&) override { + finalize(); + return nullptr; + } + void finalize() { + m_wait_ctx.release(); // must precede the wakeup + m_monitor.notify([this](std::uintptr_t ctx) { + return ctx == std::uintptr_t(&m_delegate); + }); // do not relax, it needs a fence! + m_completed.store(true, std::memory_order_release); + } +public: + delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo) + : m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{} + ~delegated_task() { + // The destructor can be called earlier than the m_monitor is notified + // because the waiting thread can be released after m_wait_ctx.release_wait. + // To close that race we wait for the m_completed signal. + spin_wait_until_eq(m_completed, true); + } +}; + +void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + __TBB_ASSERT(a != nullptr, nullptr); + thread_data* td = governor::get_thread_data(); + + bool same_arena = td->my_arena == a; + std::size_t index1 = td->my_arena_index; + if (!same_arena) { + index1 = a->occupy_free_slot</*as_worker */false>(*td); + if (index1 == arena::out_of_arena) { + concurrent_monitor::thread_context waiter((std::uintptr_t)&d); + d1::wait_context wo(1); + d1::task_group_context exec_context(d1::task_group_context::isolated); + task_group_context_impl::copy_fp_settings(exec_context, *a->my_default_ctx); + + delegated_task dt(d, a->my_exit_monitors, wo); + a->enqueue_task( dt, exec_context, *td); + size_t index2 = arena::out_of_arena; + do { + a->my_exit_monitors.prepare_wait(waiter); + if (!wo.continue_execution()) { + a->my_exit_monitors.cancel_wait(waiter); + break; + } + index2 = a->occupy_free_slot</*as_worker*/false>(*td); + if (index2 != arena::out_of_arena) { + a->my_exit_monitors.cancel_wait(waiter); + nested_arena_context scope(*td, *a, index2 ); + r1::wait(wo, exec_context); + __TBB_ASSERT(!exec_context.my_exception, NULL); // exception can be thrown above, not deferred + break; + } + a->my_exit_monitors.commit_wait(waiter); + } while (wo.continue_execution()); + if (index2 == arena::out_of_arena) { + // notify a waiting thread even if this thread did not enter arena, + // in case it was woken by a leaving thread but did not need to enter + a->my_exit_monitors.notify_one(); // do not relax! + } + // process possible exception + if (exec_context.my_exception) { + __TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); + exec_context.my_exception->throw_self(); + } + __TBB_ASSERT(governor::is_thread_data_set(td), nullptr); + return; + } // if (index1 == arena::out_of_arena) + } // if (!same_arena) + + context_guard_helper</*report_tasks=*/false> context_guard; + context_guard.set_ctx(a->my_default_ctx); + nested_arena_context scope(*td, *a, index1); +#if _WIN64 + try { +#endif + d(); + __TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr); +#if _WIN64 + } catch (...) { + context_guard.restore_default(); + throw; + } +#endif +} + +void task_arena_impl::wait(d1::task_arena_base& ta) { + arena* a = ta.my_arena.load(std::memory_order_relaxed); + __TBB_ASSERT(a != nullptr, nullptr); + thread_data* td = governor::get_thread_data(); + __TBB_ASSERT_EX(td, "Scheduler is not initialized"); + __TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" ); + if (a->my_max_num_workers != 0) { + while (a->num_workers_active() || a->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { + yield(); + } + } +} + +int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { + arena* a = nullptr; + if( ta ) // for special cases of ta->max_concurrency() + a = ta->my_arena.load(std::memory_order_relaxed); + else if( thread_data* td = governor::get_thread_data_if_initialized() ) + a = td->my_arena; // the current arena if any + + if( a ) { // Get parameters from the arena + __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL ); + return a->my_num_reserved_slots + a->my_max_num_workers +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + + (a->my_local_concurrency_flag.test() ? 1 : 0) +#endif + ; + } + + if (ta && ta->my_max_concurrency == 1) { + return 1; + } + +#if __TBB_ARENA_BINDING + if (ta) { +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + d1::constraints arena_constraints = d1::constraints{} + .set_numa_id(ta->my_numa_id) + .set_core_type(ta->core_type()) + .set_max_threads_per_core(ta->max_threads_per_core()); + return (int)default_concurrency(arena_constraints); +#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + return (int)default_concurrency(ta->my_numa_id); +#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + } +#endif /*!__TBB_ARENA_BINDING*/ + + __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); + return int(governor::default_num_threads()); +} + +void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { + // TODO: Decide what to do if the scheduler is not initialized. Is there a use case for it? + thread_data* tls = governor::get_thread_data(); + assert_pointers_valid(tls, tls->my_task_dispatcher); + task_dispatcher* dispatcher = tls->my_task_dispatcher; + isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation; + try_call([&] { + // We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard. + isolation_type current_isolation = isolation ? isolation : reinterpret_cast<isolation_type>(&d); + // Save the current isolation value and set new one + previous_isolation = dispatcher->set_isolation(current_isolation); + // Isolation within this callable + d(); + }).on_completion([&] { + __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL); + dispatcher->set_isolation(previous_isolation); + }); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/arena.h b/contrib/libs/tbb/src/tbb/arena.h new file mode 100644 index 0000000000..b1b9c3dc93 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/arena.h @@ -0,0 +1,616 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_arena_H +#define _TBB_arena_H + +#include <atomic> +#include <cstring> + +#include "oneapi/tbb/detail/_task.h" + +#include "scheduler_common.h" +#include "intrusive_list.h" +#include "task_stream.h" +#include "arena_slot.h" +#include "rml_tbb.h" +#include "mailbox.h" +#include "market.h" +#include "governor.h" +#include "concurrent_monitor.h" +#include "observer_proxy.h" +#include "oneapi/tbb/spin_mutex.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class task_dispatcher; +class task_group_context; +class allocate_root_with_context_proxy; + +#if __TBB_ARENA_BINDING +class numa_binding_observer; +#endif /*__TBB_ARENA_BINDING*/ + +//! Bounded coroutines cache LIFO ring buffer +class arena_co_cache { + //! Ring buffer storage + task_dispatcher** my_co_scheduler_cache; + //! Current cache index + unsigned my_head; + //! Cache capacity for arena + unsigned my_max_index; + //! Accessor lock for modification operations + tbb::spin_mutex my_co_cache_mutex; + + unsigned next_index() { + return ( my_head == my_max_index ) ? 0 : my_head + 1; + } + + unsigned prev_index() { + return ( my_head == 0 ) ? my_max_index : my_head - 1; + } + + bool internal_empty() { + return my_co_scheduler_cache[prev_index()] == nullptr; + } + + void internal_task_dispatcher_cleanup(task_dispatcher* to_cleanup) { + to_cleanup->~task_dispatcher(); + cache_aligned_deallocate(to_cleanup); + } + +public: + void init(unsigned cache_capacity) { + std::size_t alloc_size = cache_capacity * sizeof(task_dispatcher*); + my_co_scheduler_cache = (task_dispatcher**)cache_aligned_allocate(alloc_size); + std::memset( my_co_scheduler_cache, 0, alloc_size ); + my_head = 0; + my_max_index = cache_capacity - 1; + } + + void cleanup() { + while (task_dispatcher* to_cleanup = pop()) { + internal_task_dispatcher_cleanup(to_cleanup); + } + cache_aligned_deallocate(my_co_scheduler_cache); + } + + //! Insert scheduler to the current available place. + //! Replace an old value, if necessary. + void push(task_dispatcher* s) { + task_dispatcher* to_cleanup = nullptr; + { + tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); + // Check if we are replacing some existing buffer entrance + if (my_co_scheduler_cache[my_head] != nullptr) { + to_cleanup = my_co_scheduler_cache[my_head]; + } + // Store the cached value + my_co_scheduler_cache[my_head] = s; + // Move head index to the next slot + my_head = next_index(); + } + // Cleanup replaced buffer if any + if (to_cleanup) { + internal_task_dispatcher_cleanup(to_cleanup); + } + } + + //! Get a cached scheduler if any + task_dispatcher* pop() { + tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex); + // No cached coroutine + if (internal_empty()) { + return nullptr; + } + // Move head index to the currently available value + my_head = prev_index(); + // Retrieve the value from the buffer + task_dispatcher* to_return = my_co_scheduler_cache[my_head]; + // Clear the previous entrance value + my_co_scheduler_cache[my_head] = nullptr; + return to_return; + } +}; + +struct stack_anchor_type { + stack_anchor_type() = default; + stack_anchor_type(const stack_anchor_type&) = delete; +}; + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY +class atomic_flag { + static const std::uintptr_t SET = 1; + static const std::uintptr_t EMPTY = 0; + std::atomic<std::uintptr_t> my_state; +public: + bool test_and_set() { + std::uintptr_t state = my_state.load(std::memory_order_acquire); + switch (state) { + case SET: + return false; + default: /* busy */ + if (my_state.compare_exchange_strong(state, SET)) { + // We interrupted clear transaction + return false; + } + if (state != EMPTY) { + // We lost our epoch + return false; + } + // We are too late but still in the same epoch + __TBB_fallthrough; + case EMPTY: + return my_state.compare_exchange_strong(state, SET); + } + } + template <typename Pred> + bool try_clear_if(Pred&& pred) { + std::uintptr_t busy = std::uintptr_t(&busy); + std::uintptr_t state = my_state.load(std::memory_order_acquire); + if (state == SET && my_state.compare_exchange_strong(state, busy)) { + if (pred()) { + return my_state.compare_exchange_strong(busy, EMPTY); + } + // The result of the next operation is discarded, always false should be returned. + my_state.compare_exchange_strong(busy, SET); + } + return false; + } + void clear() { + my_state.store(EMPTY, std::memory_order_release); + } + bool test() { + return my_state.load(std::memory_order_acquire) != EMPTY; + } +}; +#endif + +//! The structure of an arena, except the array of slots. +/** Separated in order to simplify padding. + Intrusive list node base class is used by market to form a list of arenas. **/ +struct arena_base : padded<intrusive_list_node> { + //! The number of workers that have been marked out by the resource manager to service the arena. + std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop + + //! Reference counter for the arena. + /** Worker and external thread references are counted separately: first several bits are for references + from external thread threads or explicit task_arenas (see arena::ref_external_bits below); + the rest counts the number of workers servicing the arena. */ + std::atomic<unsigned> my_references; // heavy use in stealing loop + + //! The maximal number of currently busy slots. + std::atomic<unsigned> my_limit; // heavy use in stealing loop + + //! Task pool for the tasks scheduled via task::enqueue() method. + /** Such scheduling guarantees eventual execution even if + - new tasks are constantly coming (by extracting scheduled tasks in + relaxed FIFO order); + - the enqueuing thread does not call any of wait_for_all methods. **/ + task_stream<front_accessor> my_fifo_task_stream; // heavy use in stealing loop + + //! Task pool for the tasks scheduled via tbb::resume() function. + task_stream<front_accessor> my_resume_task_stream; // heavy use in stealing loop + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Task pool for the tasks with critical property set. + /** Critical tasks are scheduled for execution ahead of other sources (including local task pool + and even bypassed tasks) unless the thread already executes a critical task in an outer + dispatch loop **/ + // used on the hot path of the task dispatch loop + task_stream<back_nonnull_accessor> my_critical_task_stream; +#endif + + //! The number of workers requested by the external thread owning the arena. + unsigned my_max_num_workers; + + //! The total number of workers that are requested from the resource manager. + int my_total_num_workers_requested; + + //! The number of workers that are really requested from the resource manager. + //! Possible values are in [0, my_max_num_workers] + int my_num_workers_requested; + + //! The index in the array of per priority lists of arenas this object is in. + /*const*/ unsigned my_priority_level; + + //! The max priority level of arena in market. + std::atomic<bool> my_is_top_priority{false}; + + //! Current task pool state and estimate of available tasks amount. + /** The estimate is either 0 (SNAPSHOT_EMPTY) or infinity (SNAPSHOT_FULL). + Special state is "busy" (any other unsigned value). + Note that the implementation of arena::is_busy_or_empty() requires + my_pool_state to be unsigned. */ + using pool_state_t = std::uintptr_t ; + std::atomic<pool_state_t> my_pool_state; + + //! The list of local observers attached to this arena. + observer_list my_observers; + +#if __TBB_ARENA_BINDING + //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. + numa_binding_observer* my_numa_binding_observer; +#endif /*__TBB_ARENA_BINDING*/ + + // Below are rarely modified members + + //! The market that owns this arena. + market* my_market; + + //! ABA prevention marker. + std::uintptr_t my_aba_epoch; + + //! Default task group context. + d1::task_group_context* my_default_ctx; + + //! The number of slots in the arena. + unsigned my_num_slots; + + //! The number of reserved slots (can be occupied only by external threads). + unsigned my_num_reserved_slots; + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + // arena needs an extra worker despite the arena limit + atomic_flag my_local_concurrency_flag; + // the number of local mandatory concurrency requests + int my_local_concurrency_requests; + // arena needs an extra worker despite a global limit + std::atomic<bool> my_global_concurrency_mode; +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + + //! Waiting object for external threads that cannot join the arena. + concurrent_monitor my_exit_monitors; + + //! Coroutines (task_dispathers) cache buffer + arena_co_cache my_co_cache; + +#if TBB_USE_ASSERT + //! Used to trap accesses to the object after its destruction. + std::uintptr_t my_guard; +#endif /* TBB_USE_ASSERT */ +}; // struct arena_base + +class arena: public padded<arena_base> +{ +public: + using base_type = padded<arena_base>; + + //! Types of work advertised by advertise_new_work() + enum new_work_type { + work_spawned, + wakeup, + work_enqueued + }; + + //! Constructor + arena ( market& m, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level); + + //! Allocate an instance of arena. + static arena& allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level ); + + static int unsigned num_arena_slots ( unsigned num_slots ) { + return max(2u, num_slots); + } + + static int allocation_size ( unsigned num_slots ) { + return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher)); + } + + //! Get reference to mailbox corresponding to given slot_id + mail_outbox& mailbox( d1::slot_id slot ) { + __TBB_ASSERT( slot != d1::no_slot, "affinity should be specified" ); + + return reinterpret_cast<mail_outbox*>(this)[-(int)(slot+1)]; // cast to 'int' is redundant but left for readability + } + + //! Completes arena shutdown, destructs and deallocates it. + void free_arena (); + + //! No tasks to steal since last snapshot was taken + static const pool_state_t SNAPSHOT_EMPTY = 0; + + //! At least one task has been offered for stealing since the last snapshot started + static const pool_state_t SNAPSHOT_FULL = pool_state_t(-1); + + //! The number of least significant bits for external references + static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers + + //! Reference increment values for externals and workers + static const unsigned ref_external = 1; + static const unsigned ref_worker = 1 << ref_external_bits; + + //! No tasks to steal or snapshot is being taken. + static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; } + + //! The number of workers active in the arena. + unsigned num_workers_active() const { + return my_references.load(std::memory_order_acquire) >> ref_external_bits; + } + + //! Check if the recall is requested by the market. + bool is_recall_requested() const { + return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed); + } + + //! If necessary, raise a flag that there is new job in arena. + template<arena::new_work_type work_type> void advertise_new_work(); + + //! Attempts to steal a task from a randomly chosen arena slot + d1::task* steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation); + + //! Get a task from a global starvation resistant queue + template<task_stream_accessor_type accessor> + d1::task* get_stream_task(task_stream<accessor>& stream, unsigned& hint); + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Tries to find a critical task in global critical task stream + d1::task* get_critical_task(unsigned& hint, isolation_type isolation); +#endif + + //! Check if there is job anywhere in arena. + /** Return true if no job or if arena is being cleaned up. */ + bool is_out_of_work(); + + //! enqueue a task into starvation-resistance queue + void enqueue_task(d1::task&, d1::task_group_context&, thread_data&); + + //! Registers the worker with the arena and enters TBB scheduler dispatch loop + void process(thread_data&); + + //! Notification that the thread leaves its arena + template<unsigned ref_param> + inline void on_thread_leaving ( ); + + //! Check for the presence of enqueued tasks at all priority levels + bool has_enqueued_tasks(); + + static const std::size_t out_of_arena = ~size_t(0); + //! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena. + template <bool as_worker> + std::size_t occupy_free_slot(thread_data&); + //! Tries to occupy a slot in the specified range. + std::size_t occupy_free_slot_in_range(thread_data& tls, std::size_t lower, std::size_t upper); + + std::uintptr_t calculate_stealing_threshold(); + + /** Must be the last data field */ + arena_slot my_slots[1]; +}; // class arena + +template<unsigned ref_param> +inline void arena::on_thread_leaving ( ) { + // + // Implementation of arena destruction synchronization logic contained various + // bugs/flaws at the different stages of its evolution, so below is a detailed + // description of the issues taken into consideration in the framework of the + // current design. + // + // In case of using fire-and-forget tasks (scheduled via task::enqueue()) + // external thread is allowed to leave its arena before all its work is executed, + // and market may temporarily revoke all workers from this arena. Since revoked + // workers never attempt to reset arena state to EMPTY and cancel its request + // to RML for threads, the arena object is destroyed only when both the last + // thread is leaving it and arena's state is EMPTY (that is its external thread + // left and it does not contain any work). + // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not + // be done here (or anywhere else in the external thread to that matter); doing so + // can result either in arena's premature destruction (at least without + // additional costly checks in workers) or in unnecessary arena state changes + // (and ensuing workers migration). + // + // A worker that checks for work presence and transitions arena to the EMPTY + // state (in snapshot taking procedure arena::is_out_of_work()) updates + // arena::my_pool_state first and only then arena::my_num_workers_requested. + // So the check for work absence must be done against the latter field. + // + // In a time window between decrementing the active threads count and checking + // if there is an outstanding request for workers. New worker thread may arrive, + // finish remaining work, set arena state to empty, and leave decrementing its + // refcount and destroying. Then the current thread will destroy the arena + // the second time. To preclude it a local copy of the outstanding request + // value can be stored before decrementing active threads count. + // + // But this technique may cause two other problem. When the stored request is + // zero, it is possible that arena still has threads and they can generate new + // tasks and thus re-establish non-zero requests. Then all the threads can be + // revoked (as described above) leaving this thread the last one, and causing + // it to destroy non-empty arena. + // + // The other problem takes place when the stored request is non-zero. Another + // thread may complete the work, set arena state to empty, and leave without + // arena destruction before this thread decrements the refcount. This thread + // cannot destroy the arena either. Thus the arena may be "orphaned". + // + // In both cases we cannot dereference arena pointer after the refcount is + // decremented, as our arena may already be destroyed. + // + // If this is the external thread, the market is protected by refcount to it. + // In case of workers market's liveness is ensured by the RML connection + // rundown protocol, according to which the client (i.e. the market) lives + // until RML server notifies it about connection termination, and this + // notification is fired only after all workers return into RML. + // + // Thus if we decremented refcount to zero we ask the market to check arena + // state (including the fact if it is alive) under the lock. + // + std::uintptr_t aba_epoch = my_aba_epoch; + unsigned priority_level = my_priority_level; + market* m = my_market; + __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + // When there is no workers someone must free arena, as + // without workers, no one calls is_out_of_work(). + // Skip workerless arenas because they have no demand for workers. + // TODO: consider more strict conditions for the cleanup, + // because it can create the demand of workers, + // but the arena can be already empty (and so ready for destroying) + // TODO: Fix the race: while we check soft limit and it might be changed. + if( ref_param==ref_external && my_num_slots != my_num_reserved_slots + && 0 == m->my_num_workers_soft_limit.load(std::memory_order_relaxed) && + !my_global_concurrency_mode.load(std::memory_order_relaxed) ) { + is_out_of_work(); + // We expect, that in worst case it's enough to have num_priority_levels-1 + // calls to restore priorities and yet another is_out_of_work() to conform + // that no work was found. But as market::set_active_num_workers() can be called + // concurrently, can't guarantee last is_out_of_work() return true. + } +#endif + if ( (my_references -= ref_param ) == 0 ) + m->try_destroy_arena( this, aba_epoch, priority_level ); +} + +template<arena::new_work_type work_type> +void arena::advertise_new_work() { + auto is_related_arena = [&] (extended_context context) { + return this == context.my_arena_addr; + }; + + if( work_type == work_enqueued ) { + atomic_fence(std::memory_order_seq_cst); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && + my_global_concurrency_mode.load(std::memory_order_acquire) == false ) + my_market->enable_mandatory_concurrency(this); + + if (my_max_num_workers == 0 && my_num_reserved_slots == 1 && my_local_concurrency_flag.test_and_set()) { + my_market->adjust_demand(*this, /* delta = */ 1, /* mandatory = */ true); + } +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + // Local memory fence here and below is required to avoid missed wakeups; see the comment below. + // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable. + } + else if( work_type == wakeup ) { + atomic_fence(std::memory_order_seq_cst); + } + + // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences. + // Technically, to avoid missed wakeups, there should be a full memory fence between the point we + // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a + // fence might hurt overall performance more than it helps, because the fence would be executed + // on every task pool release, even when stealing does not occur. Since TBB allows parallelism, + // but never promises parallelism, the missed wakeup is not a correctness problem. + pool_state_t snapshot = my_pool_state.load(std::memory_order_acquire); + if( is_busy_or_empty(snapshot) ) { + // Attempt to mark as full. The compare_and_swap below is a little unusual because the + // result is compared to a value that can be different than the comparand argument. + pool_state_t expected_state = snapshot; + my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ); + if( expected_state == SNAPSHOT_EMPTY ) { + if( snapshot != SNAPSHOT_EMPTY ) { + // This thread read "busy" into snapshot, and then another thread transitioned + // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above + // to fail. Attempt to transition my_pool_state from "empty" to "full". + expected_state = SNAPSHOT_EMPTY; + if( !my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ) ) { + // Some other thread transitioned my_pool_state from "empty", and hence became + // responsible for waking up workers. + return; + } + } + // This thread transitioned pool from empty to full state, and thus is responsible for + // telling the market that there is work to do. +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if( work_type == work_spawned ) { + if ( my_global_concurrency_mode.load(std::memory_order_acquire) == true ) + my_market->mandatory_concurrency_disable( this ); + } +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + // TODO: investigate adjusting of arena's demand by a single worker. + my_market->adjust_demand(*this, my_max_num_workers, /* mandatory = */ false); + + // Notify all sleeping threads that work has appeared in the arena. + my_market->get_wait_list().notify(is_related_arena); + } + } +} + +inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation) { + auto slot_num_limit = my_limit.load(std::memory_order_relaxed); + if (slot_num_limit == 1) { + // No slots to steal from + return nullptr; + } + // Try to steal a task from a random victim. + std::size_t k = frnd.get() % (slot_num_limit - 1); + // The following condition excludes the external thread that might have + // already taken our previous place in the arena from the list . + // of potential victims. But since such a situation can take + // place only in case of significant oversubscription, keeping + // the checks simple seems to be preferable to complicating the code. + if (k >= arena_index) { + ++k; // Adjusts random distribution to exclude self + } + arena_slot* victim = &my_slots[k]; + d1::task **pool = victim->task_pool.load(std::memory_order_relaxed); + d1::task *t = nullptr; + if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation))) { + return nullptr; + } + if (task_accessor::is_proxy_task(*t)) { + task_proxy &tp = *(task_proxy*)t; + d1::slot_id slot = tp.slot; + t = tp.extract_task<task_proxy::pool_bit>(); + if (!t) { + // Proxy was empty, so it's our responsibility to free it + tp.allocator.delete_object(&tp, ed); + return nullptr; + } + // Note affinity is called for any stealed task (proxy or general) + ed.affinity_slot = slot; + } else { + // Note affinity is called for any stealed task (proxy or general) + ed.affinity_slot = d1::any_slot; + } + // Update task owner thread id to identify stealing + ed.original_slot = k; + return t; +} + +template<task_stream_accessor_type accessor> +inline d1::task* arena::get_stream_task(task_stream<accessor>& stream, unsigned& hint) { + if (stream.empty()) + return nullptr; + return stream.pop(subsequent_lane_selector(hint)); +} + +#if __TBB_PREVIEW_CRITICAL_TASKS +// Retrieves critical task respecting isolation level, if provided. The rule is: +// 1) If no outer critical task and no isolation => take any critical task +// 2) If working on an outer critical task and no isolation => cannot take any critical task +// 3) If no outer critical task but isolated => respect isolation +// 4) If working on an outer critical task and isolated => respect isolation +// Hint is used to keep some LIFO-ness, start search with the lane that was used during push operation. +inline d1::task* arena::get_critical_task(unsigned& hint, isolation_type isolation) { + if (my_critical_task_stream.empty()) + return nullptr; + + if ( isolation != no_isolation ) { + return my_critical_task_stream.pop_specific( hint, isolation ); + } else { + return my_critical_task_stream.pop(preceding_lane_selector(hint)); + } +} +#endif // __TBB_PREVIEW_CRITICAL_TASKS + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_arena_H */ diff --git a/contrib/libs/tbb/src/tbb/arena_slot.cpp b/contrib/libs/tbb/src/tbb/arena_slot.cpp new file mode 100644 index 0000000000..72706b3de5 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/arena_slot.cpp @@ -0,0 +1,219 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "arena_slot.h" +#include "arena.h" +#include "thread_data.h" + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// Arena Slot +//------------------------------------------------------------------------ +d1::task* arena_slot::get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation) { + __TBB_ASSERT(tail.load(std::memory_order_relaxed) <= T || is_local_task_pool_quiescent(), + "Is it safe to get a task at position T?"); + + d1::task* result = task_pool_ptr[T]; + __TBB_ASSERT(!is_poisoned( result ), "The poisoned task is going to be processed"); + + if (!result) { + return nullptr; + } + bool omit = isolation != no_isolation && isolation != task_accessor::isolation(*result); + if (!omit && !task_accessor::is_proxy_task(*result)) { + return result; + } else if (omit) { + tasks_omitted = true; + return nullptr; + } + + task_proxy& tp = static_cast<task_proxy&>(*result); + d1::slot_id aff_id = tp.slot; + if ( d1::task *t = tp.extract_task<task_proxy::pool_bit>() ) { + ed.affinity_slot = aff_id; + return t; + } + // Proxy was empty, so it's our responsibility to free it + tp.allocator.delete_object(&tp, ed); + + if ( tasks_omitted ) { + task_pool_ptr[T] = nullptr; + } + return nullptr; +} + +d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) { + __TBB_ASSERT(is_task_pool_published(), nullptr); + // The current task position in the task pool. + std::size_t T0 = tail.load(std::memory_order_relaxed); + // The bounds of available tasks in the task pool. H0 is only used when the head bound is reached. + std::size_t H0 = (std::size_t)-1, T = T0; + d1::task* result = nullptr; + bool task_pool_empty = false; + bool tasks_omitted = false; + do { + __TBB_ASSERT( !result, nullptr ); + // The full fence is required to sync the store of `tail` with the load of `head` (write-read barrier) + T = --tail; + // The acquire load of head is required to guarantee consistency of our task pool + // when a thief rolls back the head. + if ( (std::intptr_t)( head.load(std::memory_order_acquire) ) > (std::intptr_t)T ) { + acquire_task_pool(); + H0 = head.load(std::memory_order_relaxed); + if ( (std::intptr_t)H0 > (std::intptr_t)T ) { + // The thief has not backed off - nothing to grab. + __TBB_ASSERT( H0 == head.load(std::memory_order_relaxed) + && T == tail.load(std::memory_order_relaxed) + && H0 == T + 1, "victim/thief arbitration algorithm failure" ); + reset_task_pool_and_leave(); + // No tasks in the task pool. + task_pool_empty = true; + break; + } else if ( H0 == T ) { + // There is only one task in the task pool. + reset_task_pool_and_leave(); + task_pool_empty = true; + } else { + // Release task pool if there are still some tasks. + // After the release, the tail will be less than T, thus a thief + // will not attempt to get a task at position T. + release_task_pool(); + } + } + result = get_task_impl( T, ed, tasks_omitted, isolation ); + if ( result ) { + poison_pointer( task_pool_ptr[T] ); + break; + } else if ( !tasks_omitted ) { + poison_pointer( task_pool_ptr[T] ); + __TBB_ASSERT( T0 == T+1, nullptr ); + T0 = T; + } + } while ( !result && !task_pool_empty ); + + if ( tasks_omitted ) { + if ( task_pool_empty ) { + // All tasks have been checked. The task pool should be in reset state. + // We just restore the bounds for the available tasks. + // TODO: Does it have sense to move them to the beginning of the task pool? + __TBB_ASSERT( is_quiescent_local_task_pool_reset(), nullptr ); + if ( result ) { + // If we have a task, it should be at H0 position. + __TBB_ASSERT( H0 == T, nullptr ); + ++H0; + } + __TBB_ASSERT( H0 <= T0, nullptr ); + if ( H0 < T0 ) { + // Restore the task pool if there are some tasks. + head.store(H0, std::memory_order_relaxed); + tail.store(T0, std::memory_order_relaxed); + // The release fence is used in publish_task_pool. + publish_task_pool(); + // Synchronize with snapshot as we published some tasks. + ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); + } + } else { + // A task has been obtained. We need to make a hole in position T. + __TBB_ASSERT( is_task_pool_published(), nullptr ); + __TBB_ASSERT( result, nullptr ); + task_pool_ptr[T] = nullptr; + tail.store(T0, std::memory_order_release); + // Synchronize with snapshot as we published some tasks. + // TODO: consider some approach not to call wakeup for each time. E.g. check if the tail reached the head. + ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>(); + } + } + + __TBB_ASSERT( (std::intptr_t)tail.load(std::memory_order_relaxed) >= 0, nullptr ); + __TBB_ASSERT( result || tasks_omitted || is_quiescent_local_task_pool_reset(), nullptr ); + return result; +} + +d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) { + d1::task** victim_pool = lock_task_pool(); + if (!victim_pool) { + return nullptr; + } + d1::task* result = nullptr; + std::size_t H = head.load(std::memory_order_relaxed); // mirror + std::size_t H0 = H; + bool tasks_omitted = false; + do { + // The full fence is required to sync the store of `head` with the load of `tail` (write-read barrier) + H = ++head; + // The acquire load of tail is required to guarantee consistency of victim_pool + // because the owner synchronizes task spawning via tail. + if ((std::intptr_t)H > (std::intptr_t)(tail.load(std::memory_order_acquire))) { + // Stealing attempt failed, deque contents has not been changed by us + head.store( /*dead: H = */ H0, std::memory_order_relaxed ); + __TBB_ASSERT( !result, nullptr ); + goto unlock; + } + result = victim_pool[H-1]; + __TBB_ASSERT( !is_poisoned( result ), nullptr ); + + if (result) { + if (isolation == no_isolation || isolation == task_accessor::isolation(*result)) { + if (!task_accessor::is_proxy_task(*result)) { + break; + } + task_proxy& tp = *static_cast<task_proxy*>(result); + // If mailed task is likely to be grabbed by its destination thread, skip it. + if ( !(task_proxy::is_shared( tp.task_and_tag ) && tp.outbox->recipient_is_idle()) ) { + break; + } + } + // The task cannot be executed either due to isolation or proxy constraints. + result = nullptr; + tasks_omitted = true; + } else if (!tasks_omitted) { + // Cleanup the task pool from holes until a task is skipped. + __TBB_ASSERT( H0 == H-1, nullptr ); + poison_pointer( victim_pool[H0] ); + H0 = H; + } + } while (!result); + __TBB_ASSERT( result, nullptr ); + + // emit "task was consumed" signal + poison_pointer( victim_pool[H-1] ); + if (tasks_omitted) { + // Some proxies in the task pool have been omitted. Set the stolen task to nullptr. + victim_pool[H-1] = nullptr; + // The release store synchronizes the victim_pool update(the store of nullptr). + head.store( /*dead: H = */ H0, std::memory_order_release ); + } +unlock: + unlock_task_pool(victim_pool); + +#if __TBB_PREFETCHING + __TBB_cl_evict(&victim_slot.head); + __TBB_cl_evict(&victim_slot.tail); +#endif + if (tasks_omitted) { + // Synchronize with snapshot as the head and tail can be bumped which can falsely trigger EMPTY state + a.advertise_new_work<arena::wakeup>(); + } + return result; +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/arena_slot.h b/contrib/libs/tbb/src/tbb/arena_slot.h new file mode 100644 index 0000000000..83d61d2197 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/arena_slot.h @@ -0,0 +1,409 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_arena_slot_H +#define _TBB_arena_slot_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_task.h" + +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "misc.h" +#include "mailbox.h" +#include "scheduler_common.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +class arena; +class task_group_context; + +//-------------------------------------------------------------------------------------------------------- +// Arena Slot +//-------------------------------------------------------------------------------------------------------- + +static d1::task** const EmptyTaskPool = nullptr; +static d1::task** const LockedTaskPool = reinterpret_cast<d1::task**>(~std::intptr_t(0)); + +struct alignas(max_nfs_size) arena_slot_shared_state { + //! Scheduler of the thread attached to the slot + /** Marks the slot as busy, and is used to iterate through the schedulers belonging to this arena **/ + std::atomic<bool> my_is_occupied; + + // Synchronization of access to Task pool + /** Also is used to specify if the slot is empty or locked: + 0 - empty + -1 - locked **/ + std::atomic<d1::task**> task_pool; + + //! Index of the first ready task in the deque. + /** Modified by thieves, and by the owner during compaction/reallocation **/ + std::atomic<std::size_t> head; +}; + +struct alignas(max_nfs_size) arena_slot_private_state { + //! Hint provided for operations with the container of starvation-resistant tasks. + /** Modified by the owner thread (during these operations). **/ + unsigned hint_for_fifo_stream; + +#if __TBB_PREVIEW_CRITICAL_TASKS + //! Similar to 'hint_for_fifo_stream' but for critical tasks. + unsigned hint_for_critical_stream; +#endif + + //! Similar to 'hint_for_fifo_stream' but for the resume tasks. + unsigned hint_for_resume_stream; + + //! Index of the element following the last ready task in the deque. + /** Modified by the owner thread. **/ + std::atomic<std::size_t> tail; + + //! Capacity of the primary task pool (number of elements - pointers to task). + std::size_t my_task_pool_size; + + //! Task pool of the scheduler that owns this slot + // TODO: previously was task**__TBB_atomic, but seems like not accessed on other thread + d1::task** task_pool_ptr; +}; + +class arena_slot : private arena_slot_shared_state, private arena_slot_private_state { + friend class arena; + friend class outermost_worker_waiter; + friend class task_dispatcher; + friend class thread_data; + friend class nested_arena_context; + + //! The original task dispather associated with this slot + task_dispatcher* my_default_task_dispatcher; + +#if TBB_USE_ASSERT + void fill_with_canary_pattern ( std::size_t first, std::size_t last ) { + for ( std::size_t i = first; i < last; ++i ) + poison_pointer(task_pool_ptr[i]); + } +#else + void fill_with_canary_pattern ( size_t, std::size_t ) {} +#endif /* TBB_USE_ASSERT */ + + static constexpr std::size_t min_task_pool_size = 64; + + void allocate_task_pool( std::size_t n ) { + std::size_t byte_size = ((n * sizeof(d1::task*) + max_nfs_size - 1) / max_nfs_size) * max_nfs_size; + my_task_pool_size = byte_size / sizeof(d1::task*); + task_pool_ptr = (d1::task**)cache_aligned_allocate(byte_size); + // No need to clear the fresh deque since valid items are designated by the head and tail members. + // But fill it with a canary pattern in the high vigilance debug mode. + fill_with_canary_pattern( 0, my_task_pool_size ); + } + +public: + //! Deallocate task pool that was allocated by means of allocate_task_pool. + void free_task_pool( ) { + // TODO: understand the assertion and modify + // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, NULL); + if( task_pool_ptr ) { + __TBB_ASSERT( my_task_pool_size, NULL); + cache_aligned_deallocate( task_pool_ptr ); + task_pool_ptr = NULL; + my_task_pool_size = 0; + } + } + + //! Get a task from the local pool. + /** Called only by the pool owner. + Returns the pointer to the task or NULL if a suitable task is not found. + Resets the pool if it is empty. **/ + d1::task* get_task(execution_data_ext&, isolation_type); + + //! Steal task from slot's ready pool + d1::task* steal_task(arena&, isolation_type); + + //! Some thread is now the owner of this slot + void occupy() { + __TBB_ASSERT(!my_is_occupied.load(std::memory_order_relaxed), nullptr); + my_is_occupied.store(true, std::memory_order_release); + } + + //! Try to occupy the slot + bool try_occupy() { + return !is_occupied() && my_is_occupied.exchange(true) == false; + } + + //! Some thread is now the owner of this slot + void release() { + __TBB_ASSERT(my_is_occupied.load(std::memory_order_relaxed), nullptr); + my_is_occupied.store(false, std::memory_order_release); + } + + //! Spawn newly created tasks + void spawn(d1::task& t) { + std::size_t T = prepare_task_pool(1); + __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), NULL); + task_pool_ptr[T] = &t; + commit_spawned_tasks(T + 1); + if (!is_task_pool_published()) { + publish_task_pool(); + } + } + + bool is_task_pool_published() const { + return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool; + } + + bool is_occupied() const { + return my_is_occupied.load(std::memory_order_relaxed); + } + + task_dispatcher& default_task_dispatcher() { + __TBB_ASSERT(my_default_task_dispatcher != nullptr, nullptr); + return *my_default_task_dispatcher; + } + + void init_task_streams(unsigned h) { + hint_for_fifo_stream = h; +#if __TBB_RESUMABLE_TASKS + hint_for_resume_stream = h; +#endif +#if __TBB_PREVIEW_CRITICAL_TASKS + hint_for_critical_stream = h; +#endif + } + +#if __TBB_PREVIEW_CRITICAL_TASKS + unsigned& critical_hint() { + return hint_for_critical_stream; + } +#endif +private: + //! Get a task from the local pool at specified location T. + /** Returns the pointer to the task or NULL if the task cannot be executed, + e.g. proxy has been deallocated or isolation constraint is not met. + tasks_omitted tells if some tasks have been omitted. + Called only by the pool owner. The caller should guarantee that the + position T is not available for a thief. **/ + d1::task* get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation); + + //! Makes sure that the task pool can accommodate at least n more elements + /** If necessary relocates existing task pointers or grows the ready task deque. + * Returns (possible updated) tail index (not accounting for n). **/ + std::size_t prepare_task_pool(std::size_t num_tasks) { + std::size_t T = tail.load(std::memory_order_relaxed); // mirror + if ( T + num_tasks <= my_task_pool_size ) { + return T; + } + + std::size_t new_size = num_tasks; + if ( !my_task_pool_size ) { + __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), NULL ); + __TBB_ASSERT( !task_pool_ptr, NULL ); + if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size; + allocate_task_pool( new_size ); + return 0; + } + acquire_task_pool(); + std::size_t H = head.load(std::memory_order_relaxed); // mirror + d1::task** new_task_pool = task_pool_ptr;; + __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, NULL ); + // Count not skipped tasks. Consider using std::count_if. + for ( std::size_t i = H; i < T; ++i ) + if ( new_task_pool[i] ) ++new_size; + // If the free space at the beginning of the task pool is too short, we + // are likely facing a pathological single-producer-multiple-consumers + // scenario, and thus it's better to expand the task pool + bool allocate = new_size > my_task_pool_size - min_task_pool_size/4; + if ( allocate ) { + // Grow task pool. As this operation is rare, and its cost is asymptotically + // amortizable, we can tolerate new task pool allocation done under the lock. + if ( new_size < 2 * my_task_pool_size ) + new_size = 2 * my_task_pool_size; + allocate_task_pool( new_size ); // updates my_task_pool_size + } + // Filter out skipped tasks. Consider using std::copy_if. + std::size_t T1 = 0; + for ( std::size_t i = H; i < T; ++i ) { + if ( new_task_pool[i] ) { + task_pool_ptr[T1++] = new_task_pool[i]; + } + } + // Deallocate the previous task pool if a new one has been allocated. + if ( allocate ) + cache_aligned_deallocate( new_task_pool ); + else + fill_with_canary_pattern( T1, tail ); + // Publish the new state. + commit_relocated_tasks( T1 ); + // assert_task_pool_valid(); + return T1; + } + + //! Makes newly spawned tasks visible to thieves + void commit_spawned_tasks(std::size_t new_tail) { + __TBB_ASSERT (new_tail <= my_task_pool_size, "task deque end was overwritten"); + // emit "task was released" signal + // Release fence is necessary to make sure that previously stored task pointers + // are visible to thieves. + tail.store(new_tail, std::memory_order_release); + } + + //! Used by workers to enter the task pool + /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/ + void publish_task_pool() { + __TBB_ASSERT ( task_pool == EmptyTaskPool, "someone else grabbed my arena slot?" ); + __TBB_ASSERT ( head.load(std::memory_order_relaxed) < tail.load(std::memory_order_relaxed), + "entering arena without tasks to share" ); + // Release signal on behalf of previously spawned tasks (when this thread was not in arena yet) + task_pool.store(task_pool_ptr, std::memory_order_release ); + } + + //! Locks the local task pool + /** Garbles task_pool for the duration of the lock. Requires correctly set task_pool_ptr. + ATTENTION: This method is mostly the same as generic_scheduler::lock_task_pool(), with + a little different logic of slot state checks (slot is either locked or points + to our task pool). Thus if either of them is changed, consider changing the counterpart as well. **/ + void acquire_task_pool() { + if (!is_task_pool_published()) { + return; // we are not in arena - nothing to lock + } + bool sync_prepare_done = false; + for( atomic_backoff b;;b.pause() ) { +#if TBB_USE_ASSERT + // Local copy of the arena slot task pool pointer is necessary for the next + // assertion to work correctly to exclude asynchronous state transition effect. + d1::task** tp = task_pool.load(std::memory_order_relaxed); + __TBB_ASSERT( tp == LockedTaskPool || tp == task_pool_ptr, "slot ownership corrupt?" ); +#endif + d1::task** expected = task_pool_ptr; + if( task_pool.load(std::memory_order_relaxed) != LockedTaskPool && + task_pool.compare_exchange_strong(expected, LockedTaskPool ) ) { + // We acquired our own slot + break; + } else if( !sync_prepare_done ) { + // Start waiting + sync_prepare_done = true; + } + // Someone else acquired a lock, so pause and do exponential backoff. + } + __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "not really acquired task pool" ); + } + + //! Unlocks the local task pool + /** Restores task_pool munged by acquire_task_pool. Requires + correctly set task_pool_ptr. **/ + void release_task_pool() { + if ( !(task_pool.load(std::memory_order_relaxed) != EmptyTaskPool) ) + return; // we are not in arena - nothing to unlock + __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "arena slot is not locked" ); + task_pool.store( task_pool_ptr, std::memory_order_release ); + } + + //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL. + /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/ + d1::task** lock_task_pool() { + d1::task** victim_task_pool; + for ( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) { + victim_task_pool = task_pool.load(std::memory_order_relaxed); + // Microbenchmarks demonstrated that aborting stealing attempt when the + // victim's task pool is locked degrade performance. + // NOTE: Do not use comparison of head and tail indices to check for + // the presence of work in the victim's task pool, as they may give + // incorrect indication because of task pool relocations and resizes. + if (victim_task_pool == EmptyTaskPool) { + break; + } + d1::task** expected = victim_task_pool; + if (victim_task_pool != LockedTaskPool && task_pool.compare_exchange_strong(expected, LockedTaskPool) ) { + // We've locked victim's task pool + break; + } + // Someone else acquired a lock, so pause and do exponential backoff. + backoff.pause(); + } + __TBB_ASSERT(victim_task_pool == EmptyTaskPool || + (task_pool.load(std::memory_order_relaxed) == LockedTaskPool && + victim_task_pool != LockedTaskPool), "not really locked victim's task pool?"); + return victim_task_pool; + } + + //! Unlocks victim's task pool + /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/ + void unlock_task_pool(d1::task** victim_task_pool) { + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked"); + __TBB_ASSERT(victim_task_pool != LockedTaskPool, NULL); + task_pool.store(victim_task_pool, std::memory_order_release); + } + +#if TBB_USE_ASSERT + bool is_local_task_pool_quiescent() const { + d1::task** tp = task_pool.load(std::memory_order_relaxed); + return tp == EmptyTaskPool || tp == LockedTaskPool; + } + + bool is_quiescent_local_task_pool_empty() const { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); + return head.load(std::memory_order_relaxed) == tail.load(std::memory_order_relaxed); + } + + bool is_quiescent_local_task_pool_reset() const { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent"); + return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0; + } +#endif // TBB_USE_ASSERT + + //! Leave the task pool + /** Leaving task pool automatically releases the task pool if it is locked. **/ + void leave_task_pool() { + __TBB_ASSERT(is_task_pool_published(), "Not in arena"); + // Do not reset my_arena_index. It will be used to (attempt to) re-acquire the slot next time + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when leaving arena"); + __TBB_ASSERT(is_quiescent_local_task_pool_empty(), "Cannot leave arena when the task pool is not empty"); + // No release fence is necessary here as this assignment precludes external + // accesses to the local task pool when becomes visible. Thus it is harmless + // if it gets hoisted above preceding local bookkeeping manipulations. + task_pool.store(EmptyTaskPool, std::memory_order_relaxed); + } + + //! Resets head and tail indices to 0, and leaves task pool + /** The task pool must be locked by the owner (via acquire_task_pool).**/ + void reset_task_pool_and_leave() { + __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when resetting task pool"); + tail.store(0, std::memory_order_relaxed); + head.store(0, std::memory_order_relaxed); + leave_task_pool(); + } + + //! Makes relocated tasks visible to thieves and releases the local task pool. + /** Obviously, the task pool must be locked when calling this method. **/ + void commit_relocated_tasks(std::size_t new_tail) { + __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool must be locked when calling commit_relocated_tasks()"); + head.store(0, std::memory_order_relaxed); + // Tail is updated last to minimize probability of a thread making arena + // snapshot being misguided into thinking that this task pool is empty. + tail.store(new_tail, std::memory_order_release); + release_task_pool(); + } +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_arena_slot_H diff --git a/contrib/libs/tbb/src/tbb/assert_impl.h b/contrib/libs/tbb/src/tbb/assert_impl.h new file mode 100644 index 0000000000..7f411e06f7 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/assert_impl.h @@ -0,0 +1,71 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_assert_impl_H +#define __TBB_assert_impl_H + +#include "oneapi/tbb/detail/_config.h" + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <cstdarg> +#if _MSC_VER && _DEBUG +#include <crtdbg.h> +#endif + +#include <mutex> + +namespace tbb { +namespace detail { +namespace r1 { + +// TODO: consider extension for formatted error description string +static void assertion_failure_impl(const char* filename, int line, const char* expression, const char* comment) { + std::fprintf(stderr, "Assertion %s failed on line %d of file %s\n", expression, line, filename); + if (comment) { + std::fprintf(stderr, "Detailed description: %s\n", comment); + } +#if _MSC_VER && _DEBUG + if (1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) { + _CrtDbgBreak(); + } +#else + std::fflush(stderr); + std::abort(); +#endif +} + +void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment) { + static std::once_flag flag; + std::call_once(flag, [&](){ assertion_failure_impl(filename, line, expression, comment); }); +} + +//! Report a runtime warning. +void runtime_warning( const char* format, ... ) { + char str[1024]; std::memset(str, 0, 1024); + va_list args; va_start(args, format); + vsnprintf( str, 1024-1, format, args); + va_end(args); + fprintf(stderr, "TBB Warning: %s\n", str); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_assert_impl_H + diff --git a/contrib/libs/tbb/src/tbb/co_context.h b/contrib/libs/tbb/src/tbb/co_context.h new file mode 100644 index 0000000000..552dec356b --- /dev/null +++ b/contrib/libs/tbb/src/tbb/co_context.h @@ -0,0 +1,222 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_co_context_H +#define _TBB_co_context_H + +#include "oneapi/tbb/detail/_config.h" + +#if __TBB_RESUMABLE_TASKS + +#include <cstddef> +#include <cstdint> + +#if _WIN32 || _WIN64 +#include <windows.h> +#else +// ucontext.h API is deprecated since macOS 10.6 +#if __APPLE__ + #if __INTEL_COMPILER + #pragma warning(push) + #pragma warning(disable:1478) + #elif __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wdeprecated-declarations" + #endif +#endif // __APPLE__ + +#include <ucontext.h> +#include <sys/mman.h> // mprotect + +#include "governor.h" // default_page_size() + +#ifndef MAP_STACK +// macOS* does not define MAP_STACK +#define MAP_STACK 0 +#endif +#ifndef MAP_ANONYMOUS +// macOS* defines MAP_ANON, which is deprecated in Linux*. +#define MAP_ANONYMOUS MAP_ANON +#endif +#endif // _WIN32 || _WIN64 + +namespace tbb { +namespace detail { +namespace r1 { + +#if _WIN32 || _WIN64 + typedef LPVOID coroutine_type; +#else + struct coroutine_type { + coroutine_type() : my_context(), my_stack(), my_stack_size() {} + ucontext_t my_context; + void* my_stack; + std::size_t my_stack_size; + }; +#endif + + // Forward declaration of the coroutine API. + void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg); + void current_coroutine(coroutine_type& c); + void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine); + void destroy_coroutine(coroutine_type& c); + +class co_context { + enum co_state { + co_invalid, + co_suspended, + co_executing, + co_destroyed + }; + coroutine_type my_coroutine; + co_state my_state; + +public: + co_context(std::size_t stack_size, void* arg) + : my_state(stack_size ? co_suspended : co_executing) + { + if (stack_size) { + __TBB_ASSERT(arg != 0, nullptr); + create_coroutine(my_coroutine, stack_size, arg); + } else { + current_coroutine(my_coroutine); + } + } + + ~co_context() { + __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), NULL); + if (my_state == co_suspended) + destroy_coroutine(my_coroutine); + my_state = co_destroyed; + } + + void resume(co_context& target) { + // Do not create non-trivial objects on the stack of this function. They might never be destroyed. + __TBB_ASSERT(my_state == co_executing, NULL); + __TBB_ASSERT(target.my_state == co_suspended, NULL); + + my_state = co_suspended; + target.my_state = co_executing; + + // 'target' can reference an invalid object after swap_coroutine. Do not access it. + swap_coroutine(my_coroutine, target.my_coroutine); + + __TBB_ASSERT(my_state == co_executing, NULL); + } +}; + +#if _WIN32 || _WIN64 +/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept; +#else +/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept; +#endif + +#if _WIN32 || _WIN64 +inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { + __TBB_ASSERT(arg, NULL); + c = CreateFiber(stack_size, co_local_wait_for_all, arg); + __TBB_ASSERT(c, NULL); +} + +inline void current_coroutine(coroutine_type& c) { + c = IsThreadAFiber() ? GetCurrentFiber() : + ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); + __TBB_ASSERT(c, NULL); +} + +inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { + if (!IsThreadAFiber()) { + ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); + } + __TBB_ASSERT(new_coroutine, NULL); + prev_coroutine = GetCurrentFiber(); + __TBB_ASSERT(prev_coroutine, NULL); + SwitchToFiber(new_coroutine); +} + +inline void destroy_coroutine(coroutine_type& c) { + __TBB_ASSERT(c, NULL); + DeleteFiber(c); +} +#else // !(_WIN32 || _WIN64) + +inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { + const std::size_t REG_PAGE_SIZE = governor::default_page_size(); + const std::size_t page_aligned_stack_size = (stack_size + (REG_PAGE_SIZE - 1)) & ~(REG_PAGE_SIZE - 1); + const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE; + + // Allocate the stack with protection property + std::uintptr_t stack_ptr = (std::uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL); + + // Allow read write on our stack (guarded pages are still protected) + int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE); + __TBB_ASSERT_EX(!err, NULL); + + // Remember the stack state + c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE); + c.my_stack_size = page_aligned_stack_size; + + err = getcontext(&c.my_context); + __TBB_ASSERT_EX(!err, NULL); + + c.my_context.uc_link = 0; + // cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error + c.my_context.uc_stack.ss_sp = (char*)c.my_stack; + c.my_context.uc_stack.ss_size = c.my_stack_size; + c.my_context.uc_stack.ss_flags = 0; + + typedef void(*coroutine_func_t)(); + makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, sizeof(arg) / sizeof(int), arg); +} + +inline void current_coroutine(coroutine_type& c) { + int err = getcontext(&c.my_context); + __TBB_ASSERT_EX(!err, NULL); +} + +inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { + int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context); + __TBB_ASSERT_EX(!err, NULL); +} + +inline void destroy_coroutine(coroutine_type& c) { + const std::size_t REG_PAGE_SIZE = governor::default_page_size(); + // Free stack memory with guarded pages + munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE); + // Clear the stack state afterwards + c.my_stack = NULL; + c.my_stack_size = 0; +} + +#if __APPLE__ + #if __INTEL_COMPILER + #pragma warning(pop) // 1478 warning + #elif __clang__ + #pragma clang diagnostic pop // "-Wdeprecated-declarations" + #endif +#endif + +#endif // _WIN32 || _WIN64 + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_RESUMABLE_TASKS */ + +#endif /* _TBB_co_context_H */ + diff --git a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp new file mode 100644 index 0000000000..90077936f6 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp @@ -0,0 +1,84 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/concurrent_queue.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "concurrent_monitor.h" + +namespace tbb { +namespace detail { +namespace r1 { + +static constexpr std::size_t monitors_number = 2; + +std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ) +{ + std::size_t monitors_mem_size = sizeof(concurrent_monitor) * monitors_number; + std::uint8_t* mem = static_cast<std::uint8_t*>(cache_aligned_allocate(queue_rep_size + monitors_mem_size)); + + concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); + for (std::size_t i = 0; i < monitors_number; ++i) { + new (monitors + i) concurrent_monitor(); + } + + return mem; +} + +void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ) +{ + concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size); + for (std::size_t i = 0; i < monitors_number; ++i) { + monitors[i].~concurrent_monitor(); + } + + cache_aligned_deallocate(mem); +} + +void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, + std::ptrdiff_t target, d1::delegate_base& predicate ) +{ + __TBB_ASSERT(monitor_tag < monitors_number, nullptr); + concurrent_monitor& monitor = monitors[monitor_tag]; + + monitor.wait<concurrent_monitor::thread_context>([&] { return !predicate(); }, std::uintptr_t(target)); +} + +void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) { + concurrent_monitor& items_avail = monitors[d1::cbq_items_avail_tag]; + concurrent_monitor& slots_avail = monitors[d1::cbq_slots_avail_tag]; + + items_avail.abort_all(); + slots_avail.abort_all(); +} + +struct predicate_leq { + std::size_t my_ticket; + predicate_leq( std::size_t ticket ) : my_ticket(ticket) {} + bool operator() ( std::uintptr_t ticket ) const { return static_cast<std::size_t>(ticket) <= my_ticket; } +}; + +void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, + std::size_t monitor_tag, std::size_t ticket) +{ + __TBB_ASSERT(monitor_tag < monitors_number, nullptr); + concurrent_monitor& monitor = monitors[monitor_tag]; + monitor.notify(predicate_leq(ticket)); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor.h b/contrib/libs/tbb/src/tbb/concurrent_monitor.h new file mode 100644 index 0000000000..cb1885a5d0 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/concurrent_monitor.h @@ -0,0 +1,529 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_monitor_H +#define __TBB_concurrent_monitor_H + +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_aligned_space.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "scheduler_common.h" + +#include "semaphore.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +//! Circular doubly-linked list with sentinel +/** head.next points to the front and head.prev points to the back */ +class circular_doubly_linked_list_with_sentinel : no_copy { +public: + struct base_node { + base_node* next; + base_node* prev; + explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {} + }; + + // ctor + circular_doubly_linked_list_with_sentinel() { clear(); } + // dtor + ~circular_doubly_linked_list_with_sentinel() { + __TBB_ASSERT(head.next == &head && head.prev == &head, "the list is not empty"); + } + + inline std::size_t size() const { return count.load(std::memory_order_relaxed); } + inline bool empty() const { return size() == 0; } + inline base_node* front() const { return head.next; } + inline base_node* last() const { return head.prev; } + inline const base_node* end() const { return &head; } + + //! add to the back of the list + inline void add( base_node* n ) { + count.store(count.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + n->prev = head.prev; + n->next = &head; + head.prev->next = n; + head.prev = n; + } + + //! remove node 'n' + inline void remove( base_node& n ) { + __TBB_ASSERT(count.load(std::memory_order_relaxed) > 0, "attempt to remove an item from an empty list"); + count.store(count.load( std::memory_order_relaxed ) - 1, std::memory_order_relaxed); + n.prev->next = n.next; + n.next->prev = n.prev; + } + + //! move all elements to 'lst' and initialize the 'this' list + inline void flush_to( circular_doubly_linked_list_with_sentinel& lst ) { + const std::size_t l_count = size(); + if (l_count > 0) { + lst.count.store(l_count, std::memory_order_relaxed); + lst.head.next = head.next; + lst.head.prev = head.prev; + head.next->prev = &lst.head; + head.prev->next = &lst.head; + clear(); + } + } + + void clear() { + head.next = &head; + head.prev = &head; + count.store(0, std::memory_order_relaxed); + } +private: + std::atomic<std::size_t> count; + base_node head; +}; + +using base_list = circular_doubly_linked_list_with_sentinel; +using base_node = circular_doubly_linked_list_with_sentinel::base_node; + +template <typename Context> +class concurrent_monitor_base; + +template <typename Context> +class wait_node : public base_node { +public: + +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + wait_node(Context ctx) : my_context(ctx), my_is_in_list(false) {} +#else + wait_node(Context ctx) : my_context(ctx) {} +#endif + + virtual ~wait_node() = default; + + virtual void init() { + __TBB_ASSERT(!my_initialized, nullptr); + my_initialized = true; + } + + virtual void wait() = 0; + + virtual void reset() { + __TBB_ASSERT(my_skipped_wakeup, nullptr); + my_skipped_wakeup = false; + } + + virtual void notify() = 0; + +protected: + friend class concurrent_monitor_base<Context>; + friend class thread_data; + + Context my_context{}; +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + std::atomic<bool> my_is_in_list; +#else + std::atomic<bool> my_is_in_list{false}; +#endif + + bool my_initialized{false}; + bool my_skipped_wakeup{false}; + bool my_aborted{false}; + unsigned my_epoch{0}; +}; + +template <typename Context> +class sleep_node : public wait_node<Context> { + using base_type = wait_node<Context>; +public: + using base_type::base_type; + + // Make it virtual due to Intel Compiler warning + virtual ~sleep_node() { + if (this->my_initialized) { + if (this->my_skipped_wakeup) semaphore().P(); + semaphore().~binary_semaphore(); + } + } + + binary_semaphore& semaphore() { return *sema.begin(); } + + void init() override { + if (!this->my_initialized) { + new (sema.begin()) binary_semaphore; + base_type::init(); + } + } + + void wait() override { + __TBB_ASSERT(this->my_initialized, + "Use of commit_wait() without prior prepare_wait()"); + semaphore().P(); + __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); + if (this->my_aborted) + throw_exception(exception_id::user_abort); + } + + void reset() override { + base_type::reset(); + semaphore().P(); + } + + void notify() override { + semaphore().V(); + } + +private: + tbb::detail::aligned_space<binary_semaphore> sema; +}; + +//! concurrent_monitor +/** fine-grained concurrent_monitor implementation */ +template <typename Context> +class concurrent_monitor_base : no_copy { +public: + //! ctor + concurrent_monitor_base() : my_epoch{} + {} + + //! dtor + ~concurrent_monitor_base() { + abort_all(); + __TBB_ASSERT(my_waitset.empty(), "waitset not empty?"); + } + + //! prepare wait by inserting 'thr' into the wait queue + void prepare_wait( wait_node<Context>& node) { + // TODO: consider making even more lazy instantiation of the semaphore, that is only when it is actually needed, e.g. move it in node::wait() + if (!node.my_initialized) { + node.init(); + } + // this is good place to pump previous skipped wakeup + else if (node.my_skipped_wakeup) { + node.reset(); + } + + node.my_is_in_list.store(true, std::memory_order_relaxed); + + { + tbb::spin_mutex::scoped_lock l(my_mutex); + node.my_epoch = my_epoch.load(std::memory_order_relaxed); + my_waitset.add(&node); + } + + // Prepare wait guarantees Write Read memory barrier. + // In C++ only full fence covers this type of barrier. + atomic_fence(std::memory_order_seq_cst); + } + + //! Commit wait if event count has not changed; otherwise, cancel wait. + /** Returns true if committed, false if canceled. */ + inline bool commit_wait( wait_node<Context>& node ) { + const bool do_it = node.my_epoch == my_epoch.load(std::memory_order_relaxed); + // this check is just an optimization + if (do_it) { + node.wait(); + } else { + cancel_wait( node ); + } + return do_it; + } + + //! Cancel the wait. Removes the thread from the wait queue if not removed yet. + void cancel_wait( wait_node<Context>& node ) { + // possible skipped wakeup will be pumped in the following prepare_wait() + node.my_skipped_wakeup = true; + // try to remove node from waitset + // Cancel wait guarantees acquire memory barrier. + bool in_list = node.my_is_in_list.load(std::memory_order_acquire); + if (in_list) { + tbb::spin_mutex::scoped_lock l(my_mutex); + if (node.my_is_in_list.load(std::memory_order_relaxed)) { + my_waitset.remove(node); + // node is removed from waitset, so there will be no wakeup + node.my_is_in_list.store(false, std::memory_order_relaxed); + node.my_skipped_wakeup = false; + } + } + } + + //! Wait for a condition to be satisfied with waiting-on my_context + template <typename NodeType, typename Pred> + bool wait(Pred&& pred, NodeType&& node) { + prepare_wait(node); + while (!guarded_call(std::forward<Pred>(pred), node)) { + if (commit_wait(node)) { + return true; + } + + prepare_wait(node); + } + + cancel_wait(node); + return false; + } + + //! Notify one thread about the event + void notify_one() { + atomic_fence(std::memory_order_seq_cst); + notify_one_relaxed(); + } + + //! Notify one thread about the event. Relaxed version. + void notify_one_relaxed() { + if (my_waitset.empty()) { + return; + } + + base_node* n; + const base_node* end = my_waitset.end(); + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + n = my_waitset.front(); + if (n != end) { + my_waitset.remove(*n); + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + if (n != end) { + to_wait_node(n)->notify(); + } + } + + //! Notify all waiting threads of the event + void notify_all() { + atomic_fence(std::memory_order_seq_cst); + notify_all_relaxed(); + } + + // ! Notify all waiting threads of the event; Relaxed version + void notify_all_relaxed() { + if (my_waitset.empty()) { + return; + } + + base_list temp; + const base_node* end; + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + // TODO: Possible optimization, don't change node state under lock, just do flush + my_waitset.flush_to(temp); + end = temp.end(); + for (base_node* n = temp.front(); n != end; n = n->next) { + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + base_node* nxt; + for (base_node* n = temp.front(); n != end; n=nxt) { + nxt = n->next; + to_wait_node(n)->notify(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif + } + + //! Notify waiting threads of the event that satisfies the given predicate + template <typename P> + void notify( const P& predicate ) { + atomic_fence(std::memory_order_seq_cst); + notify_relaxed( predicate ); + } + + //! Notify waiting threads of the event that satisfies the given predicate; + //! the predicate is called under the lock. Relaxed version. + template<typename P> + void notify_relaxed( const P& predicate ) { + if (my_waitset.empty()) { + return; + } + + base_list temp; + base_node* nxt; + const base_node* end = my_waitset.end(); + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed); + for (base_node* n = my_waitset.last(); n != end; n = nxt) { + nxt = n->prev; + auto* node = static_cast<wait_node<Context>*>(n); + if (predicate(node->my_context)) { + my_waitset.remove(*n); + node->my_is_in_list.store(false, std::memory_order_relaxed); + temp.add(n); + } + } + } + + end = temp.end(); + for (base_node* n=temp.front(); n != end; n = nxt) { + nxt = n->next; + to_wait_node(n)->notify(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif + } + + //! Abort any sleeping threads at the time of the call + void abort_all() { + atomic_fence( std::memory_order_seq_cst ); + abort_all_relaxed(); + } + + //! Abort any sleeping threads at the time of the call; Relaxed version + void abort_all_relaxed() { + if (my_waitset.empty()) { + return; + } + + base_list temp; + const base_node* end; + { + tbb::spin_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + my_waitset.flush_to(temp); + end = temp.end(); + for (base_node* n = temp.front(); n != end; n = n->next) { + to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); + } + } + + base_node* nxt; + for (base_node* n = temp.front(); n != end; n = nxt) { + nxt = n->next; + to_wait_node(n)->my_aborted = true; + to_wait_node(n)->notify(); + } +#if TBB_USE_ASSERT + temp.clear(); +#endif + } + +private: + template <typename NodeType, typename Pred> + bool guarded_call(Pred&& predicate, NodeType& node) { + bool res = false; + tbb::detail::d0::try_call( [&] { + res = std::forward<Pred>(predicate)(); + }).on_exception( [&] { + cancel_wait(node); + }); + + return res; + } + + tbb::spin_mutex my_mutex; + base_list my_waitset; + std::atomic<unsigned> my_epoch; + + wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); } +}; + +class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> { + using base_type = concurrent_monitor_base<std::uintptr_t>; +public: + using base_type::base_type; + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<std::uintptr_t>; +}; + +struct extended_context { + extended_context() = default; + + extended_context(std::uintptr_t first_addr, arena* a) : + my_uniq_addr(first_addr), my_arena_addr(a) + {} + + std::uintptr_t my_uniq_addr{0}; + arena* my_arena_addr{nullptr}; +}; + + +#if __TBB_RESUMABLE_TASKS +class resume_node : public wait_node<extended_context> { + using base_type = wait_node<extended_context>; +public: + resume_node(extended_context ctx, execution_data_ext& ed_ext, task_dispatcher& target) + : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target) + , my_suspend_point(my_curr_dispatcher->get_suspend_point()) + {} + + virtual ~resume_node() { + if (this->my_skipped_wakeup) { + spin_wait_until_eq(this->my_notify_calls, 1); + } + + poison_pointer(my_curr_dispatcher); + poison_pointer(my_target_dispatcher); + poison_pointer(my_suspend_point); + } + + void init() override { + base_type::init(); + } + + void wait() override { + my_curr_dispatcher->resume(*my_target_dispatcher); + __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); + } + + void reset() override { + base_type::reset(); + spin_wait_until_eq(this->my_notify_calls, 1); + my_notify_calls.store(0, std::memory_order_relaxed); + } + + // notify is called (perhaps, concurrently) twice from: + // - concurrent_monitor::notify + // - post_resume_action::register_waiter + // The second notify is called after thread switches the stack + // (Because we can not call resume while the stack is occupied) + // We need calling resume only when both notifications are performed. + void notify() override { + if (++my_notify_calls == 2) { + r1::resume(my_suspend_point); + } + } + +private: + friend class thread_data; + friend struct suspend_point_type::resume_task; + task_dispatcher* my_curr_dispatcher; + task_dispatcher* my_target_dispatcher; + suspend_point_type* my_suspend_point; + std::atomic<int> my_notify_calls{0}; +}; +#endif // __TBB_RESUMABLE_TASKS + +class extended_concurrent_monitor : public concurrent_monitor_base<extended_context> { + using base_type = concurrent_monitor_base<extended_context>; +public: + using base_type::base_type; + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<extended_context>; +#if __TBB_RESUMABLE_TASKS + using resume_context = resume_node; +#endif +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_concurrent_monitor_H */ diff --git a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def new file mode 100644 index 0000000000..09e7753ad4 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def @@ -0,0 +1,153 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +{ +global: + +/* Assertions (assert.cpp) */ +_ZN3tbb6detail2r117assertion_failureEPKciS3_S3_; + +/* ITT (profiling.cpp) */ +_ZN3tbb6detail2r112itt_task_endENS0_2d115itt_domain_enumE; +_ZN3tbb6detail2r114itt_region_endENS0_2d115itt_domain_enumEPvy; +_ZN3tbb6detail2r114itt_task_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r115call_itt_notifyEiPv; +_ZN3tbb6detail2r115create_itt_syncEPvPKcS4_; +_ZN3tbb6detail2r116itt_region_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r116itt_relation_addENS0_2d115itt_domain_enumEPvyNS0_2d012itt_relationES4_y; +_ZN3tbb6detail2r117itt_set_sync_nameEPvPKc; +_ZN3tbb6detail2r119itt_make_task_groupENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE; +_ZN3tbb6detail2r120itt_metadata_str_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexEPKc; +_ZN3tbb6detail2r120itt_metadata_ptr_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexES4_; + +/* Allocators (allocator.cpp) */ +_ZN3tbb6detail2r115allocate_memoryEm; +_ZN3tbb6detail2r117deallocate_memoryEPv; +_ZN3tbb6detail2r122cache_aligned_allocateEm; +_ZN3tbb6detail2r124cache_aligned_deallocateEPv; +_ZN3tbb6detail2r115cache_line_sizeEv; +_ZN3tbb6detail2r117is_tbbmalloc_usedEv; + +/* Small object pool (small_object_pool.cpp) */ +_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEm; +_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEmRKNS2_14execution_dataE; +_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvm; +_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvmRKNS2_14execution_dataE; + +/* Error handling (exception.cpp) */ +_ZN3tbb6detail2r115throw_exceptionENS0_2d012exception_idE; +_ZTIN3tbb6detail2r114bad_last_allocE; +_ZTVN3tbb6detail2r114bad_last_allocE; +_ZTIN3tbb6detail2r112missing_waitE; +_ZTVN3tbb6detail2r112missing_waitE; +_ZTIN3tbb6detail2r110user_abortE; +_ZTVN3tbb6detail2r110user_abortE; +_ZTIN3tbb6detail2r111unsafe_waitE; +_ZTVN3tbb6detail2r111unsafe_waitE; + +/* RTM Mutex (rtm_mutex.cpp) */ +_ZN3tbb6detail2r17acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r17releaseERNS0_2d19rtm_mutex11scoped_lockE; +_ZN3tbb6detail2r111try_acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockE; + +/* RTM RW Mutex (rtm_rw_mutex.cpp) */ +_ZN3tbb6detail2r114acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r114acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r118try_acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; +_ZN3tbb6detail2r118try_acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE; +_ZN3tbb6detail2r17releaseERNS0_2d112rtm_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r17upgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE; + +/* Tasks and partitioners (task.cpp) */ +_ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; +_ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; +_ZN3tbb6detail2r121current_suspend_pointEv; +_ZN3tbb6detail2r114notify_waitersEm; + +/* Task dispatcher (task_dispatcher.cpp) */ +_ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; +_ZN3tbb6detail2r14waitERNS0_2d112wait_contextERNS2_18task_group_contextE; +_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextE; +_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextEt; +_ZN3tbb6detail2r116execute_and_waitERNS0_2d14taskERNS2_18task_group_contextERNS2_12wait_contextES6_; +_ZN3tbb6detail2r16submitERNS0_2d14taskERNS2_18task_group_contextEPNS1_5arenaEm; +_ZN3tbb6detail2r115current_contextEv; + +/* Task group context (task_group_context.cpp) */ +_ZN3tbb6detail2r110initializeERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r122cancel_group_executionERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r128is_group_execution_cancelledERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r15resetERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r17destroyERNS0_2d118task_group_contextE; +_ZN3tbb6detail2r119capture_fp_settingsERNS0_2d118task_group_contextE; + +/* Task arena (arena.cpp) */ +_ZN3tbb6detail2r115max_concurrencyEPKNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r110initializeERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r16attachERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r17executeERNS0_2d115task_arena_baseERNS2_13delegate_baseE; +_ZN3tbb6detail2r19terminateERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r120isolate_within_arenaERNS0_2d113delegate_baseEl; +_ZN3tbb6detail2r17enqueueERNS0_2d14taskEPNS2_15task_arena_baseE; +_ZN3tbb6detail2r14waitERNS0_2d115task_arena_baseE; + +/* System topology parsing and threads pinning (governor.cpp) */ +_ZN3tbb6detail2r115numa_node_countEv; +_ZN3tbb6detail2r117fill_numa_indicesEPi; +_ZN3tbb6detail2r115core_type_countEl; +_ZN3tbb6detail2r122fill_core_type_indicesEPil; +_ZN3tbb6detail2r131constraints_default_concurrencyERKNS0_2d111constraintsEl; +_ZN3tbb6detail2r128constraints_threads_per_coreERKNS0_2d111constraintsEl; +_ZN3tbb6detail2r124numa_default_concurrencyEi; + +/* Observer (observer_proxy.cpp) */ +_ZN3tbb6detail2r17observeERNS0_2d123task_scheduler_observerEb; + +/* Queuing RW Mutex (queuing_rw_mutex.cpp) */ +_ZN3tbb6detail2r111try_acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r117upgrade_to_writerERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r119downgrade_to_readerERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r17acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb; +_ZN3tbb6detail2r17releaseERNS0_2d116queuing_rw_mutex11scoped_lockE; +_ZN3tbb6detail2r19constructERNS0_2d116queuing_rw_mutexE; + +/* Global control (global_control.cpp) */ +_ZN3tbb6detail2r16createERNS0_2d114global_controlE; +_ZN3tbb6detail2r17destroyERNS0_2d114global_controlE; +_ZN3tbb6detail2r127global_control_active_valueEi; +_ZN3tbb6detail2r18finalizeERNS0_2d121task_scheduler_handleEl; +_ZN3tbb6detail2r13getERNS0_2d121task_scheduler_handleE; + +/* Parallel pipeline (parallel_pipeline.cpp) */ +_ZN3tbb6detail2r117parallel_pipelineERNS0_2d118task_group_contextEmRKNS2_11filter_nodeE; +_ZN3tbb6detail2r116set_end_of_inputERNS0_2d111base_filterE; + +/* Concurrent bounded queue (concurrent_bounded_queue.cpp) */ +_ZN3tbb6detail2r126allocate_bounded_queue_repEm; +_ZN3tbb6detail2r126wait_bounded_queue_monitorEPNS1_18concurrent_monitorEmlRNS0_2d113delegate_baseE; +_ZN3tbb6detail2r128abort_bounded_queue_monitorsEPNS1_18concurrent_monitorE; +_ZN3tbb6detail2r128deallocate_bounded_queue_repEPhm; +_ZN3tbb6detail2r128notify_bounded_queue_monitorEPNS1_18concurrent_monitorEmm; + +/* Versioning (version.cpp) */ +TBB_runtime_interface_version; +TBB_runtime_version; + +local: +/* TODO: fill more precisely */ +*; +}; diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.cpp b/contrib/libs/tbb/src/tbb/dynamic_link.cpp new file mode 100644 index 0000000000..d5c5c7be7d --- /dev/null +++ b/contrib/libs/tbb/src/tbb/dynamic_link.cpp @@ -0,0 +1,477 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "dynamic_link.h" + +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_utils.h" + +/* + This file is used by both TBB and OpenMP RTL. Do not use __TBB_ASSERT() macro + and runtime_warning() function because they are not available in OpenMP. Use + __TBB_ASSERT_EX and DYNAMIC_LINK_WARNING instead. +*/ + +#include <cstdarg> // va_list etc. +#if _WIN32 + #include <malloc.h> + + // Unify system calls + #define dlopen( name, flags ) LoadLibrary( name ) + #define dlsym( handle, name ) GetProcAddress( handle, name ) + #define dlclose( handle ) ( ! FreeLibrary( handle ) ) + #define dlerror() GetLastError() +#ifndef PATH_MAX + #define PATH_MAX MAX_PATH +#endif +#else /* _WIN32 */ + #include <dlfcn.h> + #include <unistd.h> + + #include <cstring> + #include <climits> + #include <cstdlib> +#endif /* _WIN32 */ + +#if __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED + //TODO: use function attribute for weak symbols instead of the pragma. + #pragma weak dlopen + #pragma weak dlsym + #pragma weak dlclose +#endif /* __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED */ + + +#define __USE_STATIC_DL_INIT ( !__ANDROID__ ) + + +/* +dynamic_link is a common interface for searching for required symbols in an +executable and dynamic libraries. + +dynamic_link provides certain guarantees: + 1. Either all or none of the requested symbols are resolved. Moreover, if + symbols are not resolved, the dynamic_link_descriptor table is not modified; + 2. All returned symbols have secured lifetime: this means that none of them + can be invalidated until dynamic_unlink is called; + 3. Any loaded library is loaded only via the full path. The full path is that + from which the runtime itself was loaded. (This is done to avoid security + issues caused by loading libraries from insecure paths). + +dynamic_link searches for the requested symbols in three stages, stopping as +soon as all of the symbols have been resolved. + + 1. Search the global scope: + a. On Windows: dynamic_link tries to obtain the handle of the requested + library and if it succeeds it resolves the symbols via that handle. + b. On Linux: dynamic_link tries to search for the symbols in the global + scope via the main program handle. If the symbols are present in the global + scope their lifetime is not guaranteed (since dynamic_link does not know + anything about the library from which they are exported). Therefore it + tries to "pin" the symbols by obtaining the library name and reopening it. + dlopen may fail to reopen the library in two cases: + i. The symbols are exported from the executable. Currently dynamic _link + cannot handle this situation, so it will not find these symbols in this + step. + ii. The necessary library has been unloaded and cannot be reloaded. It + seems there is nothing that can be done in this case. No symbols are + returned. + + 2. Dynamic load: an attempt is made to load the requested library via the + full path. + The full path used is that from which the runtime itself was loaded. If the + library can be loaded, then an attempt is made to resolve the requested + symbols in the newly loaded library. + If the symbols are not found the library is unloaded. + + 3. Weak symbols: if weak symbols are available they are returned. +*/ + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED + +#if !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED + // Report runtime errors and continue. + #define DYNAMIC_LINK_WARNING dynamic_link_warning + static void dynamic_link_warning( dynamic_link_error_t code, ... ) { + suppress_unused_warning(code); + } // library_warning +#endif /* !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED */ + + static bool resolve_symbols( dynamic_link_handle module, const dynamic_link_descriptor descriptors[], std::size_t required ) + { + if ( !module ) + return false; + + #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */ + if ( !dlsym ) return false; + #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */ + + const std::size_t n_desc=20; // Usually we don't have more than 20 descriptors per library + __TBB_ASSERT_EX( required <= n_desc, "Too many descriptors is required" ); + if ( required > n_desc ) return false; + pointer_to_handler h[n_desc]; + + for ( std::size_t k = 0; k < required; ++k ) { + dynamic_link_descriptor const & desc = descriptors[k]; + pointer_to_handler addr = (pointer_to_handler)dlsym( module, desc.name ); + if ( !addr ) { + return false; + } + h[k] = addr; + } + + // Commit the entry points. + // Cannot use memset here, because the writes must be atomic. + for( std::size_t k = 0; k < required; ++k ) + *descriptors[k].handler = h[k]; + return true; + } + +#if __TBB_WIN8UI_SUPPORT + bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) { + dynamic_link_handle tmp_handle = NULL; + TCHAR wlibrary[256]; + if ( MultiByteToWideChar(CP_UTF8, 0, library, -1, wlibrary, 255) == 0 ) return false; + if ( flags & DYNAMIC_LINK_LOAD ) + tmp_handle = LoadPackagedLibrary( wlibrary, 0 ); + if (tmp_handle != NULL){ + return resolve_symbols(tmp_handle, descriptors, required); + }else{ + return false; + } + } + void dynamic_unlink( dynamic_link_handle ) {} + void dynamic_unlink_all() {} +#else +#if __TBB_DYNAMIC_LOAD_ENABLED +/* + There is a security issue on Windows: LoadLibrary() may load and execute malicious code. + See http://www.microsoft.com/technet/security/advisory/2269637.mspx for details. + To avoid the issue, we have to pass full path (not just library name) to LoadLibrary. This + function constructs full path to the specified library (it is assumed the library located + side-by-side with the tbb.dll. + + The function constructs absolute path for given relative path. Important: Base directory is not + current one, it is the directory tbb.dll loaded from. + + Example: + Let us assume "tbb.dll" is located in "c:\program files\common\intel\" directory, e.g. + absolute path of the library is "c:\program files\common\intel\tbb.dll". Absolute path for + "tbbmalloc.dll" would be "c:\program files\common\intel\tbbmalloc.dll". Absolute path for + "malloc\tbbmalloc.dll" would be "c:\program files\common\intel\malloc\tbbmalloc.dll". +*/ + + // Struct handle_storage is used by dynamic_link routine to store handles of + // all loaded or pinned dynamic libraries. When TBB is shut down, it calls + // dynamic_unlink_all() that unloads modules referenced by handle_storage. + // This struct should not have any constructors since it may be used before + // the constructor is called. + #define MAX_LOADED_MODULES 8 // The number of maximum possible modules which can be loaded + + using atomic_incrementer = std::atomic<std::size_t>; + + static struct handles_t { + atomic_incrementer my_size; + dynamic_link_handle my_handles[MAX_LOADED_MODULES]; + + void add(const dynamic_link_handle &handle) { + const std::size_t ind = my_size++; + __TBB_ASSERT_EX( ind < MAX_LOADED_MODULES, "Too many modules are loaded" ); + my_handles[ind] = handle; + } + + void free() { + const std::size_t size = my_size; + for (std::size_t i=0; i<size; ++i) + dynamic_unlink( my_handles[i] ); + } + } handles; + + static std::once_flag init_dl_data_state; + + static struct ap_data_t { + char _path[PATH_MAX+1]; + std::size_t _len; + } ap_data; + + static void init_ap_data() { + #if _WIN32 + // Get handle of our DLL first. + HMODULE handle; + BOOL brc = GetModuleHandleEx( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + (LPCSTR)( & dynamic_link ), // any function inside the library can be used for the address + & handle + ); + if ( !brc ) { // Error occurred. + int err = GetLastError(); + DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleHandleEx", err ); + return; + } + // Now get path to our DLL. + DWORD drc = GetModuleFileName( handle, ap_data._path, static_cast< DWORD >( PATH_MAX ) ); + if ( drc == 0 ) { // Error occurred. + int err = GetLastError(); + DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleFileName", err ); + return; + } + if ( drc >= PATH_MAX ) { // Buffer too short. + DYNAMIC_LINK_WARNING( dl_buff_too_small ); + return; + } + // Find the position of the last backslash. + char *backslash = std::strrchr( ap_data._path, '\\' ); + + if ( !backslash ) { // Backslash not found. + __TBB_ASSERT_EX( backslash!=NULL, "Unbelievable."); + return; + } + __TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable."); + ap_data._len = (std::size_t)(backslash - ap_data._path) + 1; + *(backslash+1) = 0; + #else + // Get the library path + Dl_info dlinfo; + int res = dladdr( (void*)&dynamic_link, &dlinfo ); // any function inside the library can be used for the address + if ( !res ) { + char const * err = dlerror(); + DYNAMIC_LINK_WARNING( dl_sys_fail, "dladdr", err ); + return; + } else { + __TBB_ASSERT_EX( dlinfo.dli_fname!=NULL, "Unbelievable." ); + } + + char const *slash = std::strrchr( dlinfo.dli_fname, '/' ); + std::size_t fname_len=0; + if ( slash ) { + __TBB_ASSERT_EX( slash >= dlinfo.dli_fname, "Unbelievable."); + fname_len = (std::size_t)(slash - dlinfo.dli_fname) + 1; + } + + std::size_t rc; + if ( dlinfo.dli_fname[0]=='/' ) { + // The library path is absolute + rc = 0; + ap_data._len = 0; + } else { + // The library path is relative so get the current working directory + if ( !getcwd( ap_data._path, sizeof(ap_data._path)/sizeof(ap_data._path[0]) ) ) { + DYNAMIC_LINK_WARNING( dl_buff_too_small ); + return; + } + ap_data._len = std::strlen( ap_data._path ); + ap_data._path[ap_data._len++]='/'; + rc = ap_data._len; + } + + if ( fname_len>0 ) { + if ( ap_data._len>PATH_MAX ) { + DYNAMIC_LINK_WARNING( dl_buff_too_small ); + ap_data._len=0; + return; + } + std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len ); + ap_data._len += fname_len; + ap_data._path[ap_data._len]=0; + } + #endif /* _WIN32 */ + } + + static void init_dl_data() { + init_ap_data(); + } + + /* + The function constructs absolute path for given relative path. Important: Base directory is not + current one, it is the directory libtbb.so loaded from. + + Arguments: + in name -- Name of a file (may be with relative path; it must not be an absolute one). + out path -- Buffer to save result (absolute path) to. + in len -- Size of buffer. + ret -- 0 -- Error occurred. + > len -- Buffer too short, required size returned. + otherwise -- Ok, number of characters (incl. terminating null) written to buffer. + */ + static std::size_t abs_path( char const * name, char * path, std::size_t len ) { + if ( ap_data._len == 0 ) + return 0; + + std::size_t name_len = std::strlen( name ); + std::size_t full_len = name_len+ap_data._len; + if ( full_len < len ) { + __TBB_ASSERT( ap_data._path[ap_data._len] == 0, NULL); + __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, NULL); + std::strncpy( path, ap_data._path, ap_data._len + 1 ); + __TBB_ASSERT( path[ap_data._len] == 0, NULL ); + std::strncat( path, name, len - ap_data._len ); + __TBB_ASSERT( std::strlen(path) == full_len, NULL ); + } + return full_len+1; // +1 for null character + } +#endif // __TBB_DYNAMIC_LOAD_ENABLED + void init_dynamic_link_data() { + #if __TBB_DYNAMIC_LOAD_ENABLED + std::call_once( init_dl_data_state, init_dl_data ); + #endif + } + + #if __USE_STATIC_DL_INIT + // ap_data structure is initialized with current directory on Linux. + // So it should be initialized as soon as possible since the current directory may be changed. + // static_init_ap_data object provides this initialization during library loading. + static struct static_init_dl_data_t { + static_init_dl_data_t() { + init_dynamic_link_data(); + } + } static_init_dl_data; + #endif + + #if __TBB_WEAK_SYMBOLS_PRESENT + static bool weak_symbol_link( const dynamic_link_descriptor descriptors[], std::size_t required ) + { + // Check if the required entries are present in what was loaded into our process. + for ( std::size_t k = 0; k < required; ++k ) + if ( !descriptors[k].ptr ) + return false; + // Commit the entry points. + for ( std::size_t k = 0; k < required; ++k ) + *descriptors[k].handler = (pointer_to_handler) descriptors[k].ptr; + return true; + } + #else + static bool weak_symbol_link( const dynamic_link_descriptor[], std::size_t ) { + return false; + } + #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + + void dynamic_unlink( dynamic_link_handle handle ) { + #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */ + if ( !dlclose ) return; + #endif + if ( handle ) { + dlclose( handle ); + } + } + + void dynamic_unlink_all() { + #if __TBB_DYNAMIC_LOAD_ENABLED + handles.free(); + #endif + } + + static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { + dynamic_link_handle library_handle{}; +#if _WIN32 + bool res = GetModuleHandleEx(0, library, &library_handle); + __TBB_ASSERT_EX(res && library_handle || !res && !library_handle, nullptr); +#else /* _WIN32 */ + #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */ + if ( !dlopen ) return 0; + #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */ + // RTLD_GLOBAL - to guarantee that old TBB will find the loaded library + // RTLD_NOLOAD - not to load the library without the full path + library_handle = dlopen(library, RTLD_LAZY | RTLD_GLOBAL | RTLD_NOLOAD); +#endif /* _WIN32 */ + if (library_handle) { + if (!resolve_symbols(library_handle, descriptors, required)) { + dynamic_unlink(library_handle); + library_handle = nullptr; + } + } + return library_handle; + } + + static void save_library_handle( dynamic_link_handle src, dynamic_link_handle *dst ) { + __TBB_ASSERT_EX( src, "The library handle to store must be non-zero" ); + if ( dst ) + *dst = src; + #if __TBB_DYNAMIC_LOAD_ENABLED + else + handles.add( src ); + #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ + } + + dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { + ::tbb::detail::suppress_unused_warning( library, descriptors, required ); +#if __TBB_DYNAMIC_LOAD_ENABLED + + std::size_t const len = PATH_MAX + 1; + char path[ len ]; + std::size_t rc = abs_path( library, path, len ); + if ( 0 < rc && rc <= len ) { +#if _WIN32 + // Prevent Windows from displaying silly message boxes if it fails to load library + // (e.g. because of MS runtime problems - one of those crazy manifest related ones) + UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); +#endif /* _WIN32 */ + dynamic_link_handle library_handle = dlopen( path, RTLD_NOW | RTLD_GLOBAL ); +#if _WIN32 + SetErrorMode (prev_mode); +#endif /* _WIN32 */ + if( library_handle ) { + if( !resolve_symbols( library_handle, descriptors, required ) ) { + // The loaded library does not contain all the expected entry points + dynamic_unlink( library_handle ); + library_handle = NULL; + } + } else + DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() ); + return library_handle; + } else if ( rc>len ) + DYNAMIC_LINK_WARNING( dl_buff_too_small ); + // rc == 0 means failing of init_ap_data so the warning has already been issued. + +#endif /* __TBB_DYNAMIC_LOAD_ENABLED */ + return 0; + } + + bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) { + init_dynamic_link_data(); + + // TODO: May global_symbols_link find weak symbols? + dynamic_link_handle library_handle = ( flags & DYNAMIC_LINK_GLOBAL ) ? global_symbols_link( library, descriptors, required ) : 0; + + if ( !library_handle && ( flags & DYNAMIC_LINK_LOAD ) ) + library_handle = dynamic_load( library, descriptors, required ); + + if ( !library_handle && ( flags & DYNAMIC_LINK_WEAK ) ) + return weak_symbol_link( descriptors, required ); + + if ( library_handle ) { + save_library_handle( library_handle, handle ); + return true; + } + return false; + } + +#endif /*__TBB_WIN8UI_SUPPORT*/ +#else /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */ + bool dynamic_link( const char*, const dynamic_link_descriptor*, std::size_t, dynamic_link_handle *handle, int ) { + if ( handle ) + *handle=0; + return false; + } + void dynamic_unlink( dynamic_link_handle ) {} + void dynamic_unlink_all() {} +#endif /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */ + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.h b/contrib/libs/tbb/src/tbb/dynamic_link.h new file mode 100644 index 0000000000..91adcc507c --- /dev/null +++ b/contrib/libs/tbb/src/tbb/dynamic_link.h @@ -0,0 +1,115 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_dynamic_link +#define __TBB_dynamic_link + +// Support for dynamic loading entry points from other shared libraries. + +#include "oneapi/tbb/detail/_config.h" + +#include <atomic> +#include <mutex> + +/** By default, symbols declared and defined here go into namespace tbb::internal. + To put them in other namespace, define macros OPEN_INTERNAL_NAMESPACE + and CLOSE_INTERNAL_NAMESPACE to override the following default definitions. **/ + +#include <cstddef> +#if _WIN32 +#include <Windows.h> +#endif /* _WIN32 */ + +namespace tbb { +namespace detail { +namespace r1 { + +//! Type definition for a pointer to a void somefunc(void) +typedef void (*pointer_to_handler)(); + +//! The helper to construct dynamic_link_descriptor structure +// Double cast through the void* in DLD macro is necessary to +// prevent warnings from some compilers (g++ 4.1) +#if __TBB_WEAK_SYMBOLS_PRESENT +#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h), (pointer_to_handler)&s} +#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), NULL} +#else +#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h)} +#define DLD_NOWEAK(s,h) DLD(s,h) +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ +//! Association between a handler name and location of pointer to it. +struct dynamic_link_descriptor { + //! Name of the handler + const char* name; + //! Pointer to the handler + pointer_to_handler* handler; +#if __TBB_WEAK_SYMBOLS_PRESENT + //! Weak symbol + pointer_to_handler ptr; +#endif +}; + +#if _WIN32 +using dynamic_link_handle = HMODULE; +#else +using dynamic_link_handle = void*; +#endif /* _WIN32 */ + +const int DYNAMIC_LINK_GLOBAL = 0x01; +const int DYNAMIC_LINK_LOAD = 0x02; +const int DYNAMIC_LINK_WEAK = 0x04; +const int DYNAMIC_LINK_ALL = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK; + +//! Fill in dynamically linked handlers. +/** 'library' is the name of the requested library. It should not contain a full + path since dynamic_link adds the full path (from which the runtime itself + was loaded) to the library name. + 'required' is the number of the initial entries in the array descriptors[] + that have to be found in order for the call to succeed. If the library and + all the required handlers are found, then the corresponding handler + pointers are set, and the return value is true. Otherwise the original + array of descriptors is left untouched and the return value is false. + 'required' is limited by 20 (exceeding of this value will result in failure + to load the symbols and the return value will be false). + 'handle' is the handle of the library if it is loaded. Otherwise it is left + untouched. + 'flags' is the set of DYNAMIC_LINK_* flags. Each of the DYNAMIC_LINK_* flags + allows its corresponding linking stage. +**/ +bool dynamic_link( const char* library, + const dynamic_link_descriptor descriptors[], + std::size_t required, + dynamic_link_handle* handle = 0, + int flags = DYNAMIC_LINK_ALL ); + +void dynamic_unlink( dynamic_link_handle handle ); + +void dynamic_unlink_all(); + +enum dynamic_link_error_t { + dl_success = 0, + dl_lib_not_found, // char const * lib, dlerr_t err + dl_sym_not_found, // char const * sym, dlerr_t err + // Note: dlerr_t depends on OS: it is char const * on Linux* and macOS*, int on Windows*. + dl_sys_fail, // char const * func, int err + dl_buff_too_small // none +}; // dynamic_link_error_t + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_dynamic_link */ diff --git a/contrib/libs/tbb/src/tbb/environment.h b/contrib/libs/tbb/src/tbb/environment.h new file mode 100644 index 0000000000..8886ef09e1 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/environment.h @@ -0,0 +1,81 @@ +/* + Copyright (c) 2018-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_environment_H +#define __TBB_tbb_environment_H + +#include <cstdlib> +#include <cstring> +#include <cerrno> +#include <cctype> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_WIN8UI_SUPPORT +static inline bool GetBoolEnvironmentVariable( const char * ) { + return false; +} + +static inline long GetIntegralEnvironmentVariable( const char * ) { + return -1; +} +#else /* __TBB_WIN8UI_SUPPORT */ +static inline bool GetBoolEnvironmentVariable( const char * name ) { + if ( const char* s = std::getenv(name) ) { + // The result is defined as true only if the environment variable contains + // no characters except one '1' character and an arbitrary number of spaces + // (including the absence of spaces). + size_t index = std::strspn(s, " "); + if (s[index] != '1') return false; + index++; + // Memory access after incrementing is safe, since the getenv() returns a + // NULL terminated string, and even if the character getting by index is '1', + // and this character is the end of string, after incrementing we will get + // an index of character, that contains '\0' + index += std::strspn(&s[index], " "); + return !s[index]; + } + return false; +} + +static inline long GetIntegralEnvironmentVariable( const char * name ) { + if ( const char* s = std::getenv(name) ) { + char* end = NULL; + errno = 0; + long value = std::strtol(s, &end, 10); + + // We have exceeded the range, value is negative or string is incovertable + if ( errno == ERANGE || value < 0 || end==s ) { + return -1; + } + for ( ; *end != '\0'; end++ ) { + if ( !std::isspace(*end) ) { + return -1; + } + } + return value; + } + return -1; +} +#endif /* __TBB_WIN8UI_SUPPORT */ + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_tbb_environment_H diff --git a/contrib/libs/tbb/src/tbb/exception.cpp b/contrib/libs/tbb/src/tbb/exception.cpp new file mode 100644 index 0000000000..c3e95d6d97 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/exception.cpp @@ -0,0 +1,162 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include <cstring> +#include <cstdio> +#include <stdexcept> // std::runtime_error +#include <new> +#include <stdexcept> + +#define __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN \ + (__GLIBCXX__ && __TBB_GLIBCXX_VERSION>=40700 && __TBB_GLIBCXX_VERSION<60000 && TBB_USE_EXCEPTIONS) + +#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN +// GCC ABI declarations necessary for a workaround +#include <cxxabi.h> +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +const char* bad_last_alloc::what() const noexcept(true) { return "bad allocation in previous or concurrent attempt"; } +const char* user_abort::what() const noexcept(true) { return "User-initiated abort has terminated this operation"; } +const char* missing_wait::what() const noexcept(true) { return "wait() was not called on the structured_task_group"; } + +#if TBB_USE_EXCEPTIONS + template <typename F> + /*[[noreturn]]*/ void do_throw_noexcept(F throw_func) noexcept { + throw_func(); + } + + /*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept { + throw_func(); + } + + bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp + + template <typename F> + /*[[noreturn]]*/ void do_throw(F throw_func) { + if (terminate_on_exception()) { + do_throw_noexcept(throw_func); + } + throw_func(); + } + + #define DO_THROW(exc, init_args) do_throw( []{ throw exc init_args; } ); +#else /* !TBB_USE_EXCEPTIONS */ + #define PRINT_ERROR_AND_ABORT(exc_name, msg) \ + std::fprintf (stderr, "Exception %s with message %s would have been thrown, " \ + "if exception handling had not been disabled. Aborting.\n", exc_name, msg); \ + std::fflush(stderr); \ + std::abort(); + #define DO_THROW(exc, init_args) PRINT_ERROR_AND_ABORT(#exc, #init_args) +#endif /* !TBB_USE_EXCEPTIONS */ + +void throw_exception ( exception_id eid ) { + switch ( eid ) { + case exception_id::bad_alloc: DO_THROW(std::bad_alloc, ()); break; + case exception_id::bad_last_alloc: DO_THROW(bad_last_alloc, ()); break; + case exception_id::user_abort: DO_THROW( user_abort, () ); break; + case exception_id::nonpositive_step: DO_THROW(std::invalid_argument, ("Step must be positive") ); break; + case exception_id::out_of_range: DO_THROW(std::out_of_range, ("Index out of requested size range")); break; + case exception_id::reservation_length_error: DO_THROW(std::length_error, ("Attempt to exceed implementation defined length limits")); break; + case exception_id::missing_wait: DO_THROW(missing_wait, ()); break; + case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break; + case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break; + case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break; +#endif + default: __TBB_ASSERT ( false, "Unknown exception ID" ); + } + __TBB_ASSERT(false, "Unreachable code"); +} + +/* The "what" should be fairly short, not more than about 128 characters. + Because we control all the call sites to handle_perror, it is pointless + to bullet-proof it for very long strings. + + Design note: ADR put this routine off to the side in tbb_misc.cpp instead of + Task.cpp because the throw generates a pathetic lot of code, and ADR wanted + this large chunk of code to be placed on a cold page. */ +void handle_perror( int error_code, const char* what ) { + const int BUF_SIZE = 255; + char buf[BUF_SIZE + 1] = { 0 }; + std::strncat(buf, what, BUF_SIZE); + std::size_t buf_len = std::strlen(buf); + if (error_code) { + std::strncat(buf, ": ", BUF_SIZE - buf_len); + buf_len = std::strlen(buf); + std::strncat(buf, std::strerror(error_code), BUF_SIZE - buf_len); + buf_len = std::strlen(buf); + } + __TBB_ASSERT(buf_len <= BUF_SIZE && buf[buf_len] == 0, nullptr); +#if TBB_USE_EXCEPTIONS + do_throw([&buf] { throw std::runtime_error(buf); }); +#else + PRINT_ERROR_AND_ABORT( "runtime_error", buf); +#endif /* !TBB_USE_EXCEPTIONS */ +} + +#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN +// Runtime detection and workaround for the GCC bug 62258. +// The problem is that std::rethrow_exception() does not increment a counter +// of active exceptions, causing std::uncaught_exception() to return a wrong value. +// The code is created after, and roughly reflects, the workaround +// at https://gcc.gnu.org/bugzilla/attachment.cgi?id=34683 + +void fix_broken_rethrow() { + struct gcc_eh_data { + void * caughtExceptions; + unsigned int uncaughtExceptions; + }; + gcc_eh_data* eh_data = punned_cast<gcc_eh_data*>( abi::__cxa_get_globals() ); + ++eh_data->uncaughtExceptions; +} + +bool gcc_rethrow_exception_broken() { + bool is_broken; + __TBB_ASSERT( !std::uncaught_exception(), + "gcc_rethrow_exception_broken() must not be called when an exception is active" ); + try { + // Throw, catch, and rethrow an exception + try { + throw __TBB_GLIBCXX_VERSION; + } catch(...) { + std::rethrow_exception( std::current_exception() ); + } + } catch(...) { + // Check the bug presence + is_broken = std::uncaught_exception(); + } + if( is_broken ) fix_broken_rethrow(); + __TBB_ASSERT( !std::uncaught_exception(), NULL ); + return is_broken; +} +#else +void fix_broken_rethrow() {} +bool gcc_rethrow_exception_broken() { return false; } +#endif /* __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN */ + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/global_control.cpp b/contrib/libs/tbb/src/tbb/global_control.cpp new file mode 100644 index 0000000000..a9eac2cbc3 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/global_control.cpp @@ -0,0 +1,275 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/spin_mutex.h" + +#include "governor.h" +#include "market.h" +#include "misc.h" + +#include <atomic> +#include <set> + +namespace tbb { +namespace detail { +namespace r1 { + +//! Comparator for a set of global_control objects +struct control_storage_comparator { + bool operator()(const global_control* lhs, const global_control* rhs) const; +}; + +class control_storage { + friend struct global_control_impl; + friend std::size_t global_control_active_value(int); +protected: + std::size_t my_active_value{0}; + std::set<global_control*, control_storage_comparator, tbb_allocator<global_control*>> my_list{}; + spin_mutex my_list_mutex{}; +public: + virtual std::size_t default_value() const = 0; + virtual void apply_active(std::size_t new_active) { + my_active_value = new_active; + } + virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const { + return a>b; // prefer max by default + } + virtual std::size_t active_value() { + spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call + return !my_list.empty() ? my_active_value : default_value(); + } +}; + +class alignas(max_nfs_size) allowed_parallelism_control : public control_storage { + virtual std::size_t default_value() const override { + return max(1U, governor::default_num_threads()); + } + virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const override { + return a<b; // prefer min allowed parallelism + } + virtual void apply_active(std::size_t new_active) override { + control_storage::apply_active(new_active); + __TBB_ASSERT( my_active_value>=1, NULL ); + // -1 to take external thread into account + market::set_active_num_workers( my_active_value-1 ); + } + virtual std::size_t active_value() override { + spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call + if (my_list.empty()) + return default_value(); + // non-zero, if market is active + const std::size_t workers = market::max_num_workers(); + // We can't exceed market's maximal number of workers. + // +1 to take external thread into account + return workers? min(workers+1, my_active_value): my_active_value; + } +public: + std::size_t active_value_if_present() const { + return !my_list.empty() ? my_active_value : 0; + } +}; + +class alignas(max_nfs_size) stack_size_control : public control_storage { + virtual std::size_t default_value() const override { + return ThreadStackSize; + } + virtual void apply_active(std::size_t new_active) override { + control_storage::apply_active(new_active); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + __TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" ); +#endif + } +}; + +class alignas(max_nfs_size) terminate_on_exception_control : public control_storage { + virtual std::size_t default_value() const override { + return 0; + } +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +class alignas(max_nfs_size) lifetime_control : public control_storage { + virtual bool is_first_arg_preferred(std::size_t, std::size_t) const override { + return false; // not interested + } + virtual std::size_t default_value() const override { + return 0; + } + virtual void apply_active(std::size_t new_active) override { + if (new_active == 1) { + // reserve the market reference + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + if (market::theMarket) { + market::add_ref_unsafe(lock, /*is_public*/ true); + } + } else if (new_active == 0) { // new_active == 0 + // release the market reference + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + if (market::theMarket != nullptr) { + lock.release(); + market::theMarket->release(/*is_public*/ true, /*blocking_terminate*/ false); + } + } + control_storage::apply_active(new_active); + } + +public: + bool is_empty() { + spin_mutex::scoped_lock lock(my_list_mutex); + return my_list.empty(); + } +}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +static allowed_parallelism_control allowed_parallelism_ctl; +static stack_size_control stack_size_ctl; +static terminate_on_exception_control terminate_on_exception_ctl; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +static lifetime_control lifetime_ctl; +static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; +#else +static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +//! Comparator for a set of global_control objects +inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const { + __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , NULL); + return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); +} + +unsigned market::app_parallelism_limit() { + return allowed_parallelism_ctl.active_value_if_present(); +} + +bool terminate_on_exception() { + return global_control::active_value(global_control::terminate_on_exception) == 1; +} + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +unsigned market::is_lifetime_control_present() { + return !lifetime_ctl.is_empty(); +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +struct global_control_impl { +private: + static bool erase_if_present(control_storage* const c, d1::global_control& gc) { + auto it = c->my_list.find(&gc); + if (it != c->my_list.end()) { + c->my_list.erase(it); + return true; + } + return false; + } + +public: + + static void create(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + + spin_mutex::scoped_lock lock(c->my_list_mutex); + if (c->my_list.empty() || c->is_first_arg_preferred(gc.my_value, c->my_active_value)) { + // to guarantee that apply_active() is called with current active value, + // calls it here and in internal_destroy() under my_list_mutex + c->apply_active(gc.my_value); + } + c->my_list.insert(&gc); + } + + static void destroy(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), NULL); +#else + __TBB_ASSERT(!c->my_list.empty(), NULL); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + // Concurrent reading and changing global parameter is possible. + spin_mutex::scoped_lock lock(c->my_list_mutex); + std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value; + + if (!erase_if_present(c, gc)) { +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , NULL); + return; +#else + __TBB_ASSERT(false, "Unreachable code"); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + } + if (c->my_list.empty()) { + __TBB_ASSERT(new_active == (std::size_t) - 1, NULL); + new_active = c->default_value(); + } else { + new_active = (*c->my_list.begin())->my_value; + } + if (new_active != old_active) { + c->apply_active(new_active); + } + } + + static bool remove_and_check_if_empty(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + __TBB_ASSERT(!c->my_list.empty(), NULL); + + spin_mutex::scoped_lock lock(c->my_list_mutex); + erase_if_present(c, gc); + return c->my_list.empty(); + } +#if TBB_USE_ASSERT + static bool is_present(d1::global_control& gc) { + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + control_storage* const c = controls[gc.my_param]; + + spin_mutex::scoped_lock lock(c->my_list_mutex); + auto it = c->my_list.find(&gc); + if (it != c->my_list.end()) { + return true; + } + return false; + } +#endif // TBB_USE_ASSERT +}; + +void __TBB_EXPORTED_FUNC create(d1::global_control& gc) { + global_control_impl::create(gc); +} +void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) { + global_control_impl::destroy(gc); +} + +bool remove_and_check_if_empty(d1::global_control& gc) { + return global_control_impl::remove_and_check_if_empty(gc); +} +#if TBB_USE_ASSERT +bool is_present(d1::global_control& gc) { + return global_control_impl::is_present(gc); +} +#endif // TBB_USE_ASSERT +std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { + __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL); + return controls[param]->active_value(); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/governor.cpp b/contrib/libs/tbb/src/tbb/governor.cpp new file mode 100644 index 0000000000..b75b91a75c --- /dev/null +++ b/contrib/libs/tbb/src/tbb/governor.cpp @@ -0,0 +1,526 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "governor.h" +#include "main.h" +#include "thread_data.h" +#include "market.h" +#include "arena.h" +#include "dynamic_link.h" + +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/info.h" + +#include "task_dispatcher.h" + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <atomic> +#include <algorithm> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! global_control.cpp contains definition +bool remove_and_check_if_empty(d1::global_control& gc); +bool is_present(d1::global_control& gc); +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +namespace rml { +tbb_server* make_private_server( tbb_client& client ); +} // namespace rml + +//------------------------------------------------------------------------ +// governor +//------------------------------------------------------------------------ + +void governor::acquire_resources () { +#if __TBB_USE_POSIX + int status = theTLS.create(auto_terminate); +#else + int status = theTLS.create(); +#endif + if( status ) + handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); + detect_cpu_features(cpu_features); + is_rethrow_broken = gcc_rethrow_exception_broken(); +} + +void governor::release_resources () { + theRMLServerFactory.close(); + destroy_process_mask(); + + __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?"); + + int status = theTLS.destroy(); + if( status ) + runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); + dynamic_unlink_all(); +} + +rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { + rml::tbb_server* server = NULL; + if( !UsePrivateRML ) { + ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client ); + if( status != ::rml::factory::st_success ) { + UsePrivateRML = true; + runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status ); + } + } + if ( !server ) { + __TBB_ASSERT( UsePrivateRML, NULL ); + server = rml::make_private_server( client ); + } + __TBB_ASSERT( server, "Failed to create RML server" ); + return server; +} + +void governor::one_time_init() { + if ( !__TBB_InitOnce::initialization_done() ) { + DoOneTimeInitialization(); + } +} + +/* + There is no portable way to get stack base address in Posix, however the modern + Linux versions provide pthread_attr_np API that can be used to obtain thread's + stack size and base address. Unfortunately even this function does not provide + enough information for the main thread on IA-64 architecture (RSE spill area + and memory stack are allocated as two separate discontinuous chunks of memory), + and there is no portable way to discern the main and the secondary threads. + Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for + all threads and use the current stack top as the stack base. This simplified + approach is based on the following assumptions: + 1) If the default stack size is insufficient for the user app needs, the + required amount will be explicitly specified by the user at the point of the + TBB scheduler initialization (as an argument to tbb::task_scheduler_init + constructor). + 2) When an external thread initializes the scheduler, it has enough space on its + stack. Here "enough" means "at least as much as worker threads have". + 3) If the user app strives to conserve the memory by cutting stack size, it + should do this for TBB workers too (as in the #1). +*/ +static std::uintptr_t get_stack_base(std::size_t stack_size) { + // Stacks are growing top-down. Highest address is called "stack base", + // and the lowest is "stack limit". +#if USE_WINTHREAD + suppress_unused_warning(stack_size); + NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); + __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); + return reinterpret_cast<std::uintptr_t>(pteb->StackBase); +#else /* USE_PTHREAD */ + // There is no portable way to get stack base address in Posix, so we use + // non-portable method (on all modern Linux) or the simplified approach + // based on the common sense assumptions. The most important assumption + // is that the main thread's stack size is not less than that of other threads. + + // Points to the lowest addressable byte of a stack. + void* stack_limit = nullptr; +#if __linux__ && !__bg__ + size_t np_stack_size = 0; + pthread_attr_t np_attr_stack; + if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) { + if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) { + __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" ); + } + pthread_attr_destroy(&np_attr_stack); + } +#endif /* __linux__ */ + std::uintptr_t stack_base{}; + if (stack_limit) { + stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size; + } else { + // Use an anchor as a base stack address. + int anchor{}; + stack_base = reinterpret_cast<std::uintptr_t>(&anchor); + } + return stack_base; +#endif /* USE_PTHREAD */ +} + +void governor::init_external_thread() { + one_time_init(); + // Create new scheduler instance with arena + int num_slots = default_num_threads(); + // TODO_REVAMP: support an external thread without an implicit arena + int num_reserved_slots = 1; + unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal + std::size_t stack_size = 0; + arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size); + // We need an internal reference to the market. TODO: is it legacy? + market::global_market(false); + // External thread always occupies the first slot + thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); + td.attach_arena(a, /*slot index*/ 0); + + stack_size = a.my_market->worker_stack_size(); + std::uintptr_t stack_base = get_stack_base(stack_size); + task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); + task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size)); + td.attach_task_dispatcher(task_disp); + + td.my_arena_slot->occupy(); + a.my_market->add_external_thread(td); + set_thread_data(td); +} + +void governor::auto_terminate(void* tls) { + __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || + get_thread_data_if_initialized() == tls, NULL); + if (tls) { + thread_data* td = static_cast<thread_data*>(tls); + + // Only external thread can be inside an arena during termination. + if (td->my_arena_slot) { + arena* a = td->my_arena; + market* m = a->my_market; + + a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); + + td->my_task_dispatcher->m_stealing_threshold = 0; + td->detach_task_dispatcher(); + td->my_arena_slot->release(); + // Release an arena + a->on_thread_leaving<arena::ref_external>(); + + m->remove_external_thread(*td); + // If there was an associated arena, it added a public market reference + m->release( /*is_public*/ true, /*blocking_terminate*/ false); + } + + td->~thread_data(); + cache_aligned_deallocate(td); + + clear_thread_data(); + } + __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL); +} + +void governor::initialize_rml_factory () { + ::rml::factory::status_type res = theRMLServerFactory.open(); + UsePrivateRML = res != ::rml::factory::st_success; +} + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { + handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); +} + +void release_impl(d1::task_scheduler_handle& handle) { + if (handle.m_ctl != nullptr) { + handle.m_ctl->~global_control(); + deallocate_memory(handle.m_ctl); + handle.m_ctl = nullptr; + } +} + +bool finalize_impl(d1::task_scheduler_handle& handle) { + market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); + bool ok = true; // ok if theMarket does not exist yet + market* m = market::theMarket; // read the state of theMarket + if (m != nullptr) { + lock.release(); + __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); + thread_data* td = governor::get_thread_data_if_initialized(); + if (td) { + task_dispatcher* task_disp = td->my_task_dispatcher; + __TBB_ASSERT(task_disp, nullptr); + if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region + governor::auto_terminate(td); + } + } + if (remove_and_check_if_empty(*handle.m_ctl)) { + ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true); + } else { + ok = false; + } + } + return ok; +} + +bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) { + if (mode == d1::release_nothrowing) { + release_impl(handle); + return true; + } else { + bool ok = finalize_impl(handle); + // TODO: it is unsafe when finalize is called concurrently and further library unload + release_impl(handle); + if (mode == d1::finalize_throwing && !ok) { + throw_exception(exception_id::unsafe_wait); + } + return ok; + } +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +#if __TBB_ARENA_BINDING + +#if __TBB_WEAK_SYMBOLS_PRESENT +#pragma weak __TBB_internal_initialize_system_topology +#pragma weak __TBB_internal_allocate_binding_handler +#pragma weak __TBB_internal_deallocate_binding_handler +#pragma weak __TBB_internal_apply_affinity +#pragma weak __TBB_internal_restore_affinity +#pragma weak __TBB_internal_get_default_concurrency + +extern "C" { +void __TBB_internal_initialize_system_topology( + size_t groups_num, + int& numa_nodes_count, int*& numa_indexes_list, + int& core_types_count, int*& core_types_indexes_list +); + +//TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` +binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); +void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr ); + +void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num ); +void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num ); + +int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core ); +} +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +// Stubs that will be used if TBBbind library is unavailable. +static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } +static void dummy_deallocate_binding_handler ( binding_handler* ) { } +static void dummy_apply_affinity ( binding_handler*, int ) { } +static void dummy_restore_affinity ( binding_handler*, int ) { } +static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); } + +// Handlers for communication with TBBbind +static void (*initialize_system_topology_ptr)( + size_t groups_num, + int& numa_nodes_count, int*& numa_indexes_list, + int& core_types_count, int*& core_types_indexes_list +) = nullptr; + +static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) + = dummy_allocate_binding_handler; +static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr ) + = dummy_deallocate_binding_handler; +static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) + = dummy_apply_affinity; +static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num ) + = dummy_restore_affinity; +int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) + = dummy_get_default_concurrency; + +#if _WIN32 || _WIN64 || __linux__ +// Table describing how to link the handlers. +static const dynamic_link_descriptor TbbBindLinkTable[] = { + DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), + DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), + DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), + DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), + DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), + DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) +}; + +static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor); + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +#if _WIN32 || _WIN64 +#define LIBRARY_EXTENSION ".dll" +#define LIBRARY_PREFIX +#elif __linux__ +#define LIBRARY_EXTENSION __TBB_STRING(.so.3) +#define LIBRARY_PREFIX "lib" +#endif /* __linux__ */ + +#define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION +#define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION +#define TBBBIND_2_4_NAME LIBRARY_PREFIX "tbbbind_2_4" DEBUG_SUFFIX LIBRARY_EXTENSION +#endif /* _WIN32 || _WIN64 || __linux__ */ + +// Representation of system hardware topology information on the TBB side. +// System topology may be initialized by third-party component (e.g. hwloc) +// or just filled in with default stubs. +namespace system_topology { + +constexpr int automatic = -1; + +static std::atomic<do_once_state> initialization_state; + +namespace { +int numa_nodes_count = 0; +int* numa_nodes_indexes = nullptr; + +int core_types_count = 0; +int* core_types_indexes = nullptr; + +const char* load_tbbbind_shared_object() { +#if _WIN32 || _WIN64 || __linux__ +#if _WIN32 && !_WIN64 + // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + if (si.dwNumberOfProcessors > 32) return nullptr; +#endif /* _WIN32 && !_WIN64 */ + for (const auto& tbbbind_version : {TBBBIND_2_4_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { + if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize)) { + return tbbbind_version; + } + } +#endif /* _WIN32 || _WIN64 || __linux__ */ + return nullptr; +} + +int processor_groups_num() { +#if _WIN32 + return NumberOfProcessorGroups(); +#else + // Stub to improve code readability by reducing number of the compile-time conditions + return 1; +#endif +} +} // internal namespace + +// Tries to load TBBbind library API, if success, gets NUMA topology information from it, +// in another case, fills NUMA topology by stubs. +void initialization_impl() { + governor::one_time_init(); + + if (const char* tbbbind_name = load_tbbbind_shared_object()) { + initialize_system_topology_ptr( + processor_groups_num(), + numa_nodes_count, numa_nodes_indexes, + core_types_count, core_types_indexes + ); + + PrintExtraVersionInfo("TBBBIND", tbbbind_name); + return; + } + + static int dummy_index = automatic; + + numa_nodes_count = 1; + numa_nodes_indexes = &dummy_index; + + core_types_count = 1; + core_types_indexes = &dummy_index; + + PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE"); +} + +void initialize() { + atomic_do_once(initialization_impl, initialization_state); +} +} // namespace system_topology + +binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { + system_topology::initialize(); + return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core); +} + +void destroy_binding_handler(binding_handler* handler_ptr) { + __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed"); + deallocate_binding_handler_ptr(handler_ptr); +} + +void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) { + __TBB_ASSERT(slot_index >= 0, "Negative thread index"); + __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed"); + apply_affinity_ptr(handler_ptr, slot_index); +} + +void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) { + __TBB_ASSERT(slot_index >= 0, "Negative thread index"); + __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed"); + restore_affinity_ptr(handler_ptr, slot_index); +} + +unsigned __TBB_EXPORTED_FUNC numa_node_count() { + system_topology::initialize(); + return system_topology::numa_nodes_count; +} + +void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) { + system_topology::initialize(); + std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int)); +} + +int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) { + if (node_id >= 0) { + system_topology::initialize(); + int result = get_default_concurrency_ptr( + node_id, + /*core_type*/system_topology::automatic, + /*threads_per_core*/system_topology::automatic + ); + if (result > 0) return result; + } + return governor::default_num_threads(); +} + +unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) { + system_topology::initialize(); + return system_topology::core_types_count; +} + +void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) { + system_topology::initialize(); + std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int)); +} + +void constraints_assertion(d1::constraints c) { + bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized; + __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0, + "Wrong max_threads_per_core constraints field value."); + + auto numa_nodes_begin = system_topology::numa_nodes_indexes; + auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count; + __TBB_ASSERT_RELEASE( + c.numa_id == system_topology::automatic || + (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end), + "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values."); + + int* core_types_begin = system_topology::core_types_indexes; + int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count; + __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic || + (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end), + "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values."); +} + +int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) { + constraints_assertion(c); + + if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) { + system_topology::initialize(); + return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core); + } + return governor::default_num_threads(); +} + +int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) { + return system_topology::automatic; +} +#endif /* __TBB_ARENA_BINDING */ + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/governor.h b/contrib/libs/tbb/src/tbb/governor.h new file mode 100644 index 0000000000..0ff4781414 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/governor.h @@ -0,0 +1,158 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_governor_H +#define _TBB_governor_H + +#include "rml_tbb.h" + +#include "misc.h" // for AvailableHwConcurrency +#include "tls.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class market; +class thread_data; +class __TBB_InitOnce; + +#if __TBB_USE_ITT_NOTIFY +//! Defined in profiling.cpp +extern bool ITT_Present; +#endif + +typedef std::size_t stack_size_type; + +//------------------------------------------------------------------------ +// Class governor +//------------------------------------------------------------------------ + +//! The class handles access to the single instance of market, and to TLS to keep scheduler instances. +/** It also supports automatic on-demand initialization of the TBB scheduler. + The class contains only static data members and methods.*/ +class governor { +private: + friend class __TBB_InitOnce; + friend class market; + + // TODO: consider using thread_local (measure performance and side effects) + //! TLS for scheduler instances associated with individual threads + static basic_tls<thread_data*> theTLS; + + //! Caches the maximal level of parallelism supported by the hardware + static unsigned DefaultNumberOfThreads; + + //! Caches the size of OS regular memory page + static std::size_t DefaultPageSize; + + // TODO (TBB_REVAMP_TODO): reconsider constant names + static rml::tbb_factory theRMLServerFactory; + + static bool UsePrivateRML; + + // Flags for runtime-specific conditions + static cpu_features_type cpu_features; + static bool is_rethrow_broken; + + //! Create key for thread-local storage and initialize RML. + static void acquire_resources (); + + //! Destroy the thread-local storage key and deinitialize RML. + static void release_resources (); + + static rml::tbb_server* create_rml_server ( rml::tbb_client& ); + +public: + static unsigned default_num_threads () { + // No memory fence required, because at worst each invoking thread calls AvailableHwConcurrency once. + return DefaultNumberOfThreads ? DefaultNumberOfThreads : + DefaultNumberOfThreads = AvailableHwConcurrency(); + } + static std::size_t default_page_size () { + return DefaultPageSize ? DefaultPageSize : + DefaultPageSize = DefaultSystemPageSize(); + } + static void one_time_init(); + //! Processes scheduler initialization request (possibly nested) in an external thread + /** If necessary creates new instance of arena and/or local scheduler. + The auto_init argument specifies if the call is due to automatic initialization. **/ + static void init_external_thread(); + + //! The routine to undo automatic initialization. + /** The signature is written with void* so that the routine + can be the destructor argument to pthread_key_create. */ + static void auto_terminate(void* tls); + + //! Obtain the thread-local instance of the thread data. + /** If the scheduler has not been initialized yet, initialization is done automatically. + Note that auto-initialized scheduler instance is destroyed only when its thread terminates. **/ + static thread_data* get_thread_data() { + thread_data* td = theTLS.get(); + if (td) { + return td; + } + init_external_thread(); + td = theTLS.get(); + __TBB_ASSERT(td, NULL); + return td; + } + + static void set_thread_data(thread_data& td) { + theTLS.set(&td); + } + + static void clear_thread_data() { + theTLS.set(nullptr); + } + + static thread_data* get_thread_data_if_initialized () { + return theTLS.get(); + } + + static bool is_thread_data_set(thread_data* td) { + return theTLS.get() == td; + } + + //! Undo automatic initialization if necessary; call when a thread exits. + static void terminate_external_thread() { + auto_terminate(get_thread_data_if_initialized()); + } + + static void initialize_rml_factory (); + + static bool does_client_join_workers (const rml::tbb_client &client); + + static bool speculation_enabled() { return cpu_features.rtm_enabled; } + + static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } + + static bool rethrow_exception_broken() { return is_rethrow_broken; } + + static bool is_itt_present() { +#if __TBB_USE_ITT_NOTIFY + return ITT_Present; +#else + return false; +#endif + } +}; // class governor + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_governor_H */ diff --git a/contrib/libs/tbb/src/tbb/intrusive_list.h b/contrib/libs/tbb/src/tbb/intrusive_list.h new file mode 100644 index 0000000000..699bc149aa --- /dev/null +++ b/contrib/libs/tbb/src/tbb/intrusive_list.h @@ -0,0 +1,242 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_intrusive_list_H +#define _TBB_intrusive_list_H + +namespace tbb { +namespace detail { +namespace r1 { + +//! Data structure to be inherited by the types that can form intrusive lists. +/** Intrusive list is formed by means of the member_intrusive_list<T> template class. + Note that type T must derive from intrusive_list_node either publicly or + declare instantiation member_intrusive_list<T> as a friend. + This class implements a limited subset of std::list interface. **/ +struct intrusive_list_node { + intrusive_list_node* my_prev_node{}; + intrusive_list_node* my_next_node{}; +#if TBB_USE_ASSERT + intrusive_list_node() { my_prev_node = my_next_node = this; } +#endif /* TBB_USE_ASSERT */ +}; + +//! List of element of type T, where T is derived from intrusive_list_node +/** The class is not thread safe. **/ +template <class List, class T> +class intrusive_list_base { + //! Pointer to the head node + intrusive_list_node my_head; + + //! Number of list elements + std::size_t my_size; + + static intrusive_list_node& node ( T& item ) { return List::node(item); } + + static T& item ( intrusive_list_node* node ) { return List::item(node); } + + static const T& item( const intrusive_list_node* node ) { return List::item(node); } + + template <typename DereferenceType> + class iterator_impl { + static_assert(std::is_same<DereferenceType, T>::value || + std::is_same<DereferenceType, const T>::value, + "Incorrect DereferenceType in iterator_impl"); + + using pointer_type = typename std::conditional<std::is_same<DereferenceType, T>::value, + intrusive_list_node*, + const intrusive_list_node*>::type; + + public: + iterator_impl() : my_pos(nullptr) {} + + iterator_impl( pointer_type pos ) : my_pos(pos) {} + + iterator_impl& operator++() { + my_pos = my_pos->my_next_node; + return *this; + } + + iterator_impl operator++( int ) { + iterator_impl it(*this); + ++*this; + return it; + } + + iterator_impl& operator--() { + my_pos = my_pos->my_prev_node; + return *this; + } + + iterator_impl operator--( int ) { + iterator_impl it(*this); + --*this; + return it; + } + + bool operator==( const iterator_impl& rhs ) const { + return my_pos == rhs.my_pos; + } + + bool operator!=( const iterator_impl& rhs ) const { + return my_pos != rhs.my_pos; + } + + DereferenceType& operator*() const { + return intrusive_list_base::item(my_pos); + } + + DereferenceType* operator->() const { + return &intrusive_list_base::item(my_pos); + } + private: + // Node the iterator points to at the moment + pointer_type my_pos; + }; // class iterator_impl + + void assert_ok () const { + __TBB_ASSERT( (my_head.my_prev_node == &my_head && !my_size) || + (my_head.my_next_node != &my_head && my_size >0), "intrusive_list_base corrupted" ); +#if TBB_USE_ASSERT >= 2 + std::size_t i = 0; + for ( intrusive_list_node *n = my_head.my_next_node; n != &my_head; n = n->my_next_node ) + ++i; + __TBB_ASSERT( my_size == i, "Wrong size" ); +#endif /* TBB_USE_ASSERT >= 2 */ + } + +public: + using iterator = iterator_impl<T>; + using const_iterator = iterator_impl<const T>; + + intrusive_list_base () : my_size(0) { + my_head.my_prev_node = &my_head; + my_head.my_next_node = &my_head; + } + + bool empty () const { return my_head.my_next_node == &my_head; } + + std::size_t size () const { return my_size; } + + iterator begin () { return iterator(my_head.my_next_node); } + + iterator end () { return iterator(&my_head); } + + const_iterator begin () const { return const_iterator(my_head.my_next_node); } + + const_iterator end () const { return const_iterator(&my_head); } + + void push_front ( T& val ) { + __TBB_ASSERT( node(val).my_prev_node == &node(val) && node(val).my_next_node == &node(val), + "Object with intrusive list node can be part of only one intrusive list simultaneously" ); + // An object can be part of only one intrusive list at the given moment via the given node member + node(val).my_prev_node = &my_head; + node(val).my_next_node = my_head.my_next_node; + my_head.my_next_node->my_prev_node = &node(val); + my_head.my_next_node = &node(val); + ++my_size; + assert_ok(); + } + + void remove( T& val ) { + __TBB_ASSERT( node(val).my_prev_node != &node(val) && node(val).my_next_node != &node(val), "Element to remove is not in the list" ); + __TBB_ASSERT( node(val).my_prev_node->my_next_node == &node(val) && node(val).my_next_node->my_prev_node == &node(val), "Element to remove is not in the list" ); + --my_size; + node(val).my_next_node->my_prev_node = node(val).my_prev_node; + node(val).my_prev_node->my_next_node = node(val).my_next_node; +#if TBB_USE_ASSERT + node(val).my_prev_node = node(val).my_next_node = &node(val); +#endif + assert_ok(); + } + + iterator erase ( iterator it ) { + T& val = *it; + ++it; + remove( val ); + return it; + } + +}; // intrusive_list_base + +#if __TBB_TODO +// With standard compliant compilers memptr_intrusive_list could be named simply intrusive_list, +// and inheritance based intrusive_list version would become its partial specialization. +// Here are the corresponding declarations: + +struct dummy_intrusive_list_item { intrusive_list_node my_node; }; + +template <class T, class U = dummy_intrusive_list_item, intrusive_list_node U::*NodePtr = &dummy_intrusive_list_item::my_node> +class intrusive_list : public intrusive_list_base<intrusive_list<T, U, NodePtr>, T>; + +template <class T> +class intrusive_list<T, dummy_intrusive_list_item, &dummy_intrusive_list_item::my_node> + : public intrusive_list_base<intrusive_list<T>, T>; + +#endif /* __TBB_TODO */ + +//! Double linked list of items of type T containing a member of type intrusive_list_node. +/** NodePtr is a member pointer to the node data field. Class U is either T or + a base class of T containing the node member. Default values exist for the sake + of a partial specialization working with inheritance case. + + The list does not have ownership of its items. Its purpose is to avoid dynamic + memory allocation when forming lists of existing objects. + + The class is not thread safe. **/ +template <class T, class U, intrusive_list_node U::*NodePtr> +class memptr_intrusive_list : public intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T> +{ + friend class intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T>; + + static intrusive_list_node& node ( T& val ) { return val.*NodePtr; } + + static T& item ( intrusive_list_node* node ) { + // Cannot use __TBB_offsetof (and consequently __TBB_get_object_ref) macro + // with *NodePtr argument because gcc refuses to interpret pasted "->" and "*" + // as member pointer dereferencing operator, and explicit usage of ## in + // __TBB_offsetof implementation breaks operations with normal member names. + return *reinterpret_cast<T*>((char*)node - ((ptrdiff_t)&(reinterpret_cast<T*>(0x1000)->*NodePtr) - 0x1000)); + } + + static const T& item( const intrusive_list_node* node ) { + return item(const_cast<intrusive_list_node*>(node)); + } + +}; // intrusive_list<T, U, NodePtr> + +//! Double linked list of items of type T that is derived from intrusive_list_node class. +/** The list does not have ownership of its items. Its purpose is to avoid dynamic + memory allocation when forming lists of existing objects. + + The class is not thread safe. **/ +template <class T> +class intrusive_list : public intrusive_list_base<intrusive_list<T>, T> +{ + friend class intrusive_list_base<intrusive_list<T>, T>; + + static intrusive_list_node& node ( T& val ) { return val; } + + static T& item ( intrusive_list_node* node ) { return *static_cast<T*>(node); } + + static const T& item( const intrusive_list_node* node ) { return *static_cast<const T*>(node); } +}; // intrusive_list<T> + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_intrusive_list_H */ diff --git a/contrib/libs/tbb/src/tbb/itt_notify.cpp b/contrib/libs/tbb/src/tbb/itt_notify.cpp new file mode 100644 index 0000000000..0e60579a62 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/itt_notify.cpp @@ -0,0 +1,69 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#if __TBB_USE_ITT_NOTIFY + +#if _WIN32||_WIN64 + #ifndef UNICODE + #define UNICODE + #endif +#else + #pragma weak dlopen + #pragma weak dlsym + #pragma weak dlerror +#endif /* WIN */ + +#if __TBB_BUILD + +extern "C" void ITT_DoOneTimeInitialization(); +#define __itt_init_ittlib_name(x,y) (ITT_DoOneTimeInitialization(), true) + +#elif __TBBMALLOC_BUILD + +extern "C" void MallocInitializeITT(); +#define __itt_init_ittlib_name(x,y) (MallocInitializeITT(), true) + +#else +#error This file is expected to be used for either TBB or TBB allocator build. +#endif // __TBB_BUILD + +#include "tools_api/ittnotify_static.c" + +namespace tbb { +namespace detail { +namespace r1 { + +/** This extra proxy method is necessary since __itt_init_lib is declared as static **/ +int __TBB_load_ittnotify() { +#if !(_WIN32||_WIN64) + // tool_api crashes without dlopen, check that it's present. Common case + // for lack of dlopen is static binaries, i.e. ones build with -static. + if (dlopen == NULL) + return 0; +#endif + return __itt_init_ittlib(NULL, // groups for: + (__itt_group_id)(__itt_group_sync // prepare/cancel/acquired/releasing + | __itt_group_thread // name threads + | __itt_group_stitch // stack stitching + | __itt_group_structure + )); +} + +} //namespace r1 +} //namespace detail +} // namespace tbb + +#endif /* __TBB_USE_ITT_NOTIFY */ diff --git a/contrib/libs/tbb/src/tbb/itt_notify.h b/contrib/libs/tbb/src/tbb/itt_notify.h new file mode 100644 index 0000000000..9978bcd7cb --- /dev/null +++ b/contrib/libs/tbb/src/tbb/itt_notify.h @@ -0,0 +1,114 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_ITT_NOTIFY +#define _TBB_ITT_NOTIFY + +#include "oneapi/tbb/detail/_config.h" + +#if __TBB_USE_ITT_NOTIFY + +#if _WIN32||_WIN64 + #ifndef UNICODE + #define UNICODE + #endif +#endif /* WIN */ + +#ifndef INTEL_ITTNOTIFY_API_PRIVATE +#define INTEL_ITTNOTIFY_API_PRIVATE +#endif + +#include "tools_api/ittnotify.h" +#include "tools_api/legacy/ittnotify.h" +extern "C" void __itt_fini_ittlib(void); + +#if _WIN32||_WIN64 + #undef _T +#endif /* WIN */ + +#endif /* __TBB_USE_ITT_NOTIFY */ + +#if !ITT_CALLER_NULL +#define ITT_CALLER_NULL ((__itt_caller)0) +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +//! Unicode support +#if (_WIN32||_WIN64) && !__MINGW32__ + //! Unicode character type. Always wchar_t on Windows. + /** We do not use typedefs from Windows TCHAR family to keep consistence of TBB coding style. **/ + using tchar = wchar_t; + //! Standard Windows macro to markup the string literals. + #define _T(string_literal) L ## string_literal +#else /* !WIN */ + using tchar = char; + //! Standard Windows style macro to markup the string literals. + #define _T(string_literal) string_literal +#endif /* !WIN */ + +//! Display names of internal synchronization types +extern const tchar + *SyncType_Scheduler; +//! Display names of internal synchronization components/scenarios +extern const tchar + *SyncObj_ContextsList + ; + +#if __TBB_USE_ITT_NOTIFY +// const_cast<void*>() is necessary to cast off volatility +#define ITT_NOTIFY(name,obj) __itt_##name(const_cast<void*>(static_cast<volatile void*>(obj))) +#define ITT_THREAD_SET_NAME(name) __itt_thread_set_name(name) +#define ITT_FINI_ITTLIB() __itt_fini_ittlib() +#define ITT_SYNC_CREATE(obj, type, name) __itt_sync_create((void*)(obj), type, name, 2) +#define ITT_STACK_CREATE(obj) obj = __itt_stack_caller_create() +#define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0) +#define ITT_CALLEE_ENTER(cond, t, obj) if(cond) {\ + __itt_stack_callee_enter(static_cast<__itt_caller>(obj));\ + __itt_sync_acquired(t);\ + } +#define ITT_CALLEE_LEAVE(cond, obj) (cond) ? __itt_stack_callee_leave(static_cast<__itt_caller>(obj)) : ((void)0) + +#define ITT_TASK_GROUP(obj,name,parent) r1::itt_make_task_group(d1::ITT_DOMAIN_MAIN,(void*)(obj),ALGORITHM,(void*)(parent),(parent!=nullptr) ? ALGORITHM : FLOW_NULL,name) +#define ITT_TASK_BEGIN(obj,name,id) r1::itt_task_begin(d1::ITT_DOMAIN_MAIN,(void*)(id),ALGORITHM,(void*)(obj),ALGORITHM,name) +#define ITT_TASK_END r1::itt_task_end(d1::ITT_DOMAIN_MAIN) + + +#else /* !__TBB_USE_ITT_NOTIFY */ + +#define ITT_NOTIFY(name,obj) ((void)0) +#define ITT_THREAD_SET_NAME(name) ((void)0) +#define ITT_FINI_ITTLIB() ((void)0) +#define ITT_SYNC_CREATE(obj, type, name) ((void)0) +#define ITT_STACK_CREATE(obj) ((void)0) +#define ITT_STACK_DESTROY(obj) ((void)0) +#define ITT_CALLEE_ENTER(cond, t, obj) ((void)0) +#define ITT_CALLEE_LEAVE(cond, obj) ((void)0) +#define ITT_TASK_GROUP(type,name,parent) ((void)0) +#define ITT_TASK_BEGIN(type,name,id) ((void)0) +#define ITT_TASK_END ((void)0) + +#endif /* !__TBB_USE_ITT_NOTIFY */ + +int __TBB_load_ittnotify(); + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_ITT_NOTIFY */ diff --git a/contrib/libs/tbb/src/tbb/mailbox.h b/contrib/libs/tbb/src/tbb/mailbox.h new file mode 100644 index 0000000000..2f49e9b35e --- /dev/null +++ b/contrib/libs/tbb/src/tbb/mailbox.h @@ -0,0 +1,249 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_mailbox_H +#define _TBB_mailbox_H + +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/detail/_small_object_pool.h" + +#include "arena_slot.h" +#include "scheduler_common.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +struct task_proxy : public d1::task { + static const intptr_t pool_bit = 1<<0; + static const intptr_t mailbox_bit = 1<<1; + static const intptr_t location_mask = pool_bit | mailbox_bit; + /* All but two low-order bits represent a (task*). + Two low-order bits mean: + 1 = proxy is/was/will be in task pool + 2 = proxy is/was/will be in mailbox */ + std::atomic<intptr_t> task_and_tag; + + //! Pointer to next task_proxy in a mailbox + std::atomic<task_proxy*> next_in_mailbox; + + //! Mailbox to which this was mailed. + mail_outbox* outbox; + + //! Task affinity id which is referenced + d1::slot_id slot; + + d1::small_object_allocator allocator; + + //! True if the proxy is stored both in its sender's pool and in the destination mailbox. + static bool is_shared ( intptr_t tat ) { + return (tat & location_mask) == location_mask; + } + + //! Returns a pointer to the encapsulated task or nullptr. + static task* task_ptr ( intptr_t tat ) { + return (task*)(tat & ~location_mask); + } + + //! Returns a pointer to the encapsulated task or nullptr, and frees proxy if necessary. + template<intptr_t from_bit> + inline task* extract_task () { + // __TBB_ASSERT( prefix().extra_state == es_task_proxy, "Normal task misinterpreted as a proxy?" ); + intptr_t tat = task_and_tag.load(std::memory_order_acquire); + __TBB_ASSERT( tat == from_bit || (is_shared(tat) && task_ptr(tat)), + "Proxy's tag cannot specify both locations if the proxy " + "was retrieved from one of its original locations" ); + if ( tat != from_bit ) { + const intptr_t cleaner_bit = location_mask & ~from_bit; + // Attempt to transition the proxy to the "empty" state with + // cleaner_bit specifying entity responsible for its eventual freeing. + // Explicit cast to void* is to work around a seeming ICC 11.1 bug. + if ( task_and_tag.compare_exchange_strong(tat, cleaner_bit) ) { + // Successfully grabbed the task, and left new owner with the job of freeing the proxy + return task_ptr(tat); + } + } + // Proxied task has already been claimed from another proxy location. + __TBB_ASSERT( task_and_tag.load(std::memory_order_relaxed) == from_bit, "Empty proxy cannot contain non-zero task pointer" ); + return nullptr; + } + + virtual task* execute(d1::execution_data&) { + __TBB_ASSERT_RELEASE(false, nullptr); + return nullptr; + } + virtual task* cancel(d1::execution_data&) { + __TBB_ASSERT_RELEASE(false, nullptr); + return nullptr; + } +}; // struct task_proxy + +//! Internal representation of mail_outbox, without padding. +class unpadded_mail_outbox { +protected: + typedef std::atomic<task_proxy*> atomic_proxy_ptr; + + //! Pointer to first task_proxy in mailbox, or nullptr if box is empty. + atomic_proxy_ptr my_first; + + //! Pointer to pointer that will point to next item in the queue. Never nullptr. + std::atomic<atomic_proxy_ptr*> my_last; + + //! Owner of mailbox is not executing a task, and has drained its own task pool. + std::atomic<bool> my_is_idle; +}; + +// TODO: - consider moving to arena slot +//! Class representing where mail is put. +/** Padded to occupy a cache line. */ +class mail_outbox : padded<unpadded_mail_outbox> { + + task_proxy* internal_pop( isolation_type isolation ) { + task_proxy* curr = my_first.load(std::memory_order_acquire); + if ( !curr ) + return nullptr; + atomic_proxy_ptr* prev_ptr = &my_first; + if ( isolation != no_isolation ) { + while ( task_accessor::isolation(*curr) != isolation ) { + prev_ptr = &curr->next_in_mailbox; + // The next_in_mailbox should be read with acquire to guarantee (*curr) consistency. + curr = curr->next_in_mailbox.load(std::memory_order_acquire); + if ( !curr ) + return nullptr; + } + } + // There is a first item in the mailbox. See if there is a second. + // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. + if ( task_proxy* second = curr->next_in_mailbox.load(std::memory_order_acquire) ) { + // There are at least two items, so first item can be popped easily. + prev_ptr->store(second, std::memory_order_relaxed); + } else { + // There is only one item. Some care is required to pop it. + + prev_ptr->store(nullptr, std::memory_order_relaxed); + atomic_proxy_ptr* expected = &curr->next_in_mailbox; + if ( my_last.compare_exchange_strong( expected, prev_ptr ) ) { + // Successfully transitioned mailbox from having one item to having none. + __TBB_ASSERT( !curr->next_in_mailbox.load(std::memory_order_relaxed), nullptr); + } else { + // Some other thread updated my_last but has not filled in first->next_in_mailbox + // Wait until first item points to second item. + atomic_backoff backoff; + // The next_in_mailbox should be read with acquire to guarantee (*second) consistency. + while ( !(second = curr->next_in_mailbox.load(std::memory_order_acquire)) ) backoff.pause(); + prev_ptr->store( second, std::memory_order_relaxed); + } + } + assert_pointer_valid(curr); + return curr; + } +public: + friend class mail_inbox; + + //! Push task_proxy onto the mailbox queue of another thread. + /** Implementation is wait-free. */ + void push( task_proxy* t ) { + assert_pointer_valid(t); + t->next_in_mailbox.store(nullptr, std::memory_order_relaxed); + atomic_proxy_ptr* const link = my_last.exchange(&t->next_in_mailbox); + // Logically, the release fence is not required because the exchange above provides the + // release-acquire semantic that guarantees that (*t) will be consistent when another thread + // loads the link atomic. However, C++11 memory model guarantees consistency of(*t) only + // when the same atomic is used for synchronization. + link->store(t, std::memory_order_release); + } + + //! Return true if mailbox is empty + bool empty() { + return my_first.load(std::memory_order_relaxed) == nullptr; + } + + //! Construct *this as a mailbox from zeroed memory. + /** Raise assertion if *this is not previously zeroed, or sizeof(this) is wrong. + This method is provided instead of a full constructor since we know the object + will be constructed in zeroed memory. */ + void construct() { + __TBB_ASSERT( sizeof(*this)==max_nfs_size, nullptr ); + __TBB_ASSERT( !my_first.load(std::memory_order_relaxed), nullptr ); + __TBB_ASSERT( !my_last.load(std::memory_order_relaxed), nullptr ); + __TBB_ASSERT( !my_is_idle.load(std::memory_order_relaxed), nullptr ); + my_last = &my_first; + suppress_unused_warning(pad); + } + + //! Drain the mailbox + intptr_t drain() { + intptr_t k = 0; + // No fences here because other threads have already quit. + for( ; task_proxy* t = my_first; ++k ) { + my_first.store(t->next_in_mailbox, std::memory_order_relaxed); + // cache_aligned_deallocate((char*)t - task_prefix_reservation_size); + } + return k; + } + + //! True if thread that owns this mailbox is looking for work. + bool recipient_is_idle() { + return my_is_idle.load(std::memory_order_relaxed); + } +}; // class mail_outbox + +//! Class representing source of mail. +class mail_inbox { + //! Corresponding sink where mail that we receive will be put. + mail_outbox* my_putter; +public: + //! Construct unattached inbox + mail_inbox() : my_putter(nullptr) {} + + //! Attach inbox to a corresponding outbox. + void attach( mail_outbox& putter ) { + my_putter = &putter; + } + //! Detach inbox from its outbox + void detach() { + __TBB_ASSERT(my_putter,"not attached"); + my_putter = nullptr; + } + //! Get next piece of mail, or nullptr if mailbox is empty. + task_proxy* pop( isolation_type isolation ) { + return my_putter->internal_pop( isolation ); + } + //! Return true if mailbox is empty + bool empty() { + return my_putter->empty(); + } + //! Indicate whether thread that reads this mailbox is idle. + /** Raises assertion failure if mailbox is redundantly marked as not idle. */ + void set_is_idle( bool value ) { + if( my_putter ) { + __TBB_ASSERT( my_putter->my_is_idle.load(std::memory_order_relaxed) || value, "attempt to redundantly mark mailbox as not idle" ); + my_putter->my_is_idle.store(value, std::memory_order_relaxed); + } + } + //! Indicate whether thread that reads this mailbox is idle. + bool is_idle_state ( bool value ) const { + return !my_putter || my_putter->my_is_idle.load(std::memory_order_relaxed) == value; + } +}; // class mail_inbox + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_mailbox_H */ diff --git a/contrib/libs/tbb/src/tbb/main.cpp b/contrib/libs/tbb/src/tbb/main.cpp new file mode 100644 index 0000000000..ec6c98d682 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/main.cpp @@ -0,0 +1,171 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" + +#include "main.h" +#include "governor.h" +#include "environment.h" +#include "market.h" +#include "misc.h" +#include "itt_notify.h" + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// Begin shared data layout. +// The following global data items are mostly read-only after initialization. +//------------------------------------------------------------------------ + +//------------------------------------------------------------------------ +// governor data +basic_tls<thread_data*> governor::theTLS; +unsigned governor::DefaultNumberOfThreads; +size_t governor::DefaultPageSize; +rml::tbb_factory governor::theRMLServerFactory; +bool governor::UsePrivateRML; +bool governor::is_rethrow_broken; + +//------------------------------------------------------------------------ +// market data +market* market::theMarket; +market::global_market_mutex_type market::theMarketMutex; + +//------------------------------------------------------------------------ +// context propagation data +context_state_propagation_mutex_type the_context_state_propagation_mutex; +std::atomic<uintptr_t> the_context_state_propagation_epoch{}; + +//------------------------------------------------------------------------ +// One time initialization data + +//! Counter of references to global shared resources such as TLS. +std::atomic<int> __TBB_InitOnce::count{}; + +std::atomic_flag __TBB_InitOnce::InitializationLock = ATOMIC_FLAG_INIT; + +//! Flag that is set to true after one-time initializations are done. +std::atomic<bool> __TBB_InitOnce::InitializationDone{}; + +#if __TBB_USE_ITT_NOTIFY +//! Defined in profiling.cpp +extern bool ITT_Present; +void ITT_DoUnsafeOneTimeInitialization(); +#endif + +#if !(_WIN32||_WIN64) || __TBB_SOURCE_DIRECTLY_INCLUDED +static __TBB_InitOnce __TBB_InitOnceHiddenInstance; +#endif + +#if TBB_USE_ASSERT +std::atomic<int> the_observer_proxy_count; + +struct check_observer_proxy_count { + ~check_observer_proxy_count() { + if (the_observer_proxy_count != 0) { + runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count)); + } + } +}; +// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count +// after auto termination. +static check_observer_proxy_count the_check_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ + +//------------------------------------------------------------------------ +// __TBB_InitOnce +//------------------------------------------------------------------------ + +void __TBB_InitOnce::add_ref() { + if( ++count==1 ) + governor::acquire_resources(); +} + +void __TBB_InitOnce::remove_ref() { + int k = --count; + __TBB_ASSERT(k>=0,"removed __TBB_InitOnce ref that was not added?"); + if( k==0 ) { + governor::release_resources(); + ITT_FINI_ITTLIB(); + } +} + +//------------------------------------------------------------------------ +// One-time Initializations +//------------------------------------------------------------------------ + +//! Defined in cache_aligned_allocator.cpp +void initialize_cache_aligned_allocator(); + +//! Performs thread-safe lazy one-time general TBB initialization. +void DoOneTimeInitialization() { + __TBB_InitOnce::lock(); + // No fence required for load of InitializationDone, because we are inside a critical section. + if( !__TBB_InitOnce::InitializationDone ) { + __TBB_InitOnce::add_ref(); + if( GetBoolEnvironmentVariable("TBB_VERSION") ) + PrintVersion(); + bool itt_present = false; +#if __TBB_USE_ITT_NOTIFY + ITT_DoUnsafeOneTimeInitialization(); + itt_present = ITT_Present; +#endif /* __TBB_USE_ITT_NOTIFY */ + initialize_cache_aligned_allocator(); + governor::initialize_rml_factory(); + // Force processor groups support detection + governor::default_num_threads(); + // Force OS regular page size detection + governor::default_page_size(); + PrintExtraVersionInfo( "TOOLS SUPPORT", itt_present ? "enabled" : "disabled" ); + __TBB_InitOnce::InitializationDone = true; + } + __TBB_InitOnce::unlock(); +} + +#if (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED +//! Windows "DllMain" that handles startup and shutdown of dynamic library. +extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvReserved ) { + switch( reason ) { + case DLL_PROCESS_ATTACH: + __TBB_InitOnce::add_ref(); + break; + case DLL_PROCESS_DETACH: + // Since THREAD_DETACH is not called for the main thread, call auto-termination + // here as well - but not during process shutdown (due to risk of a deadlock). + if ( lpvReserved==NULL ) { // library unload + governor::terminate_external_thread(); + } + __TBB_InitOnce::remove_ref(); + // It is assumed that InitializationDone is not set after DLL_PROCESS_DETACH, + // and thus no race on InitializationDone is possible. + if ( __TBB_InitOnce::initialization_done() ) { + // Remove reference that we added in DoOneTimeInitialization. + __TBB_InitOnce::remove_ref(); + } + break; + case DLL_THREAD_DETACH: + governor::terminate_external_thread(); + break; + } + return true; +} +#endif /* (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED */ + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/main.h b/contrib/libs/tbb/src/tbb/main.h new file mode 100644 index 0000000000..c6f54bb47b --- /dev/null +++ b/contrib/libs/tbb/src/tbb/main.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_main_H +#define _TBB_main_H + +#include "governor.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +void DoOneTimeInitialization(); + +//------------------------------------------------------------------------ +// __TBB_InitOnce +//------------------------------------------------------------------------ + +// TODO (TBB_REVAMP_TODO): consider better names +//! Class that supports TBB initialization. +/** It handles acquisition and release of global resources (e.g. TLS) during startup and shutdown, + as well as synchronization for DoOneTimeInitialization. */ +class __TBB_InitOnce { + friend void DoOneTimeInitialization(); + friend void ITT_DoUnsafeOneTimeInitialization(); + + static std::atomic<int> count; + + //! Platform specific code to acquire resources. + static void acquire_resources(); + + //! Platform specific code to release resources. + static void release_resources(); + + //! Specifies if the one-time initializations has been done. + static std::atomic<bool> InitializationDone; + + //! Global initialization lock + /** Scenarios are possible when tools interop has to be initialized before the + TBB itself. This imposes a requirement that the global initialization lock + has to support valid static initialization, and does not issue any tool + notifications in any build mode. **/ + static std::atomic_flag InitializationLock; + +public: + static void lock() { + tbb::detail::atomic_backoff backoff; + while( InitializationLock.test_and_set() ) backoff.pause(); + } + + static void unlock() { InitializationLock.clear(std::memory_order_release); } + + static bool initialization_done() { return InitializationDone.load(std::memory_order_acquire); } + + //! Add initial reference to resources. + /** We assume that dynamic loading of the library prevents any other threads + from entering the library until this constructor has finished running. **/ + __TBB_InitOnce() { add_ref(); } + + //! Remove the initial reference to resources. + /** This is not necessarily the last reference if other threads are still running. **/ + ~__TBB_InitOnce() { + governor::terminate_external_thread(); // TLS dtor not called for the main thread + remove_ref(); + // We assume that InitializationDone is not set after file-scope destructors + // start running, and thus no race on InitializationDone is possible. + if ( initialization_done() ) { + // Remove an extra reference that was added in DoOneTimeInitialization. + remove_ref(); + } + } + //! Add reference to resources. If first reference added, acquire the resources. + static void add_ref(); + + //! Remove reference to resources. If last reference removed, release the resources. + static void remove_ref(); + +}; // class __TBB_InitOnce + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_main_H */ diff --git a/contrib/libs/tbb/src/tbb/market.cpp b/contrib/libs/tbb/src/tbb/market.cpp new file mode 100644 index 0000000000..9259eaf588 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/market.cpp @@ -0,0 +1,640 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/global_control.h" // global_control::active_value + +#include "market.h" +#include "main.h" +#include "governor.h" +#include "arena.h" +#include "thread_data.h" +#include "itt_notify.h" + +#include <cstring> // std::memset() + +namespace tbb { +namespace detail { +namespace r1 { + +/** This method must be invoked under my_arenas_list_mutex. **/ +arena* market::select_next_arena( arena* hint ) { + unsigned next_arena_priority_level = num_priority_levels; + if ( hint ) + next_arena_priority_level = hint->my_priority_level; + for ( unsigned idx = 0; idx < next_arena_priority_level; ++idx ) { + if ( !my_arenas[idx].empty() ) + return &*my_arenas[idx].begin(); + } + // don't change if arena with higher priority is not found. + return hint; +} + +void market::insert_arena_into_list ( arena& a ) { + __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); + my_arenas[a.my_priority_level].push_front( a ); + __TBB_ASSERT( !my_next_arena || my_next_arena->my_priority_level < num_priority_levels, nullptr ); + my_next_arena = select_next_arena( my_next_arena ); +} + +void market::remove_arena_from_list ( arena& a ) { + __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); + my_arenas[a.my_priority_level].remove( a ); + if ( my_next_arena == &a ) + my_next_arena = nullptr; + my_next_arena = select_next_arena( my_next_arena ); +} + +//------------------------------------------------------------------------ +// market +//------------------------------------------------------------------------ + +market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ) + : my_num_workers_hard_limit(workers_hard_limit) + , my_num_workers_soft_limit(workers_soft_limit) + , my_next_arena(nullptr) + , my_ref_count(1) + , my_stack_size(stack_size) + , my_workers_soft_limit_to_report(workers_soft_limit) +{ + // Once created RML server will start initializing workers that will need + // global market instance to get worker stack size + my_server = governor::create_rml_server( *this ); + __TBB_ASSERT( my_server, "Failed to create RML server" ); +} + +static unsigned calc_workers_soft_limit(unsigned workers_soft_limit, unsigned workers_hard_limit) { + if( int soft_limit = market::app_parallelism_limit() ) + workers_soft_limit = soft_limit-1; + else // if user set no limits (yet), use market's parameter + workers_soft_limit = max( governor::default_num_threads() - 1, workers_soft_limit ); + if( workers_soft_limit >= workers_hard_limit ) + workers_soft_limit = workers_hard_limit-1; + return workers_soft_limit; +} + +bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned workers_requested, std::size_t stack_size ) { + market *m = theMarket; + if( m ) { + ++m->my_ref_count; + const unsigned old_public_count = is_public ? m->my_public_ref_count++ : /*any non-zero value*/1; + lock.release(); + if( old_public_count==0 ) + set_active_num_workers( calc_workers_soft_limit(workers_requested, m->my_num_workers_hard_limit) ); + + // do not warn if default number of workers is requested + if( workers_requested != governor::default_num_threads()-1 ) { + __TBB_ASSERT( skip_soft_limit_warning > workers_requested, + "skip_soft_limit_warning must be larger than any valid workers_requested" ); + unsigned soft_limit_to_report = m->my_workers_soft_limit_to_report.load(std::memory_order_relaxed); + if( soft_limit_to_report < workers_requested ) { + runtime_warning( "The number of workers is currently limited to %u. " + "The request for %u workers is ignored. Further requests for more workers " + "will be silently ignored until the limit changes.\n", + soft_limit_to_report, workers_requested ); + // The race is possible when multiple threads report warnings. + // We are OK with that, as there are just multiple warnings. + unsigned expected_limit = soft_limit_to_report; + m->my_workers_soft_limit_to_report.compare_exchange_strong(expected_limit, skip_soft_limit_warning); + } + + } + if( m->my_stack_size < stack_size ) + runtime_warning( "Thread stack size has been already set to %u. " + "The request for larger stack (%u) cannot be satisfied.\n", m->my_stack_size, stack_size ); + return true; + } + return false; +} + +market& market::global_market(bool is_public, unsigned workers_requested, std::size_t stack_size) { + global_market_mutex_type::scoped_lock lock( theMarketMutex ); + if( !market::add_ref_unsafe(lock, is_public, workers_requested, stack_size) ) { + // TODO: A lot is done under theMarketMutex locked. Can anything be moved out? + if( stack_size == 0 ) + stack_size = global_control::active_value(global_control::thread_stack_size); + // Expecting that 4P is suitable for most applications. + // Limit to 2P for large thread number. + // TODO: ask RML for max concurrency and possibly correct hard_limit + const unsigned factor = governor::default_num_threads()<=128? 4 : 2; + // The requested number of threads is intentionally not considered in + // computation of the hard limit, in order to separate responsibilities + // and avoid complicated interactions between global_control and task_scheduler_init. + // The market guarantees that at least 256 threads might be created. + const unsigned workers_hard_limit = max(max(factor*governor::default_num_threads(), 256u), app_parallelism_limit()); + const unsigned workers_soft_limit = calc_workers_soft_limit(workers_requested, workers_hard_limit); + // Create the global market instance + std::size_t size = sizeof(market); + __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(thread_data*) == sizeof(market), + "my_workers must be the last data field of the market class"); + size += sizeof(thread_data*) * (workers_hard_limit - 1); + __TBB_InitOnce::add_ref(); + void* storage = cache_aligned_allocate(size); + std::memset( storage, 0, size ); + // Initialize and publish global market + market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size ); + if( is_public ) + m->my_public_ref_count.store(1, std::memory_order_relaxed); +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + if (market::is_lifetime_control_present()) { + ++m->my_public_ref_count; + ++m->my_ref_count; + } +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + theMarket = m; + // This check relies on the fact that for shared RML default_concurrency==max_concurrency + if ( !governor::UsePrivateRML && m->my_server->default_concurrency() < workers_soft_limit ) + runtime_warning( "RML might limit the number of workers to %u while %u is requested.\n" + , m->my_server->default_concurrency(), workers_soft_limit ); + } + return *theMarket; +} + +void market::destroy () { + this->market::~market(); // qualified to suppress warning + cache_aligned_deallocate( this ); + __TBB_InitOnce::remove_ref(); +} + +bool market::release ( bool is_public, bool blocking_terminate ) { + market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); + bool do_release = false; + { + global_market_mutex_type::scoped_lock lock( theMarketMutex ); + if ( blocking_terminate ) { + __TBB_ASSERT( is_public, "Only an object with a public reference can request the blocking terminate" ); + while ( my_public_ref_count.load(std::memory_order_relaxed) == 1 && + my_ref_count.load(std::memory_order_relaxed) > 1 ) { + lock.release(); + // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all + // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created. + // Theoretically, new private references to the market can be added during waiting making it potentially + // endless. + // TODO: revise why the weak scheduler needs market's pointer and try to remove this wait. + // Note that the market should know about its schedulers for cancellation/exception/priority propagation, + // see e.g. task_group_context::cancel_group_execution() + while ( my_public_ref_count.load(std::memory_order_acquire) == 1 && + my_ref_count.load(std::memory_order_acquire) > 1 ) { + yield(); + } + lock.acquire( theMarketMutex ); + } + } + if ( is_public ) { + __TBB_ASSERT( theMarket == this, "Global market instance was destroyed prematurely?" ); + __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), NULL ); + --my_public_ref_count; + } + if ( --my_ref_count == 0 ) { + __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), NULL ); + do_release = true; + theMarket = NULL; + } + } + if( do_release ) { + __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), + "No public references remain if we remove the market." ); + // inform RML that blocking termination is required + my_join_workers = blocking_terminate; + my_server->request_close_connection(); + return blocking_terminate; + } + return false; +} + +int market::update_workers_request() { + int old_request = my_num_workers_requested; + my_num_workers_requested = min(my_total_demand.load(std::memory_order_relaxed), + (int)my_num_workers_soft_limit.load(std::memory_order_relaxed)); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (my_mandatory_num_requested > 0) { + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + my_num_workers_requested = 1; + } +#endif + update_allotment(my_num_workers_requested); + return my_num_workers_requested - old_request; +} + +void market::set_active_num_workers ( unsigned soft_limit ) { + market *m; + + { + global_market_mutex_type::scoped_lock lock( theMarketMutex ); + if ( !theMarket ) + return; // actual value will be used at market creation + m = theMarket; + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == soft_limit) + return; + ++m->my_ref_count; + } + // have my_ref_count for market, use it safely + + int delta = 0; + { + arenas_list_mutex_type::scoped_lock lock( m->my_arenas_list_mutex ); + __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, NULL); + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + arena_list_type* arenas = m->my_arenas; + + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0 && + m->my_mandatory_num_requested > 0) + { + for (unsigned level = 0; level < num_priority_levels; ++level ) + for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) + if (it->my_global_concurrency_mode.load(std::memory_order_relaxed)) + m->disable_mandatory_concurrency_impl(&*it); + } + __TBB_ASSERT(m->my_mandatory_num_requested == 0, NULL); +#endif + + m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release); + // report only once after new soft limit value is set + m->my_workers_soft_limit_to_report.store(soft_limit, std::memory_order_relaxed); + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { + for (unsigned level = 0; level < num_priority_levels; ++level ) + for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) + if (it->has_enqueued_tasks()) + m->enable_mandatory_concurrency_impl(&*it); + } +#endif + + delta = m->update_workers_request(); + } + // adjust_job_count_estimate must be called outside of any locks + if( delta!=0 ) + m->my_server->adjust_job_count_estimate( delta ); + // release internal market reference to match ++m->my_ref_count above + m->release( /*is_public=*/false, /*blocking_terminate=*/false ); +} + +bool governor::does_client_join_workers (const rml::tbb_client &client) { + return ((const market&)client).must_join_workers(); +} + +arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level, + std::size_t stack_size ) +{ + __TBB_ASSERT( num_slots > 0, NULL ); + __TBB_ASSERT( num_reserved_slots <= num_slots, NULL ); + // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). + market &m = global_market( /*is_public=*/true, num_slots-num_reserved_slots, stack_size ); + arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level ); + // Add newly created arena into the existing market's list. + arenas_list_mutex_type::scoped_lock lock(m.my_arenas_list_mutex); + m.insert_arena_into_list(a); + return &a; +} + +/** This method must be invoked under my_arenas_list_mutex. **/ +void market::detach_arena ( arena& a ) { + market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); + __TBB_ASSERT( !a.my_slots[0].is_occupied(), NULL ); + if (a.my_global_concurrency_mode.load(std::memory_order_relaxed)) + disable_mandatory_concurrency_impl(&a); + + remove_arena_from_list(a); + if (a.my_aba_epoch == my_arenas_aba_epoch.load(std::memory_order_relaxed)) { + my_arenas_aba_epoch.store(my_arenas_aba_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + } +} + +void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { + bool locked = true; + __TBB_ASSERT( a, NULL ); + // we hold reference to the market, so it cannot be destroyed at any moment here + market::enforce([this] { return theMarket == this; }, NULL); + __TBB_ASSERT( my_ref_count!=0, NULL ); + my_arenas_list_mutex.lock(); + arena_list_type::iterator it = my_arenas[priority_level].begin(); + for ( ; it != my_arenas[priority_level].end(); ++it ) { + if ( a == &*it ) { + if ( it->my_aba_epoch == aba_epoch ) { + // Arena is alive + if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) { + __TBB_ASSERT( + !a->my_num_workers_allotted.load(std::memory_order_relaxed) && + (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), + "Inconsistent arena state" + ); + // Arena is abandoned. Destroy it. + detach_arena( *a ); + my_arenas_list_mutex.unlock(); + locked = false; + a->free_arena(); + } + } + if (locked) + my_arenas_list_mutex.unlock(); + return; + } + } + my_arenas_list_mutex.unlock(); +} + +/** This method must be invoked under my_arenas_list_mutex. **/ +arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) { + // TODO: make sure arena with higher priority returned only if there are available slots in it. + hint = select_next_arena( hint ); + if ( !hint ) + return nullptr; + arena_list_type::iterator it = hint; + unsigned curr_priority_level = hint->my_priority_level; + __TBB_ASSERT( it != arenas[curr_priority_level].end(), nullptr ); + do { + arena& a = *it; + if ( ++it == arenas[curr_priority_level].end() ) { + do { + ++curr_priority_level %= num_priority_levels; + } while ( arenas[curr_priority_level].empty() ); + it = arenas[curr_priority_level].begin(); + } + if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) { + a.my_references += arena::ref_worker; + return &a; + } + } while ( it != hint ); + return nullptr; +} + +arena* market::arena_in_need(arena* prev) { + if (my_total_demand.load(std::memory_order_acquire) <= 0) + return nullptr; + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false); + // TODO: introduce three state response: alive, not_alive, no_market_arenas + if ( is_arena_alive(prev) ) + return arena_in_need(my_arenas, prev); + return arena_in_need(my_arenas, my_next_arena); +} + +int market::update_allotment ( arena_list_type* arenas, int workers_demand, int max_workers ) { + __TBB_ASSERT( workers_demand > 0, nullptr ); + max_workers = min(workers_demand, max_workers); + int unassigned_workers = max_workers; + int assigned = 0; + int carry = 0; + unsigned max_priority_level = num_priority_levels; + for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) { + int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers); + unassigned_workers -= assigned_per_priority; + for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) { + arena& a = *it; + __TBB_ASSERT(a.my_num_workers_requested >= 0, nullptr); + __TBB_ASSERT(a.my_num_workers_requested <= int(a.my_max_num_workers) + || (a.my_max_num_workers == 0 && a.my_local_concurrency_requests > 0 && a.my_num_workers_requested == 1), nullptr); + if (a.my_num_workers_requested == 0) { + __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr); + continue; + } + + if (max_priority_level == num_priority_levels) { + max_priority_level = list_idx; + } + + int allotted = 0; +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { + __TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr); + allotted = a.my_global_concurrency_mode.load(std::memory_order_relaxed) && + assigned < max_workers ? 1 : 0; + } else +#endif + { + int tmp = a.my_num_workers_requested * assigned_per_priority + carry; + allotted = tmp / my_priority_level_demand[list_idx]; + carry = tmp % my_priority_level_demand[list_idx]; + __TBB_ASSERT(allotted <= a.my_num_workers_requested, nullptr); + __TBB_ASSERT(allotted <= int(a.my_num_slots - a.my_num_reserved_slots), nullptr); + } + a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed); + a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed); + assigned += allotted; + } + } + __TBB_ASSERT( 0 <= assigned && assigned <= max_workers, nullptr ); + return assigned; +} + +/** This method must be invoked under my_arenas_list_mutex. **/ +bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) { + __TBB_ASSERT( a, "Expected non-null pointer to arena." ); + for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it ) + if ( a == &*it ) + return true; + return false; +} + +/** This method must be invoked under my_arenas_list_mutex. **/ +bool market::is_arena_alive(arena* a) { + if ( !a ) + return false; + + // Still cannot access internals of the arena since the object itself might be destroyed. + + for ( unsigned idx = 0; idx < num_priority_levels; ++idx ) { + if ( is_arena_in_list( my_arenas[idx], a ) ) + return true; + } + return false; +} + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY +void market::enable_mandatory_concurrency_impl ( arena *a ) { + __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + + a->my_global_concurrency_mode.store(true, std::memory_order_relaxed); + my_mandatory_num_requested++; +} + +void market::enable_mandatory_concurrency ( arena *a ) { + int delta = 0; + { + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); + if (my_num_workers_soft_limit.load(std::memory_order_relaxed) != 0 || + a->my_global_concurrency_mode.load(std::memory_order_relaxed)) + return; + + enable_mandatory_concurrency_impl(a); + delta = update_workers_request(); + } + + if (delta != 0) + my_server->adjust_job_count_estimate(delta); +} + +void market::disable_mandatory_concurrency_impl(arena* a) { + __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); + __TBB_ASSERT(my_mandatory_num_requested > 0, NULL); + + a->my_global_concurrency_mode.store(false, std::memory_order_relaxed); + my_mandatory_num_requested--; +} + +void market::mandatory_concurrency_disable ( arena *a ) { + int delta = 0; + { + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); + if (!a->my_global_concurrency_mode.load(std::memory_order_relaxed)) + return; + // There is a racy window in advertise_new_work between mandtory concurrency enabling and + // setting SNAPSHOT_FULL. It gives a chance to spawn request to disable mandatory concurrency. + // Therefore, we double check that there is no enqueued tasks. + if (a->has_enqueued_tasks()) + return; + + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + disable_mandatory_concurrency_impl(a); + + delta = update_workers_request(); + } + if (delta != 0) + my_server->adjust_job_count_estimate(delta); +} +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + +void market::adjust_demand ( arena& a, int delta, bool mandatory ) { + if (!delta) { + return; + } + int target_epoch{}; + { + arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); + __TBB_ASSERT(theMarket != nullptr, "market instance was destroyed prematurely?"); +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + if (mandatory) { + __TBB_ASSERT(delta == 1 || delta == -1, nullptr); + // Count the number of mandatory requests and proceed only for 0->1 and 1->0 transitions. + a.my_local_concurrency_requests += delta; + if ((delta > 0 && a.my_local_concurrency_requests != 1) || + (delta < 0 && a.my_local_concurrency_requests != 0)) + { + return; + } + } +#endif + a.my_total_num_workers_requested += delta; + int target_workers = 0; + // Cap target_workers into interval [0, a.my_max_num_workers] + if (a.my_total_num_workers_requested > 0) { +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + // At least one thread should be requested when mandatory concurrency + int max_num_workers = int(a.my_max_num_workers); + if (a.my_local_concurrency_requests > 0 && max_num_workers == 0) { + max_num_workers = 1; + } +#endif + target_workers = min(a.my_total_num_workers_requested, max_num_workers); + } + + delta = target_workers - a.my_num_workers_requested; + + if (delta == 0) { + return; + } + + a.my_num_workers_requested += delta; + if (a.my_num_workers_requested == 0) { + a.my_num_workers_allotted.store(0, std::memory_order_relaxed); + } + + int total_demand = my_total_demand.load(std::memory_order_relaxed) + delta; + my_total_demand.store(total_demand, std::memory_order_relaxed); + my_priority_level_demand[a.my_priority_level] += delta; + unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); + if (my_mandatory_num_requested > 0) { + __TBB_ASSERT(effective_soft_limit == 0, NULL); + effective_soft_limit = 1; + } + + update_allotment(effective_soft_limit); + if (delta > 0) { + // can't overflow soft_limit, but remember values request by arenas in + // my_total_demand to not prematurely release workers to RML + if (my_num_workers_requested + delta > (int)effective_soft_limit) + delta = effective_soft_limit - my_num_workers_requested; + } + else { + // the number of workers should not be decreased below my_total_demand + if (my_num_workers_requested + delta < total_demand) + delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested; + } + my_num_workers_requested += delta; + __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, NULL); + + target_epoch = my_adjust_demand_target_epoch++; + } + + spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch); + // Must be called outside of any locks + my_server->adjust_job_count_estimate( delta ); + my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release); +} + +void market::process( job& j ) { + thread_data& td = static_cast<thread_data&>(j); + // td.my_arena can be dead. Don't access it until arena_in_need is called + arena *a = td.my_arena; + for (int i = 0; i < 2; ++i) { + while ( (a = arena_in_need(a)) ) { + a->process(td); + } + // Workers leave market because there is no arena in need. It can happen earlier than + // adjust_job_count_estimate() decreases my_slack and RML can put this thread to sleep. + // It might result in a busy-loop checking for my_slack<0 and calling this method instantly. + // the yield refines this spinning. + if ( !i ) { + yield(); + } + } +} + +void market::cleanup( job& j) { + market::enforce([this] { return theMarket != this; }, NULL ); + governor::auto_terminate(&j); +} + +void market::acknowledge_close_connection() { + destroy(); +} + +::rml::job* market::create_one_job() { + unsigned short index = ++my_first_unused_worker_idx; + __TBB_ASSERT( index > 0, NULL ); + ITT_THREAD_SET_NAME(_T("TBB Worker Thread")); + // index serves as a hint decreasing conflicts between workers when they migrate between arenas + thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; + __TBB_ASSERT( index <= my_num_workers_hard_limit, NULL ); + __TBB_ASSERT( my_workers[index - 1] == nullptr, NULL ); + my_workers[index - 1] = td; + return td; +} + +void market::add_external_thread(thread_data& td) { + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + my_masters.push_front(td); +} + +void market::remove_external_thread(thread_data& td) { + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + my_masters.remove(td); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/market.h b/contrib/libs/tbb/src/tbb/market.h new file mode 100644 index 0000000000..8443467447 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/market.h @@ -0,0 +1,317 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_market_H +#define _TBB_market_H + +#include "scheduler_common.h" +#include "concurrent_monitor.h" +#include "intrusive_list.h" +#include "rml_tbb.h" + +#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task_group.h" + +#include <atomic> + +#if defined(_MSC_VER) && defined(_Wp64) + // Workaround for overzealous compiler warnings in /Wp64 mode + #pragma warning (push) + #pragma warning (disable: 4244) +#endif + +namespace tbb { +namespace detail { + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +namespace d1 { +class task_scheduler_handle; +} +#endif + +namespace r1 { + +class task_arena_base; +class task_group_context; + +//------------------------------------------------------------------------ +// Class market +//------------------------------------------------------------------------ + +class market : no_copy, rml::tbb_client { + friend class arena; + friend class task_arena_base; + template<typename SchedulerTraits> friend class custom_scheduler; + friend class task_group_context; + friend class governor; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + friend class lifetime_control; +#endif + +public: + //! Keys for the arena map array. The lower the value the higher priority of the arena list. + static constexpr unsigned num_priority_levels = 3; + +private: + friend void ITT_DoUnsafeOneTimeInitialization (); +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + friend bool finalize_impl(d1::task_scheduler_handle& handle); +#endif + + typedef intrusive_list<arena> arena_list_type; + typedef intrusive_list<thread_data> thread_data_list_type; + + //! Currently active global market + static market* theMarket; + + typedef scheduler_mutex_type global_market_mutex_type; + + //! Mutex guarding creation/destruction of theMarket, insertions/deletions in my_arenas, and cancellation propagation + static global_market_mutex_type theMarketMutex; + + //! Lightweight mutex guarding accounting operations with arenas list + typedef spin_rw_mutex arenas_list_mutex_type; + // TODO: introduce fine-grained (per priority list) locking of arenas. + arenas_list_mutex_type my_arenas_list_mutex; + + //! Pointer to the RML server object that services this TBB instance. + rml::tbb_server* my_server; + + //! Waiting object for external and coroutine waiters. + extended_concurrent_monitor my_sleep_monitor; + + //! Maximal number of workers allowed for use by the underlying resource manager + /** It can't be changed after market creation. **/ + unsigned my_num_workers_hard_limit; + + //! Current application-imposed limit on the number of workers (see set_active_num_workers()) + /** It can't be more than my_num_workers_hard_limit. **/ + std::atomic<unsigned> my_num_workers_soft_limit; + + //! Number of workers currently requested from RML + int my_num_workers_requested; + + //! The target serialization epoch for callers of adjust_job_count_estimate + int my_adjust_demand_target_epoch; + + //! The current serialization epoch for callers of adjust_job_count_estimate + std::atomic<int> my_adjust_demand_current_epoch; + + //! First unused index of worker + /** Used to assign indices to the new workers coming from RML, and busy part + of my_workers array. **/ + std::atomic<unsigned> my_first_unused_worker_idx; + + //! Number of workers that were requested by all arenas on all priority levels + std::atomic<int> my_total_demand; + + //! Number of workers that were requested by arenas per single priority list item + int my_priority_level_demand[num_priority_levels]; + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + //! How many times mandatory concurrency was requested from the market + int my_mandatory_num_requested; +#endif + + //! Per priority list of registered arenas + arena_list_type my_arenas[num_priority_levels]; + + //! The first arena to be checked when idle worker seeks for an arena to enter + /** The check happens in round-robin fashion. **/ + arena *my_next_arena; + + //! ABA prevention marker to assign to newly created arenas + std::atomic<uintptr_t> my_arenas_aba_epoch; + + //! Reference count controlling market object lifetime + std::atomic<unsigned> my_ref_count; + + //! Count of external threads attached + std::atomic<unsigned> my_public_ref_count; + + //! Stack size of worker threads + std::size_t my_stack_size; + + //! Shutdown mode + bool my_join_workers; + + //! The value indicating that the soft limit warning is unnecessary + static const unsigned skip_soft_limit_warning = ~0U; + + //! Either workers soft limit to be reported via runtime_warning() or skip_soft_limit_warning + std::atomic<unsigned> my_workers_soft_limit_to_report; + + //! Constructor + market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ); + + //! Destroys and deallocates market object created by market::create() + void destroy (); + + //! Recalculates the number of workers requested from RML and updates the allotment. + int update_workers_request(); + + //! Recalculates the number of workers assigned to each arena in the list. + /** The actual number of workers servicing a particular arena may temporarily + deviate from the calculated value. **/ + void update_allotment (unsigned effective_soft_limit) { + int total_demand = my_total_demand.load(std::memory_order_relaxed); + if (total_demand) { + update_allotment(my_arenas, total_demand, (int)effective_soft_limit); + } + } + + //! Returns next arena that needs more workers, or NULL. + arena* arena_in_need(arena* prev); + + template <typename Pred> + static void enforce (Pred pred, const char* msg) { + suppress_unused_warning(pred, msg); +#if TBB_USE_ASSERT + global_market_mutex_type::scoped_lock lock(theMarketMutex); + __TBB_ASSERT(pred(), msg); +#endif + } + + //////////////////////////////////////////////////////////////////////////////// + // Helpers to unify code branches dependent on priority feature presence + + arena* select_next_arena( arena* hint ); + + void insert_arena_into_list ( arena& a ); + + void remove_arena_from_list ( arena& a ); + + arena* arena_in_need ( arena_list_type* arenas, arena* hint ); + + int update_allotment ( arena_list_type* arenas, int total_demand, int max_workers ); + + bool is_arena_in_list( arena_list_type& arenas, arena* a ); + + bool is_arena_alive( arena* a ); + + //////////////////////////////////////////////////////////////////////////////// + // Implementation of rml::tbb_client interface methods + + version_type version () const override { return 0; } + + unsigned max_job_count () const override { return my_num_workers_hard_limit; } + + std::size_t min_stack_size () const override { return worker_stack_size(); } + + job* create_one_job () override; + + void cleanup( job& j ) override; + + void acknowledge_close_connection () override; + + void process( job& j ) override; + +public: + //! Factory method creating new market object + static market& global_market( bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); + + //! Add reference to market if theMarket exists + static bool add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); + + //! Creates an arena object + /** If necessary, also creates global market instance, and boosts its ref count. + Each call to create_arena() must be matched by the call to arena::free_arena(). **/ + static arena* create_arena ( int num_slots, int num_reserved_slots, + unsigned arena_index, std::size_t stack_size ); + + //! Removes the arena from the market's list + void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level ); + + //! Removes the arena from the market's list + void detach_arena ( arena& ); + + //! Decrements market's refcount and destroys it in the end + bool release ( bool is_public, bool blocking_terminate ); + + //! Return wait list + extended_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } + +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + //! Imlpementation of mandatory concurrency enabling + void enable_mandatory_concurrency_impl ( arena *a ); + + //! Inform the external thread that there is an arena with mandatory concurrency + void enable_mandatory_concurrency ( arena *a ); + + //! Inform the external thread that the arena is no more interested in mandatory concurrency + void disable_mandatory_concurrency_impl(arena* a); + + //! Inform the external thread that the arena is no more interested in mandatory concurrency + void mandatory_concurrency_disable ( arena *a ); +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + + //! Request that arena's need in workers should be adjusted. + /** Concurrent invocations are possible only on behalf of different arenas. **/ + void adjust_demand ( arena&, int delta, bool mandatory ); + + //! Used when RML asks for join mode during workers termination. + bool must_join_workers () const { return my_join_workers; } + + //! Returns the requested stack size of worker threads. + std::size_t worker_stack_size () const { return my_stack_size; } + + //! Set number of active workers + static void set_active_num_workers( unsigned w ); + + //! Reports active parallelism level according to user's settings + static unsigned app_parallelism_limit(); + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + //! Reports if any active global lifetime references are present + static unsigned is_lifetime_control_present(); +#endif + + //! Finds all contexts affected by the state change and propagates the new state to them. + /** The propagation is relayed to the market because tasks created by one + external thread can be passed to and executed by other external threads. This means + that context trees can span several arenas at once and thus state change + propagation cannot be generally localized to one arena only. **/ + template <typename T> + bool propagate_task_group_state (std::atomic<T> d1::task_group_context::*mptr_state, d1::task_group_context& src, T new_state ); + + //! List of registered external threads + thread_data_list_type my_masters; + + //! Array of pointers to the registered workers + /** Used by cancellation propagation mechanism. + Must be the last data member of the class market. **/ + thread_data* my_workers[1]; + + static unsigned max_num_workers() { + global_market_mutex_type::scoped_lock lock( theMarketMutex ); + return theMarket? theMarket->my_num_workers_hard_limit : 0; + } + + void add_external_thread(thread_data& td); + + void remove_external_thread(thread_data& td); +}; // class market + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#if defined(_MSC_VER) && defined(_Wp64) + // Workaround for overzealous compiler warnings in /Wp64 mode + #pragma warning (pop) +#endif // warning 4244 is back + +#endif /* _TBB_market_H */ diff --git a/contrib/libs/tbb/src/tbb/misc.cpp b/contrib/libs/tbb/src/tbb/misc.cpp new file mode 100644 index 0000000000..0e1d33a596 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/misc.cpp @@ -0,0 +1,137 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Source file for miscellaneous entities that are infrequently referenced by +// an executing program. + +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_machine.h" + +#include "oneapi/tbb/version.h" + +#include "misc.h" +#include "governor.h" +#include "assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. + +#include <cstdio> +#include <cstdlib> +#include <stdexcept> +#include <cstring> +#include <cstdarg> + +#if _WIN32||_WIN64 +#include <windows.h> +#endif + +#if !_WIN32 +#include <unistd.h> // sysconf(_SC_PAGESIZE) +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// governor data +//------------------------------------------------------------------------ +cpu_features_type governor::cpu_features; + + +size_t DefaultSystemPageSize() { +#if _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +/** The leading "\0" is here so that applying "strings" to the binary delivers a clean result. */ +static const char VersionString[] = "\0" TBB_VERSION_STRINGS; + +static bool PrintVersionFlag = false; + +void PrintVersion() { + PrintVersionFlag = true; + std::fputs(VersionString+1,stderr); +} + +void PrintExtraVersionInfo( const char* category, const char* format, ... ) { + if( PrintVersionFlag ) { + char str[1024]; std::memset(str, 0, 1024); + va_list args; va_start(args, format); + // Note: correct vsnprintf definition obtained from tbb_assert_impl.h + std::vsnprintf( str, 1024-1, format, args); + va_end(args); + std::fprintf(stderr, "oneTBB: %s\t%s\n", category, str ); + } +} + +//! check for transaction support. +#if _MSC_VER +#include <intrin.h> // for __cpuid +#endif + +#if __TBB_x86_32 || __TBB_x86_64 +void check_cpuid(int leaf, int sub_leaf, int registers[4]) { +#if _MSC_VER + __cpuidex(registers, leaf, sub_leaf); +#else + int reg_eax = 0; + int reg_ebx = 0; + int reg_ecx = 0; + int reg_edx = 0; +#if __TBB_x86_32 && __PIC__ + // On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX + // register. We help it using backup and restore. + __asm__("mov %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchg %%ebx, %%esi" + : "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); +#else + __asm__("cpuid" + : "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx) + : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx + ); +#endif + registers[0] = reg_eax; + registers[1] = reg_ebx; + registers[2] = reg_ecx; + registers[3] = reg_edx; +#endif +} +#endif + +void detect_cpu_features(cpu_features_type& cpu_features) { + suppress_unused_warning(cpu_features); +#if __TBB_x86_32 || __TBB_x86_64 + const int rtm_ebx_mask = 1 << 11; + const int waitpkg_ecx_mask = 1 << 5; + int registers[4] = {0}; + + // Check RTM and WAITPKG + check_cpuid(7, 0, registers); + cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; + cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; +#endif /* (__TBB_x86_32 || __TBB_x86_64) */ +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/misc.h b/contrib/libs/tbb/src/tbb/misc.h new file mode 100644 index 0000000000..6a3cf778a4 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/misc.h @@ -0,0 +1,289 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_tbb_misc_H +#define _TBB_tbb_misc_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" + +#if __TBB_ARENA_BINDING +#include "oneapi/tbb/info.h" +#endif /*__TBB_ARENA_BINDING*/ + +#if __linux__ || __FreeBSD__ +#include <sys/param.h> // __FreeBSD_version +#if __FreeBSD_version >= 701000 +#include <sys/cpuset.h> +#endif +#endif + +#include <atomic> + +// Does the operating system have a system call to pin a thread to a set of OS processors? +#define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000)) +// On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB, +// and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account. +#define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) + +namespace tbb { +namespace detail { +namespace r1 { + +void runtime_warning(const char* format, ... ); + +#if __TBB_ARENA_BINDING +class task_arena; +class task_scheduler_observer; +#endif /*__TBB_ARENA_BINDING*/ + +const std::size_t MByte = 1024*1024; + +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +// In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API +// that does not allow to specify the stack size. +// Still, the thread stack size value, either explicit or default, is used by the scheduler. +// So here we set the default value to match the platform's default of 1MB. +const std::size_t ThreadStackSize = 1*MByte; +#else +const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; +#endif + +#ifndef __TBB_HardwareConcurrency + +//! Returns maximal parallelism level supported by the current OS configuration. +int AvailableHwConcurrency(); + +#else + +inline int AvailableHwConcurrency() { + int n = __TBB_HardwareConcurrency(); + return n > 0 ? n : 1; // Fail safety strap +} +#endif /* __TBB_HardwareConcurrency */ + +//! Returns OS regular memory page size +size_t DefaultSystemPageSize(); + +//! Returns number of processor groups in the current OS configuration. +/** AvailableHwConcurrency must be called at least once before calling this method. **/ +int NumberOfProcessorGroups(); + +#if _WIN32||_WIN64 + +//! Retrieves index of processor group containing processor with the given index +int FindProcessorGroupIndex ( int processorIndex ); + +//! Affinitizes the thread to the specified processor group +void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ); + +#endif /* _WIN32||_WIN64 */ + +//! Prints TBB version information on stderr +void PrintVersion(); + +//! Prints arbitrary extra TBB version information on stderr +void PrintExtraVersionInfo( const char* category, const char* format, ... ); + +//! A callback routine to print RML version information on stderr +void PrintRMLVersionInfo( void* arg, const char* server_info ); + +// For TBB compilation only; not to be used in public headers +#if defined(min) || defined(max) +#undef min +#undef max +#endif + +//! Utility template function returning lesser of the two values. +/** Provided here to avoid including not strict safe <algorithm>.\n + In case operands cause signed/unsigned or size mismatch warnings it is caller's + responsibility to do the appropriate cast before calling the function. **/ +template<typename T> +T min ( const T& val1, const T& val2 ) { + return val1 < val2 ? val1 : val2; +} + +//! Utility template function returning greater of the two values. +/** Provided here to avoid including not strict safe <algorithm>.\n + In case operands cause signed/unsigned or size mismatch warnings it is caller's + responsibility to do the appropriate cast before calling the function. **/ +template<typename T> +T max ( const T& val1, const T& val2 ) { + return val1 < val2 ? val2 : val1; +} + +//! Utility helper structure to ease overload resolution +template<int > struct int_to_type {}; + +//------------------------------------------------------------------------ +// FastRandom +//------------------------------------------------------------------------ + +//! A fast random number generator. +/** Uses linear congruential method. */ +class FastRandom { +private: + unsigned x, c; + static const unsigned a = 0x9e3779b1; // a big prime number +public: + //! Get a random number. + unsigned short get() { + return get(x); + } + //! Get a random number for the given seed; update the seed for next use. + unsigned short get( unsigned& seed ) { + unsigned short r = (unsigned short)(seed>>16); + __TBB_ASSERT(c&1, "c must be odd for big rng period"); + seed = seed*a+c; + return r; + } + //! Construct a random number generator. + FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); } + + template <typename T> + void init( T seed ) { + init(seed,int_to_type<sizeof(seed)>()); + } + void init( uint64_t seed , int_to_type<8> ) { + init(uint32_t((seed>>32)+seed), int_to_type<4>()); + } + void init( uint32_t seed, int_to_type<4> ) { + // threads use different seeds for unique sequences + c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number + x = c^(seed>>1); // also shuffle x for the first get() invocation + } +}; + +//------------------------------------------------------------------------ +// Atomic extensions +//------------------------------------------------------------------------ + +//! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate +/** Return value semantics is the same as for CAS. **/ +template<typename T1, class Pred> +T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { + T1 oldValue = dst.load(std::memory_order_acquire); + while ( compare(oldValue, newValue) ) { + if ( dst.compare_exchange_strong(oldValue, newValue) ) + break; + } + return oldValue; +} + +#if __TBB_USE_OS_AFFINITY_SYSCALL + #if __linux__ + typedef cpu_set_t basic_mask_t; + #elif __FreeBSD_version >= 701000 + typedef cpuset_t basic_mask_t; + #else + #error affinity_helper is not implemented in this OS + #endif + class affinity_helper : no_copy { + basic_mask_t* threadMask; + int is_changed; + public: + affinity_helper() : threadMask(NULL), is_changed(0) {} + ~affinity_helper(); + void protect_affinity_mask( bool restore_process_mask ); + void dismiss(); + }; + void destroy_process_mask(); +#else + class affinity_helper : no_copy { + public: + void protect_affinity_mask( bool ) {} + void dismiss() {} + }; + inline void destroy_process_mask(){} +#endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ + +struct cpu_features_type { + bool rtm_enabled{false}; + bool waitpkg_enabled{false}; +}; + +void detect_cpu_features(cpu_features_type& cpu_features); + +#if __TBB_ARENA_BINDING +class binding_handler; + +binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core); +void destroy_binding_handler(binding_handler* handler_ptr); +void apply_affinity_mask(binding_handler* handler_ptr, int slot_num); +void restore_affinity_mask(binding_handler* handler_ptr, int slot_num); + +#endif /*__TBB_ARENA_BINDING*/ + +// RTM specific section +// abort code for mutexes that detect a conflict with another thread. +enum { + speculation_not_supported = 0x00, + speculation_transaction_aborted = 0x01, + speculation_can_retry = 0x02, + speculation_memadd_conflict = 0x04, + speculation_buffer_overflow = 0x08, + speculation_breakpoint_hit = 0x10, + speculation_nested_abort = 0x20, + speculation_xabort_mask = 0xFF000000, + speculation_xabort_shift = 24, + speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free + speculation_successful_begin = 0xFFFFFFFF, + speculation_retry = speculation_transaction_aborted + | speculation_can_retry + | speculation_memadd_conflict +}; + +// We suppose that successful transactions are sequentially ordered and +// do not require additional memory fences around them. +// Technically it can be achieved only if xbegin has implicit +// acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level. +// See the article: https://arxiv.org/pdf/1710.04839.pdf +static inline unsigned int begin_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + return _xbegin(); +#else + return speculation_not_supported; // return unsuccessful code +#endif +} + +static inline void end_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + _xend(); +#endif +} + +static inline void abort_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + _xabort(speculation_xabort_not_free); +#endif +} + +#if TBB_USE_ASSERT +static inline unsigned char is_in_transaction() { +#if __TBB_TSX_INTRINSICS_PRESENT + return _xtest(); +#else + return 0; +#endif +} +#endif // TBB_USE_ASSERT + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_tbb_misc_H */ diff --git a/contrib/libs/tbb/src/tbb/misc_ex.cpp b/contrib/libs/tbb/src/tbb/misc_ex.cpp new file mode 100644 index 0000000000..177392bb65 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/misc_ex.cpp @@ -0,0 +1,398 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Source file for miscellaneous entities that are infrequently referenced by +// an executing program, and implementation of which requires dynamic linking. + +#include "misc.h" + +#if !defined(__TBB_HardwareConcurrency) + +#include "dynamic_link.h" +#include <stdio.h> +#include <limits.h> + +#if _WIN32||_WIN64 +#include <windows.h> +#if __TBB_WIN8UI_SUPPORT +#include <thread> +#endif +#else +#include <unistd.h> +#if __linux__ +#include <sys/sysinfo.h> +#include <cstring> +#include <sched.h> +#include <cerrno> +#elif __sun +#include <sys/sysinfo.h> +#elif __FreeBSD__ +#include <cerrno> +#include <cstring> +#include <sys/param.h> // Required by <sys/cpuset.h> +#include <sys/cpuset.h> +#endif +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_USE_OS_AFFINITY_SYSCALL + +#if __linux__ +// Handlers for interoperation with libiomp +static int (*libiomp_try_restoring_original_mask)(); +// Table for mapping to libiomp entry points +static const dynamic_link_descriptor iompLinkTable[] = { + DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask ) +}; +#endif + +static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) { +#if __linux__ + if( sched_setaffinity( 0, maskSize, threadMask ) ) +#else /* FreeBSD */ + if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#endif + // Here and below the error severity is lowered from critical level + // because it may happen during TBB library unload because of not + // waiting for workers to complete (current RML policy, to be fixed). + // handle_perror( errno, "setaffinity syscall" ); + runtime_warning( "setaffinity syscall failed" ); +} + +static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) { +#if __linux__ + if( sched_getaffinity( 0, maskSize, threadMask ) ) +#else /* FreeBSD */ + if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#endif + runtime_warning( "getaffinity syscall failed" ); +} + +static basic_mask_t* process_mask; +static int num_masks; + +void destroy_process_mask() { + if( process_mask ) { + delete [] process_mask; + } +} + +#define curMaskSize sizeof(basic_mask_t) * num_masks +affinity_helper::~affinity_helper() { + if( threadMask ) { + if( is_changed ) { + set_thread_affinity_mask( curMaskSize, threadMask ); + } + delete [] threadMask; + } +} +void affinity_helper::protect_affinity_mask( bool restore_process_mask ) { + if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity? + threadMask = new basic_mask_t [num_masks]; + std::memset( threadMask, 0, curMaskSize ); + get_thread_affinity_mask( curMaskSize, threadMask ); + if( restore_process_mask ) { + __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" ); + is_changed = memcmp( process_mask, threadMask, curMaskSize ); + if( is_changed ) + set_thread_affinity_mask( curMaskSize, process_mask ); + } else { + // Assume that the mask will be changed by the caller. + is_changed = 1; + } + } +} +void affinity_helper::dismiss() { + if( threadMask ) { + delete [] threadMask; + threadMask = NULL; + } + is_changed = 0; +} +#undef curMaskSize + +static std::atomic<do_once_state> hardware_concurrency_info; + +static int theNumProcs; + +static void initialize_hardware_concurrency_info () { + int err; + int availableProcs = 0; + int numMasks = 1; +#if __linux__ + int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); + int pid = getpid(); +#else /* FreeBSD >= 7.1 */ + int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); +#endif + basic_mask_t* processMask; + const std::size_t BasicMaskSize = sizeof(basic_mask_t); + for (;;) { + const int curMaskSize = BasicMaskSize * numMasks; + processMask = new basic_mask_t[numMasks]; + std::memset( processMask, 0, curMaskSize ); +#if __linux__ + err = sched_getaffinity( pid, curMaskSize, processMask ); + if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) + break; +#else /* FreeBSD >= 7.1 */ + // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask + err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask ); + if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 ) + break; +#endif /* FreeBSD >= 7.1 */ + delete[] processMask; + numMasks <<= 1; + } + if ( !err ) { + // We have found the mask size and captured the process affinity mask into processMask. + num_masks = numMasks; // do here because it's needed for affinity_helper to work +#if __linux__ + // For better coexistence with libiomp which might have changed the mask already, + // check for its presence and ask it to restore the mask. + dynamic_link_handle libhandle; + if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) { + // We have found the symbol provided by libiomp5 for restoring original thread affinity. + affinity_helper affhelp; + affhelp.protect_affinity_mask( /*restore_process_mask=*/false ); + if ( libiomp_try_restoring_original_mask()==0 ) { + // Now we have the right mask to capture, restored by libiomp. + const int curMaskSize = BasicMaskSize * numMasks; + std::memset( processMask, 0, curMaskSize ); + get_thread_affinity_mask( curMaskSize, processMask ); + } else + affhelp.dismiss(); // thread mask has not changed + dynamic_unlink( libhandle ); + // Destructor of affinity_helper restores the thread mask (unless dismissed). + } +#endif + for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) { + for ( std::size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) { + if ( CPU_ISSET( i, processMask + m ) ) + ++availableProcs; + } + } + process_mask = processMask; + } + else { + // Failed to get the process affinity mask; assume the whole machine can be used. + availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs; + delete[] processMask; + } + theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap + __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL ); +} + +int AvailableHwConcurrency() { + atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); + return theNumProcs; +} + +/* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */ +#elif __ANDROID__ + +// Work-around for Android that reads the correct number of available CPUs since system calls are unreliable. +// Format of "present" file is: ([<int>-<int>|<int>],)+ +int AvailableHwConcurrency() { + FILE *fp = fopen("/sys/devices/system/cpu/present", "r"); + if (fp == NULL) return 1; + int num_args, lower, upper, num_cpus=0; + while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) { + switch(num_args) { + case 2: num_cpus += upper - lower + 1; break; + case 1: num_cpus += 1; break; + } + fscanf(fp, ","); + } + return (num_cpus > 0) ? num_cpus : 1; +} + +#elif defined(_SC_NPROCESSORS_ONLN) + +int AvailableHwConcurrency() { + int n = sysconf(_SC_NPROCESSORS_ONLN); + return (n > 0) ? n : 1; +} + +#elif _WIN32||_WIN64 + +static std::atomic<do_once_state> hardware_concurrency_info; + +static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff; + +// Statically allocate an array for processor group information. +// Windows 7 supports maximum 4 groups, but let's look ahead a little. +static const WORD MaxProcessorGroups = 64; + +struct ProcessorGroupInfo { + DWORD_PTR mask; ///< Affinity mask covering the whole group + int numProcs; ///< Number of processors in the group + int numProcsRunningTotal; ///< Subtotal of processors in this and preceding groups + + //! Total number of processor groups in the system + static int NumGroups; + + //! Index of the group with a slot reserved for the first external thread + /** In the context of multiple processor groups support current implementation + defines "the first external thread" as the first thread to invoke + AvailableHwConcurrency(). + + TODO: Implement a dynamic scheme remapping workers depending on the pending + external threads affinity. **/ + static int HoleIndex; +}; + +int ProcessorGroupInfo::NumGroups = 1; +int ProcessorGroupInfo::HoleIndex = 0; + +ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups]; + +struct TBB_GROUP_AFFINITY { + DWORD_PTR Mask; + WORD Group; + WORD Reserved[3]; +}; + +static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL; +static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL; +static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread, + const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff ); +static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* ); + +static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = { + DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount) + , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount) + , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity) + , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity) +}; + +static void initialize_hardware_concurrency_info () { +#if __TBB_WIN8UI_SUPPORT + // For these applications processor groups info is unavailable + // Setting up a number of processors for one processor group + theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency(); +#else /* __TBB_WIN8UI_SUPPORT */ + dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable, + sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) ); + SYSTEM_INFO si; + GetNativeSystemInfo(&si); + DWORD_PTR pam, sam, m = 1; + GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam ); + int nproc = 0; + for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) { + if ( pam & m ) + ++nproc; + } + __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); + // By default setting up a number of processors for one processor group + theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; + // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present + if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) { + // The process does not have restricting affinity mask and multiple processor groups are possible + ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount(); + __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); + // Fail safety bootstrap. Release versions will limit available concurrency + // level, while debug ones would assert. + if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) + ProcessorGroupInfo::NumGroups = MaxProcessorGroups; + if ( ProcessorGroupInfo::NumGroups > 1 ) { + TBB_GROUP_AFFINITY ga; + if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) + ProcessorGroupInfo::HoleIndex = ga.Group; + int nprocs = 0; + for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { + ProcessorGroupInfo &pgi = theProcessorGroups[i]; + pgi.numProcs = (int)TBB_GetActiveProcessorCount(i); + __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); + pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; + pgi.numProcsRunningTotal = nprocs += pgi.numProcs; + } + __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); + } + } +#endif /* __TBB_WIN8UI_SUPPORT */ + + PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups); + if (ProcessorGroupInfo::NumGroups>1) + for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i) + PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs); +} + +int NumberOfProcessorGroups() { + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "NumberOfProcessorGroups is used before AvailableHwConcurrency" ); + return ProcessorGroupInfo::NumGroups; +} + +// Offset for the slot reserved for the first external thread +#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx)) + +int FindProcessorGroupIndex ( int procIdx ) { + // In case of oversubscription spread extra workers in a round robin manner + int holeIdx; + const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; + if ( procIdx >= numProcs - 1 ) { + holeIdx = INT_MAX; + procIdx = (procIdx - numProcs + 1) % numProcs; + } + else + holeIdx = ProcessorGroupInfo::HoleIndex; + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "FindProcessorGroupIndex is used before AvailableHwConcurrency" ); + // Approximate the likely group index assuming all groups are of the same size + int i = procIdx / theProcessorGroups[0].numProcs; + // Make sure the approximation is a valid group index + if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1; + // Now adjust the approximation up or down + if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) { + while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) { + __TBB_ASSERT( i > 0, NULL ); + --i; + } + } + else { + do { + ++i; + } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) ); + } + __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL ); + return i; +} + +void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) { + __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" ); + if ( !TBB_SetThreadGroupAffinity ) + return; + TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} }; + TBB_SetThreadGroupAffinity( hThread, &ga, NULL ); +} + +int AvailableHwConcurrency() { + atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info ); + return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; +} + +/* End of _WIN32||_WIN64 implementation */ +#else + #error AvailableHwConcurrency is not implemented for this OS +#endif + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* !__TBB_HardwareConcurrency */ diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.cpp b/contrib/libs/tbb/src/tbb/observer_proxy.cpp new file mode 100644 index 0000000000..4f7c07c266 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/observer_proxy.cpp @@ -0,0 +1,322 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" + +#include "observer_proxy.h" +#include "arena.h" +#include "main.h" +#include "thread_data.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +#if TBB_USE_ASSERT +extern std::atomic<int> the_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ + +observer_proxy::observer_proxy( d1::task_scheduler_observer& tso ) + : my_ref_count(1), my_list(NULL), my_next(NULL), my_prev(NULL), my_observer(&tso) +{ +#if TBB_USE_ASSERT + ++the_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ +} + +observer_proxy::~observer_proxy() { + __TBB_ASSERT( !my_ref_count, "Attempt to destroy proxy still in use" ); + poison_value(my_ref_count); + poison_pointer(my_prev); + poison_pointer(my_next); +#if TBB_USE_ASSERT + --the_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ +} + +void observer_list::clear() { + // Though the method will work fine for the empty list, we require the caller + // to check for the list emptiness before invoking it to avoid extra overhead. + __TBB_ASSERT( !empty(), NULL ); + { + scoped_lock lock(mutex(), /*is_writer=*/true); + observer_proxy *next = my_head.load(std::memory_order_relaxed); + while ( observer_proxy *p = next ) { + next = p->my_next; + // Both proxy p and observer p->my_observer (if non-null) are guaranteed + // to be alive while the list is locked. + d1::task_scheduler_observer *obs = p->my_observer; + // Make sure that possible concurrent observer destruction does not + // conflict with the proxy list cleanup. + if (!obs || !(p = obs->my_proxy.exchange(nullptr))) { + continue; + } + // accessing 'obs' after detaching of obs->my_proxy leads to the race with observer destruction + __TBB_ASSERT(!next || p == next->my_prev, nullptr); + __TBB_ASSERT(is_alive(p->my_ref_count), "Observer's proxy died prematurely"); + __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed) == 1, "Reference for observer is missing"); + poison_pointer(p->my_observer); + remove(p); + --p->my_ref_count; + delete p; + } + } + + // If observe(false) is called concurrently with the destruction of the arena, + // need to wait until all proxies are removed. + for (atomic_backoff backoff; ; backoff.pause()) { + scoped_lock lock(mutex(), /*is_writer=*/false); + if (my_head.load(std::memory_order_relaxed) == nullptr) { + break; + } + } + + __TBB_ASSERT(my_head.load(std::memory_order_relaxed) == nullptr && my_tail.load(std::memory_order_relaxed) == nullptr, nullptr); +} + +void observer_list::insert( observer_proxy* p ) { + scoped_lock lock(mutex(), /*is_writer=*/true); + if (my_head.load(std::memory_order_relaxed)) { + p->my_prev = my_tail.load(std::memory_order_relaxed); + my_tail.load(std::memory_order_relaxed)->my_next = p; + } else { + my_head.store(p, std::memory_order_relaxed); + } + my_tail.store(p, std::memory_order_relaxed); +} + +void observer_list::remove(observer_proxy* p) { + __TBB_ASSERT(my_head.load(std::memory_order_relaxed), "Attempt to remove an item from an empty list"); + __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be NULL"); + if (p == my_tail.load(std::memory_order_relaxed)) { + __TBB_ASSERT(!p->my_next, nullptr); + my_tail.store(p->my_prev, std::memory_order_relaxed); + } else { + __TBB_ASSERT(p->my_next, nullptr); + p->my_next->my_prev = p->my_prev; + } + if (p == my_head.load(std::memory_order_relaxed)) { + __TBB_ASSERT(!p->my_prev, nullptr); + my_head.store(p->my_next, std::memory_order_relaxed); + } else { + __TBB_ASSERT(p->my_prev, nullptr); + p->my_prev->my_next = p->my_next; + } + __TBB_ASSERT((my_head.load(std::memory_order_relaxed) && my_tail.load(std::memory_order_relaxed)) || + (!my_head.load(std::memory_order_relaxed) && !my_tail.load(std::memory_order_relaxed)), nullptr); +} + +void observer_list::remove_ref(observer_proxy* p) { + std::uintptr_t r = p->my_ref_count.load(std::memory_order_acquire); + __TBB_ASSERT(is_alive(r), nullptr); + while (r > 1) { + if (p->my_ref_count.compare_exchange_strong(r, r - 1)) { + return; + } + } + __TBB_ASSERT(r == 1, nullptr); + // Reference count might go to zero + { + // Use lock to avoid resurrection by a thread concurrently walking the list + observer_list::scoped_lock lock(mutex(), /*is_writer=*/true); + r = --p->my_ref_count; + if (!r) { + remove(p); + } + } + __TBB_ASSERT(r || !p->my_ref_count, nullptr); + if (!r) { + delete p; + } +} + +void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker) { + // Pointer p marches though the list from last (exclusively) to the end. + observer_proxy* p = last, * prev = p; + for (;;) { + d1::task_scheduler_observer* tso = nullptr; + // Hold lock on list only long enough to advance to the next proxy in the list. + { + scoped_lock lock(mutex(), /*is_writer=*/false); + do { + if (p) { + // We were already processing the list. + if (observer_proxy* q = p->my_next) { + if (p == prev) { + remove_ref_fast(prev); // sets prev to NULL if successful + } + p = q; + } else { + // Reached the end of the list. + if (p == prev) { + // Keep the reference as we store the 'last' pointer in scheduler + __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)) >= 1 + (p->my_observer ? 1 : 0), nullptr); + } else { + // The last few proxies were empty + __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)), nullptr); + ++p->my_ref_count; + if (prev) { + lock.release(); + remove_ref(prev); + } + } + last = p; + return; + } + } else { + // Starting pass through the list + p = my_head.load(std::memory_order_relaxed); + if (!p) { + return; + } + } + tso = p->my_observer; + } while (!tso); + ++p->my_ref_count; + ++tso->my_busy_count; + } + __TBB_ASSERT(!prev || p != prev, nullptr); + // Release the proxy pinned before p + if (prev) { + remove_ref(prev); + } + // Do not hold any locks on the list while calling user's code. + // Do not intercept any exceptions that may escape the callback so that + // they are either handled by the TBB scheduler or passed to the debugger. + tso->on_scheduler_entry(worker); + __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed), nullptr); + intptr_t bc = --tso->my_busy_count; + __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); + prev = p; + } +} + +void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) { + // Pointer p marches though the list from the beginning to last (inclusively). + observer_proxy* p = nullptr, * prev = nullptr; + for (;;) { + d1::task_scheduler_observer* tso = nullptr; + // Hold lock on list only long enough to advance to the next proxy in the list. + { + scoped_lock lock(mutex(), /*is_writer=*/false); + do { + if (p) { + // We were already processing the list. + if (p != last) { + __TBB_ASSERT(p->my_next, "List items before 'last' must have valid my_next pointer"); + if (p == prev) + remove_ref_fast(prev); // sets prev to NULL if successful + p = p->my_next; + } else { + // remove the reference from the last item + remove_ref_fast(p); + if (p) { + lock.release(); + if (p != prev && prev) { + remove_ref(prev); + } + remove_ref(p); + } + return; + } + } else { + // Starting pass through the list + p = my_head.load(std::memory_order_relaxed); + __TBB_ASSERT(p, "Nonzero 'last' must guarantee that the global list is non-empty"); + } + tso = p->my_observer; + } while (!tso); + // The item is already refcounted + if (p != last) // the last is already referenced since entry notification + ++p->my_ref_count; + ++tso->my_busy_count; + } + __TBB_ASSERT(!prev || p != prev, nullptr); + if (prev) + remove_ref(prev); + // Do not hold any locks on the list while calling user's code. + // Do not intercept any exceptions that may escape the callback so that + // they are either handled by the TBB scheduler or passed to the debugger. + tso->on_scheduler_exit(worker); + __TBB_ASSERT(p->my_ref_count || p == last, nullptr); + intptr_t bc = --tso->my_busy_count; + __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed"); + prev = p; + } +} + +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) { + if( enable ) { + if( !tso.my_proxy.load(std::memory_order_relaxed) ) { + observer_proxy* p = new observer_proxy(tso); + tso.my_proxy.store(p, std::memory_order_relaxed); + tso.my_busy_count.store(0, std::memory_order_relaxed); + + thread_data* td = governor::get_thread_data_if_initialized(); + if (p->my_observer->my_task_arena == nullptr) { + if (!(td && td->my_arena)) { + td = governor::get_thread_data(); + } + __TBB_ASSERT(__TBB_InitOnce::initialization_done(), nullptr); + __TBB_ASSERT(td && td->my_arena, nullptr); + p->my_list = &td->my_arena->my_observers; + } else { + d1::task_arena* ta = p->my_observer->my_task_arena; + arena* a = ta->my_arena.load(std::memory_order_acquire); + if (a == nullptr) { // Avoid recursion during arena initialization + ta->initialize(); + a = ta->my_arena.load(std::memory_order_relaxed); + } + __TBB_ASSERT(a != nullptr, nullptr); + p->my_list = &a->my_observers; + } + p->my_list->insert(p); + // Notify newly activated observer and other pending ones if it belongs to current arena + if (td && td->my_arena && &td->my_arena->my_observers == p->my_list) { + p->my_list->notify_entry_observers(td->my_last_observer, td->my_is_worker); + } + } + } else { + // Make sure that possible concurrent proxy list cleanup does not conflict + // with the observer destruction here. + if ( observer_proxy* proxy = tso.my_proxy.exchange(nullptr) ) { + // List destruction should not touch this proxy after we've won the above interlocked exchange. + __TBB_ASSERT( proxy->my_observer == &tso, nullptr); + __TBB_ASSERT( is_alive(proxy->my_ref_count.load(std::memory_order_relaxed)), "Observer's proxy died prematurely" ); + __TBB_ASSERT( proxy->my_ref_count.load(std::memory_order_relaxed) >= 1, "reference for observer missing" ); + observer_list &list = *proxy->my_list; + { + // Ensure that none of the list walkers relies on observer pointer validity + observer_list::scoped_lock lock(list.mutex(), /*is_writer=*/true); + proxy->my_observer = nullptr; + // Proxy may still be held by other threads (to track the last notified observer) + if( !--proxy->my_ref_count ) {// nobody can increase it under exclusive lock + list.remove(proxy); + __TBB_ASSERT( !proxy->my_ref_count, NULL ); + delete proxy; + } + } + spin_wait_until_eq(tso.my_busy_count, 0); // other threads are still accessing the callback + } + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.h b/contrib/libs/tbb/src/tbb/observer_proxy.h new file mode 100644 index 0000000000..2450247ecd --- /dev/null +++ b/contrib/libs/tbb/src/tbb/observer_proxy.h @@ -0,0 +1,154 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_observer_proxy_H +#define __TBB_observer_proxy_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_aligned_space.h" + +#include "oneapi/tbb/task_scheduler_observer.h" +#include "oneapi/tbb/spin_rw_mutex.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class observer_list { + friend class arena; + + // Mutex is wrapped with aligned_space to shut up warnings when its destructor + // is called while threads are still using it. + typedef aligned_space<spin_rw_mutex> my_mutex_type; + + //! Pointer to the head of this list. + std::atomic<observer_proxy*> my_head{nullptr}; + + //! Pointer to the tail of this list. + std::atomic<observer_proxy*> my_tail{nullptr}; + + //! Mutex protecting this list. + my_mutex_type my_mutex; + + //! Back-pointer to the arena this list belongs to. + arena* my_arena; + + //! Decrement refcount of the proxy p if there are other outstanding references. + /** In case of success sets p to NULL. Must be invoked from under the list lock. **/ + inline static void remove_ref_fast( observer_proxy*& p ); + + //! Implements notify_entry_observers functionality. + void do_notify_entry_observers( observer_proxy*& last, bool worker ); + + //! Implements notify_exit_observers functionality. + void do_notify_exit_observers( observer_proxy* last, bool worker ); + +public: + observer_list () = default; + + //! Removes and destroys all observer proxies from the list. + /** Cannot be used concurrently with other methods. **/ + void clear (); + + //! Add observer proxy to the tail of the list. + void insert ( observer_proxy* p ); + + //! Remove observer proxy from the list. + void remove ( observer_proxy* p ); + + //! Decrement refcount of the proxy and destroy it if necessary. + /** When refcount reaches zero removes the proxy from the list and destructs it. **/ + void remove_ref( observer_proxy* p ); + + //! Type of the scoped lock for the reader-writer mutex associated with the list. + typedef spin_rw_mutex::scoped_lock scoped_lock; + + //! Accessor to the reader-writer mutex associated with the list. + spin_rw_mutex& mutex () { return my_mutex.begin()[0]; } + + bool empty () const { return my_head.load(std::memory_order_relaxed) == nullptr; } + + //! Call entry notifications on observers added after last was notified. + /** Updates last to become the last notified observer proxy (in the global list) + or leaves it to be nullptr. The proxy has its refcount incremented. **/ + inline void notify_entry_observers( observer_proxy*& last, bool worker ); + + //! Call exit notifications on last and observers added before it. + inline void notify_exit_observers( observer_proxy*& last, bool worker ); +}; // class observer_list + +//! Wrapper for an observer object +/** To maintain shared lists of observers the scheduler first wraps each observer + object into a proxy so that a list item remained valid even after the corresponding + proxy object is destroyed by the user code. **/ +class observer_proxy { + friend class task_scheduler_observer; + friend class observer_list; + friend void observe(d1::task_scheduler_observer&, bool); + //! Reference count used for garbage collection. + /** 1 for reference from my task_scheduler_observer. + 1 for each task dispatcher's last observer pointer. + No accounting for neighbors in the shared list. */ + std::atomic<std::uintptr_t> my_ref_count; + //! Reference to the list this observer belongs to. + observer_list* my_list; + //! Pointer to next observer in the list specified by my_head. + /** NULL for the last item in the list. **/ + observer_proxy* my_next; + //! Pointer to the previous observer in the list specified by my_head. + /** For the head of the list points to the last item. **/ + observer_proxy* my_prev; + //! Associated observer + d1::task_scheduler_observer* my_observer; + + //! Constructs proxy for the given observer and adds it to the specified list. + observer_proxy( d1::task_scheduler_observer& ); + + ~observer_proxy(); +}; // class observer_proxy + +void observer_list::remove_ref_fast( observer_proxy*& p ) { + if( p->my_observer ) { + // Can decrement refcount quickly, as it cannot drop to zero while under the lock. + std::uintptr_t r = --p->my_ref_count; + __TBB_ASSERT_EX( r, NULL ); + p = NULL; + } else { + // Use slow form of refcount decrementing, after the lock is released. + } +} + +void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) { + if (last == my_tail.load(std::memory_order_relaxed)) + return; + do_notify_entry_observers(last, worker); +} + +void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) { + if (last == nullptr) { + return; + } + __TBB_ASSERT(!is_poisoned(last), NULL); + do_notify_exit_observers( last, worker ); + __TBB_ASSERT(last != nullptr, NULL); + poison_pointer(last); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_observer_proxy_H */ diff --git a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp new file mode 100644 index 0000000000..b7655c6b35 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp @@ -0,0 +1,471 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/parallel_pipeline.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "itt_notify.h" +#include "tls.h" +#include "oneapi/tbb/detail/_exception.h" +#include "oneapi/tbb/detail/_small_object_pool.h" + +namespace tbb { +namespace detail { +namespace r1 { + +void handle_perror(int error_code, const char* aux_info); + +using Token = unsigned long; + +//! A processing pipeline that applies filters to items. +/** @ingroup algorithms */ +class pipeline { + friend void parallel_pipeline(d1::task_group_context&, std::size_t, const d1::filter_node&); +public: + + //! Construct empty pipeline. + pipeline(d1::task_group_context& cxt, std::size_t max_token) : + my_context(cxt), + first_filter(nullptr), + last_filter(nullptr), + input_tokens(Token(max_token)), + end_of_input(false), + wait_ctx(0) { + __TBB_ASSERT( max_token>0, "pipeline::run must have at least one token" ); + } + + ~pipeline(); + + //! Add filter to end of pipeline. + void add_filter( d1::base_filter& ); + + //! Traverse tree of fitler-node in-order and add filter for each leaf + void fill_pipeline(const d1::filter_node& root) { + if( root.left && root.right ) { + fill_pipeline(*root.left); + fill_pipeline(*root.right); + } + else { + __TBB_ASSERT(!root.left && !root.right, "tree should be full"); + add_filter(*root.create_filter()); + } + } + +private: + friend class stage_task; + friend class base_filter; + friend void set_end_of_input(d1::base_filter& bf); + + task_group_context& my_context; + + //! Pointer to first filter in the pipeline. + d1::base_filter* first_filter; + + //! Pointer to last filter in the pipeline. + d1::base_filter* last_filter; + + //! Number of idle tokens waiting for input stage. + std::atomic<Token> input_tokens; + + //! False until flow_control::stop() is called. + std::atomic<bool> end_of_input; + + d1::wait_context wait_ctx; +}; + +//! This structure is used to store task information in a input buffer +struct task_info { + void* my_object = nullptr; + //! Invalid unless a task went through an ordered stage. + Token my_token = 0; + //! False until my_token is set. + bool my_token_ready = false; + //! True if my_object is valid. + bool is_valid = false; + //! Set to initial state (no object, no token) + void reset() { + my_object = nullptr; + my_token = 0; + my_token_ready = false; + is_valid = false; + } +}; + +//! A buffer of input items for a filter. +/** Each item is a task_info, inserted into a position in the buffer corresponding to a Token. */ +class input_buffer { + friend class base_filter; + friend class stage_task; + friend class pipeline; + friend void set_end_of_input(d1::base_filter& bf); + + using size_type = Token; + + //! Array of deferred tasks that cannot yet start executing. + task_info* array; + + //! Size of array + /** Always 0 or a power of 2 */ + size_type array_size; + + //! Lowest token that can start executing. + /** All prior Token have already been seen. */ + Token low_token; + + //! Serializes updates. + spin_mutex array_mutex; + + //! Resize "array". + /** Caller is responsible to acquiring a lock on "array_mutex". */ + void grow( size_type minimum_size ); + + //! Initial size for "array" + /** Must be a power of 2 */ + static const size_type initial_buffer_size = 4; + + //! Used for out of order buffer, and for assigning my_token if is_ordered and my_token not already assigned + Token high_token; + + //! True for ordered filter, false otherwise. + const bool is_ordered; + + //! for parallel filters that accepts NULLs, thread-local flag for reaching end_of_input + using end_of_input_tls_t = basic_tls<std::intptr_t>; + end_of_input_tls_t end_of_input_tls; + bool end_of_input_tls_allocated; // no way to test pthread creation of TLS + +public: + input_buffer(const input_buffer&) = delete; + input_buffer& operator=(const input_buffer&) = delete; + + //! Construct empty buffer. + input_buffer( bool ordered) : + array(nullptr), + array_size(0), + low_token(0), + high_token(0), + is_ordered(ordered), + end_of_input_tls(), + end_of_input_tls_allocated(false) { + grow(initial_buffer_size); + __TBB_ASSERT( array, nullptr ); + } + + //! Destroy the buffer. + ~input_buffer() { + __TBB_ASSERT( array, nullptr ); + cache_aligned_allocator<task_info>().deallocate(array,array_size); + poison_pointer( array ); + if( end_of_input_tls_allocated ) { + destroy_my_tls(); + } + } + + //! Define order when the first filter is serial_in_order. + Token get_ordered_token(){ + return high_token++; + } + + //! Put a token into the buffer. + /** If task information was placed into buffer, returns true; + otherwise returns false, informing the caller to create and spawn a task. + */ + bool try_put_token( task_info& info ) { + info.is_valid = true; + spin_mutex::scoped_lock lock( array_mutex ); + Token token; + if( is_ordered ) { + if( !info.my_token_ready ) { + info.my_token = high_token++; + info.my_token_ready = true; + } + token = info.my_token; + } else + token = high_token++; + __TBB_ASSERT( (long)(token-low_token)>=0, nullptr ); + if( token!=low_token ) { + // Trying to put token that is beyond low_token. + // Need to wait until low_token catches up before dispatching. + if( token-low_token>=array_size ) + grow( token-low_token+1 ); + ITT_NOTIFY( sync_releasing, this ); + array[token&(array_size-1)] = info; + return true; + } + return false; + } + + //! Note that processing of a token is finished. + /** Fires up processing of the next token, if processing was deferred. */ + // Uses template to avoid explicit dependency on stage_task. + template<typename StageTask> + void try_to_spawn_task_for_next_token(StageTask& spawner, d1::execution_data& ed) { + task_info wakee; + { + spin_mutex::scoped_lock lock( array_mutex ); + // Wake the next task + task_info& item = array[++low_token & (array_size-1)]; + ITT_NOTIFY( sync_acquired, this ); + wakee = item; + item.is_valid = false; + } + if( wakee.is_valid ) + spawner.spawn_stage_task(wakee, ed); + } + + // end_of_input signal for parallel_pipeline, parallel input filters with 0 tokens allowed. + void create_my_tls() { + int status = end_of_input_tls.create(); + if(status) + handle_perror(status, "TLS not allocated for filter"); + end_of_input_tls_allocated = true; + } + void destroy_my_tls() { + int status = end_of_input_tls.destroy(); + if(status) + handle_perror(status, "Failed to destroy filter TLS"); + } + bool my_tls_end_of_input() { + return end_of_input_tls.get() != 0; + } + void set_my_tls_end_of_input() { + end_of_input_tls.set(1); + } +}; + +void input_buffer::grow( size_type minimum_size ) { + size_type old_size = array_size; + size_type new_size = old_size ? 2*old_size : initial_buffer_size; + while( new_size<minimum_size ) + new_size*=2; + task_info* new_array = cache_aligned_allocator<task_info>().allocate(new_size); + task_info* old_array = array; + for( size_type i=0; i<new_size; ++i ) + new_array[i].is_valid = false; + Token t=low_token; + for( size_type i=0; i<old_size; ++i, ++t ) + new_array[t&(new_size-1)] = old_array[t&(old_size-1)]; + array = new_array; + array_size = new_size; + if( old_array ) + cache_aligned_allocator<task_info>().deallocate(old_array,old_size); +} + +class stage_task : public d1::task, public task_info { +private: + friend class pipeline; + pipeline& my_pipeline; + d1::base_filter* my_filter; + d1::small_object_allocator m_allocator; + //! True if this task has not yet read the input. + bool my_at_start; + + //! True if this can be executed again. + bool execute_filter(d1::execution_data& ed); + + //! Spawn task if token is available. + void try_spawn_stage_task(d1::execution_data& ed) { + ITT_NOTIFY( sync_releasing, &my_pipeline.input_tokens ); + if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_relaxed)) > 1 ) { + d1::small_object_allocator alloc{}; + r1::spawn( *alloc.new_object<stage_task>(ed, my_pipeline, alloc ), my_pipeline.my_context ); + } + } + +public: + + //! Construct stage_task for first stage in a pipeline. + /** Such a stage has not read any input yet. */ + stage_task(pipeline& pipeline, d1::small_object_allocator& alloc ) : + my_pipeline(pipeline), + my_filter(pipeline.first_filter), + m_allocator(alloc), + my_at_start(true) + { + task_info::reset(); + my_pipeline.wait_ctx.reserve(); + } + //! Construct stage_task for a subsequent stage in a pipeline. + stage_task(pipeline& pipeline, d1::base_filter* filter, const task_info& info, d1::small_object_allocator& alloc) : + task_info(info), + my_pipeline(pipeline), + my_filter(filter), + m_allocator(alloc), + my_at_start(false) + { + my_pipeline.wait_ctx.reserve(); + } + //! Roughly equivalent to the constructor of input stage task + void reset() { + task_info::reset(); + my_filter = my_pipeline.first_filter; + my_at_start = true; + } + void finalize(d1::execution_data& ed) { + m_allocator.delete_object(this, ed); + } + //! The virtual task execution method + task* execute(d1::execution_data& ed) override { + if(!execute_filter(ed)) { + finalize(ed); + return nullptr; + } + return this; + } + task* cancel(d1::execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~stage_task() { + if ( my_filter && my_object ) { + my_filter->finalize(my_object); + my_object = nullptr; + } + my_pipeline.wait_ctx.release(); + } + //! Creates and spawns stage_task from task_info + void spawn_stage_task(const task_info& info, d1::execution_data& ed) { + d1::small_object_allocator alloc{}; + stage_task* clone = alloc.new_object<stage_task>(ed, my_pipeline, my_filter, info, alloc); + r1::spawn(*clone, my_pipeline.my_context); + } +}; + +bool stage_task::execute_filter(d1::execution_data& ed) { + __TBB_ASSERT( !my_at_start || !my_object, "invalid state of task" ); + if( my_at_start ) { + if( my_filter->is_serial() ) { + my_object = (*my_filter)(my_object); + if( my_object || ( my_filter->object_may_be_null() && !my_pipeline.end_of_input.load(std::memory_order_relaxed)) ) { + if( my_filter->is_ordered() ) { + my_token = my_filter->my_input_buffer->get_ordered_token(); + my_token_ready = true; + } + if( !my_filter->next_filter_in_pipeline ) { // we're only filter in pipeline + reset(); + return true; + } else { + try_spawn_stage_task(ed); + } + } else { + my_pipeline.end_of_input.store(true, std::memory_order_relaxed); + return false; + } + } else /*not is_serial*/ { + if ( my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { + return false; + } + + try_spawn_stage_task(ed); + + my_object = (*my_filter)(my_object); + if( !my_object && (!my_filter->object_may_be_null() || my_filter->my_input_buffer->my_tls_end_of_input()) ){ + my_pipeline.end_of_input.store(true, std::memory_order_relaxed); + return false; + } + } + my_at_start = false; + } else { + my_object = (*my_filter)(my_object); + if( my_filter->is_serial() ) + my_filter->my_input_buffer->try_to_spawn_task_for_next_token(*this, ed); + } + my_filter = my_filter->next_filter_in_pipeline; + if( my_filter ) { + // There is another filter to execute. + if( my_filter->is_serial() ) { + // The next filter must execute tokens when they are available (in order for serial_in_order) + if( my_filter->my_input_buffer->try_put_token(*this) ){ + my_filter = nullptr; // To prevent deleting my_object twice if exception occurs + return false; + } + } + } else { + // Reached end of the pipe. + std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_relaxed); + + if( ntokens_avail>0 // Only recycle if there is one available token + || my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { + return false; // No need to recycle for new input + } + ITT_NOTIFY( sync_acquired, &my_pipeline.input_tokens ); + // Recycle as an input stage task. + reset(); + } + return true; +} + +pipeline:: ~pipeline() { + while( first_filter ) { + d1::base_filter* f = first_filter; + if( input_buffer* b = f->my_input_buffer ) { + b->~input_buffer(); + deallocate_memory(b); + } + first_filter = f->next_filter_in_pipeline; + f->~base_filter(); + deallocate_memory(f); + } +} + +void pipeline::add_filter( d1::base_filter& new_fitler ) { + __TBB_ASSERT( new_fitler.next_filter_in_pipeline==d1::base_filter::not_in_pipeline(), "filter already part of pipeline?" ); + new_fitler.my_pipeline = this; + if ( first_filter == nullptr ) + first_filter = &new_fitler; + else + last_filter->next_filter_in_pipeline = &new_fitler; + new_fitler.next_filter_in_pipeline = nullptr; + last_filter = &new_fitler; + if( new_fitler.is_serial() ) { + new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( new_fitler.is_ordered() ); + } else { + if( first_filter == &new_fitler && new_fitler.object_may_be_null() ) { + //TODO: buffer only needed to hold TLS; could improve + new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( /*is_ordered*/false ); + new_fitler.my_input_buffer->create_my_tls(); + } + } +} + +void __TBB_EXPORTED_FUNC parallel_pipeline(d1::task_group_context& cxt, std::size_t max_token, const d1::filter_node& fn) { + pipeline pipe(cxt, max_token); + + pipe.fill_pipeline(fn); + + d1::small_object_allocator alloc{}; + stage_task& st = *alloc.new_object<stage_task>(pipe, alloc); + + // Start execution of tasks + r1::execute_and_wait(st, cxt, pipe.wait_ctx, cxt); +} + +void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter& bf) { + __TBB_ASSERT(bf.my_input_buffer, nullptr); + __TBB_ASSERT(bf.object_may_be_null(), nullptr); + if(bf.is_serial() ) { + bf.my_pipeline->end_of_input.store(true, std::memory_order_relaxed); + } else { + __TBB_ASSERT(bf.my_input_buffer->end_of_input_tls_allocated, nullptr); + bf.my_input_buffer->set_my_tls_end_of_input(); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/private_server.cpp b/contrib/libs/tbb/src/tbb/private_server.cpp new file mode 100644 index 0000000000..bc0af84bb4 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/private_server.cpp @@ -0,0 +1,420 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "rml_tbb.h" +#include "rml_thread_monitor.h" + +#include "scheduler_common.h" +#include "governor.h" +#include "misc.h" + +#include <atomic> + + +namespace tbb { +namespace detail { +namespace r1 { +namespace rml { + +using rml::internal::thread_monitor; +typedef thread_monitor::handle_type thread_handle; + +class private_server; + +class private_worker: no_copy { +private: + //! State in finite-state machine that controls the worker. + /** State diagram: + init --> starting --> normal + | | | + | V | + \------> quit <------/ + */ + enum state_t { + //! *this is initialized + st_init, + //! *this has associated thread that is starting up. + st_starting, + //! Associated thread is doing normal life sequence. + st_normal, + //! Associated thread has ended normal life sequence and promises to never touch *this again. + st_quit + }; + std::atomic<state_t> my_state; + + //! Associated server + private_server& my_server; + + //! Associated client + tbb_client& my_client; + + //! index used for avoiding the 64K aliasing problem + const std::size_t my_index; + + //! Monitor for sleeping when there is no work to do. + /** The invariant that holds for sleeping workers is: + "my_slack<=0 && my_state==st_normal && I am on server's list of asleep threads" */ + thread_monitor my_thread_monitor; + + //! Handle of the OS thread associated with this worker + thread_handle my_handle; + + //! Link for list of workers that are sleeping or have no associated thread. + private_worker* my_next; + + friend class private_server; + + //! Actions executed by the associated thread + void run() noexcept; + + //! Wake up associated thread (or launch a thread if there is none) + void wake_or_launch(); + + //! Called by a thread (usually not the associated thread) to commence termination. + void start_shutdown(); + + static __RML_DECL_THREAD_ROUTINE thread_routine( void* arg ); + + static void release_handle(thread_handle my_handle, bool join); + +protected: + private_worker( private_server& server, tbb_client& client, const std::size_t i ) : + my_state(st_init), my_server(server), my_client(client), my_index(i), + my_thread_monitor(), my_handle(), my_next() + {} +}; + +static const std::size_t cache_line_size = tbb::detail::max_nfs_size; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress overzealous compiler warnings about uninstantiable class + #pragma warning(push) + #pragma warning(disable:4510 4610) +#endif +class padded_private_worker: public private_worker { + char pad[cache_line_size - sizeof(private_worker)%cache_line_size]; +public: + padded_private_worker( private_server& server, tbb_client& client, const std::size_t i ) + : private_worker(server,client,i) { suppress_unused_warning(pad); } +}; +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning(pop) +#endif + +class private_server: public tbb_server, no_copy { +private: + tbb_client& my_client; + //! Maximum number of threads to be created. + /** Threads are created lazily, so maximum might not actually be reached. */ + const tbb_client::size_type my_n_thread; + + //! Stack size for each thread. */ + const std::size_t my_stack_size; + + //! Number of jobs that could use their associated thread minus number of active threads. + /** If negative, indicates oversubscription. + If positive, indicates that more threads should run. + Can be lowered asynchronously, but must be raised only while holding my_asleep_list_mutex, + because raising it impacts the invariant for sleeping threads. */ + std::atomic<int> my_slack; + + //! Counter used to determine when to delete this. + std::atomic<int> my_ref_count; + + padded_private_worker* my_thread_array; + + //! List of workers that are asleep or committed to sleeping until notified by another thread. + std::atomic<private_worker*> my_asleep_list_root; + + //! Protects my_asleep_list_root + typedef scheduler_mutex_type asleep_list_mutex_type; + asleep_list_mutex_type my_asleep_list_mutex; + +#if TBB_USE_ASSERT + std::atomic<int> my_net_slack_requests; +#endif /* TBB_USE_ASSERT */ + + //! Wake up to two sleeping workers, if there are any sleeping. + /** The call is used to propagate a chain reaction where each thread wakes up two threads, + which in turn each wake up two threads, etc. */ + void propagate_chain_reaction() { + // First test of a double-check idiom. Second test is inside wake_some(0). + if( my_asleep_list_root.load(std::memory_order_acquire) ) + wake_some(0); + } + + //! Try to add t to list of sleeping workers + bool try_insert_in_asleep_list( private_worker& t ); + + //! Equivalent of adding additional_slack to my_slack and waking up to 2 threads if my_slack permits. + void wake_some( int additional_slack ); + + virtual ~private_server(); + + void remove_server_ref() { + if( --my_ref_count==0 ) { + my_client.acknowledge_close_connection(); + this->~private_server(); + tbb::cache_aligned_allocator<private_server>().deallocate( this, 1 ); + } + } + + friend class private_worker; +public: + private_server( tbb_client& client ); + + version_type version() const override { + return 0; + } + + void request_close_connection( bool /*exiting*/ ) override { + for( std::size_t i=0; i<my_n_thread; ++i ) + my_thread_array[i].start_shutdown(); + remove_server_ref(); + } + + void yield() override { d0::yield(); } + + void independent_thread_number_changed( int ) override {__TBB_ASSERT(false,NULL);} + + unsigned default_concurrency() const override { return governor::default_num_threads() - 1; } + + void adjust_job_count_estimate( int delta ) override; + +#if _WIN32||_WIN64 + void register_external_thread ( ::rml::server::execution_resource_t& ) override {} + void unregister_external_thread ( ::rml::server::execution_resource_t ) override {} +#endif /* _WIN32||_WIN64 */ +}; + +//------------------------------------------------------------------------ +// Methods of private_worker +//------------------------------------------------------------------------ +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress overzealous compiler warnings about an initialized variable 'sink_for_alloca' not referenced + #pragma warning(push) + #pragma warning(disable:4189) +#endif +#if __MINGW32__ && __GNUC__==4 &&__GNUC_MINOR__>=2 && !__MINGW64__ +// ensure that stack is properly aligned for TBB threads +__attribute__((force_align_arg_pointer)) +#endif +__RML_DECL_THREAD_ROUTINE private_worker::thread_routine( void* arg ) { + private_worker* self = static_cast<private_worker*>(arg); + AVOID_64K_ALIASING( self->my_index ); + self->run(); + return 0; +} +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning(pop) +#endif + +void private_worker::release_handle(thread_handle handle, bool join) { + if (join) + thread_monitor::join(handle); + else + thread_monitor::detach_thread(handle); +} + +void private_worker::start_shutdown() { + state_t expected_state = my_state.load(std::memory_order_acquire); + __TBB_ASSERT( expected_state!=st_quit, NULL ); + + while( !my_state.compare_exchange_strong( expected_state, st_quit ) ); + + if( expected_state==st_normal || expected_state==st_starting ) { + // May have invalidated invariant for sleeping, so wake up the thread. + // Note that the notify() here occurs without maintaining invariants for my_slack. + // It does not matter, because my_state==st_quit overrides checking of my_slack. + my_thread_monitor.notify(); + // Do not need release handle in st_init state, + // because in this case the thread wasn't started yet. + // For st_starting release is done at launch site. + if (expected_state==st_normal) + release_handle(my_handle, governor::does_client_join_workers(my_client)); + } else if( expected_state==st_init ) { + // Perform action that otherwise would be performed by associated thread when it quits. + my_server.remove_server_ref(); + } +} + +void private_worker::run() noexcept { + my_server.propagate_chain_reaction(); + + // Transiting to st_normal here would require setting my_handle, + // which would create race with the launching thread and + // complications in handle management on Windows. + + ::rml::job& j = *my_client.create_one_job(); + while( my_state.load(std::memory_order_acquire)!=st_quit ) { + if( my_server.my_slack.load(std::memory_order_acquire)>=0 ) { + my_client.process(j); + } else { + thread_monitor::cookie c; + // Prepare to wait + my_thread_monitor.prepare_wait(c); + // Check/set the invariant for sleeping + if( my_state.load(std::memory_order_acquire)!=st_quit && my_server.try_insert_in_asleep_list(*this) ) { + my_thread_monitor.commit_wait(c); + __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); + my_server.propagate_chain_reaction(); + } else { + // Invariant broken + my_thread_monitor.cancel_wait(); + } + } + } + my_client.cleanup(j); + + ++my_server.my_slack; + my_server.remove_server_ref(); +} + +inline void private_worker::wake_or_launch() { + state_t expected_state = st_init; + if( my_state.compare_exchange_strong( expected_state, st_starting ) ) { + // after this point, remove_server_ref() must be done by created thread +#if __TBB_USE_WINAPI + my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size, &this->my_index ); +#elif __TBB_USE_POSIX + { + affinity_helper fpa; + fpa.protect_affinity_mask( /*restore_process_mask=*/true ); + my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size ); + // Implicit destruction of fpa resets original affinity mask. + } +#endif /* __TBB_USE_POSIX */ + expected_state = st_starting; + if ( !my_state.compare_exchange_strong( expected_state, st_normal ) ) { + // Do shutdown during startup. my_handle can't be released + // by start_shutdown, because my_handle value might be not set yet + // at time of transition from st_starting to st_quit. + __TBB_ASSERT( expected_state==st_quit, NULL ); + release_handle(my_handle, governor::does_client_join_workers(my_client)); + } + } + else { + __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" ); + my_thread_monitor.notify(); + } +} + +//------------------------------------------------------------------------ +// Methods of private_server +//------------------------------------------------------------------------ +private_server::private_server( tbb_client& client ) : + my_client(client), + my_n_thread(client.max_job_count()), + my_stack_size(client.min_stack_size()), + my_slack(0), + my_ref_count(my_n_thread+1), + my_thread_array(NULL), + my_asleep_list_root(NULL) +#if TBB_USE_ASSERT + , my_net_slack_requests(0) +#endif /* TBB_USE_ASSERT */ +{ + my_thread_array = tbb::cache_aligned_allocator<padded_private_worker>().allocate( my_n_thread ); + for( std::size_t i=0; i<my_n_thread; ++i ) { + private_worker* t = new( &my_thread_array[i] ) padded_private_worker( *this, client, i ); + t->my_next = my_asleep_list_root.exchange(t, std::memory_order_relaxed); + } +} + +private_server::~private_server() { + __TBB_ASSERT( my_net_slack_requests==0, NULL ); + for( std::size_t i=my_n_thread; i--; ) + my_thread_array[i].~padded_private_worker(); + tbb::cache_aligned_allocator<padded_private_worker>().deallocate( my_thread_array, my_n_thread ); + tbb::detail::poison_pointer( my_thread_array ); +} + +inline bool private_server::try_insert_in_asleep_list( private_worker& t ) { + asleep_list_mutex_type::scoped_lock lock; + if( !lock.try_acquire(my_asleep_list_mutex) ) + return false; + // Contribute to slack under lock so that if another takes that unit of slack, + // it sees us sleeping on the list and wakes us up. + int k = ++my_slack; + if( k<=0 ) { + t.my_next = my_asleep_list_root.exchange(&t, std::memory_order_relaxed); + return true; + } else { + --my_slack; + return false; + } +} + +void private_server::wake_some( int additional_slack ) { + __TBB_ASSERT( additional_slack>=0, NULL ); + private_worker* wakee[2]; + private_worker**w = wakee; + { + asleep_list_mutex_type::scoped_lock lock(my_asleep_list_mutex); + while( my_asleep_list_root.load(std::memory_order_relaxed) && w<wakee+2 ) { + if( additional_slack>0 ) { + // additional demand does not exceed surplus supply + if ( additional_slack+my_slack.load(std::memory_order_acquire)<=0 ) + break; + --additional_slack; + } else { + // Chain reaction; Try to claim unit of slack + int old = my_slack; + do { + if( old<=0 ) goto done; + } while( !my_slack.compare_exchange_strong(old,old-1) ); + } + // Pop sleeping worker to combine with claimed unit of slack + auto old = my_asleep_list_root.load(std::memory_order_relaxed); + my_asleep_list_root.store(old->my_next, std::memory_order_relaxed); + *w++ = old; + } + if( additional_slack ) { + // Contribute our unused slack to my_slack. + my_slack += additional_slack; + } + } +done: + while( w>wakee ) { + private_worker* ww = *--w; + ww->my_next = NULL; + ww->wake_or_launch(); + } +} + +void private_server::adjust_job_count_estimate( int delta ) { +#if TBB_USE_ASSERT + my_net_slack_requests+=delta; +#endif /* TBB_USE_ASSERT */ + if( delta<0 ) { + my_slack+=delta; + } else if( delta>0 ) { + wake_some( delta ); + } +} + +//! Factory method called from task.cpp to create a private_server. +tbb_server* make_private_server( tbb_client& client ) { + return new( tbb::cache_aligned_allocator<private_server>().allocate(1) ) private_server(client); +} + +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/profiling.cpp b/contrib/libs/tbb/src/tbb/profiling.cpp new file mode 100644 index 0000000000..2603f35b88 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/profiling.cpp @@ -0,0 +1,265 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "main.h" +#include "itt_notify.h" + +#include "oneapi/tbb/profiling.h" + +#include <string.h> + +namespace tbb { +namespace detail { +namespace r1 { + +#if __TBB_USE_ITT_NOTIFY +bool ITT_Present; +static std::atomic<bool> ITT_InitializationDone; + +static __itt_domain *tbb_domains[d1::ITT_NUM_DOMAINS] = {}; + +struct resource_string { + const char *str; + __itt_string_handle *itt_str_handle; +}; + +// +// populate resource strings +// +#define TBB_STRING_RESOURCE( index_name, str ) { str, nullptr }, +static resource_string strings_for_itt[] = { + #include "oneapi/tbb/detail/_string_resource.h" + { "num_resource_strings", nullptr } +}; +#undef TBB_STRING_RESOURCE + +static __itt_string_handle* ITT_get_string_handle(std::uintptr_t idx) { + __TBB_ASSERT(idx < NUM_STRINGS, "string handle out of valid range"); + return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : NULL; +} + +static void ITT_init_domains() { + tbb_domains[d1::ITT_DOMAIN_MAIN] = __itt_domain_create( _T("tbb") ); + tbb_domains[d1::ITT_DOMAIN_MAIN]->flags = 1; + tbb_domains[d1::ITT_DOMAIN_FLOW] = __itt_domain_create( _T("tbb.flow") ); + tbb_domains[d1::ITT_DOMAIN_FLOW]->flags = 1; + tbb_domains[d1::ITT_DOMAIN_ALGO] = __itt_domain_create( _T("tbb.algorithm") ); + tbb_domains[d1::ITT_DOMAIN_ALGO]->flags = 1; +} + +static void ITT_init_strings() { + for ( std::uintptr_t i = 0; i < NUM_STRINGS; ++i ) { +#if _WIN32||_WIN64 + strings_for_itt[i].itt_str_handle = __itt_string_handle_createA( strings_for_itt[i].str ); +#else + strings_for_itt[i].itt_str_handle = __itt_string_handle_create( strings_for_itt[i].str ); +#endif + } +} + +static void ITT_init() { + ITT_init_domains(); + ITT_init_strings(); +} + +/** Thread-unsafe lazy one-time initialization of tools interop. + Used by both dummy handlers and general TBB one-time initialization routine. **/ +void ITT_DoUnsafeOneTimeInitialization () { + // Double check ITT_InitializationDone is necessary because the first check + // in ITT_DoOneTimeInitialization is not guarded with the __TBB_InitOnce lock. + if ( !ITT_InitializationDone ) { + ITT_Present = (__TBB_load_ittnotify()!=0); + if (ITT_Present) ITT_init(); + ITT_InitializationDone = true; + } +} + +/** Thread-safe lazy one-time initialization of tools interop. + Used by dummy handlers only. **/ +extern "C" +void ITT_DoOneTimeInitialization() { + if ( !ITT_InitializationDone ) { + __TBB_InitOnce::lock(); + ITT_DoUnsafeOneTimeInitialization(); + __TBB_InitOnce::unlock(); + } +} + +void create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname) { + ITT_SYNC_CREATE(ptr, objtype, objname); +} + +void call_itt_notify(int t, void *ptr) { + switch (t) { + case 0: ITT_NOTIFY(sync_prepare, ptr); break; + case 1: ITT_NOTIFY(sync_cancel, ptr); break; + case 2: ITT_NOTIFY(sync_acquired, ptr); break; + case 3: ITT_NOTIFY(sync_releasing, ptr); break; + case 4: ITT_NOTIFY(sync_destroy, ptr); break; + } +} + +void itt_set_sync_name(void* obj, const tchar* name) { + __itt_sync_rename(obj, name); +} + +const __itt_id itt_null_id = { 0, 0, 0 }; + +static inline __itt_domain* get_itt_domain(d1::itt_domain_enum idx) { + if (tbb_domains[idx] == NULL) { + ITT_DoOneTimeInitialization(); + } + return tbb_domains[idx]; +} + +static inline void itt_id_make(__itt_id* id, void* addr, unsigned long long extra) { + *id = __itt_id_make(addr, extra); +} + +static inline void itt_id_create(const __itt_domain* domain, __itt_id id) { + __itt_id_create(domain, id); +} + +void itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_id group_id = itt_null_id; + __itt_id parent_id = itt_null_id; + itt_id_make(&group_id, group, group_extra); + itt_id_create(d, group_id); + if (parent) { + itt_id_make(&parent_id, parent, parent_extra); + } + __itt_string_handle* n = ITT_get_string_handle(name_index); + __itt_task_group(d, group_id, parent_id, n); + } +} + +void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, const char *value ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id = itt_null_id; + itt_id_make( &id, addr, addr_extra ); + __itt_string_handle *k = ITT_get_string_handle(key); + size_t value_length = strlen( value ); +#if _WIN32||_WIN64 + __itt_metadata_str_addA(d, id, k, value, value_length); +#else + __itt_metadata_str_add(d, id, k, value, value_length); +#endif + } +} + +void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, void *value ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id = itt_null_id; + itt_id_make( &id, addr, addr_extra ); + __itt_string_handle *k = ITT_get_string_handle(key); +#if __TBB_x86_32 + __itt_metadata_add(d, id, k, __itt_metadata_u32, 1, value); +#else + __itt_metadata_add(d, id, k, __itt_metadata_u64, 1, value); +#endif + } +} + +void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, + itt_relation relation, void *addr1, unsigned long long addr1_extra ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id id0 = itt_null_id; + __itt_id id1 = itt_null_id; + itt_id_make( &id0, addr0, addr0_extra ); + itt_id_make( &id1, addr1, addr1_extra ); + __itt_relation_add( d, id0, (__itt_relation)relation, id1 ); + } +} + +void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_id task_id = itt_null_id; + __itt_id parent_id = itt_null_id; + if (task) { + itt_id_make(&task_id, task, task_extra); + } + if (parent) { + itt_id_make(&parent_id, parent, parent_extra); + } + __itt_string_handle* n = ITT_get_string_handle(name_index); + __itt_task_begin(d, task_id, parent_id, n); + } +} + +void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain) { + if (__itt_domain* d = get_itt_domain(domain)) { + __itt_task_end(d); + } +} + +void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void *region, unsigned long long region_extra, + void *parent, unsigned long long parent_extra, string_resource_index /* name_index */ ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id region_id = itt_null_id; + __itt_id parent_id = itt_null_id; + itt_id_make( ®ion_id, region, region_extra ); + if ( parent ) { + itt_id_make( &parent_id, parent, parent_extra ); + } + __itt_region_begin( d, region_id, parent_id, NULL ); + } +} + +void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void *region, unsigned long long region_extra ) { + if ( __itt_domain *d = get_itt_domain( domain ) ) { + __itt_id region_id = itt_null_id; + itt_id_make( ®ion_id, region, region_extra ); + __itt_region_end( d, region_id ); + } +} + +#else +void create_itt_sync(void* /*ptr*/, const tchar* /*objtype*/, const tchar* /*objname*/) {} +void call_itt_notify(int /*t*/, void* /*ptr*/) {} +void itt_set_sync_name(void* /*obj*/, const tchar* /*name*/) {} +void itt_make_task_group(d1::itt_domain_enum /*domain*/, void* /*group*/, unsigned long long /*group_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/) {} +void itt_metadata_str_add(d1::itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/, + string_resource_index /*key*/, const char* /*value*/ ) { } +void itt_metadata_ptr_add(d1::itt_domain_enum /*domain*/, void * /*addr*/, unsigned long long /*addr_extra*/, + string_resource_index /*key*/, void * /*value*/ ) {} +void itt_relation_add(d1::itt_domain_enum /*domain*/, void* /*addr0*/, unsigned long long /*addr0_extra*/, + itt_relation /*relation*/, void* /*addr1*/, unsigned long long /*addr1_extra*/ ) { } +void itt_task_begin(d1::itt_domain_enum /*domain*/, void* /*task*/, unsigned long long /*task_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } +void itt_task_end(d1::itt_domain_enum /*domain*/ ) { } +void itt_region_begin(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/, + void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { } +void itt_region_end(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/ ) { } +#endif /* __TBB_USE_ITT_NOTIFY */ + +const tchar + *SyncType_Scheduler = _T("%Constant") + ; +const tchar + *SyncObj_ContextsList = _T("TBB Scheduler") + ; +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp new file mode 100644 index 0000000000..cfdc4d3c2a --- /dev/null +++ b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp @@ -0,0 +1,558 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** Before making any changes in the implementation, please emulate algorithmic changes + with SPIN tool using <TBB directory>/tools/spin_models/ReaderWriterMutex.pml. + There could be some code looking as "can be restructured" but its structure does matter! */ + +#include "oneapi/tbb/queuing_rw_mutex.h" +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_utils.h" +#include "itt_notify.h" + +namespace tbb { +namespace detail { +namespace r1 { + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Workaround for overzealous compiler warnings + #pragma warning (push) + #pragma warning (disable: 4311 4312) +#endif + +//! A view of a T* with additional functionality for twiddling low-order bits. +template<typename T> +class tricky_atomic_pointer { +public: + using word = uintptr_t; + + static T* fetch_add( std::atomic<word>& location, word addend, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.fetch_add(addend, memory_order)); + } + + static T* exchange( std::atomic<word>& location, T* value, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.exchange(reinterpret_cast<word>(value), memory_order)); + } + + static T* compare_exchange_strong( std::atomic<word>& obj, const T* expected, const T* desired, std::memory_order memory_order ) { + word expd = reinterpret_cast<word>(expected); + obj.compare_exchange_strong(expd, reinterpret_cast<word>(desired), memory_order); + return reinterpret_cast<T*>(expd); + } + + static void store( std::atomic<word>& location, const T* value, std::memory_order memory_order ) { + location.store(reinterpret_cast<word>(value), memory_order); + } + + static T* load( std::atomic<word>& location, std::memory_order memory_order ) { + return reinterpret_cast<T*>(location.load(memory_order)); + } + + static void spin_wait_while_eq(const std::atomic<word>& location, const T* value) { + tbb::detail::d0::spin_wait_while_eq(location, reinterpret_cast<word>(value) ); + } + + T* & ref; + tricky_atomic_pointer( T*& original ) : ref(original) {}; + tricky_atomic_pointer(const tricky_atomic_pointer&) = delete; + tricky_atomic_pointer& operator=(const tricky_atomic_pointer&) = delete; + T* operator&( const word operand2 ) const { + return reinterpret_cast<T*>( reinterpret_cast<word>(ref) & operand2 ); + } + T* operator|( const word operand2 ) const { + return reinterpret_cast<T*>( reinterpret_cast<word>(ref) | operand2 ); + } +}; + +using tricky_pointer = tricky_atomic_pointer<queuing_rw_mutex::scoped_lock>; + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Workaround for overzealous compiler warnings + #pragma warning (pop) +#endif + +//! Flag bits in a state_t that specify information about a locking request. +enum state_t_flags : unsigned char { + STATE_NONE = 0, + STATE_WRITER = 1<<0, + STATE_READER = 1<<1, + STATE_READER_UNBLOCKNEXT = 1<<2, + STATE_ACTIVEREADER = 1<<3, + STATE_UPGRADE_REQUESTED = 1<<4, + STATE_UPGRADE_WAITING = 1<<5, + STATE_UPGRADE_LOSER = 1<<6, + STATE_COMBINED_WAITINGREADER = STATE_READER | STATE_READER_UNBLOCKNEXT, + STATE_COMBINED_READER = STATE_COMBINED_WAITINGREADER | STATE_ACTIVEREADER, + STATE_COMBINED_UPGRADING = STATE_UPGRADE_WAITING | STATE_UPGRADE_LOSER +}; + +static const unsigned char RELEASED = 0; +static const unsigned char ACQUIRED = 1; + +struct queuing_rw_mutex_impl { + //! Try to acquire the internal lock + /** Returns true if lock was successfully acquired. */ + static bool try_acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + auto expected = RELEASED; + return s.my_internal_lock.compare_exchange_strong(expected, ACQUIRED); + } + + //! Acquire the internal lock + static void acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + // Usually, we would use the test-test-and-set idiom here, with exponential backoff. + // But so far, experiments indicate there is no value in doing so here. + while( !try_acquire_internal_lock(s) ) { + machine_pause(1); + } + } + + //! Release the internal lock + static void release_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + s.my_internal_lock.store(RELEASED, std::memory_order_release); + } + + //! Wait for internal lock to be released + static void wait_for_release_of_internal_lock(d1::queuing_rw_mutex::scoped_lock& s) + { + spin_wait_until_eq(s.my_internal_lock, RELEASED); + } + + //! A helper function + static void unblock_or_wait_on_internal_lock(d1::queuing_rw_mutex::scoped_lock& s, uintptr_t flag ) { + if( flag ) { + wait_for_release_of_internal_lock(s); + } + else { + release_internal_lock(s); + } + } + + //! Mask for low order bit of a pointer. + static const tricky_pointer::word FLAG = 0x1; + + static uintptr_t get_flag( d1::queuing_rw_mutex::scoped_lock* ptr ) { + return reinterpret_cast<uintptr_t>(ptr) & FLAG; + } + + //------------------------------------------------------------------------ + // Methods of queuing_rw_mutex::scoped_lock + //------------------------------------------------------------------------ + + //! A method to acquire queuing_rw_mutex lock + static void acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) + { + __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + s.my_mutex = &m; + s.my_prev.store(0U, std::memory_order_relaxed); + s.my_next.store(0U, std::memory_order_relaxed); + s.my_going.store(0U, std::memory_order_relaxed); + s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed); + s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); + + queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release); + + if( write ) { // Acquiring for write + + if( predecessor ) { + ITT_NOTIFY(sync_prepare, s.my_mutex); + predecessor = tricky_pointer(predecessor) & ~FLAG; + __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); + #if TBB_USE_ASSERT + atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" + __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); + #endif + tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); + spin_wait_until_eq(s.my_going, 1U); + } + + } else { // Acquiring for read + #if __TBB_USE_ITT_NOTIFY + bool sync_prepare_done = false; + #endif + if( predecessor ) { + unsigned char pred_state; + __TBB_ASSERT( !s.my_prev, "the predecessor is already set" ); + if( tricky_pointer(predecessor) & FLAG ) { + /* this is only possible if predecessor is an upgrading reader and it signals us to wait */ + pred_state = STATE_UPGRADE_WAITING; + predecessor = tricky_pointer(predecessor) & ~FLAG; + } else { + // Load predecessor->my_state now, because once predecessor->my_next becomes + // non-NULL, we must assume that *predecessor might be destroyed. + pred_state = STATE_READER; + predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel); + } + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); + #if TBB_USE_ASSERT + atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" + __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); + #endif + tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); + if( pred_state != STATE_ACTIVEREADER ) { + #if __TBB_USE_ITT_NOTIFY + sync_prepare_done = true; + ITT_NOTIFY(sync_prepare, s.my_mutex); + #endif + spin_wait_until_eq(s.my_going, 1U); + } + } + + // The protected state must have been acquired here before it can be further released to any other reader(s): + unsigned char old_state = STATE_READER; + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel); + if( old_state!=STATE_READER ) { +#if __TBB_USE_ITT_NOTIFY + if( !sync_prepare_done ) + ITT_NOTIFY(sync_prepare, s.my_mutex); +#endif + // Failed to become active reader -> need to unblock the next waiting reader first + __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" ); + spin_wait_while_eq(s.my_next, 0U); + /* my_state should be changed before unblocking the next otherwise it might finish + and another thread can get our old state and left blocked */ + s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed); + tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release); + } + __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" ); + } + + ITT_NOTIFY(sync_acquired, s.my_mutex); + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + } + + //! A method to acquire queuing_rw_mutex if it is free + static bool try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) + { + __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex"); + + if( m.q_tail.load(std::memory_order_relaxed) ) + return false; // Someone already took the lock + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + s.my_prev.store(0U, std::memory_order_relaxed); + s.my_next.store(0U, std::memory_order_relaxed); + s.my_going.store(0U, std::memory_order_relaxed); // TODO: remove dead assignment? + s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_ACTIVEREADER), std::memory_order_relaxed); + s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); + + // The CAS must have release semantics, because we are + // "sending" the fields initialized above to other processors. + d1::queuing_rw_mutex::scoped_lock* expected = nullptr; + if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) ) + return false; // Someone already took the lock + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + s.my_mutex = &m; + ITT_NOTIFY(sync_acquired, s.my_mutex); + return true; + } + + //! A method to release queuing_rw_mutex lock + static void release(d1::queuing_rw_mutex::scoped_lock& s) { + __TBB_ASSERT(s.my_mutex!=nullptr, "no lock acquired"); + + ITT_NOTIFY(sync_releasing, s.my_mutex); + + if( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) { // Acquired for write + + // The logic below is the same as "writerUnlock", but elides + // "return" from the middle of the routine. + // In the statement below, acquire semantics of reading my_next is required + // so that following operations with fields of my_next are safe. + d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + if( !next ) { + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { + // this was the only item in the queue, and the queue is now empty. + goto done; + } + spin_wait_while_eq( s.my_next, 0U ); + next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + } + next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early + if( next->my_state==STATE_UPGRADE_WAITING ) { + // the next waiting for upgrade means this writer was upgraded before. + acquire_internal_lock(s); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); + next->my_going.store(1U, std::memory_order_release); + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + } else { + // next->state cannot be STATE_UPGRADE_REQUESTED + __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" ); + __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" ); + tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed); + next->my_going.store(1U, std::memory_order_release); + } + + } else { // Acquired for read + + queuing_rw_mutex::scoped_lock *tmp = nullptr; + retry: + // Addition to the original paper: Mark my_prev as in use + queuing_rw_mutex::scoped_lock *predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); + + if( predecessor ) { + if( !(try_acquire_internal_lock(*predecessor)) ) + { + // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades. + // In the second case, it could or could not know my "in use" flag - need to check + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release); + if( !(tricky_pointer(tmp) & FLAG) ) { + // Wait for the predecessor to change my_prev (e.g. during unlink) + // TODO: spin_wait condition seems never reachable + tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG ); + // Now owner of predecessor is waiting for _us_ to release its lock + release_internal_lock(*predecessor); + } + // else the "in use" flag is back -> the predecessor didn't get it and will release itself; nothing to do + + tmp = nullptr; + goto retry; + } + __TBB_ASSERT(predecessor && predecessor->my_internal_lock.load(std::memory_order_relaxed)==ACQUIRED, "predecessor's lock is not acquired"); + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + acquire_internal_lock(s); + + tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release); + + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + } + __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" ); + + // ensure acquire semantics of reading 'my_next' + if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below + // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0 + tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release); + // I->prev->next = I->next; + __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed)==predecessor, nullptr); + predecessor->my_next.store(s.my_next.load(std::memory_order_relaxed), std::memory_order_release); + } + // Safe to release in the order opposite to acquiring which makes the code simpler + release_internal_lock(*predecessor); + + } else { // No predecessor when we looked + acquire_internal_lock(s); // "exclusiveLock(&I->EL)" + d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + if( !next ) { + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + next = tricky_pointer::load(s.my_next, std::memory_order_relaxed); + } else { + goto unlock_self; + } + } + next->my_going.store(2U, std::memory_order_relaxed); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); + next->my_going.store(1U, std::memory_order_release); + } + unlock_self: + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + } + done: + spin_wait_while_eq( s.my_going, 2U ); + + s.initialize(); + } + + static bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { + if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader + + ITT_NOTIFY(sync_releasing, s.my_mutex); + s.my_state.store(STATE_READER, std::memory_order_relaxed); + if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) { + // the following load of q_tail must not be reordered with setting STATE_READER above + if( &s==s.my_mutex->q_tail.load() ) { + unsigned char old_state = STATE_READER; + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release); + if( old_state==STATE_READER ) + return true; // Downgrade completed + } + /* wait for the next to register */ + spin_wait_while_eq( s.my_next, 0U ); + } + d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + __TBB_ASSERT( next, "still no successor at this point!" ); + if( next->my_state & STATE_COMBINED_WAITINGREADER ) + next->my_going.store(1U, std::memory_order_release); + else if( next->my_state==STATE_UPGRADE_WAITING ) + // the next waiting for upgrade means this writer was upgraded before. + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); + s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);; + return true; + } + + static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { + if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer + + __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" ); + + queuing_rw_mutex::scoped_lock * tmp; + queuing_rw_mutex::scoped_lock * me = &s; + + ITT_NOTIFY(sync_releasing, s.my_mutex); + s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed); + requested: + __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" ); + acquire_internal_lock(s); + d1::queuing_rw_mutex::scoped_lock* expected = &s; + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U ); + queuing_rw_mutex::scoped_lock * next; + next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire); + unsigned short n_state = next->my_state; + /* the next reader can be blocked by our state. the best thing to do is to unblock it */ + if( n_state & STATE_COMBINED_WAITINGREADER ) + next->my_going.store(1U, std::memory_order_release); + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::exchange(next->my_prev, &s, std::memory_order_release); + unblock_or_wait_on_internal_lock(s, get_flag(tmp)); + if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) { + // save next|FLAG for simplicity of following comparisons + tmp = tricky_pointer(next)|FLAG; + for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) { + if( s.my_state & STATE_COMBINED_UPGRADING ) { + if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp ) + tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); + goto waiting; + } + } + __TBB_ASSERT(tricky_pointer::load(s.my_next, std::memory_order_relaxed) != (tricky_pointer(next)|FLAG), nullptr); + goto requested; + } else { + __TBB_ASSERT( n_state & (STATE_WRITER | STATE_UPGRADE_WAITING), "unexpected state"); + __TBB_ASSERT( (tricky_pointer(next)|FLAG) == tricky_pointer::load(s.my_next, std::memory_order_relaxed), nullptr); + tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); + } + } else { + /* We are in the tail; whoever comes next is blocked by q_tail&FLAG */ + release_internal_lock(s); + } // if( this != my_mutex->q_tail... ) + { + unsigned char old_state = STATE_UPGRADE_REQUESTED; + s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire); + } + waiting: + __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" ); + __TBB_ASSERT( s.my_state & STATE_COMBINED_UPGRADING, "wrong state at upgrade waiting_retry" ); + __TBB_ASSERT( me==&s, nullptr ); + ITT_NOTIFY(sync_prepare, s.my_mutex); + /* if no one was blocked by the "corrupted" q_tail, turn it back */ + expected = tricky_pointer(me)|FLAG; + s.my_mutex->q_tail.compare_exchange_strong(expected, &s, std::memory_order_release); + queuing_rw_mutex::scoped_lock * predecessor; + // Mark my_prev as 'in use' to prevent predecessor from releasing + predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire); + if( predecessor ) { + bool success = try_acquire_internal_lock(*predecessor); + { + // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state. + // Corrupted pointer transitions responsibility to release the predecessor`s node on us. + unsigned char old_state = STATE_UPGRADE_REQUESTED; + predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release); + } + if( !success ) { + // Responsibility transition, the one who reads uncorrupted my_prev will do release. + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release); + if( tricky_pointer(tmp) & FLAG ) { + tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); + predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); + } else { + // TODO: spin_wait condition seems never reachable + tricky_pointer::spin_wait_while_eq(s.my_prev, tricky_pointer(predecessor)|FLAG); + release_internal_lock(*predecessor); + } + } else { + tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); + release_internal_lock(*predecessor); + tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); + predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); + } + if( predecessor ) + goto waiting; + } else { + tricky_pointer::store(s.my_prev, nullptr, std::memory_order_relaxed); + } + __TBB_ASSERT( !predecessor && !s.my_prev, nullptr ); + + // additional lifetime issue prevention checks + // wait for the successor to finish working with my fields + wait_for_release_of_internal_lock(s); + // now wait for the predecessor to finish working with my fields + spin_wait_while_eq( s.my_going, 2U ); + + // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear). + atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear) + + bool result = ( s.my_state != STATE_UPGRADE_LOSER ); + s.my_state.store(STATE_WRITER, std::memory_order_relaxed); + s.my_going.store(1U, std::memory_order_relaxed); + + ITT_NOTIFY(sync_acquired, s.my_mutex); + return result; + } + + static void construct(d1::queuing_rw_mutex& m) { + suppress_unused_warning(m); + ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T("")); + } +}; + +void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { + queuing_rw_mutex_impl::acquire(m, s, write); +} + +bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) { + return queuing_rw_mutex_impl::try_acquire(m, s, write); +} + +void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) { + queuing_rw_mutex_impl::release(s); +} + +bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { + return queuing_rw_mutex_impl::upgrade_to_writer(s); +} + +bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { + return queuing_rw_mutex_impl::downgrade_to_reader(s); +} + +void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) { + queuing_rw_mutex_impl::construct(m); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/rml_base.h b/contrib/libs/tbb/src/tbb/rml_base.h new file mode 100644 index 0000000000..9e1705837c --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rml_base.h @@ -0,0 +1,163 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Header guard and namespace names follow rml conventions. + +#ifndef __RML_rml_base_H +#define __RML_rml_base_H + +#include <cstddef> + +#if _WIN32||_WIN64 +#include <windows.h> +#endif /* _WIN32||_WIN64 */ + +#ifdef RML_PURE_VIRTUAL_HANDLER +#define RML_PURE(T) {RML_PURE_VIRTUAL_HANDLER(); return (T)0;} +#else +#define RML_PURE(T) = 0; +#endif + +namespace rml { + +class server; + +class versioned_object { +public: + //! A version number + typedef unsigned version_type; + + virtual ~versioned_object() {} + + //! Get version of this object + /** The version number is incremented when a incompatible change is introduced. + The version number is invariant for the lifetime of the object. */ + virtual version_type version() const RML_PURE(version_type) + +}; + +//! Represents a client's job for an execution context. +/** A job object is constructed by the client. + Not derived from versioned_object because version is same as for client. */ +class job { + friend class server; +}; + +//! Information that client provides to server when asking for a server. +/** The instance must endure at least until acknowledge_close_connection is called. */ +class client: public versioned_object { +public: + //! Typedef for convenience of derived classes in other namespaces. + typedef ::rml::job job; + + //! Index of a job in a job pool + typedef unsigned size_type; + + //! Maximum number of threads that client can exploit profitably if nothing else is running on the machine. + /** The returned value should remain invariant for the lifetime of the connection. [idempotent] */ + virtual size_type max_job_count() const RML_PURE(size_type) + + //! Minimum stack size for each job. 0 means to use default stack size. [idempotent] + virtual std::size_t min_stack_size() const RML_PURE(std::size_t) + + //! Server calls this routine when it needs client to create a job object. + virtual job* create_one_job() RML_PURE(job*) + + //! Acknowledge that all jobs have been cleaned up. + /** Called by server in response to request_close_connection + after cleanup(job) has been called for each job. */ + virtual void acknowledge_close_connection() RML_PURE(void) + + //! Inform client that server is done with *this. + /** Client should destroy the job. + Not necessarily called by execution context represented by *this. + Never called while any other thread is working on the job. */ + virtual void cleanup( job& ) RML_PURE(void) + + // In general, we should not add new virtual methods, because that would + // break derived classes. Think about reserving some vtable slots. +}; + +// Information that server provides to client. +// Virtual functions are routines provided by the server for the client to call. +class server: public versioned_object { +public: + //! Typedef for convenience of derived classes. + typedef ::rml::job job; + +#if _WIN32||_WIN64 + typedef void* execution_resource_t; +#endif + + //! Request that connection to server be closed. + /** Causes each job associated with the client to have its cleanup method called, + possibly by a thread different than the thread that created the job. + This method can return before all cleanup methods return. + Actions that have to wait after all cleanup methods return should be part of + client::acknowledge_close_connection. + Pass true as exiting if request_close_connection() is called because exit() is + called. In that case, it is the client's responsibility to make sure all threads + are terminated. In all other cases, pass false. */ + virtual void request_close_connection( bool exiting = false ) = 0; + + //! Called by client thread when it reaches a point where it cannot make progress until other threads do. + virtual void yield() = 0; + + //! Called by client to indicate a change in the number of non-RML threads that are running. + /** This is a performance hint to the RML to adjust how many threads it should let run + concurrently. The delta is the change in the number of non-RML threads that are running. + For example, a value of 1 means the client has started running another thread, and a value + of -1 indicates that the client has blocked or terminated one of its threads. */ + virtual void independent_thread_number_changed( int delta ) = 0; + + //! Default level of concurrency for which RML strives when there are no non-RML threads running. + /** Normally, the value is the hardware concurrency minus one. + The "minus one" accounts for the thread created by main(). */ + virtual unsigned default_concurrency() const = 0; +}; + +class factory { +public: + //! status results + enum status_type { + st_success=0, + st_connection_exists, + st_not_found, + st_incompatible + }; + +protected: + //! Pointer to routine that waits for server to indicate when client can close itself. + status_type (*my_wait_to_close_routine)( factory& ); + +public: + //! Library handle for use by RML. +#if _WIN32||_WIN64 + HMODULE library_handle; +#else + void* library_handle; +#endif /* _WIN32||_WIN64 */ + + //! Special marker to keep dll from being unloaded prematurely + static const std::size_t c_dont_unload = 1; +}; + +//! Typedef for callback functions to print server info +typedef void (*server_info_callback_t)( void* arg, const char* server_info ); + +} // namespace rml + +#endif /* __RML_rml_base_H */ diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.cpp b/contrib/libs/tbb/src/tbb/rml_tbb.cpp new file mode 100644 index 0000000000..122e2709f7 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rml_tbb.cpp @@ -0,0 +1,113 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" + +#include "rml_tbb.h" +#include "dynamic_link.h" + +namespace tbb { +namespace detail { +namespace r1 { +namespace rml { + +#define MAKE_SERVER(x) DLD(__TBB_make_rml_server,x) +#define GET_INFO(x) DLD(__TBB_call_with_my_server_info,x) +#define SERVER tbb_server +#define CLIENT tbb_client +#define FACTORY tbb_factory + +#if __TBB_WEAK_SYMBOLS_PRESENT + #pragma weak __TBB_make_rml_server + #pragma weak __TBB_call_with_my_server_info + extern "C" { + ::rml::factory::status_type __TBB_make_rml_server( rml::tbb_factory& f, rml::tbb_server*& server, rml::tbb_client& client ); + void __TBB_call_with_my_server_info( ::rml::server_info_callback_t cb, void* arg ); + } +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +// RML_SERVER_NAME is the name of the RML server library. +#if _WIN32 || _WIN64 +#define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll" +#elif __APPLE__ +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib" +#elif __linux__ +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1" +#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so" +#else +#error Unknown OS +#endif + +const ::rml::versioned_object::version_type CLIENT_VERSION = 2; + +#if __TBB_WEAK_SYMBOLS_PRESENT + #pragma weak __RML_open_factory + #pragma weak __RML_close_factory + extern "C" { + ::rml::factory::status_type __RML_open_factory ( ::rml::factory&, ::rml::versioned_object::version_type&, ::rml::versioned_object::version_type ); + void __RML_close_factory( ::rml::factory& f ); + } +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +::rml::factory::status_type FACTORY::open() { + // Failure of following assertion indicates that factory is already open, or not zero-inited. + __TBB_ASSERT_EX( !library_handle, NULL ); + status_type (*open_factory_routine)( factory&, version_type&, version_type ); + dynamic_link_descriptor server_link_table[4] = { + DLD(__RML_open_factory,open_factory_routine), + MAKE_SERVER(my_make_server_routine), + DLD(__RML_close_factory,my_wait_to_close_routine), + GET_INFO(my_call_with_server_info_routine), + }; + status_type result; + if ( dynamic_link( RML_SERVER_NAME, server_link_table, 4, &library_handle ) ) { + version_type server_version; + result = (*open_factory_routine)( *this, server_version, CLIENT_VERSION ); + // server_version can be checked here for incompatibility if necessary. + } else { + library_handle = NULL; + result = st_not_found; + } + return result; +} + +void FACTORY::close() { + if ( library_handle ) + (*my_wait_to_close_routine)(*this); + if ( (size_t)library_handle>FACTORY::c_dont_unload ) { + dynamic_unlink(library_handle); + library_handle = NULL; + } +} + +::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) { + // Failure of following assertion means that factory was not successfully opened. + __TBB_ASSERT_EX( my_make_server_routine, NULL ); + return (*my_make_server_routine)(*this,s,c); +} + +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.h b/contrib/libs/tbb/src/tbb/rml_tbb.h new file mode 100644 index 0000000000..de923be1b2 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rml_tbb.h @@ -0,0 +1,94 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Header guard and namespace names follow TBB conventions. + +#ifndef __TBB_rml_tbb_H +#define __TBB_rml_tbb_H + +#include "oneapi/tbb/version.h" +#include "rml_base.h" + +namespace tbb { +namespace detail { +namespace r1 { +namespace rml { + +//------------------------------------------------------------------------ +// Classes instantiated by the server +//------------------------------------------------------------------------ + +//! Represents a set of oneTBB worker threads provided by the server. +class tbb_server: public ::rml::server { +public: + //! Inform server of adjustments in the number of workers that the client can profitably use. + virtual void adjust_job_count_estimate( int delta ) = 0; + +#if _WIN32||_WIN64 + //! Inform server of a oneTBB external thread. + virtual void register_external_thread( execution_resource_t& v ) = 0; + + //! Inform server that the oneTBB external thread is done with its work. + virtual void unregister_external_thread( execution_resource_t v ) = 0; +#endif /* _WIN32||_WIN64 */ +}; + +//------------------------------------------------------------------------ +// Classes instantiated by the client +//------------------------------------------------------------------------ + +class tbb_client: public ::rml::client { +public: + //! Defined by TBB to steal a task and execute it. + /** Called by server when it wants an execution context to do some TBB work. + The method should return when it is okay for the thread to yield indefinitely. */ + virtual void process( job& ) RML_PURE(void) +}; + +/** Client must ensure that instance is zero-inited, typically by being a file-scope object. */ +class tbb_factory: public ::rml::factory { + + //! Pointer to routine that creates an RML server. + status_type (*my_make_server_routine)( tbb_factory&, tbb_server*&, tbb_client& ); + + //! Pointer to routine that calls callback function with server version info. + void (*my_call_with_server_info_routine)( ::rml::server_info_callback_t cb, void* arg ); + +public: + typedef ::rml::versioned_object::version_type version_type; + typedef tbb_client client_type; + typedef tbb_server server_type; + + //! Open factory. + /** Dynamically links against RML library. + Returns st_success, st_incompatible, or st_not_found. */ + status_type open(); + + //! Factory method to be called by client to create a server object. + /** Factory must be open. + Returns st_success, or st_incompatible . */ + status_type make_server( server_type*&, client_type& ); + + //! Close factory + void close(); +}; + +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /*__TBB_rml_tbb_H */ diff --git a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h new file mode 100644 index 0000000000..613ec72e98 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h @@ -0,0 +1,258 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// All platform-specific threading support is encapsulated here. */ + +#ifndef __RML_thread_monitor_H +#define __RML_thread_monitor_H + +#if __TBB_USE_WINAPI +#include <windows.h> +#include <process.h> +#include <malloc.h> //_alloca +#include "misc.h" // support for processor groups +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +#include <thread> +#endif +#elif __TBB_USE_POSIX +#include <pthread.h> +#include <cstring> +#include <cstdlib> +#else +#error Unsupported platform +#endif +#include <cstdio> + +#include "oneapi/tbb/detail/_template_helpers.h" + +#include "itt_notify.h" +#include "semaphore.h" + +// All platform-specific threading support is in this header. + +#if (_WIN32||_WIN64)&&!__TBB_ipf +// Deal with 64K aliasing. The formula for "offset" is a Fibonacci hash function, +// which has the desirable feature of spreading out the offsets fairly evenly +// without knowing the total number of offsets, and furthermore unlikely to +// accidentally cancel out other 64K aliasing schemes that Microsoft might implement later. +// See Knuth Vol 3. "Theorem S" for details on Fibonacci hashing. +// The second statement is really does need "volatile", otherwise the compiler might remove the _alloca. +#define AVOID_64K_ALIASING(idx) \ + std::size_t offset = (idx+1) * 40503U % (1U<<16); \ + void* volatile sink_for_alloca = _alloca(offset); \ + __TBB_ASSERT_EX(sink_for_alloca, "_alloca failed"); +#else +// Linux thread allocators avoid 64K aliasing. +#define AVOID_64K_ALIASING(idx) tbb::detail::suppress_unused_warning(idx) +#endif /* _WIN32||_WIN64 */ + +namespace tbb { +namespace detail { +namespace r1 { + +// Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info +void handle_perror(int error_code, const char* aux_info); + +namespace rml { +namespace internal { + +#if __TBB_USE_ITT_NOTIFY +static const ::tbb::detail::r1::tchar *SyncType_RML = _T("%Constant"); +static const ::tbb::detail::r1::tchar *SyncObj_ThreadMonitor = _T("RML Thr Monitor"); +#endif /* __TBB_USE_ITT_NOTIFY */ + +//! Monitor with limited two-phase commit form of wait. +/** At most one thread should wait on an instance at a time. */ +class thread_monitor { +public: + class cookie { + friend class thread_monitor; + std::atomic<std::size_t> my_epoch{0}; + }; + thread_monitor() : skipped_wakeup(false), my_sema() { + ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor); + } + ~thread_monitor() {} + + //! If a thread is waiting or started a two-phase wait, notify it. + /** Can be called by any thread. */ + void notify(); + + //! Begin two-phase wait. + /** Should only be called by thread that owns the monitor. + The caller must either complete the wait or cancel it. */ + void prepare_wait( cookie& c ); + + //! Complete a two-phase wait and wait until notification occurs after the earlier prepare_wait. + void commit_wait( cookie& c ); + + //! Cancel a two-phase wait. + void cancel_wait(); + +#if __TBB_USE_WINAPI + typedef HANDLE handle_type; + + #define __RML_DECL_THREAD_ROUTINE unsigned WINAPI + typedef unsigned (WINAPI *thread_routine_type)(void*); + + //! Launch a thread + static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = NULL ); + +#elif __TBB_USE_POSIX + typedef pthread_t handle_type; + + #define __RML_DECL_THREAD_ROUTINE void* + typedef void*(*thread_routine_type)(void*); + + //! Launch a thread + static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size ); +#endif /* __TBB_USE_POSIX */ + + //! Join thread + static void join(handle_type handle); + + //! Detach thread + static void detach_thread(handle_type handle); +private: + cookie my_cookie; // epoch counter + std::atomic<bool> in_wait{false}; + bool skipped_wakeup; + binary_semaphore my_sema; +#if __TBB_USE_POSIX + static void check( int error_code, const char* routine ); +#endif +}; + +#if __TBB_USE_WINAPI + +#ifndef STACK_SIZE_PARAM_IS_A_RESERVATION +#define STACK_SIZE_PARAM_IS_A_RESERVATION 0x00010000 +#endif + +// _beginthreadex API is not available in Windows 8 Store* applications, so use std::thread instead +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) +inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_function, void* arg, std::size_t, const std::size_t*) { +//TODO: check that exception thrown from std::thread is not swallowed silently + std::thread* thread_tmp=new std::thread(thread_function, arg); + return thread_tmp->native_handle(); +} +#else +inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const std::size_t* worker_index ) { + unsigned thread_id; + int number_of_processor_groups = ( worker_index ) ? NumberOfProcessorGroups() : 0; + unsigned create_flags = ( number_of_processor_groups > 1 ) ? CREATE_SUSPENDED : 0; + HANDLE h = (HANDLE)_beginthreadex( NULL, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id ); + if( !h ) { + handle_perror(0, "thread_monitor::launch: _beginthreadex failed\n"); + } + if ( number_of_processor_groups > 1 ) { + MoveThreadIntoProcessorGroup( h, FindProcessorGroupIndex( static_cast<int>(*worker_index) ) ); + ResumeThread( h ); + } + return h; +} +#endif //__TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + +void thread_monitor::join(handle_type handle) { +#if TBB_USE_ASSERT + DWORD res = +#endif + WaitForSingleObjectEx(handle, INFINITE, FALSE); + __TBB_ASSERT( res==WAIT_OBJECT_0, NULL ); +#if TBB_USE_ASSERT + BOOL val = +#endif + CloseHandle(handle); + __TBB_ASSERT( val, NULL ); +} + +void thread_monitor::detach_thread(handle_type handle) { +#if TBB_USE_ASSERT + BOOL val = +#endif + CloseHandle(handle); + __TBB_ASSERT( val, NULL ); +} + +#endif /* __TBB_USE_WINAPI */ + +#if __TBB_USE_POSIX +inline void thread_monitor::check( int error_code, const char* routine ) { + if( error_code ) { + handle_perror(error_code, routine); + } +} + +inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routine)(void*), void* arg, std::size_t stack_size ) { + // FIXME - consider more graceful recovery than just exiting if a thread cannot be launched. + // Note that there are some tricky situations to deal with, such that the thread is already + // grabbed as part of an OpenMP team. + pthread_attr_t s; + check(pthread_attr_init( &s ), "pthread_attr_init has failed"); + if( stack_size>0 ) + check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" ); + pthread_t handle; + check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" ); + check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" ); + return handle; +} + +void thread_monitor::join(handle_type handle) { + check(pthread_join(handle, NULL), "pthread_join has failed"); +} + +void thread_monitor::detach_thread(handle_type handle) { + check(pthread_detach(handle), "pthread_detach has failed"); +} +#endif /* __TBB_USE_POSIX */ + +inline void thread_monitor::notify() { + my_cookie.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire) + 1, std::memory_order_release); + bool do_signal = in_wait.exchange( false ); + if( do_signal ) + my_sema.V(); +} + +inline void thread_monitor::prepare_wait( cookie& c ) { + if( skipped_wakeup ) { + // Lazily consume a signal that was skipped due to cancel_wait + skipped_wakeup = false; + my_sema.P(); // does not really wait on the semaphore + } + // Former c = my_cookie + c.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire), std::memory_order_release); + in_wait.store( true, std::memory_order_seq_cst ); +} + +inline void thread_monitor::commit_wait( cookie& c ) { + bool do_it = ( c.my_epoch.load(std::memory_order_relaxed) == my_cookie.my_epoch.load(std::memory_order_relaxed) ); + if( do_it ) my_sema.P(); + else cancel_wait(); +} + +inline void thread_monitor::cancel_wait() { + // if not in_wait, then some thread has sent us a signal; + // it will be consumed by the next prepare_wait call + skipped_wakeup = ! in_wait.exchange( false ); +} + +} // namespace internal +} // namespace rml +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __RML_thread_monitor_H */ diff --git a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp new file mode 100644 index 0000000000..fe7fb66dc8 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp @@ -0,0 +1,120 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_rtm_mutex.h" +#include "itt_notify.h" +#include "governor.h" +#include "misc.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +// maximum number of times to retry +// TODO: experiment on retry values. +static constexpr int retry_threshold = 10; + +struct rtm_mutex_impl { + //! Release speculative mutex + static void release(d1::rtm_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_mutex::rtm_state::rtm_transacting: + __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); + end_transaction(); + s.m_mutex = nullptr; + break; + case d1::rtm_mutex::rtm_state::rtm_real: + s.m_mutex->unlock(); + s.m_mutex = nullptr; + break; + case d1::rtm_mutex::rtm_state::rtm_none: + __TBB_ASSERT(false, "mutex is not locked, but in release"); + break; + default: + __TBB_ASSERT(false, "invalid m_transaction_state"); + } + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_none; + } + + //! Acquire lock on the given mutex. + static void acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + if(m.m_flag.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_while_eq(m.m_flag, true); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.m_flag.load(std::memory_order_relaxed)) { + abort_transaction(); + } + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_transacting; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold)); + } + + if(only_speculate) return; + s.m_mutex = &m; + s.m_mutex->lock(); + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; + return; + } + + //! Try to acquire lock on the given mutex. + static bool try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { + acquire(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_transacting) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, NULL); + // transacting acquire failed. try_lock the real mutex + if (m.try_lock()) { + s.m_mutex = &m; + s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; + return true; + } + return false; + } +}; + +void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) { + rtm_mutex_impl::acquire(m, s, only_speculate); +} +bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) { + return rtm_mutex_impl::try_acquire(m, s); +} +void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock& s) { + rtm_mutex_impl::release(s); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp new file mode 100644 index 0000000000..5e50de4c39 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp @@ -0,0 +1,271 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_assert.h" +#include "oneapi/tbb/detail/_rtm_rw_mutex.h" +#include "itt_notify.h" +#include "governor.h" +#include "misc.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +struct rtm_rw_mutex_impl { + // maximum number of times to retry + // TODO: experiment on retry values. + static constexpr int retry_threshold_read = 10; + static constexpr int retry_threshold_write = 10; + + //! Release speculative mutex + static void release(d1::rtm_rw_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: + __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating"); + end_transaction(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: + __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag set but read lock acquired"); + s.m_mutex->unlock_shared(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: + __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag unset but write lock acquired"); + s.m_mutex->write_flag.store(false, std::memory_order_relaxed); + s.m_mutex->unlock(); + s.m_mutex = nullptr; + break; + case d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex: + __TBB_ASSERT(false, "rtm_not_in_mutex, but in release"); + break; + default: + __TBB_ASSERT(false, "invalid m_transaction_state"); + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex; + } + + //! Acquire write lock on the given mutex. + static void acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + if(m.m_state.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_until_eq(m.m_state, d1::rtm_rw_mutex::state_type(0)); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.m_state.load(std::memory_order_relaxed)) { // add spin_rw_mutex to read-set. + // reader or writer grabbed the lock, so abort. + abort_transaction(); + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_write)); + } + + if(only_speculate) return; + s.m_mutex = &m; // should apply a real try_lock... + s.m_mutex->lock(); // kill transactional writers + __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After acquire for write, write_flag already true"); + m.write_flag.store(true, std::memory_order_relaxed); // kill transactional readers + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + return; + } + + //! Acquire read lock on given mutex. + // only_speculate : true if we are doing a try_acquire. If true and we fail to speculate, don't + // really acquire the lock, return and do a try_acquire on the contained spin_rw_mutex. If + // the lock is already held by a writer, just return. + static void acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); + if(governor::speculation_enabled()) { + int num_retries = 0; + unsigned int abort_code = 0; + do { + // if in try_acquire, and lock is held as writer, don't attempt to speculate. + if(m.write_flag.load(std::memory_order_acquire)) { + if(only_speculate) return; + spin_wait_while_eq(m.write_flag, true); + } + // _xbegin returns -1 on success or the abort code, so capture it + if((abort_code = begin_transaction()) == speculation_successful_begin) + { + // started speculation + if(m.write_flag.load(std::memory_order_relaxed)) { // add write_flag to read-set. + abort_transaction(); // writer grabbed the lock, so abort. + } + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; + // Don not wrap the following assignment to a function, + // because it can abort the transaction in debug. Need mutex for release(). + s.m_mutex = &m; + return; // successfully started speculation + } + // fallback path + // retry only if there is any hope of getting into a transaction soon + // Retry in the following cases (from Section 8.3.5 of + // Intel(R) Architecture Instruction Set Extensions Programming Reference): + // 1. abort caused by XABORT instruction (bit 0 of EAX register is set) + // 2. the transaction may succeed on a retry (bit 1 of EAX register is set) + // 3. if another logical processor conflicted with a memory address + // that was part of the transaction that aborted (bit 2 of EAX register is set) + // That is, retry if (abort_code & 0x7) is non-zero + ++num_retries; + } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_read)); + } + + if(only_speculate) return; + s.m_mutex = &m; + s.m_mutex->lock_shared(); + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + } + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + static bool upgrade(d1::rtm_rw_mutex::scoped_lock& s) { + switch(s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: { + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + bool no_release = s.m_mutex->upgrade(); + __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "After upgrade, write_flag already true"); + s.m_mutex->write_flag.store(true, std::memory_order_relaxed); + return no_release; + } + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: { + d1::rtm_rw_mutex& m = *s.m_mutex; + if(m.m_state.load(std::memory_order_acquire)) { // add spin_rw_mutex to read-set. + // Real reader or writer holds the lock; so commit the read and re-acquire for write. + release(s); + acquire_writer(m, s, false); + return false; + } else + { + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer; + return true; + } + } + default: + __TBB_ASSERT(false, "Invalid state for upgrade"); + return false; + } + } + + //! Downgrade writer to a reader. + static bool downgrade(d1::rtm_rw_mutex::scoped_lock& s) { + switch (s.m_transaction_state) { + case d1::rtm_rw_mutex::rtm_type::rtm_real_writer: + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "Before downgrade write_flag not true"); + s.m_mutex->write_flag.store(false, std::memory_order_relaxed); + s.m_mutex->downgrade(); + return true; + case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer: + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader; + return true; + default: + __TBB_ASSERT(false, "Invalid state for downgrade"); + return false; + } + } + + //! Try to acquire write lock on the given mutex. + // There may be reader(s) which acquired the spin_rw_mutex, as well as possibly + // transactional reader(s). If this is the case, the acquire will fail, and assigning + // write_flag will kill the transactors. So we only assign write_flag if we have successfully + // acquired the lock. + static bool try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + acquire_writer(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + // transacting write acquire failed. try_lock the real mutex + if (m.try_lock()) { + s.m_mutex = &m; + // only shoot down readers if we're not transacting ourselves + __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After try_acquire_writer, write_flag already true"); + m.write_flag.store(true, std::memory_order_relaxed); + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; + return true; + } + return false; + } + + //! Try to acquire read lock on the given mutex. + static bool try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + // speculatively acquire the lock. If this fails, do try_lock_shared on the spin_rw_mutex. + acquire_reader(m, s, /*only_speculate=*/true); + if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader) { + return true; + } + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + // transacting read acquire failed. try_lock_shared the real mutex + if (m.try_lock_shared()) { + s.m_mutex = &m; + s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader; + return true; + } + return false; + } +}; + +void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + rtm_rw_mutex_impl::acquire_writer(m, s, only_speculate); +} +//! Internal acquire read lock. +// only_speculate == true if we're doing a try_lock, else false. +void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) { + rtm_rw_mutex_impl::acquire_reader(m, s, only_speculate); +} +//! Internal upgrade reader to become a writer. +bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::upgrade(s); +} +//! Internal downgrade writer to become a reader. +bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::downgrade(s); +} +//! Internal try_acquire write lock. +bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::try_acquire_writer(m, s); +} +//! Internal try_acquire read lock. +bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) { + return rtm_rw_mutex_impl::try_acquire_reader(m, s); +} +//! Internal release lock. +void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock& s) { + rtm_rw_mutex_impl::release(s); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + + diff --git a/contrib/libs/tbb/src/tbb/scheduler_common.h b/contrib/libs/tbb/src/tbb/scheduler_common.h new file mode 100644 index 0000000000..ee13dbf981 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/scheduler_common.h @@ -0,0 +1,505 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_scheduler_common_H +#define _TBB_scheduler_common_H + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/detail/_machine.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "itt_notify.h" +#include "co_context.h" +#include "misc.h" +#include "governor.h" + +#ifndef __TBB_SCHEDULER_MUTEX_TYPE +#define __TBB_SCHEDULER_MUTEX_TYPE tbb::spin_mutex +#endif +// TODO: add conditional inclusion based on specified type +#include "oneapi/tbb/spin_mutex.h" + +#if TBB_USE_ASSERT +#include <atomic> +#endif + +#include <cstdint> +#include <exception> + +//! Mutex type for global locks in the scheduler +using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Workaround for overzealous compiler warnings + // These particular warnings are so ubiquitous that no attempt is made to narrow + // the scope of the warnings. + #pragma warning (disable: 4100 4127 4312 4244 4267 4706) +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +class arena; +class mail_inbox; +class mail_outbox; +class market; +class observer_proxy; + +enum task_stream_accessor_type { front_accessor = 0, back_nonnull_accessor }; +template<task_stream_accessor_type> class task_stream; + +using isolation_type = std::intptr_t; +constexpr isolation_type no_isolation = 0; + +//------------------------------------------------------------------------ +// Extended execute data +//------------------------------------------------------------------------ + +//! Execute data used on a task dispatcher side, reflects a current execution state +struct execution_data_ext : d1::execution_data { + task_dispatcher* task_disp{}; + isolation_type isolation{}; + d1::wait_context* wait_ctx{}; +}; + +//------------------------------------------------------------------------ +// Task accessor +//------------------------------------------------------------------------ + +//! Interpretation of reserved task fields inside a task dispatcher +struct task_accessor { + static constexpr std::uint64_t proxy_task_trait = 1; + static constexpr std::uint64_t resume_task_trait = 2; + static d1::task_group_context*& context(d1::task& t) { + task_group_context** tgc = reinterpret_cast<task_group_context**>(&t.m_reserved[0]); + return *tgc; + } + static isolation_type& isolation(d1::task& t) { + isolation_type* tag = reinterpret_cast<isolation_type*>(&t.m_reserved[2]); + return *tag; + } + static void set_proxy_trait(d1::task& t) { + // TODO: refactor proxy tasks not to work on uninitialized memory. + //__TBB_ASSERT((t.m_version_and_traits & proxy_task_trait) == 0, nullptr); + t.m_version_and_traits |= proxy_task_trait; + } + static bool is_proxy_task(d1::task& t) { + return (t.m_version_and_traits & proxy_task_trait) != 0; + } + static void set_resume_trait(d1::task& t) { + __TBB_ASSERT((t.m_version_and_traits & resume_task_trait) == 0, nullptr); + t.m_version_and_traits |= resume_task_trait; + } + static bool is_resume_task(d1::task& t) { + return (t.m_version_and_traits & resume_task_trait) != 0; + } +}; + +//------------------------------------------------------------------------ +//! Extended variant of the standard offsetof macro +/** The standard offsetof macro is not sufficient for TBB as it can be used for + POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/ +#define __TBB_offsetof(class_name, member_name) \ + ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000) + +//! Returns address of the object containing a member with the given name and address +#define __TBB_get_object_ref(class_name, member_name, member_addr) \ + (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name))) + +//! Helper class for tracking floating point context and task group context switches +/** Assuming presence of an itt collector, in addition to keeping track of floating + point context, this class emits itt events to indicate begin and end of task group + context execution **/ +template <bool report_tasks> +class context_guard_helper { + const d1::task_group_context* curr_ctx; + d1::cpu_ctl_env guard_cpu_ctl_env; + d1::cpu_ctl_env curr_cpu_ctl_env; +public: + context_guard_helper() : curr_ctx(NULL) { + guard_cpu_ctl_env.get_env(); + curr_cpu_ctl_env = guard_cpu_ctl_env; + } + ~context_guard_helper() { + if (curr_cpu_ctl_env != guard_cpu_ctl_env) + guard_cpu_ctl_env.set_env(); + if (report_tasks && curr_ctx) + ITT_TASK_END; + } + // The function is called from bypass dispatch loop on the hot path. + // Consider performance issues when refactoring. + void set_ctx(const d1::task_group_context* ctx) { + if (!ctx) + return; + const d1::cpu_ctl_env* ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&ctx->my_cpu_ctl_env); + // Compare the FPU settings directly because the context can be reused between parallel algorithms. + if (*ctl != curr_cpu_ctl_env) { + curr_cpu_ctl_env = *ctl; + curr_cpu_ctl_env.set_env(); + } + if (report_tasks && ctx != curr_ctx) { + // if task group context was active, report end of current execution frame. + if (curr_ctx) + ITT_TASK_END; + // reporting begin of new task group context execution frame. + // using address of task group context object to group tasks (parent). + // id of task execution frame is NULL and reserved for future use. + ITT_TASK_BEGIN(ctx, ctx->my_name, NULL); + curr_ctx = ctx; + } + } +#if _WIN64 + void restore_default() { + if (curr_cpu_ctl_env != guard_cpu_ctl_env) { + guard_cpu_ctl_env.set_env(); + curr_cpu_ctl_env = guard_cpu_ctl_env; + } + } +#endif // _WIN64 +}; + +#if (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) +#if _MSC_VER +#pragma intrinsic(__rdtsc) +#endif +inline std::uint64_t machine_time_stamp() { +#if __INTEL_COMPILER + return _rdtsc(); +#elif _MSC_VER + return __rdtsc(); +#else + std::uint32_t hi, lo; + __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo)); + return (std::uint64_t(hi) << 32) | lo; +#endif +} + +inline void prolonged_pause_impl() { + // Assumption based on practice: 1000-2000 ticks seems to be a suitable invariant for the + // majority of platforms. Currently, skip platforms that define __TBB_STEALING_PAUSE + // because these platforms require very careful tuning. + std::uint64_t prev = machine_time_stamp(); + const std::uint64_t finish = prev + 1000; + atomic_backoff backoff; + do { + backoff.bounded_pause(); + std::uint64_t curr = machine_time_stamp(); + if (curr <= prev) + // Possibly, the current logical thread is moved to another hardware thread or overflow is occurred. + break; + prev = curr; + } while (prev < finish); +} +#else +inline void prolonged_pause_impl() { +#ifdef __TBB_ipf + static const long PauseTime = 1500; +#else + static const long PauseTime = 80; +#endif + // TODO IDEA: Update PauseTime adaptively? + machine_pause(PauseTime); +} +#endif + +inline void prolonged_pause() { +#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) + if (governor::wait_package_enabled()) { + std::uint64_t time_stamp = machine_time_stamp(); + // _tpause function directs the processor to enter an implementation-dependent optimized state + // until the Time Stamp Counter reaches or exceeds the value specified in second parameter. + // Constant "700" is ticks to wait for. + // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state. + _tpause(0, time_stamp + 700); + } + else +#endif + prolonged_pause_impl(); +} + +class stealing_loop_backoff { + const int my_pause_threshold; + const int my_yield_threshold; + int my_pause_count; + int my_yield_count; +public: + // my_yield_threshold = 100 is an experimental value. Ideally, once we start calling __TBB_Yield(), + // the time spent spinning before calling is_out_of_work() should be approximately + // the time it takes for a thread to be woken up. Doing so would guarantee that we do + // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. + stealing_loop_backoff(int num_workers) + : my_pause_threshold{ 2 * (num_workers + 1) } +#if __APPLE__ + // threshold value tuned separately for macOS due to high cost of sched_yield there + , my_yield_threshold{10} +#else + , my_yield_threshold{100} +#endif + , my_pause_count{} + , my_yield_count{} + {} + bool pause() { + prolonged_pause(); + if (my_pause_count++ >= my_pause_threshold) { + my_pause_count = my_pause_threshold; + d0::yield(); + if (my_yield_count++ >= my_yield_threshold) { + my_yield_count = my_yield_threshold; + return true; + } + } + return false; + } + void reset_wait() { + my_pause_count = my_yield_count = 0; + } +}; + +//------------------------------------------------------------------------ +// Exception support +//------------------------------------------------------------------------ +//! Task group state change propagation global epoch +/** Together with generic_scheduler::my_context_state_propagation_epoch forms + cross-thread signaling mechanism that allows to avoid locking at the hot path + of normal execution flow. + + When a descendant task group context is registered or unregistered, the global + and local epochs are compared. If they differ, a state change is being propagated, + and thus registration/deregistration routines take slower branch that may block + (at most one thread of the pool can be blocked at any moment). Otherwise the + control path is lock-free and fast. **/ +extern std::atomic<std::uintptr_t> the_context_state_propagation_epoch; + +//! Mutex guarding state change propagation across task groups forest. +/** Also protects modification of related data structures. **/ +typedef scheduler_mutex_type context_state_propagation_mutex_type; +extern context_state_propagation_mutex_type the_context_state_propagation_mutex; + +class tbb_exception_ptr { + std::exception_ptr my_ptr; +public: + static tbb_exception_ptr* allocate() noexcept; + + //! Destroys this objects + /** Note that objects of this type can be created only by the allocate() method. **/ + void destroy() noexcept; + + //! Throws the contained exception . + void throw_self(); + +private: + tbb_exception_ptr(const std::exception_ptr& src) : my_ptr(src) {} +}; // class tbb_exception_ptr + +//------------------------------------------------------------------------ +// Debugging support +//------------------------------------------------------------------------ + +#if TBB_USE_ASSERT +static const std::uintptr_t venom = tbb::detail::select_size_t_constant<0xDEADBEEFU, 0xDDEEAADDDEADBEEFULL>::value; + +inline void poison_value(std::uintptr_t& val) { val = venom; } + +inline void poison_value(std::atomic<std::uintptr_t>& val) { val.store(venom, std::memory_order_relaxed); } + +/** Expected to be used in assertions only, thus no empty form is defined. **/ +inline bool is_alive(std::uintptr_t v) { return v != venom; } + +/** Logically, this method should be a member of class task. + But we do not want to publish it, so it is here instead. */ +inline void assert_task_valid(const d1::task* t) { + assert_pointer_valid(t); +} +#else /* !TBB_USE_ASSERT */ + +/** In contrast to debug version poison_value() is a macro here because + the variable used as its argument may be undefined in release builds. **/ +#define poison_value(g) ((void)0) + +inline void assert_task_valid(const d1::task*) {} + +#endif /* !TBB_USE_ASSERT */ + +struct suspend_point_type { +#if __TBB_RESUMABLE_TASKS + //! The arena related to this task_dispatcher + arena* m_arena{ nullptr }; + //! The random for the resume task + FastRandom m_random; + //! The flag is raised when the original owner should return to this task dispatcher. + std::atomic<bool> m_is_owner_recalled{ false }; + //! Inicates if the resume task should be placed to the critical task stream. + bool m_is_critical{ false }; + //! Associated coroutine + co_context m_co_context; + + struct resume_task final : public d1::task { + task_dispatcher& m_target; + explicit resume_task(task_dispatcher& target) : m_target(target) { + task_accessor::set_resume_trait(*this); + } + d1::task* execute(d1::execution_data& ed) override; + d1::task* cancel(d1::execution_data&) override { + __TBB_ASSERT(false, "The resume task cannot be canceled"); + return nullptr; + } + } m_resume_task; + + suspend_point_type(arena* a, std::size_t stack_size, task_dispatcher& target); +#endif /*__TBB_RESUMABLE_TASKS */ +}; + +class alignas (max_nfs_size) task_dispatcher { +public: + // TODO: reconsider low level design to better organize dependencies and files. + friend class thread_data; + friend class arena_slot; + friend class nested_arena_context; + friend class delegated_task; + friend struct base_waiter; + + //! The data of the current thread attached to this task_dispatcher + thread_data* m_thread_data{ nullptr }; + + //! The current execution data + execution_data_ext m_execute_data_ext; + + //! Properties + struct properties { + bool outermost{ true }; + bool fifo_tasks_allowed{ true }; + bool critical_task_allowed{ true }; + } m_properties; + + //! Position in the call stack when stealing is still allowed. + std::uintptr_t m_stealing_threshold{}; + + //! Suspend point (null if this task dispatcher has been never suspended) + suspend_point_type* m_suspend_point{ nullptr }; + + //! Attempt to get a task from the mailbox. + /** Gets a task only if it has not been executed by its sender or a thief + that has stolen it from the sender's task pool. Otherwise returns NULL. + This method is intended to be used only by the thread extracting the proxy + from its mailbox. (In contrast to local task pool, mailbox can be read only + by its owner). **/ + d1::task* get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation); + + d1::task* get_critical_task(d1::task*, execution_data_ext&, isolation_type, bool); + + template <bool ITTPossible, typename Waiter> + d1::task* receive_or_steal_task(thread_data& tls, execution_data_ext& ed, Waiter& waiter, + isolation_type isolation, bool outermost, bool criticality_absence); + + template <bool ITTPossible, typename Waiter> + d1::task* local_wait_for_all(d1::task * t, Waiter& waiter); + + task_dispatcher(const task_dispatcher&) = delete; + + bool can_steal(); +public: + task_dispatcher(arena* a); + + ~task_dispatcher() { + if (m_suspend_point) { + m_suspend_point->~suspend_point_type(); + cache_aligned_deallocate(m_suspend_point); + } + poison_pointer(m_thread_data); + poison_pointer(m_suspend_point); + } + + template <typename Waiter> + d1::task* local_wait_for_all(d1::task* t, Waiter& waiter); + + bool allow_fifo_task(bool new_state) { + bool old_state = m_properties.fifo_tasks_allowed; + m_properties.fifo_tasks_allowed = new_state; + return old_state; + } + + isolation_type set_isolation(isolation_type isolation) { + isolation_type prev = m_execute_data_ext.isolation; + m_execute_data_ext.isolation = isolation; + return prev; + } + + thread_data& get_thread_data() { + __TBB_ASSERT(m_thread_data, nullptr); + return *m_thread_data; + } + + static void execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx); + + void set_stealing_threshold(std::uintptr_t stealing_threshold) { + bool assert_condition = (stealing_threshold == 0 && m_stealing_threshold != 0) || + (stealing_threshold != 0 && m_stealing_threshold == 0); + __TBB_ASSERT_EX( assert_condition, nullptr ); + m_stealing_threshold = stealing_threshold; + } + + d1::task* get_inbox_or_critical_task(execution_data_ext&, mail_inbox&, isolation_type, bool); + d1::task* get_stream_or_critical_task(execution_data_ext&, arena&, task_stream<front_accessor>&, + unsigned& /*hint_for_stream*/, isolation_type, + bool /*critical_allowed*/); + d1::task* steal_or_get_critical(execution_data_ext&, arena&, unsigned /*arena_index*/, FastRandom&, + isolation_type, bool /*critical_allowed*/); + +#if __TBB_RESUMABLE_TASKS + /* [[noreturn]] */ void co_local_wait_for_all() noexcept; + void suspend(suspend_callback_type suspend_callback, void* user_callback); + void resume(task_dispatcher& target); + suspend_point_type* get_suspend_point(); + void init_suspend_point(arena* a, std::size_t stack_size); + friend void internal_resume(suspend_point_type*); + void recall_point(); +#endif /* __TBB_RESUMABLE_TASKS */ +}; + +inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) { + return base - stack_size / 2; +} + +struct task_group_context_impl { + static void destroy(d1::task_group_context&); + static void initialize(d1::task_group_context&); + static void register_with(d1::task_group_context&, thread_data*); + static void bind_to_impl(d1::task_group_context&, thread_data*); + static void bind_to(d1::task_group_context&, thread_data*); + template <typename T> + static void propagate_task_group_state(d1::task_group_context&, std::atomic<T> d1::task_group_context::*, d1::task_group_context&, T); + static bool cancel_group_execution(d1::task_group_context&); + static bool is_group_execution_cancelled(const d1::task_group_context&); + static void reset(d1::task_group_context&); + static void capture_fp_settings(d1::task_group_context&); + static void copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src); +}; + + +//! Forward declaration for scheduler entities +bool gcc_rethrow_exception_broken(); +void fix_broken_rethrow(); +//! Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info +void handle_perror(int error_code, const char* aux_info); + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_scheduler_common_H */ diff --git a/contrib/libs/tbb/src/tbb/semaphore.cpp b/contrib/libs/tbb/src/tbb/semaphore.cpp new file mode 100644 index 0000000000..92c9e675ab --- /dev/null +++ b/contrib/libs/tbb/src/tbb/semaphore.cpp @@ -0,0 +1,92 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "semaphore.h" +#if __TBB_USE_SRWLOCK +#include "dynamic_link.h" // Refers to src/tbb, not include/tbb +#error #include "tbb_misc.h" +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +// TODO: For new win UI port, we can use SRWLock API without dynamic_link etc. +#if __TBB_USE_SRWLOCK + +static std::atomic<do_once_state> concmon_module_inited; + +void WINAPI init_binsem_using_event( SRWLOCK* h_ ) +{ + srwl_or_handle* shptr = (srwl_or_handle*) h_; + shptr->h = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS|SEMAPHORE_ALL_ACCESS ); +} + +void WINAPI acquire_binsem_using_event( SRWLOCK* h_ ) +{ + srwl_or_handle* shptr = (srwl_or_handle*) h_; + WaitForSingleObjectEx( shptr->h, INFINITE, FALSE ); +} + +void WINAPI release_binsem_using_event( SRWLOCK* h_ ) +{ + srwl_or_handle* shptr = (srwl_or_handle*) h_; + SetEvent( shptr->h ); +} + +static void (WINAPI *__TBB_init_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&init_binsem_using_event; +static void (WINAPI *__TBB_acquire_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&acquire_binsem_using_event; +static void (WINAPI *__TBB_release_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&release_binsem_using_event; + +//! Table describing the how to link the handlers. +static const dynamic_link_descriptor SRWLLinkTable[] = { + DLD(InitializeSRWLock, __TBB_init_binsem), + DLD(AcquireSRWLockExclusive, __TBB_acquire_binsem), + DLD(ReleaseSRWLockExclusive, __TBB_release_binsem) +}; + +inline void init_concmon_module() +{ + __TBB_ASSERT( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event, NULL ); + if( dynamic_link( "Kernel32.dll", SRWLLinkTable, sizeof(SRWLLinkTable)/sizeof(dynamic_link_descriptor) ) ) { + __TBB_ASSERT( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event, NULL ); + __TBB_ASSERT( (uintptr_t)__TBB_acquire_binsem!=(uintptr_t)&acquire_binsem_using_event, NULL ); + __TBB_ASSERT( (uintptr_t)__TBB_release_binsem!=(uintptr_t)&release_binsem_using_event, NULL ); + } +} + +binary_semaphore::binary_semaphore() { + atomic_do_once( &init_concmon_module, concmon_module_inited ); + + __TBB_init_binsem( &my_sem.lock ); + if( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event ) + P(); +} + +binary_semaphore::~binary_semaphore() { + if( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event ) + CloseHandle( my_sem.h ); +} + +void binary_semaphore::P() { __TBB_acquire_binsem( &my_sem.lock ); } + +void binary_semaphore::V() { __TBB_release_binsem( &my_sem.lock ); } + +#endif /* __TBB_USE_SRWLOCK */ + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/semaphore.h b/contrib/libs/tbb/src/tbb/semaphore.h new file mode 100644 index 0000000000..0a88536e36 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/semaphore.h @@ -0,0 +1,335 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_semaphore_H +#define __TBB_semaphore_H + +#include "oneapi/tbb/detail/_utils.h" + +#if _WIN32||_WIN64 +#include <windows.h> +#elif __APPLE__ +#include <mach/semaphore.h> +#include <mach/task.h> +#include <mach/mach_init.h> +#include <mach/error.h> +#else +#include <semaphore.h> +#ifdef TBB_USE_DEBUG +#include <cerrno> +#endif +#endif /*_WIN32||_WIN64*/ + +#include <atomic> + +#if __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ + +/* Futex definitions */ +#include <unistd.h> +#include <sys/syscall.h> + +#if defined(SYS_futex) + +/* This section is included for Linux and some other systems that may support futexes.*/ + +#define __TBB_USE_FUTEX 1 + +#if defined(__has_include) +#define __TBB_has_include __has_include +#else +#define __TBB_has_include(x) 0 +#endif + +/* +If available, use typical headers where futex API is defined. While Linux and OpenBSD +are known to provide such headers, other systems might have them as well. +*/ +#if defined(__linux__) || __TBB_has_include(<linux/futex.h>) +#include <linux/futex.h> +#elif defined(__OpenBSD__) || __TBB_has_include(<sys/futex.h>) +#error #include <sys/futex.h> +#endif + +#include <climits> +#include <cerrno> + +/* +Some systems might not define the macros or use different names. In such case we expect +the actual parameter values to match Linux: 0 for wait, 1 for wake. +*/ +#if defined(FUTEX_WAIT_PRIVATE) +#define __TBB_FUTEX_WAIT FUTEX_WAIT_PRIVATE +#elif defined(FUTEX_WAIT) +#define __TBB_FUTEX_WAIT FUTEX_WAIT +#else +#define __TBB_FUTEX_WAIT 0 +#endif + +#if defined(FUTEX_WAKE_PRIVATE) +#define __TBB_FUTEX_WAKE FUTEX_WAKE_PRIVATE +#elif defined(FUTEX_WAKE) +#define __TBB_FUTEX_WAKE FUTEX_WAKE +#else +#define __TBB_FUTEX_WAKE 1 +#endif + +#endif // SYS_futex +#endif // __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ + +namespace tbb { +namespace detail { +namespace r1 { + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Futex implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#if __TBB_USE_FUTEX + +static inline int futex_wait( void *futex, int comparand ) { + int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 ); +#if TBB_USE_ASSERT + int e = errno; + __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." ); +#endif /* TBB_USE_ASSERT */ + return r; +} + +static inline int futex_wakeup_one( void *futex ) { + int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 ); + __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" ); + return r; +} + +// Additional possible methods that are not required right now +// static inline int futex_wakeup_all( void *futex ) { +// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 ); +// __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" ); +// return r; +// } + +#endif // __TBB_USE_FUTEX + +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if _WIN32||_WIN64 +typedef LONG sem_count_t; +//! Edsger Dijkstra's counting semaphore +class semaphore : no_copy { + static const int max_semaphore_cnt = MAXLONG; +public: + //! ctor + semaphore(size_t start_cnt_ = 0) {init_semaphore(start_cnt_);} + //! dtor + ~semaphore() {CloseHandle( sem );} + //! wait/acquire + void P() {WaitForSingleObjectEx( sem, INFINITE, FALSE );} + //! post/release + void V() {ReleaseSemaphore( sem, 1, NULL );} +private: + HANDLE sem; + void init_semaphore(size_t start_cnt_) { + sem = CreateSemaphoreEx( NULL, LONG(start_cnt_), max_semaphore_cnt, NULL, 0, SEMAPHORE_ALL_ACCESS ); + } +}; +#elif __APPLE__ +//! Edsger Dijkstra's counting semaphore +class semaphore : no_copy { +public: + //! ctor + semaphore(int start_cnt_ = 0) : sem(start_cnt_) { init_semaphore(start_cnt_); } + //! dtor + ~semaphore() { + kern_return_t ret = semaphore_destroy( mach_task_self(), sem ); + __TBB_ASSERT_EX( ret==err_none, NULL ); + } + //! wait/acquire + void P() { + int ret; + do { + ret = semaphore_wait( sem ); + } while( ret==KERN_ABORTED ); + __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" ); + } + //! post/release + void V() { semaphore_signal( sem ); } +private: + semaphore_t sem; + void init_semaphore(int start_cnt_) { + kern_return_t ret = semaphore_create( mach_task_self(), &sem, SYNC_POLICY_FIFO, start_cnt_ ); + __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" ); + } +}; +#else /* Linux/Unix */ +typedef uint32_t sem_count_t; +//! Edsger Dijkstra's counting semaphore +class semaphore : no_copy { +public: + //! ctor + semaphore(int start_cnt_ = 0 ) { init_semaphore( start_cnt_ ); } + + //! dtor + ~semaphore() { + int ret = sem_destroy( &sem ); + __TBB_ASSERT_EX( !ret, NULL ); + } + //! wait/acquire + void P() { + while( sem_wait( &sem )!=0 ) + __TBB_ASSERT( errno==EINTR, NULL ); + } + //! post/release + void V() { sem_post( &sem ); } +private: + sem_t sem; + void init_semaphore(int start_cnt_) { + int ret = sem_init( &sem, /*shared among threads*/ 0, start_cnt_ ); + __TBB_ASSERT_EX( !ret, NULL ); + } +}; +#endif /* _WIN32||_WIN64 */ + + +//! for performance reasons, we want specialized binary_semaphore +#if _WIN32||_WIN64 +#if !__TBB_USE_SRWLOCK +//! binary_semaphore for concurrent_monitor +class binary_semaphore : no_copy { +public: + //! ctor + binary_semaphore() { my_sem = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS ); } + //! dtor + ~binary_semaphore() { CloseHandle( my_sem ); } + //! wait/acquire + void P() { WaitForSingleObjectEx( my_sem, INFINITE, FALSE ); } + //! post/release + void V() { SetEvent( my_sem ); } +private: + HANDLE my_sem; +}; +#else /* __TBB_USE_SRWLOCK */ + +union srwl_or_handle { + SRWLOCK lock; + HANDLE h; +}; + +//! binary_semaphore for concurrent_monitor +class binary_semaphore : no_copy { +public: + //! ctor + binary_semaphore(); + //! dtor + ~binary_semaphore(); + //! wait/acquire + void P(); + //! post/release + void V(); +private: + srwl_or_handle my_sem; +}; +#endif /* !__TBB_USE_SRWLOCK */ +#elif __APPLE__ +//! binary_semaphore for concurrent monitor +class binary_semaphore : no_copy { +public: + //! ctor + binary_semaphore() : my_sem(0) { + kern_return_t ret = semaphore_create( mach_task_self(), &my_sem, SYNC_POLICY_FIFO, 0 ); + __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" ); + } + //! dtor + ~binary_semaphore() { + kern_return_t ret = semaphore_destroy( mach_task_self(), my_sem ); + __TBB_ASSERT_EX( ret==err_none, NULL ); + } + //! wait/acquire + void P() { + int ret; + do { + ret = semaphore_wait( my_sem ); + } while( ret==KERN_ABORTED ); + __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" ); + } + //! post/release + void V() { semaphore_signal( my_sem ); } +private: + semaphore_t my_sem; +}; +#else /* Linux/Unix */ + +#if __TBB_USE_FUTEX +class binary_semaphore : no_copy { +// The implementation is equivalent to the "Mutex, Take 3" one +// in the paper "Futexes Are Tricky" by Ulrich Drepper +public: + //! ctor + binary_semaphore() { my_sem = 1; } + //! dtor + ~binary_semaphore() {} + //! wait/acquire + void P() { + int s = 0; + if( !my_sem.compare_exchange_strong( s, 1 ) ) { + if( s!=2 ) + s = my_sem.exchange( 2 ); + while( s!=0 ) { // This loop deals with spurious wakeup + futex_wait( &my_sem, 2 ); + s = my_sem.exchange( 2 ); + } + } + } + //! post/release + void V() { + __TBB_ASSERT( my_sem.load(std::memory_order_relaxed)>=1, "multiple V()'s in a row?" ); + if( my_sem.exchange( 0 )==2 ) + futex_wakeup_one( &my_sem ); + } +private: + std::atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits +}; +#else +typedef uint32_t sem_count_t; +//! binary_semaphore for concurrent monitor +class binary_semaphore : no_copy { +public: + //! ctor + binary_semaphore() { + int ret = sem_init( &my_sem, /*shared among threads*/ 0, 0 ); + __TBB_ASSERT_EX( !ret, NULL ); + } + //! dtor + ~binary_semaphore() { + int ret = sem_destroy( &my_sem ); + __TBB_ASSERT_EX( !ret, NULL ); + } + //! wait/acquire + void P() { + while( sem_wait( &my_sem )!=0 ) + __TBB_ASSERT( errno==EINTR, NULL ); + } + //! post/release + void V() { sem_post( &my_sem ); } +private: + sem_t my_sem; +}; +#endif /* __TBB_USE_FUTEX */ +#endif /* _WIN32||_WIN64 */ + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_semaphore_H */ diff --git a/contrib/libs/tbb/src/tbb/small_object_pool.cpp b/contrib/libs/tbb/src/tbb/small_object_pool.cpp new file mode 100644 index 0000000000..28d11d011d --- /dev/null +++ b/contrib/libs/tbb/src/tbb/small_object_pool.cpp @@ -0,0 +1,154 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/detail/_small_object_pool.h" +#include "oneapi/tbb/detail/_task.h" +#include "governor.h" +#include "thread_data.h" +#include "task_dispatcher.h" + +#include <cstddef> + +namespace tbb { +namespace detail { +namespace r1 { + +small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list = + reinterpret_cast<small_object_pool_impl::small_object*>(1); + +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) { + auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); + auto pool = tls.my_small_object_pool; + return pool->allocate_impl(allocator, number_of_bytes); +} + +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) { + // TODO: optimize if the allocator contains a valid pool. + auto tls = governor::get_thread_data(); + auto pool = tls->my_small_object_pool; + return pool->allocate_impl(allocator, number_of_bytes); +} + +void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes) +{ + small_object* obj{nullptr}; + + if (number_of_bytes <= small_object_size) { + if (m_private_list) { + obj = m_private_list; + m_private_list = m_private_list->next; + } else if (m_public_list.load(std::memory_order_relaxed)) { + // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence. + obj = m_public_list.exchange(nullptr); + __TBB_ASSERT( obj, "another thread emptied the my_public_list" ); + m_private_list = obj->next; + } else { + obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr}; + ++m_private_counter; + } + } else { + obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr}; + } + allocator = this; + + // Return uninitialized memory for further construction on user side. + obj->~small_object(); + return obj; +} + +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) { + auto pool = static_cast<small_object_pool_impl*>(&allocator); + auto tls = governor::get_thread_data(); + pool->deallocate_impl(ptr, number_of_bytes, *tls); +} + +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) { + auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data(); + auto pool = static_cast<small_object_pool_impl*>(&allocator); + pool->deallocate_impl(ptr, number_of_bytes, tls); +} + +void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) { + __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null"); + __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)"); + + if (number_of_bytes <= small_object_size) { + auto obj = new (ptr) small_object{nullptr}; + if (td.my_small_object_pool == this) { + obj->next = m_private_list; + m_private_list = obj; + } else { + auto old_public_list = m_public_list.load(std::memory_order_relaxed); + + for (;;) { + if (old_public_list == dead_public_list) { + obj->~small_object(); + cache_aligned_deallocate(obj); + if (++m_public_counter == 0) + { + this->~small_object_pool_impl(); + cache_aligned_deallocate(this); + } + break; + } + obj->next = old_public_list; + if (m_public_list.compare_exchange_strong(old_public_list, obj)) { + break; + } + } + } + } else { + cache_aligned_deallocate(ptr); + } +} + +std::int64_t small_object_pool_impl::cleanup_list(small_object* list) +{ + std::int64_t removed_count{}; + + while (list) { + small_object* current = list; + list = list->next; + current->~small_object(); + cache_aligned_deallocate(current); + ++removed_count; + } + return removed_count; +} + +void small_object_pool_impl::destroy() +{ + // clean up private list and subtract the removed count from private counter + m_private_counter -= cleanup_list(m_private_list); + // Grab public list and place dead mark + small_object* public_list = m_public_list.exchange(dead_public_list); + // clean up public list and subtract from private (intentionally) counter + m_private_counter -= cleanup_list(public_list); + __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0"); + // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it + // atomically with operator-= not to access m_private_counter after the subtraction. + auto new_value = m_public_counter -= m_private_counter; + // check if this method is responsible to clean up the resources + if (new_value == 0) { + this->~small_object_pool_impl(); + cache_aligned_deallocate(this); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/small_object_pool_impl.h b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h new file mode 100644 index 0000000000..a6b664beab --- /dev/null +++ b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h @@ -0,0 +1,59 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_small_object_pool_impl_H +#define __TBB_small_object_pool_impl_H + +#include "oneapi/tbb/detail/_small_object_pool.h" +#include "oneapi/tbb/detail/_utils.h" + +#include <cstddef> +#include <cstdint> +#include <atomic> + + +namespace tbb { +namespace detail { +namespace r1 { + +class thread_data; + +class small_object_pool_impl : public d1::small_object_pool +{ + static constexpr std::size_t small_object_size = 256; + struct small_object { + small_object* next; + }; + static small_object* const dead_public_list; +public: + void* allocate_impl(small_object_pool*& allocator, std::size_t number_of_bytes); + void deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td); + void destroy(); +private: + static std::int64_t cleanup_list(small_object* list); + ~small_object_pool_impl() = default; +private: + alignas(max_nfs_size) small_object* m_private_list; + std::int64_t m_private_counter{}; + alignas(max_nfs_size) std::atomic<small_object*> m_public_list; + std::atomic<std::int64_t> m_public_counter{}; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_small_object_pool_impl_H */ diff --git a/contrib/libs/tbb/src/tbb/task.cpp b/contrib/libs/tbb/src/tbb/task.cpp new file mode 100644 index 0000000000..129614447a --- /dev/null +++ b/contrib/libs/tbb/src/tbb/task.cpp @@ -0,0 +1,225 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// Do not include task.h directly. Use scheduler_common.h instead +#include "scheduler_common.h" +#include "governor.h" +#include "arena.h" +#include "thread_data.h" +#include "task_dispatcher.h" +#include "waiters.h" +#include "itt_notify.h" + +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/partitioner.h" +#include "oneapi/tbb/task.h" + +#include <cstring> + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// resumable tasks +//------------------------------------------------------------------------ +#if __TBB_RESUMABLE_TASKS + +void suspend(suspend_callback_type suspend_callback, void* user_callback) { + thread_data& td = *governor::get_thread_data(); + td.my_task_dispatcher->suspend(suspend_callback, user_callback); + // Do not access td after suspend. +} + +void resume(suspend_point_type* sp) { + assert_pointers_valid(sp, sp->m_arena); + task_dispatcher& task_disp = sp->m_resume_task.m_target; + __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); + + // TODO: remove this work-around + // Prolong the arena's lifetime while all coroutines are alive + // (otherwise the arena can be destroyed while some tasks are suspended). + arena& a = *sp->m_arena; + a.my_references += arena::ref_external; + + if (task_disp.m_properties.critical_task_allowed) { + // The target is not in the process of executing critical task, so the resume task is not critical. + a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); + } else { +#if __TBB_PREVIEW_CRITICAL_TASKS + // The target is in the process of executing critical task, so the resume task is critical. + a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); +#endif + } + + // Do not access target after that point. + a.advertise_new_work<arena::wakeup>(); + + // Release our reference to my_arena. + a.on_thread_leaving<arena::ref_external>(); +} + +suspend_point_type* current_suspend_point() { + thread_data& td = *governor::get_thread_data(); + return td.my_task_dispatcher->get_suspend_point(); +} + +static task_dispatcher& create_coroutine(thread_data& td) { + // We may have some task dispatchers cached + task_dispatcher* task_disp = td.my_arena->my_co_cache.pop(); + if (!task_disp) { + void* ptr = cache_aligned_allocate(sizeof(task_dispatcher)); + task_disp = new(ptr) task_dispatcher(td.my_arena); + task_disp->init_suspend_point(td.my_arena, td.my_arena->my_market->worker_stack_size()); + } + // Prolong the arena's lifetime until all coroutines is alive + // (otherwise the arena can be destroyed while some tasks are suspended). + // TODO: consider behavior if there are more than 4K external references. + td.my_arena->my_references += arena::ref_external; + return *task_disp; +} + +void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) { + __TBB_ASSERT(suspend_callback != nullptr, nullptr); + __TBB_ASSERT(user_callback != nullptr, nullptr); + __TBB_ASSERT(m_thread_data != nullptr, nullptr); + + arena_slot* slot = m_thread_data->my_arena_slot; + __TBB_ASSERT(slot != nullptr, nullptr); + + task_dispatcher& default_task_disp = slot->default_task_dispatcher(); + // TODO: simplify the next line, e.g. is_task_dispatcher_recalled( task_dispatcher& ) + bool is_recalled = default_task_disp.get_suspend_point()->m_is_owner_recalled.load(std::memory_order_acquire); + task_dispatcher& target = is_recalled ? default_task_disp : create_coroutine(*m_thread_data); + + thread_data::suspend_callback_wrapper callback = { suspend_callback, user_callback, get_suspend_point() }; + m_thread_data->set_post_resume_action(thread_data::post_resume_action::callback, &callback); + resume(target); + + if (m_properties.outermost) { + recall_point(); + } +} + +void task_dispatcher::resume(task_dispatcher& target) { + // Do not create non-trivial objects on the stack of this function. They might never be destroyed + { + thread_data* td = m_thread_data; + __TBB_ASSERT(&target != this, "We cannot resume to ourself"); + __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); + __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); + __TBB_ASSERT(td->my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); + __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); + + // Change the task dispatcher + td->detach_task_dispatcher(); + td->attach_task_dispatcher(target); + } + __TBB_ASSERT(m_suspend_point != nullptr, "Suspend point must be created"); + __TBB_ASSERT(target.m_suspend_point != nullptr, "Suspend point must be created"); + // Swap to the target coroutine. + m_suspend_point->m_co_context.resume(target.m_suspend_point->m_co_context); + // Pay attention that m_thread_data can be changed after resume + { + thread_data* td = m_thread_data; + __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); + __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); + td->do_post_resume_action(); + + // Remove the recall flag if the thread in its original task dispatcher + arena_slot* slot = td->my_arena_slot; + __TBB_ASSERT(slot != nullptr, nullptr); + if (this == slot->my_default_task_dispatcher) { + __TBB_ASSERT(m_suspend_point != nullptr, nullptr); + m_suspend_point->m_is_owner_recalled.store(false, std::memory_order_relaxed); + } + } +} + +void thread_data::do_post_resume_action() { + __TBB_ASSERT(my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); + __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument"); + + switch (my_post_resume_action) { + case post_resume_action::register_waiter: + { + static_cast<extended_concurrent_monitor::resume_context*>(my_post_resume_arg)->notify(); + break; + } + case post_resume_action::resume: + { + r1::resume(static_cast<suspend_point_type*>(my_post_resume_arg)); + break; + } + case post_resume_action::callback: + { + suspend_callback_wrapper callback = *static_cast<suspend_callback_wrapper*>(my_post_resume_arg); + callback(); + break; + } + case post_resume_action::cleanup: + { + task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(my_post_resume_arg); + // Release coroutine's reference to my_arena. + my_arena->on_thread_leaving<arena::ref_external>(); + // Cache the coroutine for possible later re-usage + my_arena->my_co_cache.push(to_cleanup); + break; + } + case post_resume_action::notify: + { + std::atomic<bool>& owner_recall_flag = *static_cast<std::atomic<bool>*>(my_post_resume_arg); + owner_recall_flag.store(true, std::memory_order_release); + // Do not access recall_flag because it can be destroyed after the notification. + break; + } + default: + __TBB_ASSERT(false, "Unknown post resume action"); + } + + my_post_resume_action = post_resume_action::none; + my_post_resume_arg = nullptr; +} + +#else + +void suspend(suspend_callback_type, void*) { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); +} + +void resume(suspend_point_type*) { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); +} + +suspend_point_type* current_suspend_point() { + __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform"); + return nullptr; +} + +#endif /* __TBB_RESUMABLE_TASKS */ + +void notify_waiters(std::uintptr_t wait_ctx_addr) { + auto is_related_wait_ctx = [&] (extended_context context) { + return wait_ctx_addr == context.my_uniq_addr; + }; + + r1::governor::get_thread_data()->my_arena->my_market->get_wait_list().notify(is_related_wait_ctx); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp new file mode 100644 index 0000000000..86818af1d1 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp @@ -0,0 +1,240 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "task_dispatcher.h" +#include "waiters.h" + +namespace tbb { +namespace detail { +namespace r1 { + +static inline void spawn_and_notify(d1::task& t, arena_slot* slot, arena* a) { + slot->spawn(t); + a->advertise_new_work<arena::work_spawned>(); + // TODO: TBB_REVAMP_TODO slot->assert_task_pool_valid(); +} + +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx) { + thread_data* tls = governor::get_thread_data(); + task_group_context_impl::bind_to(ctx, tls); + arena* a = tls->my_arena; + arena_slot* slot = tls->my_arena_slot; + // Capture current context + task_accessor::context(t) = &ctx; + // Mark isolation + task_accessor::isolation(t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; + spawn_and_notify(t, slot, a); +} + +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id) { + thread_data* tls = governor::get_thread_data(); + task_group_context_impl::bind_to(ctx, tls); + arena* a = tls->my_arena; + arena_slot* slot = tls->my_arena_slot; + execution_data_ext& ed = tls->my_task_dispatcher->m_execute_data_ext; + + // Capture context + task_accessor::context(t) = &ctx; + // Mark isolation + task_accessor::isolation(t) = ed.isolation; + + if ( id != d1::no_slot && id != tls->my_arena_index ) { + // Allocate proxy task + d1::small_object_allocator alloc{}; + auto proxy = alloc.new_object<task_proxy>(static_cast<d1::execution_data&>(ed)); + // Mark as a proxy + task_accessor::set_proxy_trait(*proxy); + // Mark isolation for the proxy task + task_accessor::isolation(*proxy) = ed.isolation; + // Deallocation hint (tls) from the task allocator + proxy->allocator = alloc; + proxy->slot = id; + proxy->outbox = &a->mailbox(id); + // Mark proxy as present in both locations (sender's task pool and destination mailbox) + proxy->task_and_tag = intptr_t(&t) | task_proxy::location_mask; + // Mail the proxy - after this point t may be destroyed by another thread at any moment. + proxy->outbox->push(proxy); + // Spawn proxy to the local task pool + spawn_and_notify(*proxy, slot, a); + } else { + spawn_and_notify(t, slot, a); + } +} + +void __TBB_EXPORTED_FUNC submit(d1::task& t, d1::task_group_context& ctx, arena* a, std::uintptr_t as_critical) { + suppress_unused_warning(as_critical); + assert_pointer_valid(a); + thread_data& tls = *governor::get_thread_data(); + + // TODO revamp: for each use case investigate neccesity to make this call + task_group_context_impl::bind_to(ctx, &tls); + task_accessor::context(t) = &ctx; + // TODO revamp: consider respecting task isolation if this call is being made by external thread + task_accessor::isolation(t) = tls.my_task_dispatcher->m_execute_data_ext.isolation; + + // TODO: consider code refactoring when lane selection mechanism is unified. + + if ( tls.is_attached_to(a) ) { + arena_slot* slot = tls.my_arena_slot; +#if __TBB_PREVIEW_CRITICAL_TASKS + if( as_critical ) { + a->my_critical_task_stream.push( &t, subsequent_lane_selector(slot->critical_hint()) ); + } else +#endif + { + slot->spawn(t); + } + } else { + random_lane_selector lane_selector{tls.my_random}; +#if !__TBB_PREVIEW_CRITICAL_TASKS + suppress_unused_warning(as_critical); +#else + if ( as_critical ) { + a->my_critical_task_stream.push( &t, lane_selector ); + } else +#endif + { + // Avoid joining the arena the thread is not currently in. + a->my_fifo_task_stream.push( &t, lane_selector ); + } + } + // It is assumed that some thread will explicitly wait in the arena the task is submitted + // into. Therefore, no need to utilize mandatory concurrency here. + a->advertise_new_work<arena::work_spawned>(); +} + +void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + task_accessor::context(t) = &t_ctx; + task_dispatcher::execute_and_wait(&t, wait_ctx, w_ctx); +} + +void __TBB_EXPORTED_FUNC wait(d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + // Enter the task dispatch loop without a task + task_dispatcher::execute_and_wait(nullptr, wait_ctx, w_ctx); +} + +d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data* ed) { + if (ed) { + const execution_data_ext* ed_ext = static_cast<const execution_data_ext*>(ed); + assert_pointers_valid(ed_ext->task_disp, ed_ext->task_disp->m_thread_data); + return ed_ext->task_disp->m_thread_data->my_arena_index; + } else { + thread_data* td = governor::get_thread_data_if_initialized(); + return td ? int(td->my_arena_index) : -1; + } +} + +d1::task_group_context* __TBB_EXPORTED_FUNC current_context() { + thread_data* td = governor::get_thread_data(); + assert_pointers_valid(td, td->my_task_dispatcher); + + task_dispatcher* task_disp = td->my_task_dispatcher; + if (task_disp->m_properties.outermost) { + // No one task is executed, so no execute_data. + return nullptr; + } else { + return td->my_task_dispatcher->m_execute_data_ext.context; + } +} + +void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) { + // Get an associated task dispatcher + thread_data* tls = governor::get_thread_data(); + __TBB_ASSERT(tls->my_task_dispatcher != nullptr, nullptr); + task_dispatcher& local_td = *tls->my_task_dispatcher; + + // TODO: factor out the binding to execute_and_wait_impl + if (t) { + task_group_context_impl::bind_to(*task_accessor::context(*t), tls); + // Propagate the isolation to the task executed without spawn. + task_accessor::isolation(*t) = tls->my_task_dispatcher->m_execute_data_ext.isolation; + } + + // Waiting on special object tied to a waiting thread. + external_waiter waiter{ *tls->my_arena, wait_ctx }; + t = local_td.local_wait_for_all(t, waiter); + __TBB_ASSERT_EX(t == nullptr, "External waiter must not leave dispatch loop with a task"); + + // The external thread couldn't exit the dispatch loop in an idle state + if (local_td.m_thread_data->my_inbox.is_idle_state(true)) { + local_td.m_thread_data->my_inbox.set_is_idle(false); + } + + if (w_ctx.my_exception) { + __TBB_ASSERT(w_ctx.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); + w_ctx.my_exception->throw_self(); + } +} + +#if __TBB_RESUMABLE_TASKS + +#if _WIN32 +/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept +#else +/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept +#endif +{ + // Do not create non-trivial objects on the stack of this function. They will never be destroyed. + __TBB_ASSERT(arg != nullptr, nullptr); + task_dispatcher& task_disp = *static_cast<task_dispatcher*>(arg); + + assert_pointers_valid(task_disp.m_thread_data, task_disp.m_thread_data->my_arena); + task_disp.set_stealing_threshold(task_disp.m_thread_data->my_arena->calculate_stealing_threshold()); + __TBB_ASSERT(task_disp.can_steal(), nullptr); + task_disp.co_local_wait_for_all(); + // This code is unreachable +} + +/* [[noreturn]] */ void task_dispatcher::co_local_wait_for_all() noexcept { + // Do not create non-trivial objects on the stack of this function. They will never be destroyed. + assert_pointer_valid(m_thread_data); + + // Basically calls the user callback passed to the tbb::task::suspend function + m_thread_data->do_post_resume_action(); + + // Endless loop here because coroutine could be reused + for (;;) { + arena* a = m_thread_data->my_arena; + coroutine_waiter waiter(*a); + d1::task* resume_task = local_wait_for_all(nullptr, waiter); + assert_task_valid(resume_task); + __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr); + + m_thread_data->set_post_resume_action(thread_data::post_resume_action::cleanup, this); + resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target); + } + // This code is unreachable +} + +d1::suspend_point task_dispatcher::get_suspend_point() { + if (m_suspend_point == nullptr) { + assert_pointer_valid(m_thread_data); + // 0 means that we attach this task dispatcher to the current stack + init_suspend_point(m_thread_data->my_arena, 0); + } + assert_pointer_valid(m_suspend_point); + return m_suspend_point; +} +void task_dispatcher::init_suspend_point(arena* a, std::size_t stack_size) { + __TBB_ASSERT(m_suspend_point == nullptr, nullptr); + m_suspend_point = new(cache_aligned_allocate(sizeof(suspend_point_type))) + suspend_point_type(a, stack_size, *this); +} +#endif /* __TBB_RESUMABLE_TASKS */ +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.h b/contrib/libs/tbb/src/tbb/task_dispatcher.h new file mode 100644 index 0000000000..54a6c0d934 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.h @@ -0,0 +1,465 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_task_dispatcher_H +#define _TBB_task_dispatcher_H + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/global_control.h" + +#include "scheduler_common.h" +#include "waiters.h" +#include "arena_slot.h" +#include "arena.h" +#include "thread_data.h" +#include "mailbox.h" +#include "itt_notify.h" +#include "concurrent_monitor.h" + +#include <atomic> + +#if !__TBB_CPU_CTL_ENV_PRESENT +#include <fenv.h> // +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +inline d1::task* get_self_recall_task(arena_slot& slot) { + suppress_unused_warning(slot); + d1::task* t = nullptr; +#if __TBB_RESUMABLE_TASKS + suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; + if (sp && sp->m_is_owner_recalled.load(std::memory_order_acquire)) { + t = &sp->m_resume_task; + __TBB_ASSERT(sp->m_resume_task.m_target.m_thread_data == nullptr, nullptr); + } +#endif /* __TBB_RESUMABLE_TASKS */ + return t; +} + +// Defined in exception.cpp +/*[[noreturn]]*/void do_throw_noexcept(void (*throw_exception)()) noexcept; + +//------------------------------------------------------------------------ +// Suspend point +//------------------------------------------------------------------------ +#if __TBB_RESUMABLE_TASKS + +inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed) { + execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed); + + if (ed_ext.wait_ctx) { + extended_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; + // The wait_ctx is present only in external_waiter. In that case we leave the current stack + // in the abandoned state to resume when waiting completes. + thread_data* td = ed_ext.task_disp->m_thread_data; + td->set_post_resume_action(thread_data::post_resume_action::register_waiter, &monitor_node); + + extended_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); + + if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) { + return nullptr; + } + + td->clear_post_resume_action(); + td->set_post_resume_action(thread_data::post_resume_action::resume, ed_ext.task_disp->get_suspend_point()); + } else { + // If wait_ctx is null, it can be only a worker thread on outermost level because + // coroutine_waiter interrupts bypass loop before the resume_task execution. + ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::notify, + &ed_ext.task_disp->get_suspend_point()->m_is_owner_recalled); + } + // Do not access this task because it might be destroyed + ed_ext.task_disp->resume(m_target); + return nullptr; +} + +inline suspend_point_type::suspend_point_type(arena* a, size_t stack_size, task_dispatcher& task_disp) + : m_arena(a) + , m_random(this) + , m_co_context(stack_size, &task_disp) + , m_resume_task(task_disp) +{ + assert_pointer_valid(m_arena); + assert_pointer_valid(m_arena->my_default_ctx); + task_accessor::context(m_resume_task) = m_arena->my_default_ctx; + task_accessor::isolation(m_resume_task) = no_isolation; + // Initialize the itt_caller for the context of the resume task. + // It will be bound to the stack of the first suspend call. + task_group_context_impl::bind_to(*task_accessor::context(m_resume_task), task_disp.m_thread_data); +} + +#endif /* __TBB_RESUMABLE_TASKS */ + +//------------------------------------------------------------------------ +// Task Dispatcher +//------------------------------------------------------------------------ +inline task_dispatcher::task_dispatcher(arena* a) { + m_execute_data_ext.context = a->my_default_ctx; + m_execute_data_ext.task_disp = this; +} + +inline bool task_dispatcher::can_steal() { + __TBB_ASSERT(m_stealing_threshold != 0, nullptr); + stack_anchor_type anchor{}; + return reinterpret_cast<std::uintptr_t>(&anchor) > m_stealing_threshold; +} + +inline d1::task* task_dispatcher::get_inbox_or_critical_task( + execution_data_ext& ed, mail_inbox& inbox, isolation_type isolation, bool critical_allowed) +{ + if (inbox.empty()) + return nullptr; + d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); + if (result) + return result; + // Check if there are tasks mailed to this thread via task-to-thread affinity mechanism. + result = get_mailbox_task(inbox, ed, isolation); + // There is a race with a thread adding a new task (possibly with suitable isolation) + // to our mailbox, so the below conditions might result in a false positive. + // Then set_is_idle(false) allows that task to be stolen; it's OK. + if (isolation != no_isolation && !result && !inbox.empty() && inbox.is_idle_state(true)) { + // We have proxy tasks in our mailbox but the isolation blocks their execution. + // So publish the proxy tasks in mailbox to be available for stealing from owner's task pool. + inbox.set_is_idle( false ); + } + return result; +} + +inline d1::task* task_dispatcher::get_stream_or_critical_task( + execution_data_ext& ed, arena& a, task_stream<front_accessor>& stream, unsigned& hint, + isolation_type isolation, bool critical_allowed) +{ + if (stream.empty()) + return nullptr; + d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed); + if (result) + return result; + return a.get_stream_task(stream, hint); +} + +inline d1::task* task_dispatcher::steal_or_get_critical( + execution_data_ext& ed, arena& a, unsigned arena_index, FastRandom& random, + isolation_type isolation, bool critical_allowed) +{ + if (d1::task* t = a.steal_task(arena_index, random, ed, isolation)) { + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + return get_critical_task(t, ed, isolation, critical_allowed); + } + return nullptr; +} + +template <bool ITTPossible, typename Waiter> +d1::task* task_dispatcher::receive_or_steal_task( + thread_data& tls, execution_data_ext& ed, Waiter& waiter, isolation_type isolation, + bool fifo_allowed, bool critical_allowed) +{ + __TBB_ASSERT(governor::is_thread_data_set(&tls), NULL); + // Task to return + d1::task* t = nullptr; + // Get tls data (again) + arena& a = *tls.my_arena; + arena_slot& slot = *tls.my_arena_slot; + unsigned arena_index = tls.my_arena_index; + mail_inbox& inbox = tls.my_inbox; + task_stream<front_accessor>& resume_stream = a.my_resume_task_stream; + unsigned& resume_hint = slot.hint_for_resume_stream; + task_stream<front_accessor>& fifo_stream = a.my_fifo_task_stream; + unsigned& fifo_hint = slot.hint_for_fifo_stream; + + waiter.reset_wait(); + // Thread is in idle state now + inbox.set_is_idle(true); + + bool stealing_is_allowed = can_steal(); + + // Stealing loop mailbox/enqueue/other_slots + for (;;) { + __TBB_ASSERT(t == nullptr, nullptr); + // Check if the resource manager requires our arena to relinquish some threads + // For the external thread restore idle state to true after dispatch loop + if (!waiter.continue_execution(slot, t)) { + __TBB_ASSERT(t == nullptr, nullptr); + break; + } + // Start searching + if (t != nullptr) { + // continue_execution returned a task + } + else if ((t = get_inbox_or_critical_task(ed, inbox, isolation, critical_allowed))) { + // Successfully got the task from mailbox or critical task + } + else if ((t = get_stream_or_critical_task(ed, a, resume_stream, resume_hint, isolation, critical_allowed))) { + // Successfully got the resume or critical task + } + else if (fifo_allowed && isolation == no_isolation + && (t = get_stream_or_critical_task(ed, a, fifo_stream, fifo_hint, isolation, critical_allowed))) { + // Checked if there are tasks in starvation-resistant stream. Only allowed at the outermost dispatch level without isolation. + } + else if (stealing_is_allowed + && (t = steal_or_get_critical(ed, a, arena_index, tls.my_random, isolation, critical_allowed))) { + // Stole a task from a random arena slot + } + else { + t = get_critical_task(t, ed, isolation, critical_allowed); + } + + if (t != nullptr) { + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + a.my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); + break; // Stealing success, end of stealing attempt + } + // Nothing to do, pause a little. + waiter.pause(slot); + } // end of nonlocal task retrieval loop + if (inbox.is_idle_state(true)) { + inbox.set_is_idle(false); + } + return t; +} + +template <bool ITTPossible, typename Waiter> +d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { + assert_pointer_valid(m_thread_data); + __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); + + // Guard an outer/default execution state + struct dispatch_loop_guard { + task_dispatcher& task_disp; + execution_data_ext old_execute_data_ext; + properties old_properties; + + ~dispatch_loop_guard() { + task_disp.m_execute_data_ext = old_execute_data_ext; + task_disp.m_properties = old_properties; + + __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr); + __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr); + } + } dl_guard{ *this, m_execute_data_ext, m_properties }; + + // The context guard to track fp setting and itt tasks. + context_guard_helper</*report_tasks=*/ITTPossible> context_guard; + + // Current isolation context + const isolation_type isolation = dl_guard.old_execute_data_ext.isolation; + + // Critical work inflection point. Once turned false current execution context has taken + // critical task on the previous stack frame and cannot take more until that critical path is + // finished. + bool critical_allowed = dl_guard.old_properties.critical_task_allowed; + + // Extended execution data that is used for dispatching. + // Base version is passed to the task::execute method. + execution_data_ext& ed = m_execute_data_ext; + ed.context = t ? task_accessor::context(*t) : nullptr; + ed.original_slot = m_thread_data->my_arena_index; + ed.affinity_slot = d1::no_slot; + ed.task_disp = this; + ed.wait_ctx = waiter.wait_ctx(); + + m_properties.outermost = false; + m_properties.fifo_tasks_allowed = false; + + t = get_critical_task(t, ed, isolation, critical_allowed); + + // Infinite exception loop + for (;;) { + try { + // Main execution loop + do { + // We assume that bypass tasks are from the same task group. + context_guard.set_ctx(ed.context); + // Inner level evaluates tasks coming from nesting loops and those returned + // by just executed tasks (bypassing spawn or enqueue calls). + while (t != nullptr) { + assert_task_valid(t); + assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); + __TBB_ASSERT(ed.context->my_lifetime_state > d1::task_group_context::lifetime_state::locked && + ed.context->my_lifetime_state < d1::task_group_context::lifetime_state::dying, nullptr); + __TBB_ASSERT(m_thread_data->my_inbox.is_idle_state(false), nullptr); + __TBB_ASSERT(task_accessor::is_resume_task(*t) || isolation == no_isolation || isolation == ed.isolation, nullptr); + // Check premature leave + if (Waiter::postpone_execution(*t)) { + __TBB_ASSERT(task_accessor::is_resume_task(*t) && dl_guard.old_properties.outermost, + "Currently, the bypass loop can be interrupted only for resume task on outermost level"); + return t; + } + // Copy itt_caller to a stack because the context might be destroyed after t->execute. + void* itt_caller = ed.context->my_itt_caller; + suppress_unused_warning(itt_caller); + + ITT_CALLEE_ENTER(ITTPossible, t, itt_caller); + + if (ed.context->is_group_execution_cancelled()) { + t = t->cancel(ed); + } else { + t = t->execute(ed); + } + + ITT_CALLEE_LEAVE(ITTPossible, itt_caller); + + // The task affinity in execution data is set for affinitized tasks. + // So drop it after the task execution. + ed.affinity_slot = d1::no_slot; + // Reset task owner id for bypassed task + ed.original_slot = m_thread_data->my_arena_index; + t = get_critical_task(t, ed, isolation, critical_allowed); + } + __TBB_ASSERT(m_thread_data && governor::is_thread_data_set(m_thread_data), nullptr); + __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr); + // When refactoring, pay attention that m_thread_data can be changed after t->execute() + __TBB_ASSERT(m_thread_data->my_arena_slot != nullptr, nullptr); + arena_slot& slot = *m_thread_data->my_arena_slot; + if (!waiter.continue_execution(slot, t)) { + break; + } + // Retrieve the task from local task pool + if (t || (slot.is_task_pool_published() && (t = slot.get_task(ed, isolation)))) { + __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, NULL); + ed.context = task_accessor::context(*t); + ed.isolation = task_accessor::isolation(*t); + continue; + } + // Retrieve the task from global sources + t = receive_or_steal_task<ITTPossible>( + *m_thread_data, ed, waiter, isolation, dl_guard.old_properties.fifo_tasks_allowed, + critical_allowed + ); + } while (t != nullptr); // main dispatch loop + break; // Exit exception loop; + } catch (...) { + if (global_control::active_value(global_control::terminate_on_exception) == 1) { + do_throw_noexcept([] { throw; }); + } + if (ed.context->cancel_group_execution()) { + /* We are the first to signal cancellation, so store the exception that caused it. */ + ed.context->my_exception = tbb_exception_ptr::allocate(); + } + } + } // Infinite exception loop + __TBB_ASSERT(t == nullptr, nullptr); + + +#if __TBB_RESUMABLE_TASKS + if (dl_guard.old_properties.outermost) { + recall_point(); + } +#endif /* __TBB_RESUMABLE_TASKS */ + + return nullptr; +} + +#if __TBB_RESUMABLE_TASKS +inline void task_dispatcher::recall_point() { + if (this != &m_thread_data->my_arena_slot->default_task_dispatcher()) { + __TBB_ASSERT(m_suspend_point != nullptr, nullptr); + __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr); + d1::suspend([](suspend_point_type* sp) { + sp->m_is_owner_recalled.store(true, std::memory_order_release); + auto is_related_suspend_point = [sp] (extended_context context) { + std::uintptr_t sp_addr = std::uintptr_t(sp); + return sp_addr == context.my_uniq_addr; + }; + sp->m_arena->my_market->get_wait_list().notify(is_related_suspend_point); + }); + + if (m_thread_data->my_inbox.is_idle_state(true)) { + m_thread_data->my_inbox.set_is_idle(false); + } + } +} +#endif /* __TBB_RESUMABLE_TASKS */ + +#if __TBB_PREVIEW_CRITICAL_TASKS +inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext& ed, isolation_type isolation, bool critical_allowed) { + __TBB_ASSERT( critical_allowed || !m_properties.critical_task_allowed, nullptr ); + + if (!critical_allowed) { + // The stack is already in the process of critical path execution. Cannot take another + // critical work until finish with the current one. + __TBB_ASSERT(!m_properties.critical_task_allowed, nullptr); + return t; + } + + assert_pointers_valid(m_thread_data, m_thread_data->my_arena, m_thread_data->my_arena_slot); + thread_data& td = *m_thread_data; + arena& a = *td.my_arena; + arena_slot& slot = *td.my_arena_slot; + + d1::task* crit_t = a.get_critical_task(slot.hint_for_critical_stream, isolation); + if (crit_t != nullptr) { + assert_task_valid(crit_t); + if (t != nullptr) { + assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); + r1::spawn(*t, *ed.context); + } + ed.context = task_accessor::context(*crit_t); + ed.isolation = task_accessor::isolation(*crit_t); + + // We cannot execute more than one critical task on the same stack. + // In other words, we prevent nested critical tasks. + m_properties.critical_task_allowed = false; + + // TODO: add a test that the observer is called when critical task is taken. + a.my_observers.notify_entry_observers(td.my_last_observer, td.my_is_worker); + t = crit_t; + } else { + // Was unable to find critical work in the queue. Allow inspecting the queue in nested + // invocations. Handles the case when critical task has been just completed. + m_properties.critical_task_allowed = true; + } + return t; +} +#else +inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext&, isolation_type, bool /*critical_allowed*/) { + return t; +} +#endif + +inline d1::task* task_dispatcher::get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation) { + while (task_proxy* const tp = my_inbox.pop(isolation)) { + if (d1::task* result = tp->extract_task<task_proxy::mailbox_bit>()) { + ed.original_slot = (unsigned short)(-2); + ed.affinity_slot = ed.task_disp->m_thread_data->my_arena_index; + return result; + } + // We have exclusive access to the proxy, and can destroy it. + tp->allocator.delete_object(tp, ed); + } + return NULL; +} + +template <typename Waiter> +d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter) { + if (governor::is_itt_present()) { + return local_wait_for_all</*ITTPossible = */ true>(t, waiter); + } else { + return local_wait_for_all</*ITTPossible = */ false>(t, waiter); + } +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_task_dispatcher_H + diff --git a/contrib/libs/tbb/src/tbb/task_group_context.cpp b/contrib/libs/tbb/src/tbb/task_group_context.cpp new file mode 100644 index 0000000000..3c296648ec --- /dev/null +++ b/contrib/libs/tbb/src/tbb/task_group_context.cpp @@ -0,0 +1,493 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/task_group.h" +#include "governor.h" +#include "thread_data.h" +#include "scheduler_common.h" +#include "itt_notify.h" +#include "task_dispatcher.h" + +#include <type_traits> + +namespace tbb { +namespace detail { +namespace r1 { + +//------------------------------------------------------------------------ +// tbb_exception_ptr +//------------------------------------------------------------------------ +tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept { + tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr)); + return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr; +} + +void tbb_exception_ptr::destroy() noexcept { + this->~tbb_exception_ptr(); + deallocate_memory(this); +} + +void tbb_exception_ptr::throw_self() { + if (governor::rethrow_exception_broken()) fix_broken_rethrow(); + std::rethrow_exception(my_ptr); +} + +//------------------------------------------------------------------------ +// task_group_context +//------------------------------------------------------------------------ + +void task_group_context_impl::destroy(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + + auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); + __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); + + if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { + // The owner can be destroyed at any moment. Access the associate data with caution. + thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); + if (governor::is_thread_data_set(owner)) { + thread_data::context_list_state& cls = owner->my_context_list_state; + // We are the owner, so cls is valid. + // Local update of the context list + std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed); + // The sequentially-consistent store to prevent load of nonlocal update flag + // from being hoisted before the store to local update flag. + cls.local_update = 1; + if (cls.nonlocal_update.load(std::memory_order_relaxed)) { + spin_mutex::scoped_lock lock(cls.mutex); + ctx.my_node.remove_relaxed(); + cls.local_update.store(0, std::memory_order_relaxed); + } else { + ctx.my_node.remove_relaxed(); + // Release fence is necessary so that update of our neighbors in + // the context list was committed when possible concurrent destroyer + // proceeds after local update flag is reset by the following store. + cls.local_update.store(0, std::memory_order_release); + if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { + // Another thread was propagating cancellation request when we removed + // ourselves from the list. We must ensure that it is not accessing us + // when this destructor finishes. We'll be able to acquire the lock + // below only after the other thread finishes with us. + spin_mutex::scoped_lock lock(cls.mutex); + } + } + } else { + d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; + if ( +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) +#else + !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) +#endif + ) { + __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); + // The "owner" local variable can be a dangling pointer here. Do not access it. + owner = nullptr; + spin_wait_until_eq(ctx.my_owner, nullptr); + // It is unsafe to remove the node because its neighbors might be already destroyed. + // TODO: reconsider the logic. + // ctx.my_node.remove_relaxed(); + } + else { + __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); + __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); + thread_data::context_list_state& cls = owner->my_context_list_state; + __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); + + ++cls.nonlocal_update; + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); + spin_wait_until_eq(cls.local_update, 0u); + { + spin_mutex::scoped_lock lock(cls.mutex); + ctx.my_node.remove_relaxed(); + } + --cls.nonlocal_update; + } + } + } + + if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { + spin_wait_until_eq(ctx.my_owner, nullptr); + } + + d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER + suppress_unused_warning(ctl); +#endif + ctl->~cpu_ctl_env(); + + if (ctx.my_exception) + ctx.my_exception->destroy(); + ITT_STACK_DESTROY(ctx.my_itt_caller); + + poison_pointer(ctx.my_parent); + poison_pointer(ctx.my_parent); + poison_pointer(ctx.my_owner); + poison_pointer(ctx.my_node.next); + poison_pointer(ctx.my_node.prev); + poison_pointer(ctx.my_exception); + poison_pointer(ctx.my_itt_caller); +} + +void task_group_context_impl::initialize(d1::task_group_context& ctx) { + ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); + + ctx.my_cpu_ctl_env = 0; + ctx.my_cancellation_requested = 0; + ctx.my_state.store(0, std::memory_order_relaxed); + // Set the created state to bound at the first usage. + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); + ctx.my_parent = nullptr; + ctx.my_owner = nullptr; + ctx.my_node.next.store(nullptr, std::memory_order_relaxed); + ctx.my_node.next.store(nullptr, std::memory_order_relaxed); + ctx.my_exception = nullptr; + ctx.my_itt_caller = nullptr; + + static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); + d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; + if (ctx.my_traits.fp_settings) + ctl->get_env(); +} + +void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(td, NULL); + ctx.my_owner.store(td, std::memory_order_relaxed); + thread_data::context_list_state& cls = td->my_context_list_state; + // state propagation logic assumes new contexts are bound to head of the list + ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); + // Notify threads that may be concurrently destroying contexts registered + // in this scheduler's list that local list update is underway. + // Prevent load of global propagation epoch counter from being hoisted before + // speculative stores above, as well as load of nonlocal update flag from + // being hoisted before the store to local update flag. + cls.local_update = 1; + // Finalize local context list update + if (cls.nonlocal_update.load(std::memory_order_relaxed)) { + spin_mutex::scoped_lock lock(cls.mutex); + d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); + head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); + ctx.my_node.next.store(head_next, std::memory_order_relaxed); + cls.local_update.store(0, std::memory_order_relaxed); + cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); + } else { + d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); + head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); + ctx.my_node.next.store(head_next, std::memory_order_relaxed); + cls.local_update.store(0, std::memory_order_release); + // Thread-local list of contexts allows concurrent traversal by another thread + // while propagating state change. To ensure visibility of ctx.my_node's members + // to the concurrently traversing thread, the list's head is updated by means + // of store-with-release. + cls.head.next.store(&ctx.my_node, std::memory_order_release); + } +} + +void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); + __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); + + ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; + __TBB_ASSERT(ctx.my_parent, NULL); + + // Inherit FPU settings only if the context has not captured FPU settings yet. + if (!ctx.my_traits.fp_settings) + copy_fp_settings(ctx, *ctx.my_parent); + + // Condition below prevents unnecessary thrashing parent context's cache line + if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { + ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below + } + if (ctx.my_parent->my_parent) { + // Even if this context were made accessible for state change propagation + // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node) + // above), it still could be missed if state propagation from a grand-ancestor + // was underway concurrently with binding. + // Speculative propagation from the parent together with epoch counters + // detecting possibility of such a race allow to avoid taking locks when + // there is no contention. + + // Acquire fence is necessary to prevent reordering subsequent speculative + // loads of parent state data out of the scope where epoch counters comparison + // can reliably validate it. + uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); + // Speculative propagation of parent's state. The speculation will be + // validated by the epoch counters check further on. + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + register_with(ctx, td); // Issues full fence + + // If no state propagation was detected by the following condition, the above + // full fence guarantees that the parent had correct state during speculative + // propagation before the fence. Otherwise the propagation from parent is + // repeated under the lock. + if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { + // Another thread may be propagating state change right now. So resort to lock. + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + } else { + register_with(ctx, td); // Issues full fence + // As we do not have grand-ancestors, concurrent state propagation (if any) + // may originate only from the parent context, and thus it is safe to directly + // copy the state from it. + ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); +} + +void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); + if (state <= d1::task_group_context::lifetime_state::locked) { + if (state == d1::task_group_context::lifetime_state::created && +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, + (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) +#else + ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) +#endif + ) { + // If we are in the outermost task dispatch loop of an external thread, then + // there is nothing to bind this context to, and we skip the binding part + // treating the context as isolated. + __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); + if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { + if (!ctx.my_traits.fp_settings) { + copy_fp_settings(ctx, *td->my_arena->my_default_ctx); + } + ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); + } else { + bind_to_impl(ctx, td); + } + ITT_STACK_CREATE(ctx.my_itt_caller); + } + spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); + } + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); + __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); +} + +template <typename T> +void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { + // Nothing to do, whether descending from "src" or not, so no need to scan. + // Hopefully this happens often thanks to earlier invocations. + // This optimization is enabled by LIFO order in the context lists: + // - new contexts are bound to the beginning of lists; + // - descendants are newer than ancestors; + // - earlier invocations are therefore likely to "paint" long chains. + } else if (&ctx == &src) { + // This clause is disjunct from the traversal below, which skips src entirely. + // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). + // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). + // Letting the other thread prevail may also be fairer. + } else { + for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { + if (ancestor == &src) { + for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) + (c->*mptr_state).store(new_state, std::memory_order_relaxed); + break; + } + } + } +} + +bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); + if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { + // This task group and any descendants have already been canceled. + // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested, + // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) + return false; + } + governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); + return true; +} + +bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) { + return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0; +} + +// IMPORTANT: It is assumed that this method is not used concurrently! +void task_group_context_impl::reset(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + //! TODO: Add assertion that this context does not have children + // No fences are necessary since this context can be accessed from another thread + // only after stealing happened (which means necessary fences were used). + if (ctx.my_exception) { + ctx.my_exception->destroy(); + ctx.my_exception = NULL; + } + ctx.my_cancellation_requested = 0; +} + +// IMPORTANT: It is assumed that this method is not used concurrently! +void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + //! TODO: Add assertion that this context does not have children + // No fences are necessary since this context can be accessed from another thread + // only after stealing happened (which means necessary fences were used). + d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); + if (!ctx.my_traits.fp_settings) { + ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env; + ctx.my_traits.fp_settings = true; + } + ctl->get_env(); +} + +void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { + __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); + __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); + + const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env); + new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl); + ctx.my_traits.fp_settings = true; +} + +template <typename T> +void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + spin_mutex::scoped_lock lock(my_context_list_state.mutex); + // Acquire fence is necessary to ensure that the subsequent node->my_next load + // returned the correct value in case it was just inserted in another thread. + // The fence also ensures visibility of the correct ctx.my_parent value. + d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); + while (node != &my_context_list_state.head) { + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) + task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); + node = node->next.load(std::memory_order_relaxed); + } + // Sync up local propagation epoch with the global one. Release fence prevents + // reordering of possible store to *mptr_state after the sync point. + my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); +} + +template <typename T> +bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) + return true; + // The whole propagation algorithm is under the lock in order to ensure correctness + // in case of concurrent state changes at the different levels of the context tree. + // See comment at the bottom of scheduler.cpp + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) + // Another thread has concurrently changed the state. Back down. + return false; + // Advance global state propagation epoch + ++the_context_state_propagation_epoch; + // Propagate to all workers and external threads and sync up their local epochs with the global one + unsigned num_workers = my_first_unused_worker_idx; + for (unsigned i = 0; i < num_workers; ++i) { + thread_data* td = my_workers[i]; + // If the worker is only about to be registered, skip it. + if (td) + td->propagate_task_group_state(mptr_state, src, new_state); + } + // Propagate to all external threads + // The whole propagation sequence is locked, thus no contention is expected + for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) + it->propagate_task_group_state(mptr_state, src, new_state); + return true; +} + +/* + Comments: + +1. The premise of the cancellation support implementation is that cancellations are + not part of the hot path of the program execution. Therefore all changes in its + implementation in order to reduce the overhead of the cancellation control flow + should be done only in ways that do not increase overhead of the normal execution. + + In general, contexts are used by all threads and their descendants are created in + different threads as well. In order to minimize impact of the cross-thread tree + maintenance (first of all because of the synchronization), the tree of contexts + is split into pieces, each of which is handled by a single thread. Such pieces + are represented as lists of contexts, members of which are contexts that were + bound to their parents in the given thread. + + The context tree maintenance and cancellation propagation algorithms are designed + in such a manner that cross-thread access to a context list will take place only + when cancellation signal is sent (by user or when an exception happens), and + synchronization is necessary only then. Thus the normal execution flow (without + exceptions and cancellation) remains free from any synchronization done on + behalf of exception handling and cancellation support. + +2. Consider parallel cancellations at the different levels of the context tree: + + Ctx1 <- Cancelled by Thread1 |- Thread2 started processing + | | + Ctx2 |- Thread1 started processing + | T1 |- Thread2 finishes and syncs up local counters + Ctx3 <- Cancelled by Thread2 | + | |- Ctx5 is bound to Ctx2 + Ctx4 | + T2 |- Thread1 reaches Ctx2 + + Thread-propagator of each cancellation increments global counter. However the thread + propagating the cancellation from the outermost context (Thread1) may be the last + to finish. Which means that the local counters may be synchronized earlier (by Thread2, + at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context + (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only + (Ctx2) may result in cancellation request being lost. + + This issue is solved by doing the whole propagation under the lock. + + If we need more concurrency while processing parallel cancellations, we could try + the following modification of the propagation algorithm: + + advance global counter and remember it + for each thread: + scan thread's list of contexts + for each thread: + sync up its local counter only if the global counter has not been changed + + However this version of the algorithm requires more analysis and verification. +*/ + +void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) { + task_group_context_impl::initialize(ctx); +} +void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) { + task_group_context_impl::destroy(ctx); +} +void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) { + task_group_context_impl::reset(ctx); +} +bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) { + return task_group_context_impl::cancel_group_execution(ctx); +} +bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) { + return task_group_context_impl::is_group_execution_cancelled(ctx); +} +void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) { + task_group_context_impl::capture_fp_settings(ctx); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + diff --git a/contrib/libs/tbb/src/tbb/task_stream.h b/contrib/libs/tbb/src/tbb/task_stream.h new file mode 100644 index 0000000000..f32ef94e80 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/task_stream.h @@ -0,0 +1,288 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_task_stream_H +#define _TBB_task_stream_H + +//! This file is a possible future replacement for the task_stream class implemented in +//! task_stream.h. It refactors the code and extends task_stream capabilities by moving lane +//! management during operations on caller side. Despite the fact that new implementation should not +//! affect performance of the original task stream, analysis on this subject was not made at the +//! time it was developed. In addition, it is not clearly seen at the moment that this container +//! would be suitable for critical tasks due to linear time complexity on its operations. + +#include "oneapi/tbb/detail/_utils.h" + +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/cache_aligned_allocator.h" + +#include "scheduler_common.h" +#include "misc.h" // for FastRandom + +#include <deque> +#include <climits> +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +//! Essentially, this is just a pair of a queue and a mutex to protect the queue. +/** The reason std::pair is not used is that the code would look less clean + if field names were replaced with 'first' and 'second'. **/ +template< typename T, typename mutex_t > +struct alignas(max_nfs_size) queue_and_mutex { + typedef std::deque< T, cache_aligned_allocator<T> > queue_base_t; + + queue_base_t my_queue{}; + mutex_t my_mutex{}; +}; + +using population_t = uintptr_t; +const population_t one = 1; + +inline void set_one_bit( std::atomic<population_t>& dest, int pos ) { + __TBB_ASSERT( pos>=0, NULL ); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + dest.fetch_or( one<<pos ); +} + +inline void clear_one_bit( std::atomic<population_t>& dest, int pos ) { + __TBB_ASSERT( pos>=0, NULL ); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + dest.fetch_and( ~(one<<pos) ); +} + +inline bool is_bit_set( population_t val, int pos ) { + __TBB_ASSERT( pos>=0, NULL ); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + return (val & (one<<pos)) != 0; +} + +struct random_lane_selector : +#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 + no_assign +#else + no_copy +#endif +{ + random_lane_selector( FastRandom& random ) : my_random( random ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return my_random.get() & (out_of-1); + } +private: + FastRandom& my_random; +}; + +struct lane_selector_base : +#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500 + no_assign +#else + no_copy +#endif +{ + unsigned& my_previous; + lane_selector_base( unsigned& previous ) : my_previous( previous ) {} +}; + +struct subsequent_lane_selector : lane_selector_base { + subsequent_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return (++my_previous &= out_of-1); + } +}; + +struct preceding_lane_selector : lane_selector_base { + preceding_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {} + unsigned operator()( unsigned out_of ) const { + __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." ); + return (--my_previous &= (out_of-1)); + } +}; + +//! Specializes from which side of the underlying container elements are retrieved. Method must be +//! called under corresponding mutex locked. +template<task_stream_accessor_type accessor> +class task_stream_accessor : no_copy { +protected: + using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + d1::task* get_item( lane_t::queue_base_t& queue ) { + d1::task* result = queue.front(); + queue.pop_front(); + return result; + } +}; + +template<> +class task_stream_accessor< back_nonnull_accessor > : no_copy { +protected: + using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + d1::task* get_item( lane_t::queue_base_t& queue ) { + d1::task* result = nullptr; + __TBB_ASSERT(!queue.empty(), nullptr); + // Isolated task can put zeros in queue see look_specific + do { + result = queue.back(); + queue.pop_back(); + } while ( !result && !queue.empty() ); + + __TBB_ASSERT_RELEASE(result, nullptr); + return result; + } +}; + +//! The container for "fairness-oriented" aka "enqueued" tasks. +template<task_stream_accessor_type accessor> +class task_stream : public task_stream_accessor< accessor > { + using lane_t = typename task_stream_accessor<accessor>::lane_t; + std::atomic<population_t> population{}; + lane_t* lanes{nullptr}; + unsigned N{}; + +public: + task_stream() = default; + + void initialize( unsigned n_lanes ) { + const unsigned max_lanes = sizeof(population_t) * CHAR_BIT; + + N = n_lanes >= max_lanes ? max_lanes : n_lanes > 2 ? 1 << (tbb::detail::log2(n_lanes - 1) + 1) : 2; + __TBB_ASSERT( N == max_lanes || (N >= n_lanes && ((N - 1) & N) == 0), "number of lanes miscalculated" ); + __TBB_ASSERT( N <= sizeof(population_t) * CHAR_BIT, NULL ); + lanes = static_cast<lane_t*>(cache_aligned_allocate(sizeof(lane_t) * N)); + for (unsigned i = 0; i < N; ++i) { + new (lanes + i) lane_t; + } + __TBB_ASSERT( !population.load(std::memory_order_relaxed), NULL ); + } + + ~task_stream() { + if (lanes) { + for (unsigned i = 0; i < N; ++i) { + lanes[i].~lane_t(); + } + cache_aligned_deallocate(lanes); + } + } + + //! Push a task into a lane. Lane selection is performed by passed functor. + template<typename lane_selector_t> + void push(d1::task* source, const lane_selector_t& next_lane ) { + bool succeed = false; + unsigned lane = 0; + do { + lane = next_lane( /*out_of=*/N ); + __TBB_ASSERT( lane < N, "Incorrect lane index." ); + } while( ! (succeed = try_push( source, lane )) ); + } + + //! Try finding and popping a task using passed functor for lane selection. Last used lane is + //! updated inside lane selector. + template<typename lane_selector_t> + d1::task* pop( const lane_selector_t& next_lane ) { + d1::task* popped = NULL; + unsigned lane = 0; + do { + lane = next_lane( /*out_of=*/N ); + __TBB_ASSERT( lane < N, "Incorrect lane index." ); + } while( !empty() && !(popped = try_pop( lane )) ); + return popped; + } + + //! Try finding and popping a related task. + d1::task* pop_specific( unsigned& last_used_lane, isolation_type isolation ) { + d1::task* result = NULL; + // Lane selection is round-robin in backward direction. + unsigned idx = last_used_lane & (N-1); + do { + if( is_bit_set( population.load(std::memory_order_relaxed), idx ) ) { + lane_t& lane = lanes[idx]; + spin_mutex::scoped_lock lock; + if( lock.try_acquire(lane.my_mutex) && !lane.my_queue.empty() ) { + result = look_specific( lane.my_queue, isolation ); + if( lane.my_queue.empty() ) + clear_one_bit( population, idx ); + if( result ) + break; + } + } + idx=(idx-1)&(N-1); + } while( !empty() && idx != last_used_lane ); + last_used_lane = idx; + return result; + } + + //! Checks existence of a task. + bool empty() { + return !population.load(std::memory_order_relaxed); + } + +private: + //! Returns true on successful push, otherwise - false. + bool try_push(d1::task* source, unsigned lane_idx ) { + spin_mutex::scoped_lock lock; + if( lock.try_acquire( lanes[lane_idx].my_mutex ) ) { + lanes[lane_idx].my_queue.push_back( source ); + set_one_bit( population, lane_idx ); // TODO: avoid atomic op if the bit is already set + return true; + } + return false; + } + + //! Returns pointer to task on successful pop, otherwise - NULL. + d1::task* try_pop( unsigned lane_idx ) { + if( !is_bit_set( population.load(std::memory_order_relaxed), lane_idx ) ) + return NULL; + d1::task* result = NULL; + lane_t& lane = lanes[lane_idx]; + spin_mutex::scoped_lock lock; + if( lock.try_acquire( lane.my_mutex ) && !lane.my_queue.empty() ) { + result = this->get_item( lane.my_queue ); + if( lane.my_queue.empty() ) + clear_one_bit( population, lane_idx ); + } + return result; + } + + // TODO: unify '*_specific' logic with 'pop' methods above + d1::task* look_specific( typename lane_t::queue_base_t& queue, isolation_type isolation ) { + __TBB_ASSERT( !queue.empty(), NULL ); + // TODO: add a worst-case performance test and consider an alternative container with better + // performance for isolation search. + typename lane_t::queue_base_t::iterator curr = queue.end(); + do { + // TODO: consider logic from get_task to simplify the code. + d1::task* result = *--curr; + if( result && task_accessor::isolation(*result) == isolation ) { + if( queue.end() - curr == 1 ) + queue.pop_back(); // a little of housekeeping along the way + else + *curr = 0; // grabbing task with the same isolation + // TODO: move one of the container's ends instead if the task has been found there + return result; + } + } while( curr != queue.begin() ); + return NULL; + } + +}; // task_stream + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_task_stream_H */ diff --git a/contrib/libs/tbb/src/tbb/thread_data.h b/contrib/libs/tbb/src/tbb/thread_data.h new file mode 100644 index 0000000000..41d4a0cf60 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_data.h @@ -0,0 +1,273 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_thread_data_H +#define __TBB_thread_data_H + +#include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/task.h" + +#include "rml_base.h" // rml::job + +#include "scheduler_common.h" +#include "arena.h" +#include "concurrent_monitor.h" +#include "mailbox.h" +#include "misc.h" // FastRandom +#include "small_object_pool_impl.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +class task; +class arena_slot; +class task_group_context; +class task_dispatcher; + +//------------------------------------------------------------------------ +// Thread Data +//------------------------------------------------------------------------ +class thread_data : public ::rml::job + , public intrusive_list_node + , no_copy { +public: + thread_data(unsigned short index, bool is_worker) + : my_arena_index{ index } + , my_is_worker{ is_worker } + , my_task_dispatcher{ nullptr } + , my_arena{} + , my_arena_slot{} + , my_inbox{} + , my_random{ this } + , my_last_observer{ nullptr } + , my_small_object_pool{new (cache_aligned_allocate(sizeof(small_object_pool_impl))) small_object_pool_impl{}} + , my_context_list_state{} +#if __TBB_RESUMABLE_TASKS + , my_post_resume_action{ post_resume_action::none } + , my_post_resume_arg{nullptr} +#endif /* __TBB_RESUMABLE_TASKS */ + { + ITT_SYNC_CREATE(&my_context_list_state.mutex, SyncType_Scheduler, SyncObj_ContextsList); + my_context_list_state.head.next.store(&my_context_list_state.head, std::memory_order_relaxed); + my_context_list_state.head.prev.store(&my_context_list_state.head, std::memory_order_relaxed); + } + + ~thread_data() { + context_list_cleanup(); + my_small_object_pool->destroy(); + poison_pointer(my_task_dispatcher); + poison_pointer(my_arena); + poison_pointer(my_arena_slot); + poison_pointer(my_last_observer); + poison_pointer(my_small_object_pool); +#if __TBB_RESUMABLE_TASKS + poison_pointer(my_post_resume_arg); +#endif /* __TBB_RESUMABLE_TASKS */ + poison_value(my_context_list_state.epoch); + poison_value(my_context_list_state.local_update); + poison_value(my_context_list_state.nonlocal_update); + } + + void attach_arena(arena& a, std::size_t index); + bool is_attached_to(arena*); + void attach_task_dispatcher(task_dispatcher&); + void detach_task_dispatcher(); + void context_list_cleanup(); + template <typename T> + void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state); + + //! Index of the arena slot the scheduler occupies now, or occupied last time + unsigned short my_arena_index; + + //! Indicates if the thread is created by RML + const bool my_is_worker; + + //! The current task dipsatcher + task_dispatcher* my_task_dispatcher; + + //! The arena that I own (if external thread) or am servicing at the moment (if worker) + arena* my_arena; + + //! Pointer to the slot in the arena we own at the moment + arena_slot* my_arena_slot; + + //! The mailbox (affinity mechanism) the current thread attached to + mail_inbox my_inbox; + + //! The random generator + FastRandom my_random; + + //! Last observer in the observers list processed on this slot + observer_proxy* my_last_observer; + + //! Pool of small object for fast task allocation + small_object_pool_impl* my_small_object_pool; + + struct context_list_state { + //! Head of the thread specific list of task group contexts. + d1::context_list_node head{}; + + //! Mutex protecting access to the list of task group contexts. + // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex + spin_mutex mutex{}; + + //! Last state propagation epoch known to this thread + /** Together with the_context_state_propagation_epoch constitute synchronization protocol + that keeps hot path of task group context construction destruction mostly + lock-free. + When local epoch equals the global one, the state of task group contexts + registered with this thread is consistent with that of the task group trees + they belong to. **/ + std::atomic<std::uintptr_t> epoch{}; + + //! Flag indicating that a context is being destructed by its owner thread + /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol + that keeps hot path of context destruction (by the owner thread) mostly + lock-free. **/ + std::atomic<std::uintptr_t> local_update{}; + + //! Flag indicating that a context is being destructed by non-owner thread. + /** See also my_local_update. **/ + std::atomic<std::uintptr_t> nonlocal_update{}; + } my_context_list_state; + +#if __TBB_RESUMABLE_TASKS + //! The list of possible post resume actions. + enum class post_resume_action { + invalid, + register_waiter, + resume, + callback, + cleanup, + notify, + none + }; + + //! The callback to call the user callback passed to tbb::suspend. + struct suspend_callback_wrapper { + suspend_callback_type suspend_callback; + void* user_callback; + suspend_point_type* tag; + + void operator()() { + __TBB_ASSERT(suspend_callback && user_callback && tag, nullptr); + suspend_callback(user_callback, tag); + } + }; + + //! Suspends the current coroutine (task_dispatcher). + void suspend(void* suspend_callback, void* user_callback); + + //! Resumes the target task_dispatcher. + void resume(task_dispatcher& target); + + //! Set post resume action to perform after resume. + void set_post_resume_action(post_resume_action pra, void* arg) { + __TBB_ASSERT(my_post_resume_action == post_resume_action::none, "The Post resume action must not be set"); + __TBB_ASSERT(!my_post_resume_arg, "The post resume action must not have an argument"); + my_post_resume_action = pra; + my_post_resume_arg = arg; + } + + void clear_post_resume_action() { + my_post_resume_action = thread_data::post_resume_action::none; + my_post_resume_arg = nullptr; + } + + //! Performs post resume action. + void do_post_resume_action(); + + //! The post resume action requested after the swap contexts. + post_resume_action my_post_resume_action; + + //! The post resume action argument. + void* my_post_resume_arg; +#endif /* __TBB_RESUMABLE_TASKS */ + + //! The default context + // TODO: consider using common default context because it is used only to simplify + // cancellation check. + d1::task_group_context my_default_context; +}; + +inline void thread_data::attach_arena(arena& a, std::size_t index) { + my_arena = &a; + my_arena_index = static_cast<unsigned short>(index); + my_arena_slot = a.my_slots + index; + // Read the current slot mail_outbox and attach it to the mail_inbox (remove inbox later maybe) + my_inbox.attach(my_arena->mailbox(index)); +} + +inline bool thread_data::is_attached_to(arena* a) { return my_arena == a; } + +inline void thread_data::context_list_cleanup() { + // Detach contexts remaining in the local list. + { + spin_mutex::scoped_lock lock(my_context_list_state.mutex); + d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_relaxed); + while (node != &my_context_list_state.head) { + using state_t = d1::task_group_context::lifetime_state; + + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); + std::atomic<state_t>& state = ctx.my_lifetime_state; + + node = node->next.load(std::memory_order_relaxed); + + __TBB_ASSERT(ctx.my_owner == this, "The context should belong to the current thread."); + state_t expected = state_t::bound; + if ( +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + !((std::atomic<typename std::underlying_type<state_t>::type>&)state).compare_exchange_strong( + (typename std::underlying_type<state_t>::type&)expected, + (typename std::underlying_type<state_t>::type)state_t::detached) +#else + !state.compare_exchange_strong(expected, state_t::detached) +#endif + ) { + __TBB_ASSERT(expected == state_t::locked || expected == state_t::dying, nullptr); + spin_wait_until_eq(state, state_t::dying); + } else { + __TBB_ASSERT(expected == state_t::bound, nullptr); + ctx.my_owner.store(nullptr, std::memory_order_release); + } + } + } + spin_wait_until_eq(my_context_list_state.nonlocal_update, 0u); +} + +inline void thread_data::attach_task_dispatcher(task_dispatcher& task_disp) { + __TBB_ASSERT(my_task_dispatcher == nullptr, nullptr); + __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); + task_disp.m_thread_data = this; + my_task_dispatcher = &task_disp; +} + +inline void thread_data::detach_task_dispatcher() { + __TBB_ASSERT(my_task_dispatcher != nullptr, nullptr); + __TBB_ASSERT(my_task_dispatcher->m_thread_data == this, nullptr); + my_task_dispatcher->m_thread_data = nullptr; + my_task_dispatcher = nullptr; +} + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_thread_data_H + diff --git a/contrib/libs/tbb/src/tbb/tls.h b/contrib/libs/tbb/src/tbb/tls.h new file mode 100644 index 0000000000..5d28ca4dae --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tls.h @@ -0,0 +1,93 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_tls_H +#define _TBB_tls_H + +#include "oneapi/tbb/detail/_config.h" + +#if __TBB_USE_POSIX +#include <pthread.h> +#else /* assume __TBB_USE_WINAPI */ +#include <windows.h> +#endif + +namespace tbb { +namespace detail { +namespace r1 { + +typedef void (*tls_dtor_t)(void*); + +//! Basic cross-platform wrapper class for TLS operations. +template <typename T> +class basic_tls { +#if __TBB_USE_POSIX + typedef pthread_key_t tls_key_t; +public: + int create( tls_dtor_t dtor = NULL ) { + return pthread_key_create(&my_key, dtor); + } + int destroy() { return pthread_key_delete(my_key); } + void set( T value ) { pthread_setspecific(my_key, (void*)value); } + T get() { return (T)pthread_getspecific(my_key); } +#else /* __TBB_USE_WINAPI */ + typedef DWORD tls_key_t; +public: +#if !__TBB_WIN8UI_SUPPORT + int create() { + tls_key_t tmp = TlsAlloc(); + if( tmp==TLS_OUT_OF_INDEXES ) + return TLS_OUT_OF_INDEXES; + my_key = tmp; + return 0; + } + int destroy() { TlsFree(my_key); my_key=0; return 0; } + void set( T value ) { TlsSetValue(my_key, (LPVOID)value); } + T get() { return (T)TlsGetValue(my_key); } +#else /*!__TBB_WIN8UI_SUPPORT*/ + int create() { + tls_key_t tmp = FlsAlloc(NULL); + if( tmp== (DWORD)0xFFFFFFFF ) + return (DWORD)0xFFFFFFFF; + my_key = tmp; + return 0; + } + int destroy() { FlsFree(my_key); my_key=0; return 0; } + void set( T value ) { FlsSetValue(my_key, (LPVOID)value); } + T get() { return (T)FlsGetValue(my_key); } +#endif /* !__TBB_WIN8UI_SUPPORT */ +#endif /* __TBB_USE_WINAPI */ +private: + tls_key_t my_key; +}; + +//! More advanced TLS support template class. +/** It supports RAII and to some extent mimic __declspec(thread) variables. */ +template <typename T> +class tls : public basic_tls<T> { + typedef basic_tls<T> base; +public: + tls() { base::create(); } + ~tls() { base::destroy(); } + T operator=(T value) { base::set(value); return value; } + operator T() { return base::get(); } +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_tls_H */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h new file mode 100644 index 0000000000..e1ba837404 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h @@ -0,0 +1,35 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + +#pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ +#pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ +#pragma warning (disable: 174) /* expression has no effect */ +#pragma warning (disable: 4127) /* conditional expression is constant */ +#pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */ + +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if defined __INTEL_COMPILER + +#pragma warning (disable: 869) /* parameter "XXXXX" was never referenced */ +#pragma warning (disable: 1418) /* external function definition with no prior declaration */ +#pragma warning (disable: 1419) /* external declaration in primary source file */ + +#endif /* __INTEL_COMPILER */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h new file mode 100644 index 0000000000..993b7b0bfd --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h @@ -0,0 +1,4165 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _ITTNOTIFY_H_ +#define _ITTNOTIFY_H_ + +/** +@file +@brief Public User API functions and types +@mainpage + +The Instrumentation and Tracing Technology API (ITT API) is used to +annotate a user's program with additional information +that can be used by correctness and performance tools. The user inserts +calls in their program. Those calls generate information that is collected +at runtime, and used by Intel(R) Threading Tools. + +@section API Concepts +The following general concepts are used throughout the API. + +@subsection Unicode Support +Many API functions take character string arguments. On Windows, there +are two versions of each such function. The function name is suffixed +by W if Unicode support is enabled, and by A otherwise. Any API function +that takes a character string argument adheres to this convention. + +@subsection Conditional Compilation +Many users prefer having an option to modify ITT API code when linking it +inside their runtimes. ITT API header file provides a mechanism to replace +ITT API function names inside your code with empty strings. To do this, +define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the +static library from the linker script. + +@subsection Domains +[see domains] +Domains provide a way to separate notification for different modules or +libraries in a program. Domains are specified by dotted character strings, +e.g. TBB.Internal.Control. + +A mechanism (to be specified) is provided to enable and disable +domains. By default, all domains are enabled. +@subsection Named Entities and Instances +Named entities (frames, regions, tasks, and markers) communicate +information about the program to the analysis tools. A named entity often +refers to a section of program code, or to some set of logical concepts +that the programmer wants to group together. + +Named entities relate to the programmer's static view of the program. When +the program actually executes, many instances of a given named entity +may be created. + +The API annotations denote instances of named entities. The actual +named entities are displayed using the analysis tools. In other words, +the named entities come into existence when instances are created. + +Instances of named entities may have instance identifiers (IDs). Some +API calls use instance identifiers to create relationships between +different instances of named entities. Other API calls associate data +with instances of named entities. + +Some named entities must always have instance IDs. In particular, regions +and frames always have IDs. Task and markers need IDs only if the ID is +needed in another API call (such as adding a relation or metadata). + +The lifetime of instance IDs is distinct from the lifetime of +instances. This allows various relationships to be specified separate +from the actual execution of instances. This flexibility comes at the +expense of extra API calls. + +The same ID may not be reused for different instances, unless a previous +[ref] __itt_id_destroy call for that ID has been issued. +*/ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +// #warning usage leads to ICC's compilation error +// # warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# include "legacy/ittnotify.h" +#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup public Public API + * @{ + * @} + */ + +/** + * @defgroup control Collection Control + * @ingroup public + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); +/** @brief Detach collection */ +void ITTAPI __itt_detach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#define __itt_detach() +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} control group */ +/** @endcond */ + +/** + * @defgroup Intel Processor Trace control + * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data + * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35): + * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf + * Use this API to mark particular code regions for loading detailed performance statistics. + * This mode makes your analysis faster and more accurate. + * @{ +*/ +typedef unsigned char __itt_pt_region; + +/** + * @brief function saves a region name marked with Intel PT API and returns a region id. + * Only 7 names can be registered. Attempts to register more names will be ignored and a region id with auto names will be returned. + * For automatic naming of regions pass NULL as function parameter +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_pt_region ITTAPI __itt_pt_region_createA(const char *name); +__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_pt_region_create __itt_pt_region_createW +#else /* UNICODE */ +# define __itt_pt_region_create __itt_pt_region_createA +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_pt_region ITTAPI __itt_pt_region_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA ITTNOTIFY_DATA(pt_region_createA) +#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA) +#define __itt_pt_region_createW ITTNOTIFY_DATA(pt_region_createW) +#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create ITTNOTIFY_DATA(pt_region_create) +#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA(name) (__itt_pt_region)0 +#define __itt_pt_region_createA_ptr 0 +#define __itt_pt_region_createW(name) (__itt_pt_region)0 +#define __itt_pt_region_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create(name) (__itt_pt_region)0 +#define __itt_pt_region_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_pt_region_createA_ptr 0 +#define __itt_pt_region_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_pt_region_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief function contains a special code pattern identified on the post-processing stage and + * marks the beginning of a code region targeted for Intel PT analysis + * @param[in] region - region id, 0 <= region < 8 +*/ +void __itt_mark_pt_region_begin(__itt_pt_region region); +/** + * @brief function contains a special code pattern identified on the post-processing stage and + * marks the end of a code region targeted for Intel PT analysis + * @param[in] region - region id, 0 <= region < 8 +*/ +void __itt_mark_pt_region_end(__itt_pt_region region); +/** @} Intel PT control group*/ + +/** + * @defgroup threads Threads + * @ingroup public + * Give names to threads + * @{ + */ +/** + * @brief Sets thread name of calling thread + * @param[in] name - name of thread + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_thread_set_nameA(const char *name); +void ITTAPI __itt_thread_set_nameW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thread_set_name __itt_thread_set_nameW +# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr +#else /* UNICODE */ +# define __itt_thread_set_name __itt_thread_set_nameA +# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_thread_set_name(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name)) +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA) +#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA) +#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW) +#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name) +#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA(name) +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW(name) +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name(name) +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thread_set_nameA_ptr 0 +#define __itt_thread_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thread_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void ITTAPI __itt_thread_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, thread_ignore, (void)) +#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore) +#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thread_ignore() +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thread_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} threads group */ + +/** + * @defgroup suppress Error suppression + * @ingroup public + * General behavior: application continues to run, but errors are suppressed + * + * @{ + */ + +/*****************************************************************//** + * @name group of functions used for error suppression in correctness tools + *********************************************************************/ +/** @{ */ +/** + * @hideinitializer + * @brief possible value for suppression mask + */ +#define __itt_suppress_all_errors 0x7fffffff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from threading analysis) + */ +#define __itt_suppress_threading_errors 0x000000ff + +/** + * @hideinitializer + * @brief possible value for suppression mask (suppresses errors from memory analysis) + */ +#define __itt_suppress_memory_errors 0x0000ff00 + +/** + * @brief Start suppressing errors identified in mask on this thread + */ +void ITTAPI __itt_suppress_push(unsigned int mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask)) +#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push) +#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_push(mask) +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_push_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effects of the matching call to __itt_suppress_push + */ +void ITTAPI __itt_suppress_pop(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_pop, (void)) +#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop) +#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_pop() +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_pop_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum __itt_suppress_mode { + __itt_unsuppress_range, + __itt_suppress_range +} __itt_suppress_mode_t; + +/** + * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask + */ +void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range) +#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_mark_range(mask) +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_mark_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching + * call is found, nothing is changed. + */ +void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size)) +#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range) +#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_suppress_clear_range(mask) +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_suppress_clear_range_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ +/** @} suppress group */ + +/** + * @defgroup sync Synchronization + * @ingroup public + * Indicate user-written synchronization code + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** +@brief Name a synchronization object +@param[in] addr Handle for the synchronization object. You should +use a real address to uniquely identify the synchronization object. +@param[in] objtype null-terminated object type string. If NULL is +passed, the name will be "User Synchronization". +@param[in] objname null-terminated object name string. If NULL, +no name will be assigned to the object. +@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex] + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_create __itt_sync_createW +# define __itt_sync_create_ptr __itt_sync_createW_ptr +#else /* UNICODE */ +# define __itt_sync_create __itt_sync_createA +# define __itt_sync_create_ptr __itt_sync_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA) +#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA) +#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW) +#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create ITTNOTIFY_VOID(sync_create) +#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA(addr, objtype, objname, attribute) +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW(addr, objtype, objname, attribute) +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create(addr, objtype, objname, attribute) +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_createA_ptr 0 +#define __itt_sync_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** +@brief Rename a synchronization object + +You can use the rename call to assign or reassign a name to a given +synchronization object. +@param[in] addr handle for the synchronization object. +@param[in] name null-terminated object name string. +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_renameA(void *addr, const char *name); +void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_rename __itt_sync_renameW +# define __itt_sync_rename_ptr __itt_sync_renameW_ptr +#else /* UNICODE */ +# define __itt_sync_rename __itt_sync_renameA +# define __itt_sync_rename_ptr __itt_sync_renameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_rename(void *addr, const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name)) +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA) +#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA) +#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW) +#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename) +#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA(addr, name) +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW(addr, name) +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename(addr, name) +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_renameA_ptr 0 +#define __itt_sync_renameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_rename_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + @brief Destroy a synchronization object. + @param addr Handle for the synchronization object. + */ +void ITTAPI __itt_sync_destroy(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr)) +#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy) +#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_destroy(addr) +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/*****************************************************************//** + * @name group of functions is used for performance measurement tools + *********************************************************************/ +/** @{ */ +/** + * @brief Enter spin loop on user-defined sync object + */ +void ITTAPI __itt_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr)) +#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare) +#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_prepare(addr) +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Quit spin loop without acquiring spin object + */ +void ITTAPI __itt_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr)) +#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel) +#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_cancel(addr) +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Successful spin loop completion (sync object acquired) + */ +void ITTAPI __itt_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr)) +#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired) +#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_acquired(addr) +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void ITTAPI __itt_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr)) +#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing) +#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_sync_releasing(addr) +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** @} sync group */ + +/**************************************************************//** + * @name group of functions is used for correctness checking tools + ******************************************************************/ +/** @{ */ +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_prepare(void* addr); + */ +void ITTAPI __itt_fsync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr)) +#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare) +#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_prepare(addr) +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_cancel(void *addr); + */ +void ITTAPI __itt_fsync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr)) +#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel) +#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_cancel(addr) +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_acquired(void *addr); + */ +void ITTAPI __itt_fsync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr)) +#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired) +#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_acquired(addr) +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup legacy + * @deprecated Legacy API + * @brief Fast synchronization which does no require spinning. + * - This special function is to be used by TBB and OpenMP libraries only when they know + * there is no spin but they need to suppress TC warnings about shared variable modifications. + * - It only has corresponding pointers in static library and does not have corresponding function + * in dynamic library. + * @see void __itt_sync_releasing(void* addr); + */ +void ITTAPI __itt_fsync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr)) +#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing) +#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_fsync_releasing(addr) +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_fsync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} */ + +/** + * @defgroup model Modeling by Intel(R) Parallel Advisor + * @ingroup public + * This is the subset of itt used for modeling by Intel(R) Parallel Advisor. + * This API is called ONLY using annotate.h, by "Annotation" macros + * the user places in their sources during the parallelism modeling steps. + * + * site_begin/end and task_begin/end take the address of handle variables, + * which are writeable by the API. Handles must be 0 initialized prior + * to the first call to begin, or may cause a run-time failure. + * The handles are initialized in a multi-thread safe way by the API if + * the handle is 0. The commonly expected idiom is one static handle to + * identify a site or task. If a site or task of the same name has already + * been started during this collection, the same handle MAY be returned, + * but is not required to be - it is unspecified if data merging is done + * based on name. These routines also take an instance variable. Like + * the lexical instance, these must be 0 initialized. Unlike the lexical + * instance, this is used to track a single dynamic instance. + * + * API used by the Intel(R) Parallel Advisor to describe potential concurrency + * and related activities. User-added source annotations expand to calls + * to these procedures to enable modeling of a hypothetical concurrent + * execution serially. + * @{ + */ +#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL) + +typedef void* __itt_model_site; /*!< @brief handle for lexical site */ +typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */ +typedef void* __itt_model_task; /*!< @brief handle for lexical site */ +typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */ + +/** + * @enum __itt_model_disable + * @brief Enumerator for the disable methods + */ +typedef enum { + __itt_model_disable_observation, + __itt_model_disable_collection +} __itt_model_disable; + +#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */ + +/** + * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support. + * + * site_begin/end model a potential concurrency site. + * site instances may be recursively nested with themselves. + * site_end exits the most recently started but unended site for the current + * thread. The handle passed to end may be used to validate structure. + * Instances of a site encountered on different threads concurrently + * are considered completely distinct. If the site name for two different + * lexical sites match, it is unspecified whether they are treated as the + * same or different for data presentation. + */ +void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_site_beginW(const wchar_t *name); +#endif +void ITTAPI __itt_model_site_beginA(const char *name); +void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen); +void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance); +void ITTAPI __itt_model_site_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen)) +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance)) +ITT_STUBV(ITTAPI, void, model_site_end_2, (void)) +#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin) +#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW) +#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW) +#endif +#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA) +#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA) +#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL) +#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL) +#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end) +#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end) +#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2) +#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_site_begin(site, instance, name) +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW(name) +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA(name) +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL(name, siteNameLen) +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end(site, instance) +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2() +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_site_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_site_beginW_ptr 0 +#endif +#define __itt_model_site_beginA_ptr 0 +#define __itt_model_site_beginAL_ptr 0 +#define __itt_model_site_end_ptr 0 +#define __itt_model_site_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support + * + * task_begin/end model a potential task, which is contained within the most + * closely enclosing dynamic site. task_end exits the most recently started + * but unended task. The handle passed to end may be used to validate + * structure. It is unspecified if bad dynamic nesting is detected. If it + * is, it should be encoded in the resulting data collection. The collector + * should not fail due to construct nesting issues, nor attempt to directly + * indicate the problem. + */ +void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name); +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_model_task_beginW(const wchar_t *name); +void ITTAPI __itt_model_iteration_taskW(const wchar_t *name); +#endif +void ITTAPI __itt_model_task_beginA(const char *name); +void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_iteration_taskA(const char *name); +void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen); +void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance); +void ITTAPI __itt_model_task_end_2(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name)) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name)) +#endif +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name)) +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen)) +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance)) +ITT_STUBV(ITTAPI, void, model_task_end_2, (void)) +#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin) +#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin) +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW) +#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW) +#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW) +#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW) +#endif +#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA) +#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA) +#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL) +#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL) +#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA) +#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA) +#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL) +#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL) +#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end) +#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end) +#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2) +#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_task_begin(task, instance, name) +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW(name) +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA(name) +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL(name, siteNameLen) +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA(name) +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL(name, siteNameLen) +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end(task, instance) +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2() +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_task_begin_ptr 0 +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_model_task_beginW_ptr 0 +#endif +#define __itt_model_task_beginA_ptr 0 +#define __itt_model_task_beginAL_ptr 0 +#define __itt_model_iteration_taskA_ptr 0 +#define __itt_model_iteration_taskAL_ptr 0 +#define __itt_model_task_end_ptr 0 +#define __itt_model_task_end_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support + * + * lock_acquire/release model a potential lock for both lockset and + * performance modeling. Each unique address is modeled as a separate + * lock, with invalid addresses being valid lock IDs. Specifically: + * no storage is accessed by the API at the specified address - it is only + * used for lock identification. Lock acquires may be self-nested and are + * unlocked by a corresponding number of releases. + * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing, + * but may not have identical semantics.) + */ +void ITTAPI __itt_model_lock_acquire(void *lock); +void ITTAPI __itt_model_lock_acquire_2(void *lock); +void ITTAPI __itt_model_lock_release(void *lock); +void ITTAPI __itt_model_lock_release_2(void *lock); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock)) +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock)) +#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire) +#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire) +#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2) +#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2) +#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release) +#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release) +#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2) +#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_lock_acquire(lock) +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2(lock) +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release(lock) +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2(lock) +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_lock_acquire_ptr 0 +#define __itt_model_lock_acquire_2_ptr 0 +#define __itt_model_lock_release_ptr 0 +#define __itt_model_lock_release_2_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support + * + * record_allocation/deallocation describe user-defined memory allocator + * behavior, which may be required for correctness modeling to understand + * when storage is not expected to be actually reused across threads. + */ +void ITTAPI __itt_model_record_allocation (void *addr, size_t size); +void ITTAPI __itt_model_record_deallocation(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size)) +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr)) +#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation) +#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation) +#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation) +#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_record_allocation(addr, size) +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation(addr) +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_record_allocation_ptr 0 +#define __itt_model_record_deallocation_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_INDUCTION_USES support + * + * Note particular storage is inductive through the end of the current site + */ +void ITTAPI __itt_model_induction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size)) +#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses) +#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_induction_uses(addr, size) +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_induction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_REDUCTION_USES support + * + * Note particular storage is used for reduction through the end + * of the current site + */ +void ITTAPI __itt_model_reduction_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size)) +#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses) +#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_reduction_uses(addr, size) +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_reduction_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_OBSERVE_USES support + * + * Have correctness modeling record observations about uses of storage + * through the end of the current site + */ +void ITTAPI __itt_model_observe_uses(void* addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size)) +#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses) +#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_observe_uses(addr, size) +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_observe_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_CLEAR_USES support + * + * Clear the special handling of a piece of storage related to induction, + * reduction or observe_uses + */ +void ITTAPI __itt_model_clear_uses(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr)) +#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses) +#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_clear_uses(addr) +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_clear_uses_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support + * + * disable_push/disable_pop push and pop disabling based on a parameter. + * Disabling observations stops processing of memory references during + * correctness modeling, and all annotations that occur in the disabled + * region. This allows description of code that is expected to be handled + * specially during conversion to parallelism or that is not recognized + * by tools (e.g. some kinds of synchronization operations.) + * This mechanism causes all annotations in the disabled region, other + * than disable_push and disable_pop, to be ignored. (For example, this + * might validly be used to disable an entire parallel site and the contained + * tasks and locking in it for data collection purposes.) + * The disable for collection is a more expensive operation, but reduces + * collector overhead significantly. This applies to BOTH correctness data + * collection and performance data collection. For example, a site + * containing a task might only enable data collection for the first 10 + * iterations. Both performance and correctness data should reflect this, + * and the program should run as close to full speed as possible when + * collection is disabled. + */ +void ITTAPI __itt_model_disable_push(__itt_model_disable x); +void ITTAPI __itt_model_disable_pop(void); +void ITTAPI __itt_model_aggregate_task(size_t x); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x)) +ITT_STUBV(ITTAPI, void, model_disable_pop, (void)) +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x)) +#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push) +#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push) +#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop) +#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop) +#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task) +#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_model_disable_push(x) +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop() +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task(x) +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_model_disable_push_ptr 0 +#define __itt_model_disable_pop_ptr 0 +#define __itt_model_aggregate_task_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} model group */ + +/** + * @defgroup heap Heap + * @ingroup public + * Heap group + * @{ + */ + +typedef void* __itt_heap_function; + +/** + * @brief Create an identification for heap function + * @return non-zero identifier or NULL + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain); +__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_heap_function_create __itt_heap_function_createW +# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr +#else +# define __itt_heap_function_create __itt_heap_function_createA +# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain)) +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA) +#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA) +#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW) +#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create) +#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create(name, domain) (__itt_heap_function)0 +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_heap_function_createA_ptr 0 +#define __itt_heap_function_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_heap_function_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation begin occurrence. + */ +void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized)) +#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin) +#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_begin(h, size, initialized) +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an allocation end occurrence. + */ +void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized)) +#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end) +#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_allocate_end(h, addr, size, initialized) +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_allocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free begin occurrence. + */ +void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin) +#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_begin(h, addr) +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an free end occurrence. + */ +void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) +#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end) +#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_free_end(h, addr) +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_free_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation begin occurrence. + */ +void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin) +#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_begin(h, addr, new_size, initialized) +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an reallocation end occurrence. + */ +void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized)) +#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end) +#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized) +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reallocate_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access begin */ +void ITTAPI __itt_heap_internal_access_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void)) +#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin) +#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_begin() +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief internal access end */ +void ITTAPI __itt_heap_internal_access_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void)) +#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end) +#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_internal_access_end() +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_internal_access_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth begin */ +void ITTAPI __itt_heap_record_memory_growth_begin(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void)) +#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin) +#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_begin() +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_begin_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief record memory growth end */ +void ITTAPI __itt_heap_record_memory_growth_end(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void)) +#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end) +#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record_memory_growth_end() +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_memory_growth_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Specify the type of heap detection/reporting to modify. + */ +/** + * @hideinitializer + * @brief Report on memory leaks. + */ +#define __itt_heap_leaks 0x00000001 + +/** + * @hideinitializer + * @brief Report on memory growth. + */ +#define __itt_heap_growth 0x00000002 + + +/** @brief heap reset detection */ +void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask)) +#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection) +#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_reset_detection() +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_reset_detection_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief report */ +void ITTAPI __itt_heap_record(unsigned int record_mask); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask)) +#define __itt_heap_record ITTNOTIFY_VOID(heap_record) +#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_heap_record() +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_heap_record_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} heap group */ +/** @endcond */ +/* ========================================================================== */ + +/** + * @defgroup domains Domains + * @ingroup public + * Domains group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_domain +{ + volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_domain* next; +} __itt_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup domains + * @brief Create a domain. + * Create domain using some domain name: the URI naming style is recommended. + * Because the set of domains is expected to be static over the application's + * execution time, there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of + * which thread created the domain. This call is thread-safe. + * @param[in] name name of domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_domain* ITTAPI __itt_domain_createA(const char *name); +__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_domain_create __itt_domain_createW +# define __itt_domain_create_ptr __itt_domain_createW_ptr +#else /* UNICODE */ +# define __itt_domain_create __itt_domain_createA +# define __itt_domain_create_ptr __itt_domain_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_domain* ITTAPI __itt_domain_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA) +#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA) +#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW) +#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create ITTNOTIFY_DATA(domain_create) +#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA(name) (__itt_domain*)0 +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW(name) (__itt_domain*)0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create(name) (__itt_domain*)0 +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_domain_createA_ptr 0 +#define __itt_domain_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_domain_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} domains group */ + +/** + * @defgroup ids IDs + * @ingroup public + * IDs group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_id +{ + unsigned long long d1, d2, d3; +} __itt_id; + +#pragma pack(pop) +/** @endcond */ + +static const __itt_id __itt_null = { 0, 0, 0 }; + +/** + * @ingroup ids + * @brief A convenience function is provided to create an ID without domain control. + * @brief This is a convenience function to initialize an __itt_id structure. This function + * does not affect the collector runtime in any way. After you make the ID with this + * function, you still must create it with the __itt_id_create function before using the ID + * to identify a named entity. + * @param[in] addr The address of object; high QWORD of the ID value. + * @param[in] extra The extra data to unique identify object; low QWORD of the ID value. + */ + +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE; +ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) +{ + __itt_id id = __itt_null; + id.d1 = (unsigned long long)((uintptr_t)addr); + id.d2 = (unsigned long long)extra; + id.d3 = (unsigned long long)0; /* Reserved. Must be zero */ + return id; +} + +/** + * @ingroup ids + * @brief Create an instance of identifier. + * This establishes the beginning of the lifetime of an instance of + * the given ID in the trace. Once this lifetime starts, the ID + * can be used to tag named entity instances in calls such as + * __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * Instance IDs are not domain specific! + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x) +#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create(domain,id) +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup ids + * @brief Destroy an instance of identifier. + * This ends the lifetime of the current instance of the given ID value in the trace. + * Any relationships that are established after this lifetime ends are invalid. + * This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id)) +#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x) +#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_destroy(domain,id) +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} ids group */ + +/** + * @defgroup handless String Handles + * @ingroup public + * String Handles group + * @{ + */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_string_handle +{ + const char* strA; /*!< Copy of original string in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* strW; /*!< Copy of original string in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* strW; +#endif /* UNICODE || _UNICODE */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_string_handle* next; +} __itt_string_handle; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup handles + * @brief Create a string handle. + * Create and return handle value that can be associated with a string. + * Consecutive calls to __itt_string_handle_create with the same name + * return the same value. Because the set of string handles is expected to remain + * static during the application's execution time, there is no mechanism to destroy a string handle. + * Any string handle can be accessed by any thread in the process, regardless of which thread created + * the string handle. This call is thread-safe. + * @param[in] name The input string + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name); +__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_string_handle_create __itt_string_handle_createW +# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr +#else /* UNICODE */ +# define __itt_string_handle_create __itt_string_handle_createA +# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA) +#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA) +#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW) +#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create) +#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA(name) (__itt_string_handle*)0 +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW(name) (__itt_string_handle*)0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create(name) (__itt_string_handle*)0 +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_string_handle_createA_ptr 0 +#define __itt_string_handle_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_string_handle_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} handles group */ + +/** @cond exclude_from_documentation */ +typedef unsigned long long __itt_timestamp; +/** @endcond */ + +#define __itt_timestamp_none ((__itt_timestamp)-1LL) + +/** @cond exclude_from_gpa_documentation */ + +/** + * @ingroup timestamps + * @brief Return timestamp corresponding to the current moment. + * This returns the timestamp in the format that is the most relevant for the current + * host or platform (RDTSC, QPC, and others). You can use the "<" operator to + * compare __itt_timestamp values. + */ +__itt_timestamp ITTAPI __itt_get_timestamp(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void)) +#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp) +#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_get_timestamp() +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_get_timestamp_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} timestamps */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** + * @defgroup regions Regions + * @ingroup public + * Regions group + * @{ + */ +/** + * @ingroup regions + * @brief Begin of region instance. + * Successive calls to __itt_region_begin with the same ID are ignored + * until a call to __itt_region_end with the same ID + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance. Must not be __itt_null + * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null + * @param[in] name The name of this region + */ +void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup regions + * @brief End of region instance. + * The first call to __itt_region_end with a given ID ends the + * region. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_region_begin call. + * @param[in] domain The domain for this region instance + * @param[in] id The instance ID for this region instance + */ +void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id)) +#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z) +#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin) +#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x) +#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_region_begin(d,x,y,z) +#define __itt_region_begin_ptr 0 +#define __itt_region_end(d,x) +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_region_begin_ptr 0 +#define __itt_region_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} regions group */ + +/** + * @defgroup frames Frames + * @ingroup public + * Frames are similar to regions, but are intended to be easier to use and to implement. + * In particular: + * - Frames always represent periods of elapsed time + * - By default, frames have no nesting relationships + * @{ + */ + +/** + * @ingroup frames + * @brief Begin a frame instance. + * Successive calls to __itt_frame_begin with the + * same ID are ignored until a call to __itt_frame_end with the same ID. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + */ +void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief End a frame instance. + * The first call to __itt_frame_end with a given ID + * ends the frame. Successive calls with the same ID are ignored, as are + * calls that do not have a matching __itt_frame_begin call. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL for current + */ +void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id); + +/** + * @ingroup frames + * @brief Submits a frame instance. + * Successive calls to __itt_frame_begin or __itt_frame_submit with the + * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit + * with the same ID. + * Passing special __itt_timestamp_none value as "end" argument means + * take the current timestamp as the end timestamp. + * @param[in] domain The domain for this frame instance + * @param[in] id The instance ID for this frame instance or NULL + * @param[in] begin Timestamp of the beginning of the frame + * @param[in] end Timestamp of the end of the frame + */ +void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id, + __itt_timestamp begin, __itt_timestamp end); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id)) +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end)) +#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x) +#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3) +#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x) +#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3) +#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e) +#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin_v3(domain,id) +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3(domain,id) +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3(domain,id,begin,end) +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_v3_ptr 0 +#define __itt_frame_end_v3_ptr 0 +#define __itt_frame_submit_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ +/** @endcond */ + +/** + * @defgroup taskgroup Task Group + * @ingroup public + * Task Group + * @{ + */ +/** + * @ingroup task_groups + * @brief Denotes a task_group instance. + * Successive calls to __itt_task_group with the same ID are ignored. + * @param[in] domain The domain for this task_group instance + * @param[in] id The instance ID for this task_group instance. Must not be __itt_null. + * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null. + * @param[in] name The name of this task_group + */ +void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z) +#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_group(d,x,y,z) +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_group_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} taskgroup group */ + +/** + * @defgroup tasks Tasks + * @ingroup public + * A task instance represents a piece of work performed by a particular + * thread for a period of time. A call to __itt_task_begin creates a + * task instance. This becomes the current instance for that task on that + * thread. A following call to __itt_task_end on the same thread ends the + * instance. There may be multiple simultaneous instances of tasks with the + * same name on different threads. If an ID is specified, the task instance + * receives that ID. Nested tasks are allowed. + * + * Note: The task is defined by the bracketing of __itt_task_begin and + * __itt_task_end on the same thread. If some scheduling mechanism causes + * task switching (the thread executes a different user task) or task + * switching (the user task switches to a different thread) then this breaks + * the notion of current instance. Additional API calls are required to + * deal with that possibility. + * @{ + */ + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name); + +/** + * @ingroup tasks + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] taskid The identifier for this task instance (may be 0) + * @param[in] parentid The parent of this task (may be 0) + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup tasks + * @brief End the current task instance. + * @param[in] domain The domain for this task + */ +void ITTAPI __itt_task_end(const __itt_domain *domain); + +/** + * @ingroup tasks + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup tasks + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain)) +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid)) +#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z) +#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin) +#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z) +#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn) +#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d) +#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end) +#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z) +#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped) +#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x) +#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin(domain,id,parentid,name) +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn(domain,id,parentid,fn) +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end(domain) +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped(domain,taskid,parentid,name) +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped(domain,taskid) +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ptr 0 +#define __itt_task_begin_fn_ptr 0 +#define __itt_task_end_ptr 0 +#define __itt_task_begin_overlapped_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} tasks group */ + + +/** + * @defgroup markers Markers + * Markers represent a single discreet event in time. Markers have a scope, + * described by an enumerated type __itt_scope. Markers are created by + * the API call __itt_marker. A marker instance can be given an ID for use in + * adding metadata. + * @{ + */ + +/** + * @brief Describes the scope of an event object in the trace. + */ +typedef enum +{ + __itt_scope_unknown = 0, + __itt_scope_global, + __itt_scope_track_group, + __itt_scope_track, + __itt_scope_task, + __itt_scope_marker +} __itt_scope; + +/** @cond exclude_from_documentation */ +#define __itt_marker_scope_unknown __itt_scope_unknown +#define __itt_marker_scope_global __itt_scope_global +#define __itt_marker_scope_process __itt_scope_track_group +#define __itt_marker_scope_thread __itt_scope_track +#define __itt_marker_scope_task __itt_scope_task +/** @endcond */ + +/** + * @ingroup markers + * @brief Create a marker instance + * @param[in] domain The domain for this marker + * @param[in] id The instance ID for this marker or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z) +#define __itt_marker_ptr ITTNOTIFY_NAME(marker) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker(domain,id,name,scope) +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} markers group */ + +/** + * @defgroup metadata Metadata + * The metadata API is used to attach extra information to named + * entities. Metadata can be attached to an identified named entity by ID, + * or to the current entity (which is always a task). + * + * Conceptually metadata has a type (what kind of metadata), a key (the + * name of the metadata), and a value (the actual data). The encoding of + * the value depends on the type of the metadata. + * + * The type of metadata is specified by an enumerated type __itt_metdata_type. + * @{ + */ + +/** + * @ingroup parameters + * @brief describes the type of metadata + */ +typedef enum { + __itt_metadata_unknown = 0, + __itt_metadata_u64, /**< Unsigned 64-bit integer */ + __itt_metadata_s64, /**< Signed 64-bit integer */ + __itt_metadata_u32, /**< Unsigned 32-bit integer */ + __itt_metadata_s32, /**< Signed 32-bit integer */ + __itt_metadata_u16, /**< Unsigned 16-bit integer */ + __itt_metadata_s16, /**< Signed 16-bit integer */ + __itt_metadata_float, /**< Signed 32-bit floating-point */ + __itt_metadata_double /**< SIgned 64-bit floating-point */ +} __itt_metadata_type; + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b) +#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add(d,x,y,z,a,b) +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add __itt_metadata_str_addW +# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add __itt_metadata_str_addA +# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a) +#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA) +#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a) +#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a) +#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA(d,x,y,z,a) +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW(d,x,y,z,a) +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add(d,x,y,z,a) +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_addA_ptr 0 +#define __itt_metadata_str_addW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] type The type of the metadata + * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added. + * @param[in] data The metadata itself +*/ +void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data)) +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_metadata_add_with_scope(d,x,y,z,a,b) +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_metadata_add_with_scope_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup parameters + * @brief Add string metadata to an instance of a named entity. + * @param[in] domain The domain controlling the call + * @param[in] scope The scope of the instance to which the metadata is to be added + + * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task + + * @param[in] key The name of the metadata + * @param[in] data The metadata itself + * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr +#else /* UNICODE */ +# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA +# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length); +#endif + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA) +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope(d,x,y,z,a) +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_metadata_str_add_with_scopeA_ptr 0 +#define __itt_metadata_str_add_with_scopeW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_metadata_str_add_with_scope_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} metadata group */ + +/** + * @defgroup relations Relations + * Instances of named entities can be explicitly associated with other + * instances using instance IDs and the relationship API calls. + * + * @{ + */ + +/** + * @ingroup relations + * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation. + * Relations between instances can be added with an API call. The relation + * API uses instance IDs. Relations can be added before or after the actual + * instances are created and persist independently of the instances. This + * is the motivation for having different lifetimes for instance IDs and + * the actual instances. + */ +typedef enum +{ + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ +} __itt_relation; + +/** + * @ingroup relations + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail); + +/** + * @ingroup relations + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y) +#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current) +#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z) +#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current(d,x,y) +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add(d,x,y,z) +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ptr 0 +#define __itt_relation_add_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} relations group */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_info +{ + unsigned long long clock_freq; /*!< Clock domain frequency */ + unsigned long long clock_base; /*!< Clock domain base timestamp */ +} __itt_clock_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_clock_domain +{ + __itt_clock_info info; /*!< Most recent clock domain info */ + __itt_get_clock_info_fn fn; /*!< Callback function pointer */ + void* fn_data; /*!< Input argument for the callback function */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_clock_domain* next; +} __itt_clock_domain; + +#pragma pack(pop) +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Create a clock domain. + * Certain applications require the capability to trace their application using + * a clock domain different than the CPU, for instance the instrumentation of events + * that occur on a GPU. + * Because the set of domains is expected to be static over the application's execution time, + * there is no mechanism to destroy a domain. + * Any domain can be accessed by any thread in the process, regardless of which thread created + * the domain. This call is thread-safe. + * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps + * @param[in] fn_data Argument for a callback function; may be NULL + */ +__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data)) +#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create) +#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0 +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomains + * @brief Recalculate clock domains frequencies and clock base timestamps. + */ +void ITTAPI __itt_clock_domain_reset(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void)) +#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset) +#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_clock_domain_reset() +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_clock_domain_reset_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Create an instance of identifier. This establishes the beginning of the lifetime of + * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to + * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among + * identified named entity instances, using the \ref relations APIs. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to create. + */ +void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** + * @ingroup clockdomain + * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the + * given ID value in the trace. Any relationships that are established after this lifetime ends are + * invalid. This call must be performed before the given ID value can be reused for a different + * named entity instance. + * @param[in] domain The domain controlling the execution of this call. + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The ID to destroy. + */ +void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id)) +#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z) +#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex) +#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z) +#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_id_create_ex(domain,clock_domain,timestamp,id) +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id) +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_id_create_ex_ptr 0 +#define __itt_id_destroy_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The instance ID for this task instance, or __itt_null + * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null + * @param[in] name The name of this task + */ +void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief Begin a task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, or __itt_null + * @param[in] parentid The parent of this task, or __itt_null + * @param[in] fn The pointer to the function you are tracing + */ +void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn); + +/** + * @ingroup clockdomain + * @brief End the current task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + */ +void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn)) +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp)) +#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b) +#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex) +#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b) +#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex) +#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y) +#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name) +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn) +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex(domain,clock_domain,timestamp) +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_ex_ptr 0 +#define __itt_task_begin_fn_ex_ptr 0 +#define __itt_task_end_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup counters Counters + * @ingroup public + * Counters are user-defined objects with a monotonically increasing + * value. Counter values are 64-bit unsigned integers. + * Counters have names that can be displayed in + * the tools. + * @{ + */ + +/** + * @brief opaque structure for counter identification + */ +/** @cond exclude_from_documentation */ + +typedef struct ___itt_counter* __itt_counter; + +/** + * @brief Create an unsigned 64 bits integer counter with given name/domain + * + * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer + * + * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain); +__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create __itt_counter_createW +# define __itt_counter_create_ptr __itt_counter_createW_ptr +#else /* UNICODE */ +# define __itt_counter_create __itt_counter_createA +# define __itt_counter_create_ptr __itt_counter_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA) +#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA) +#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW) +#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create ITTNOTIFY_DATA(counter_create) +#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA(name, domain) +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW(name, domain) +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create(name, domain) +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_ptr 0 +#define __itt_counter_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Increment the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id)) +#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc) +#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc(id) +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Increment the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta) +#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_delta(id, value) +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Decrement the unsigned 64 bits integer counter value + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id)) +#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec) +#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec(id) +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** + * @brief Decrement the unsigned 64 bits integer counter value with x + * + * Calling this function to non-unsigned 64 bits integer counters has no effect + */ +void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value)) +#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta) +#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_delta(id, value) +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_delta_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup counters + * @brief Increment a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls increment the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Increment a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to increment the counter + */ +void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x) +#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3) +#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y) +#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_inc_v3(domain,name) +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3(domain,name,delta) +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_inc_v3_ptr 0 +#define __itt_counter_inc_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + +/** + * @ingroup counters + * @brief Decrement a counter by one. + * The first call with a given name creates a counter by that name and sets its + * value to zero. Successive calls decrement the counter value. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + */ +void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name); + +/** + * @ingroup counters + * @brief Decrement a counter by the value specified in delta. + * @param[in] domain The domain controlling the call. Counter names are not domain specific. + * The domain argument is used only to enable or disable the API calls. + * @param[in] name The name of the counter + * @param[in] delta The amount by which to decrement the counter + */ +void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name)) +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta)) +#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x) +#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3) +#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y) +#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_dec_v3(domain,name) +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3(domain,name,delta) +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_dec_v3_ptr 0 +#define __itt_counter_dec_delta_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} counters group */ + + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr)) +#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value) +#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value(id, value_ptr) +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value + */ +void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr)) +#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex) +#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create a typed counter with given name/domain + * + * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta), + * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr) + * can be used to change the value of the counter + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_typed __itt_counter_create_typedW +# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr +#else /* UNICODE */ +# define __itt_counter_create_typed __itt_counter_create_typedA +# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA) +#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA) +#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW) +#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed) +#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA(name, domain, type) +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW(name, domain, type) +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed(name, domain, type) +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_create_typedA_ptr 0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_typed_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or + * __itt_counter_create_typed() + */ +void ITTAPI __itt_counter_destroy(__itt_counter id); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id)) +#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy) +#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_destroy(id) +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} counters group */ + +/** + * @ingroup markers + * @brief Create a marker instance. + * @param[in] domain The domain for this marker + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] id The instance ID for this marker, or __itt_null + * @param[in] name The name for this marker + * @param[in] scope The scope for this marker + */ +void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope)) +#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b) +#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope) +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_marker_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @ingroup clockdomain + * @brief Add a relation to the current task instance. + * The current task instance is the head of the relation. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail); + +/** + * @ingroup clockdomain + * @brief Add a relation between two instance identifiers. + * @param[in] domain The domain controlling this call + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] head The ID for the head of the relation + * @param[in] relation The kind of relation + * @param[in] tail The ID for the tail of the relation + */ +void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail)) +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail)) +#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a) +#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex) +#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b) +#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail) +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail) +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_relation_add_to_current_ex_ptr 0 +#define __itt_relation_add_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum ___itt_track_group_type +{ + __itt_track_group_type_normal = 0 +} __itt_track_group_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track_group +{ + __itt_string_handle* name; /*!< Name of the track group */ + struct ___itt_track* track; /*!< List of child tracks */ + __itt_track_group_type tgtype; /*!< Type of the track group */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track_group* next; +} __itt_track_group; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Placeholder for custom track types. Currently, "normal" custom track + * is the only available track type. + */ +typedef enum ___itt_track_type +{ + __itt_track_type_normal = 0 +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + , __itt_track_type_queue +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ +} __itt_track_type; + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_track +{ + __itt_string_handle* name; /*!< Name of the track group */ + __itt_track_group* group; /*!< Parent group to a track */ + __itt_track_type ttype; /*!< Type of the track */ + int extra1; /*!< Reserved. Must be zero */ + void* extra2; /*!< Reserved. Must be zero */ + struct ___itt_track* next; +} __itt_track; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Create logical track group. + */ +__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type)) +#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create) +#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_group_create(name) (__itt_track_group*)0 +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_group_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Create logical track. + */ +__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type)) +#define __itt_track_create ITTNOTIFY_DATA(track_create) +#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_track_create(track_group,name,track_type) (__itt_track*)0 +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_track_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the logical track. + */ +void ITTAPI __itt_set_track(__itt_track* track); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track)) +#define __itt_set_track ITTNOTIFY_VOID(set_track) +#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_set_track(track) +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_set_track_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/* ========================================================================== */ +/** @cond exclude_from_gpa_documentation */ +/** + * @defgroup events Events + * @ingroup public + * Events group + * @{ + */ +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} events group */ + + +/** + * @defgroup arrays Arrays Visualizer + * @ingroup public + * Visualize arrays + * @{ + */ + +/** + * @enum __itt_av_data_type + * @brief Defines types of arrays data (for C/C++ intrinsic types) + */ +typedef enum +{ + __itt_e_first = 0, + __itt_e_char = 0, /* 1-byte integer */ + __itt_e_uchar, /* 1-byte unsigned integer */ + __itt_e_int16, /* 2-byte integer */ + __itt_e_uint16, /* 2-byte unsigned integer */ + __itt_e_int32, /* 4-byte integer */ + __itt_e_uint32, /* 4-byte unsigned integer */ + __itt_e_int64, /* 8-byte integer */ + __itt_e_uint64, /* 8-byte unsigned integer */ + __itt_e_float, /* 4-byte floating */ + __itt_e_double, /* 8-byte floating */ + __itt_e_last = __itt_e_double +} __itt_av_data_type; + +/** + * @brief Save an array data to a file. + * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only). + * @param[in] data - pointer to the array data + * @param[in] rank - the rank of the array + * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions. + * The size of dimensions must be equal to the rank + * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types) + * @param[in] filePath - the file path; the output format is defined by the file extension + * @param[in] columnOrder - defines how the array is stored in the linear memory. + * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C). + */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_av_save __itt_av_saveW +# define __itt_av_save_ptr __itt_av_saveW_ptr +#else /* UNICODE */ +# define __itt_av_save __itt_av_saveA +# define __itt_av_save_ptr __itt_av_saveA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA) +#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA) +#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW) +#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save ITTNOTIFY_DATA(av_save) +#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA(name) +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW(name) +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save(name) +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_av_saveA_ptr 0 +#define __itt_av_saveW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_av_save_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +void ITTAPI __itt_enable_attach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, enable_attach, (void)) +#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach) +#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_enable_attach() +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_enable_attach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_gpa_documentation */ + +/** @} arrays group */ + +/** @endcond */ + +/** + * @brief Module load info + * This API is used to report necessary information in case of module relocation + * @param[in] start_addr - relocated module start address + * @param[in] end_addr - relocated module end address + * @param[in] path - file system path to the module + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); +void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_module_load __itt_module_loadW +# define __itt_module_load_ptr __itt_module_loadW_ptr +#else /* UNICODE */ +# define __itt_module_load __itt_module_loadA +# define __itt_module_load_ptr __itt_module_loadA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path)) +ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA) +#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA) +#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW) +#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load ITTNOTIFY_VOID(module_load) +#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA(start_addr, end_addr, path) +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW(start_addr, end_addr, path) +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load(start_addr, end_addr, path) +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_module_loadA_ptr 0 +#define __itt_module_loadW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_module_load_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + + + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_H_ */ + +#ifdef INTEL_ITTNOTIFY_API_PRIVATE + +#ifndef _ITTNOTIFY_PRIVATE_ +#define _ITTNOTIFY_PRIVATE_ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @ingroup clockdomain + * @brief Begin an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null. + * @param[in] parentid The parent of this task, or __itt_null. + * @param[in] name The name of this task. + */ +void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name); + +/** + * @ingroup clockdomain + * @brief End an overlapped task instance. + * @param[in] domain The domain for this task + * @param[in] clock_domain The clock domain controlling the execution of this call. + * @param[in] timestamp The user defined timestamp. + * @param[in] taskid Explicit ID of finished task + */ +void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name)) +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid)) +#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b) +#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex) +#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z) +#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name) +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid) +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_task_begin_overlapped_ex_ptr 0 +#define __itt_task_end_overlapped_ptr 0 +#define __itt_task_end_overlapped_ex_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @defgroup makrs_internal Marks + * @ingroup internal + * Marks group + * @warning Internal API: + * - It is not shipped to outside of Intel + * - It is delivered to internal Intel teams using e-mail or SVN access only + * @{ + */ +/** @brief user mark type */ +typedef int __itt_mark_type; + +/** + * @brief Creates a user mark type with the specified name using char or Unicode string. + * @param[in] name - name of mark to create + * @return Returns a handle to the mark type + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_mark_type ITTAPI __itt_mark_createA(const char *name); +__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_create __itt_mark_createW +# define __itt_mark_create_ptr __itt_mark_createW_ptr +#else /* UNICODE */ +# define __itt_mark_create __itt_mark_createA +# define __itt_mark_create_ptr __itt_mark_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_mark_type ITTAPI __itt_mark_create(const char *name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name)) +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA) +#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA) +#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW) +#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create ITTNOTIFY_DATA(mark_create) +#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA(name) (__itt_mark_type)0 +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW(name) (__itt_mark_type)0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create(name) (__itt_mark_type)0 +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_createA_ptr 0 +#define __itt_mark_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string. + * + * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign. + * - The call is "synchronous" - function returns after mark is actually added to results. + * - This function is useful, for example, to mark different phases of application + * (beginning of the next mark automatically meand end of current region). + * - Can be used together with "continuous" marks (see below) at the same collection session + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @param[in] parameter - string parameter of mark + * @return Returns zero value in case of success, non-zero value otherwise. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark __itt_markW +# define __itt_mark_ptr __itt_markW_ptr +#else /* UNICODE */ +# define __itt_mark __itt_markA +# define __itt_mark_ptr __itt_markA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA ITTNOTIFY_DATA(markA) +#define __itt_markA_ptr ITTNOTIFY_NAME(markA) +#define __itt_markW ITTNOTIFY_DATA(markW) +#define __itt_markW_ptr ITTNOTIFY_NAME(markW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark ITTNOTIFY_DATA(mark) +#define __itt_mark_ptr ITTNOTIFY_NAME(mark) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA(mt, parameter) (int)0 +#define __itt_markA_ptr 0 +#define __itt_markW(mt, parameter) (int)0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark(mt, parameter) (int)0 +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_markA_ptr 0 +#define __itt_markW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create a "discrete" user event type (mark) for process + * rather then for one thread + * @see int __itt_mark(__itt_mark_type mt, const char* parameter); + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter); +int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_mark_global __itt_mark_globalW +# define __itt_mark_global_ptr __itt_mark_globalW_ptr +#else /* UNICODE */ +# define __itt_mark_global __itt_mark_globalA +# define __itt_mark_global_ptr __itt_mark_globalA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter)) +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA) +#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA) +#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW) +#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global ITTNOTIFY_DATA(mark_global) +#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA(mt, parameter) (int)0 +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW(mt, parameter) (int)0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global(mt, parameter) (int)0 +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_mark_globalA_ptr 0 +#define __itt_mark_globalW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_mark_global_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Creates an "end" point for "continuous" mark with specified name. + * + * - Returns zero value in case of success, non-zero value otherwise. + * Also returns non-zero value when preceding "begin" point for the + * mark with the same name failed to be created or not created. + * - The mark of "continuous" type is placed to collection results in + * case of success. It appears in overtime view(s) as a special tick + * sign (different from "discrete" mark) together with line from + * corresponding "begin" mark to "end" mark. + * @note Continuous marks can overlap and be nested inside each other. + * Discrete mark can be nested inside marked region + * @param[in] mt - mark, created by __itt_mark_create(const char* name) function + * @return Returns zero value in case of success, non-zero value otherwise. + */ +int ITTAPI __itt_mark_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt)) +#define __itt_mark_off ITTNOTIFY_DATA(mark_off) +#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_off(mt) (int)0 +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Use this if necessary to create an "end" point for mark of process + * @see int __itt_mark_off(__itt_mark_type mt); + */ +int ITTAPI __itt_mark_global_off(__itt_mark_type mt); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt)) +#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off) +#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_mark_global_off(mt) (int)0 +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_mark_global_off_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} marks group */ + +/** + * @defgroup counters_internal Counters + * @ingroup internal + * Counters group + * @{ + */ + + +/** + * @defgroup stitch Stack Stitching + * @ingroup internal + * Stack Stitching group + * @{ + */ +/** + * @brief opaque structure for counter identification + */ +typedef struct ___itt_caller *__itt_caller; + +/** + * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to. + * The function returns a unique identifier which is used to match the cut points with corresponding stitch points. + */ +__itt_caller ITTAPI __itt_stack_caller_create(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void)) +#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create) +#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_create() (__itt_caller)0 +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_create_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create() + */ +void ITTAPI __itt_stack_caller_destroy(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id)) +#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy) +#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_caller_destroy(id) +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_caller_destroy_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Sets the cut point. Stack from each event which occurs after this call will be cut + * at the same stack level the function was called and stitched to the corresponding stitch point. + */ +void ITTAPI __itt_stack_callee_enter(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id)) +#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter) +#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_enter(id) +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_enter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter(). + */ +void ITTAPI __itt_stack_callee_leave(__itt_caller id); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id)) +#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave) +#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_stack_callee_leave(id) +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_stack_callee_leave_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @} stitch group */ + +/* ***************************************************************************************************************************** */ + +#include <stdarg.h> + +/** @cond exclude_from_documentation */ +typedef enum __itt_error_code +{ + __itt_error_success = 0, /*!< no error */ + __itt_error_no_module = 1, /*!< module can't be loaded */ + /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + __itt_error_no_symbol = 2, /*!< symbol not found */ + /* %1$s -- library name, %2$s -- symbol name. */ + __itt_error_unknown_group = 3, /*!< unknown group specified */ + /* %1$s -- env var name, %2$s -- group name. */ + __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */ + /* %1$s -- env var name, %2$d -- system error. */ + __itt_error_env_too_long = 5, /*!< variable value too long */ + /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */ + __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */ + /* %1$s -- function name, %2$d -- errno. */ +} __itt_error_code; + +typedef void (__itt_error_handler_t)(__itt_error_code code, va_list); +__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*); + +const char* ITTAPI __itt_api_version(void); +/** @endcond */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler) +void __itt_error_handler(__itt_error_code code, va_list args); +extern const int ITTNOTIFY_NAME(err); +#define __itt_err ITTNOTIFY_NAME(err) +ITT_STUB(ITTAPI, const char*, api_version, (void)) +#define __itt_api_version ITTNOTIFY_DATA(api_version) +#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_api_version() (const char*)0 +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_api_version_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ITTNOTIFY_PRIVATE_ */ + +#endif /* INTEL_ITTNOTIFY_API_PRIVATE */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h new file mode 100644 index 0000000000..c25730d522 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h @@ -0,0 +1,585 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _ITTNOTIFY_CONFIG_H_ +#define _ITTNOTIFY_CONFIG_H_ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +#ifndef ITT_ARCH_IA32 +# define ITT_ARCH_IA32 1 +#endif /* ITT_ARCH_IA32 */ + +#ifndef ITT_ARCH_IA32E +# define ITT_ARCH_IA32E 2 +#endif /* ITT_ARCH_IA32E */ + +#ifndef ITT_ARCH_ARM +# define ITT_ARCH_ARM 4 +#endif /* ITT_ARCH_ARM */ + +#ifndef ITT_ARCH_PPC64 +# define ITT_ARCH_PPC64 5 +#endif /* ITT_ARCH_PPC64 */ + +#ifndef ITT_ARCH +# if defined _M_IX86 || defined __i386__ +# define ITT_ARCH ITT_ARCH_IA32 +# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__ +# define ITT_ARCH ITT_ARCH_IA32E +# elif defined _M_IA64 || defined __ia64__ +# define ITT_ARCH ITT_ARCH_IA64 +# elif defined _M_ARM || defined __arm__ +# define ITT_ARCH ITT_ARCH_ARM +# elif defined __powerpc64__ +# define ITT_ARCH ITT_ARCH_PPC64 +# endif +#endif + +#ifdef __cplusplus +# define ITT_EXTERN_C extern "C" +# define ITT_EXTERN_C_BEGIN extern "C" { +# define ITT_EXTERN_C_END } +#else +# define ITT_EXTERN_C /* nothing */ +# define ITT_EXTERN_C_BEGIN /* nothing */ +# define ITT_EXTERN_C_END /* nothing */ +#endif /* __cplusplus */ + +#define ITT_TO_STR_AUX(x) #x +#define ITT_TO_STR(x) ITT_TO_STR_AUX(x) + +#define __ITT_BUILD_ASSERT(expr, suffix) do { \ + static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \ + __itt_build_check_##suffix[0] = 0; \ +} while(0) +#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix) +#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__) + +#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } + +/* Replace with snapshot date YYYYMMDD for promotion build. */ +#define API_VERSION_BUILD 20180723 + +#ifndef API_VERSION_NUM +#define API_VERSION_NUM 0.0.0 +#endif /* API_VERSION_NUM */ + +#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ + " (" ITT_TO_STR(API_VERSION_BUILD) ")" + +/* OS communication functions */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <windows.h> +typedef HMODULE lib_t; +typedef DWORD TIDT; +typedef CRITICAL_SECTION mutex_t; +#define MUTEX_INITIALIZER { 0 } +#define strong_alias(name, aliasname) /* empty for Windows */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <dlfcn.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */ +#endif /* _GNU_SOURCE */ +#ifndef __USE_UNIX98 +#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */ +#endif /*__USE_UNIX98*/ +#include <pthread.h> +typedef void* lib_t; +typedef pthread_t TIDT; +typedef pthread_mutex_t mutex_t; +#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER +#define _strong_alias(name, aliasname) \ + extern __typeof (name) aliasname __attribute__ ((alias (#name))); +#define strong_alias(name, aliasname) _strong_alias(name, aliasname) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_get_proc(lib, name) GetProcAddress(lib, name) +#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex) +#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex) +#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex) +#define __itt_load_lib(name) LoadLibraryA(name) +#define __itt_unload_lib(handle) FreeLibrary(handle) +#define __itt_system_error() (int)GetLastError() +#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2) +#define __itt_fstrnlen(s, l) strnlen_s(s, l) +#define __itt_fstrcpyn(s1, b, s2, l) strncpy_s(s1, b, s2, l) +#define __itt_fstrdup(s) _strdup(s) +#define __itt_thread_id() GetCurrentThreadId() +#define __itt_thread_yield() SwitchToThread() +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return InterlockedIncrement(ptr); +} +#endif /* ITT_SIMPLE_INIT */ + +#define DL_SYMBOLS (1) +#define PTHREAD_SYMBOLS (1) + +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#define __itt_get_proc(lib, name) dlsym(lib, name) +#define __itt_mutex_init(mutex) {\ + pthread_mutexattr_t mutex_attr; \ + int error_code = pthread_mutexattr_init(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \ + error_code); \ + error_code = pthread_mutexattr_settype(&mutex_attr, \ + PTHREAD_MUTEX_RECURSIVE); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \ + error_code); \ + error_code = pthread_mutex_init(mutex, &mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutex_init", \ + error_code); \ + error_code = pthread_mutexattr_destroy(&mutex_attr); \ + if (error_code) \ + __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \ + error_code); \ +} +#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex) +#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex) +#define __itt_load_lib(name) dlopen(name, RTLD_LAZY) +#define __itt_unload_lib(handle) dlclose(handle) +#define __itt_system_error() errno +#define __itt_fstrcmp(s1, s2) strcmp(s1, s2) + +/* makes customer code define safe APIs for SDL_STRNLEN_S and SDL_STRNCPY_S */ +#ifdef SDL_STRNLEN_S +#define __itt_fstrnlen(s, l) SDL_STRNLEN_S(s, l) +#else +#define __itt_fstrnlen(s, l) strlen(s) +#endif /* SDL_STRNLEN_S */ +#ifdef SDL_STRNCPY_S +#define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l) +#else +#define __itt_fstrcpyn(s1, b, s2, l) { \ + if (b > 0) { \ + /* 'volatile' is used to suppress the warning that a destination */ \ + /* bound depends on the length of the source. */ \ + volatile size_t num_to_copy = (size_t)(b - 1) < (size_t)(l) ? \ + (size_t)(b - 1) : (size_t)(l); \ + strncpy(s1, s2, num_to_copy); \ + s1[num_to_copy] = 0; \ + } \ +} +#endif /* SDL_STRNCPY_S */ + +#define __itt_fstrdup(s) strdup(s) +#define __itt_thread_id() pthread_self() +#define __itt_thread_yield() sched_yield() +#if ITT_ARCH==ITT_ARCH_IA64 +#ifdef __INTEL_COMPILER +#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val) +#else /* __INTEL_COMPILER */ +/* TODO: Add Support for not Intel compilers for IA-64 architecture */ +#endif /* __INTEL_COMPILER */ +#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */ +ITT_INLINE long +__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) +{ + long result; + __asm__ __volatile__("lock\nxadd %0,%1" + : "=r"(result),"=m"(*(int*)ptr) + : "0"(addend), "m"(*(int*)ptr) + : "memory"); + return result; +} +#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 +#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) +#endif /* ITT_ARCH==ITT_ARCH_IA64 */ +#ifndef ITT_SIMPLE_INIT +ITT_INLINE long +__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE; +ITT_INLINE long __itt_interlocked_increment(volatile long* ptr) +{ + return __TBB_machine_fetchadd4(ptr, 1) + 1L; +} +#endif /* ITT_SIMPLE_INIT */ + +void* dlopen(const char*, int) __attribute__((weak)); +void* dlsym(void*, const char*) __attribute__((weak)); +int dlclose(void*) __attribute__((weak)); +#define DL_SYMBOLS (dlopen && dlsym && dlclose) + +int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak)); +int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutex_destroy(pthread_mutex_t*) __attribute__((weak)); +int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak)); +int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak)); +int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak)); +pthread_t pthread_self(void) __attribute__((weak)); +#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self) + +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +typedef enum { + __itt_collection_normal = 0, + __itt_collection_paused = 1 +} __itt_collection_state; + +typedef enum { + __itt_thread_normal = 0, + __itt_thread_ignored = 1 +} __itt_thread_state; + +#pragma pack(push, 8) + +typedef struct ___itt_thread_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + TIDT tid; + __itt_thread_state state; /*!< Thread state (paused or normal) */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_thread_info* next; +} __itt_thread_info; + +#include "ittnotify_types.h" /* For __itt_group_id definition */ + +typedef struct ___itt_api_info_20101001 +{ + const char* name; + void** func_ptr; + void* init_func; + __itt_group_id group; +} __itt_api_info_20101001; + +typedef struct ___itt_api_info +{ + const char* name; + void** func_ptr; + void* init_func; + void* null_func; + __itt_group_id group; +} __itt_api_info; + +typedef struct __itt_counter_info +{ + const char* nameA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* nameW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* nameW; +#endif /* UNICODE || _UNICODE */ + const char* domainA; /*!< Copy of original name in ASCII. */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* domainW; /*!< Copy of original name in UNICODE. */ +#else /* UNICODE || _UNICODE */ + void* domainW; +#endif /* UNICODE || _UNICODE */ + int type; + long index; + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct __itt_counter_info* next; +} __itt_counter_info_t; + +struct ___itt_domain; +struct ___itt_string_handle; + +typedef struct ___itt_global +{ + unsigned char magic[8]; + unsigned long version_major; + unsigned long version_minor; + unsigned long version_build; + volatile long api_initialized; + volatile long mutex_initialized; + volatile long atomic_counter; + mutex_t mutex; + lib_t lib; + void* error_handler; + const char** dll_path_ptr; + __itt_api_info* api_list_ptr; + struct ___itt_global* next; + /* Joinable structures below */ + __itt_thread_info* thread_list; + struct ___itt_domain* domain_list; + struct ___itt_string_handle* string_list; + __itt_collection_state state; + __itt_counter_info_t* counter_list; + unsigned int ipt_collect_events; +} __itt_global; + +#pragma pack(pop) + +#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = NULL; \ + h->nameW = n ? _wcsdup(n) : NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \ + h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \ + if (h != NULL) { \ + h->tid = t; \ + h->nameA = n ? __itt_fstrdup(n) : NULL; \ + h->nameW = NULL; \ + h->state = s; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->thread_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 1; /* domain is enabled by default */ \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \ + h = (__itt_domain*)malloc(sizeof(__itt_domain)); \ + if (h != NULL) { \ + h->flags = 1; /* domain is enabled by default */ \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->domain_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = NULL; \ + h->strW = name ? _wcsdup(name) : NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \ + h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \ + if (h != NULL) { \ + h->strA = name ? __itt_fstrdup(name) : NULL; \ + h->strW = NULL; \ + h->extra1 = 0; /* reserved */ \ + h->extra2 = NULL; /* reserved */ \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->string_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_COUNTER_W(gptr,h,h_tail,name,domain,type) { \ + h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ + if (h != NULL) { \ + h->nameA = NULL; \ + h->nameW = name ? _wcsdup(name) : NULL; \ + h->domainA = NULL; \ + h->domainW = name ? _wcsdup(domain) : NULL; \ + h->type = type; \ + h->index = 0; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \ + h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \ + if (h != NULL) { \ + h->nameA = name ? __itt_fstrdup(name) : NULL; \ + h->nameW = NULL; \ + h->domainA = domain ? __itt_fstrdup(domain) : NULL; \ + h->domainW = NULL; \ + h->type = type; \ + h->index = 0; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c new file mode 100644 index 0000000000..dd8ca8e755 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c @@ -0,0 +1,1244 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "ittnotify_config.h" + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define PATH_MAX 512 +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +#include <limits.h> +#include <dlfcn.h> +#include <errno.h> +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> + +#define INTEL_NO_MACRO_BODY +#define INTEL_ITTNOTIFY_API_PRIVATE +#include "ittnotify.h" +#include "legacy/ittnotify.h" + +#include "disable_warnings.h" + +static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; + +#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) + +#if ITT_OS==ITT_OS_WIN +static const char* ittnotify_lib_name = "libittnotify.dll"; +#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD +static const char* ittnotify_lib_name = "libittnotify.so"; +#elif ITT_OS==ITT_OS_MAC +static const char* ittnotify_lib_name = "libittnotify.dylib"; +#else +#error Unsupported or unknown OS. +#endif + +#ifdef __ANDROID__ +#include <android/log.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <linux/limits.h> + +#ifdef ITT_ANDROID_LOG + #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI" + #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) + #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__)) +#else + #define ITT_ANDROID_LOGI(...) + #define ITT_ANDROID_LOGW(...) + #define ITT_ANDROID_LOGE(...) + #define ITT_ANDROID_LOGD(...) +#endif + +/* default location of userapi collector on Android */ +#define ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(x) "/data/data/com.intel.vtune/perfrun/lib" \ + #x "/runtime/libittnotify.so" + +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM +#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(32) +#else +#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(64) +#endif + +#endif + + +#ifndef LIB_VAR_NAME +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32 +#else +#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64 +#endif +#endif /* LIB_VAR_NAME */ + +#define ITT_MUTEX_INIT_AND_LOCK(p) { \ + if (PTHREAD_SYMBOLS) \ + { \ + if (!p.mutex_initialized) \ + { \ + if (__itt_interlocked_increment(&p.atomic_counter) == 1) \ + { \ + __itt_mutex_init(&p.mutex); \ + p.mutex_initialized = 1; \ + } \ + else \ + while (!p.mutex_initialized) \ + __itt_thread_yield(); \ + } \ + __itt_mutex_lock(&p.mutex); \ + } \ +} + +const int _N_(err) = 0; + +typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); + +/* this define used to control initialization function name. */ +#ifndef __itt_init_ittlib_name +ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id); +static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib); +#define __itt_init_ittlib_name __itt_init_ittlib_ptr +#endif /* __itt_init_ittlib_name */ + +typedef void (__itt_fini_ittlib_t)(void); + +/* this define used to control finalization function name. */ +#ifndef __itt_fini_ittlib_name +ITT_EXTERN_C void _N_(fini_ittlib)(void); +static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib); +#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr +#endif /* __itt_fini_ittlib_name */ + +extern __itt_global _N_(_ittapi_global); + +/* building pointers to imported funcs */ +#undef ITT_STUBV +#undef ITT_STUB +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + return ITTNOTIFY_NAME(name) params; \ + else \ + return (type)0; \ +} + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \ +{ \ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \ + __itt_init_ittlib_name(NULL, __itt_group_all); \ + if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \ + ITTNOTIFY_NAME(name) params; \ + else \ + return; \ +} + +#undef __ITT_INTERNAL_INIT +#include "ittnotify_static.h" + +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \ +static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\ +typedef type api ITT_JOIN(_N_(name),_t) args; \ +ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END + +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT + +ITT_GROUP_LIST(group_list); + +#pragma pack(push, 8) + +typedef struct ___itt_group_alias +{ + const char* env_var; + __itt_group_id groups; +} __itt_group_alias; + +static __itt_group_alias group_alias[] = { + { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) }, + { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) }, + { NULL, (__itt_group_none) }, + { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */ +}; + +#pragma pack(pop) + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static __itt_api_info api_list[] = { +/* Define functions with static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#define __ITT_INTERNAL_INIT +#include "ittnotify_static.h" +#undef __ITT_INTERNAL_INIT +/* Define functions without static implementation */ +#undef ITT_STUB +#undef ITT_STUBV +#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)}, +#define ITT_STUBV ITT_STUB +#include "ittnotify_static.h" + {NULL, NULL, NULL, NULL, __itt_group_none} +}; + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static const char dll_path[PATH_MAX] = { 0 }; + +/* static part descriptor which handles. all notification api attributes. */ +__itt_global _N_(_ittapi_global) = { + ITT_MAGIC, /* identification info */ + ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */ + 0, /* api_initialized */ + 0, /* mutex_initialized */ + 0, /* atomic_counter */ + MUTEX_INITIALIZER, /* mutex */ + NULL, /* dynamic library handle */ + NULL, /* error_handler */ + (const char**)&dll_path, /* dll_path_ptr */ + (__itt_api_info*)&api_list, /* api_list_ptr */ + NULL, /* next __itt_global */ + NULL, /* thread_list */ + NULL, /* domain_list */ + NULL, /* string_list */ + __itt_collection_normal, /* collection state */ + NULL, /* counter_list */ + 0 /* ipt_collect_events */ +}; + +typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); +typedef void (__itt_api_fini_t)(__itt_global*); + +/* ========================================================================= */ + +#ifdef ITT_NOTIFY_EXT_REPORT +ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void __itt_report_error_impl(int code, ...) { + va_list args; + va_start(args, code); + if (_N_(_ittapi_global).error_handler != NULL) + { + __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + handler((__itt_error_code)code, args); + } +#ifdef ITT_NOTIFY_EXT_REPORT + _N_(error_handler)(code, args); +#endif /* ITT_NOTIFY_EXT_REPORT */ + va_end(args); +} + +//va_start cannot take enum (__itt_error_code) on clang, so it is necessary to transform it to int +#define __itt_report_error(code, ...) \ + __itt_report_error_impl((int)code,__VA_ARGS__) + + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name) +{ + __itt_domain *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_createW)(name); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && !wcscmp(h->nameW, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_domain *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_createA)(name); + } +#else + if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(domain_create)(name); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break; + } + if (h == NULL) + { + NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) +{ + __itt_string_handle *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_createW)(name); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strW != NULL && !wcscmp(h->strW, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_string_handle *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_createA)(name); + } +#else + if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(string_handle_create)(name); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) + { + if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break; + } + if (h == NULL) + { + NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createW)(name, domain); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + __itt_metadata_type type = __itt_metadata_u64; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createA)(name, domain); + } +#else + if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create)(name, domain); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); + } + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type); + } +#else + if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); + } +#endif + } + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type); + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +/* -------------------------------------------------------------------------- */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))) + { + ITTNOTIFY_NAME(pause)(); + } + else + { + _N_(_ittapi_global).state = __itt_collection_paused; + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))) + { + ITTNOTIFY_NAME(resume)(); + } + else + { + _N_(_ittapi_global).state = __itt_collection_normal; + } +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))) + { + ITTNOTIFY_NAME(thread_set_nameW)(name); + } +} + +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name); + return 0; +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))) + { + ITTNOTIFY_NAME(thread_set_nameA)(name); + } +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))) + { + ITTNOTIFY_NAME(thread_set_name)(name); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name); + return 0; +} +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen) +{ + (void)namelen; + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name); + return 0; +} +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))) + { + ITTNOTIFY_NAME(thread_ignore)(); + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void) +{ + ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(); +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void) +{ +#ifdef __ANDROID__ + /* + * if LIB_VAR_NAME env variable were set before then stay previous value + * else set default path + */ + setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len) +{ + int i; + int j; + + if (!s || !sep || !out || !len) + return NULL; + + for (i = 0; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + if (!s[i]) + return NULL; + + *len = 0; + *out = &s[i]; + + for (; s[i]; i++, (*len)++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (b) + break; + } + + for (; s[i]; i++) + { + int b = 0; + for (j = 0; sep[j]; j++) + if (s[i] == sep[j]) + { + b = 1; + break; + } + if (!b) + break; + } + + return &s[i]; +} + +/* This function return value of env variable that placed into static buffer. + * !!! The same static buffer is used for subsequent calls. !!! + * This was done to avoid dynamic allocation for few calls. + * Actually we need this function only four times. + */ +static const char* __itt_get_env_var(const char* name) +{ +#define MAX_ENV_VALUE_SIZE 4086 + static char env_buff[MAX_ENV_VALUE_SIZE]; + static char* env_value = (char*)env_buff; + + if (name != NULL) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len); + if (rc >= max_len) + __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1)); + else if (rc > 0) + { + const char* ret = (const char*)env_value; + env_value += rc + 1; + return ret; + } + else + { + /* If environment variable is empty, GetEnvirornmentVariables() + * returns zero (number of characters (not including terminating null), + * and GetLastError() returns ERROR_SUCCESS. */ + DWORD err = GetLastError(); + if (err == ERROR_SUCCESS) + return env_value; + + if (err != ERROR_ENVVAR_NOT_FOUND) + __itt_report_error(__itt_error_cant_read_env, name, (int)err); + } +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + char* env = getenv(name); + if (env != NULL) + { + size_t len = __itt_fstrnlen(env, MAX_ENV_VALUE_SIZE); + size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff); + if (len < max_len) + { + const char* ret = (const char*)env_value; + __itt_fstrcpyn(env_value, max_len, env, len + 1); + env_value += len + 1; + return ret; + } else + __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1)); + } +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + } + return NULL; +} + +static const char* __itt_get_lib_name(void) +{ + const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + +#ifdef __ANDROID__ + if (lib_name == NULL) + { + +#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM + const char* const marker_filename = "com.intel.itt.collector_lib_32"; +#else + const char* const marker_filename = "com.intel.itt.collector_lib_64"; +#endif + + char system_wide_marker_filename[PATH_MAX] = {0}; + int itt_marker_file_fd = -1; + ssize_t res = 0; + + res = snprintf(system_wide_marker_filename, PATH_MAX - 1, "%s%s", "/data/local/tmp/", marker_filename); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to concatenate marker file string."); + return lib_name; + } + itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY); + + if (itt_marker_file_fd == -1) + { + const pid_t my_pid = getpid(); + char cmdline_path[PATH_MAX] = {0}; + char package_name[PATH_MAX] = {0}; + char app_sandbox_file[PATH_MAX] = {0}; + int cmdline_fd = 0; + + ITT_ANDROID_LOGI("Unable to open system-wide marker file."); + res = snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to get cmdline path string."); + return lib_name; + } + + ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path); + cmdline_fd = open(cmdline_path, O_RDONLY); + if (cmdline_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path); + return lib_name; + } + res = read(cmdline_fd, package_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path); + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + } + return lib_name; + } + res = close(cmdline_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path); + return lib_name; + } + ITT_ANDROID_LOGI("Package name: %s\n", package_name); + res = snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/%s", package_name, marker_filename); + if (res < 0) + { + ITT_ANDROID_LOGE("Unable to concatenate marker file string."); + return lib_name; + } + + ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file); + itt_marker_file_fd = open(app_sandbox_file, O_RDONLY); + if (itt_marker_file_fd == -1) + { + ITT_ANDROID_LOGE("Unable to open app marker file!"); + return lib_name; + } + } + + { + char itt_lib_name[PATH_MAX] = {0}; + + res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + } + return lib_name; + } + ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name); + res = close(itt_marker_file_fd); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd); + return lib_name; + } + ITT_ANDROID_LOGI("Set env %s to %s", ITT_TO_STR(LIB_VAR_NAME), itt_lib_name); + res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0); + if (res == -1) + { + ITT_ANDROID_LOGE("Unable to set env var!"); + return lib_name; + } + lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME)); + ITT_ANDROID_LOGI("ITT Lib path from env: %s", lib_name); + } + } +#endif + + return lib_name; +} + +/* Avoid clashes with std::min */ +#define __itt_min(a,b) ((a) < (b) ? (a) : (b)) + +static __itt_group_id __itt_get_groups(void) +{ + int i; + __itt_group_id res = __itt_group_none; + const char* var_name = "INTEL_ITTNOTIFY_GROUPS"; + const char* group_str = __itt_get_env_var(var_name); + + if (group_str != NULL) + { + int len; + char gr[255]; + const char* chunk; + while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL) + { + int min_len = __itt_min(len, (int)(sizeof(gr) - 1)); + __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len); + gr[min_len] = 0; + + for (i = 0; group_list[i].name != NULL; i++) + { + if (!__itt_fstrcmp(gr, group_list[i].name)) + { + res = (__itt_group_id)(res | group_list[i].id); + break; + } + } + } + /* TODO: !!! Workaround for bug with warning for unknown group !!! + * Should be fixed in new initialization scheme. + * Now the following groups should be set always. */ + for (i = 0; group_list[i].id != __itt_group_none; i++) + if (group_list[i].id != __itt_group_all && + group_list[i].id > __itt_group_splitter_min && + group_list[i].id < __itt_group_splitter_max) + res = (__itt_group_id)(res | group_list[i].id); + return res; + } + else + { + for (i = 0; group_alias[i].env_var != NULL; i++) + if (__itt_get_env_var(group_alias[i].env_var) != NULL) + return group_alias[i].groups; + } + + return res; +} + +#undef __itt_min + +static int __itt_lib_version(lib_t lib) +{ + if (lib == NULL) + return 0; + if (__itt_get_proc(lib, "__itt_api_init")) + return 2; + if (__itt_get_proc(lib, "__itt_api_version")) + return 1; + return 0; +} + +/* It's not used right now! Comment it out to avoid warnings. +static void __itt_reinit_all_pointers(void) +{ + register int i; + // Fill all pointers with initial stubs + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func; +} +*/ + +static void __itt_nullify_all_pointers(void) +{ + int i; + /* Nulify all pointers except domain_create, string_handle_create and counter_create */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(push) +#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ +#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_EXTERN_C void _N_(fini_ittlib)(void) +{ + __itt_api_fini_t* __itt_api_fini_ptr = NULL; + static volatile TIDT current_thread = 0; + + if (_N_(_ittapi_global).api_initialized) + { + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (_N_(_ittapi_global).lib != NULL) + { + __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini"); + } + if (__itt_api_fini_ptr) + { + __itt_api_fini_ptr(&_N_(_ittapi_global)); + } + + __itt_nullify_all_pointers(); + + /* TODO: !!! not safe !!! don't support unload so far. + * if (_N_(_ittapi_global).lib != NULL) + * __itt_unload_lib(_N_(_ittapi_global).lib); + * _N_(_ittapi_global).lib = NULL; + */ + _N_(_ittapi_global).api_initialized = 0; + current_thread = 0; + } + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + } +} + +ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) +{ + int i; + __itt_group_id groups; +#ifdef ITT_COMPLETE_GROUP + __itt_group_id zero_group = __itt_group_none; +#endif /* ITT_COMPLETE_GROUP */ + static volatile TIDT current_thread = 0; + + if (!_N_(_ittapi_global).api_initialized) + { +#ifndef ITT_SIMPLE_INIT + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); +#endif /* ITT_SIMPLE_INIT */ + + if (!_N_(_ittapi_global).api_initialized) + { + if (current_thread == 0) + { + if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id(); + if (lib_name == NULL) + { + lib_name = __itt_get_lib_name(); + } + groups = __itt_get_groups(); + if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL)) + { + _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name); + + if (_N_(_ittapi_global).lib != NULL) + { + __itt_api_init_t* __itt_api_init_ptr; + int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); + + switch (lib_version) { + case 0: + groups = __itt_group_legacy; + /* Falls through */ + case 1: + /* Fill all pointers from dynamic library */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + { + if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups) + { + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name); + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL) + { + /* Restore pointers for function with static implementation */ + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name); +#ifdef ITT_COMPLETE_GROUP + zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group); +#endif /* ITT_COMPLETE_GROUP */ + } + } + else + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; + } + + if (groups == __itt_group_legacy) + { + /* Compatibility with legacy tools */ + ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA); + ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW); +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare); + ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel); + ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired); + ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing); + } + +#ifdef ITT_COMPLETE_GROUP + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group) + *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; +#endif /* ITT_COMPLETE_GROUP */ + break; + case 2: + __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init"); + if (__itt_api_init_ptr) + __itt_api_init_ptr(&_N_(_ittapi_global), init_groups); + break; + } + } + else + { + __itt_nullify_all_pointers(); + +#if ITT_PLATFORM==ITT_PLATFORM_WIN + int error = __itt_system_error(); +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + const char* error = dlerror(); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + __itt_report_error(__itt_error_no_module, lib_name, error); + } + } + else + { + __itt_nullify_all_pointers(); + } + _N_(_ittapi_global).api_initialized = 1; + current_thread = 0; + /* !!! Just to avoid unused code elimination !!! */ + if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0; + } + } + +#ifndef ITT_SIMPLE_INIT + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif /* ITT_SIMPLE_INIT */ + } + + /* Evaluating if any function ptr is non empty and it's in init_groups */ + for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) + { + if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func && + _N_(_ittapi_global).api_list_ptr[i].group & init_groups) + { + return 1; + } + } + return 0; +} + +ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler) +{ + __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler; + _N_(_ittapi_global).error_handler = (void*)(size_t)handler; + return prev; +} + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#pragma warning(pop) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** __itt_mark_pt_region functions marks region of interest + * region parameter defines different regions. + * 0 <= region < 8 */ + +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) +void __itt_pt_mark(__itt_pt_region region); +void __itt_pt_mark_event(__itt_pt_region region); +#endif + +ITT_EXTERN_C void _N_(mark_pt_region_begin)(__itt_pt_region region) +{ +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) + if (_N_(_ittapi_global).ipt_collect_events == 1) + { + __itt_pt_mark_event(2*region); + } + else + { + __itt_pt_mark(2*region); + } +#else + (void)region; +#endif +} + +ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) +{ +#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__) + if (_N_(_ittapi_global).ipt_collect_events == 1) + { + __itt_pt_mark_event(2*region + 1); + } + else + { + __itt_pt_mark(2*region + 1); + } +#else + (void)region; +#endif +} + diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h new file mode 100644 index 0000000000..67cf683880 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h @@ -0,0 +1,354 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "ittnotify_config.h" + +#ifndef ITT_FORMAT_DEFINED +# ifndef ITT_FORMAT +# define ITT_FORMAT +# endif /* ITT_FORMAT */ +# ifndef ITT_NO_PARAMS +# define ITT_NO_PARAMS +# endif /* ITT_NO_PARAMS */ +#endif /* ITT_FORMAT_DEFINED */ + +/* + * parameters for macro expected: + * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt) + */ +#ifdef __ITT_INTERNAL_INIT + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"") +ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedA, __itt_group_counter, "\"%s\", \"%s\", %d") +ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedW, __itt_group_counter, "\"%s\", \"%s\", %d") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typed, __itt_group_counter, "\"%s\", \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + + +ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") +ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") +ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name), (ITT_FORMAT name), thread_set_name, __itt_group_thread, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") +ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") + +#else /* __ITT_INTERNAL_INIT */ + +ITT_STUBV(ITTAPI, void, detach, (void), (ITT_NO_PARAMS), detach, __itt_group_control | __itt_group_legacy, "no args") + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x") +ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") +ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_rename, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr), (ITT_FORMAT addr), sync_destroy, __itt_group_sync | __itt_group_fsync, "%p") + +ITT_STUBV(ITTAPI, void, sync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p") +ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p") + +ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p") +ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args") +ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d") +ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d") + +ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p") +ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p") + +ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"") +ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance), (ITT_FORMAT site, instance), model_site_end, __itt_group_model, "%p, %p") +ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"") +ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance), (ITT_FORMAT task, instance), model_task_end, __itt_group_model, "%p, %p") +ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr), (ITT_FORMAT addr), model_record_deallocation, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d") +ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"") +ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d") +ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args") +ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d") +ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p") +ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"") +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_create, __itt_group_heap, "\"%s\", \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized), (ITT_FORMAT h, size, initialized), heap_allocate_begin, __itt_group_heap, "%p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end, __itt_group_heap, "%p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p") +ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_end, __itt_group_heap, "%p, %p") +ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_size, initialized), heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d") +ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args") +ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u") +ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u") + +ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu") +ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu") + +ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args") + +ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p") + +ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn), (ITT_FORMAT domain, id, parent, fn), task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), (ITT_FORMAT domain), task_end, __itt_group_structure, "%p") + +ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu") +ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_dec_v3, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_dec_delta_v3, __itt_group_structure, "%p, %p, %lu") + +ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d") + +ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu") +ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_add, __itt_group_structure, "%p, %lu, %p, %p, %lu") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, relation, tail), relation_add_to_current, __itt_group_structure, "%p, %lu, %p") +ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add, __itt_group_structure, "%p, %p, %lu, %p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_create, __itt_group_mark | __itt_group_legacy, "\"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event), (ITT_FORMAT event), event_start, __itt_group_mark | __itt_group_legacy, "%d") +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event), (ITT_FORMAT event), event_end, __itt_group_mark | __itt_group_legacy, "%d") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x") +ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") +ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *p), (ITT_FORMAT p), notify_sync_prepare, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *p), (ITT_FORMAT p), notify_sync_cancel, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *p), (ITT_FORMAT p), notify_sync_acquired, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p") +#endif /* __ITT_INTERNAL_BODY */ + +ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read, __itt_group_legacy, "%p, %lu") +ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write, __itt_group_legacy, "%p, %lu") +ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu") + +ITT_STUB(LIBITTAPI, __itt_state_t, state_get, (void), (ITT_NO_PARAMS), state_get, __itt_group_legacy, "no args") +ITT_STUB(LIBITTAPI, __itt_state_t, state_set, (__itt_state_t s), (ITT_FORMAT s), state_set, __itt_group_legacy, "%d") +ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d") +ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"") +ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name), (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"") +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name), (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name), (ITT_FORMAT name), pt_region_create, __itt_group_structure, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p") +ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p") + +ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu") +ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id), (ITT_FORMAT id), counter_dec, __itt_group_counter, "%p") +ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_dec_delta, __itt_group_counter, "%p, %lu") +ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr), (ITT_FORMAT id, value_ptr), counter_set_value, __itt_group_counter, "%p, %p") +ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr), (ITT_FORMAT id, clock_domain, timestamp, value_ptr), counter_set_value_ex, __itt_group_counter, "%p, %p, %llu, %p") + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"") +ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name), (ITT_FORMAT name), mark_create, __itt_group_mark, "\"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"") +ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark, __itt_group_mark, "%d, \"%s\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"") +ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_global, __itt_group_mark, "%d, \"%S\"") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_global_off, __itt_group_mark, "%d") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create, __itt_group_stitch, "no args") +#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p") +ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id), (ITT_FORMAT id), stack_callee_enter, __itt_group_stitch, "%p") +ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id), (ITT_FORMAT id), stack_callee_leave, __itt_group_stitch, "%p") + +ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p") +ITT_STUBV(ITTAPI, void, clock_domain_reset, (void), (ITT_NO_PARAMS), clock_domain_reset, __itt_group_structure, "no args") +ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp), (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu") +ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p") +ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu") +ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu") +ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d") +ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p") +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu") +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope, __itt_group_structure, "%p, %d, %p, %p, %lu") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, relation, tail), relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu") +ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex, __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu") +ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type), (ITT_FORMAT name, track_group_type), track_group_create, __itt_group_structure, "%p, %d") +ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create, __itt_group_structure, "%p, %p, %d") +ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), (ITT_FORMAT track), set_track, __itt_group_structure, "%p") + +#ifndef __ITT_INTERNAL_BODY +ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args") +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ + +#ifndef __ITT_INTERNAL_BODY +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p") +ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p") +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* __ITT_INTERNAL_BODY */ + + +#endif /* __ITT_INTERNAL_INIT */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h new file mode 100644 index 0000000000..3849452c27 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h @@ -0,0 +1,73 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _ITTNOTIFY_TYPES_H_ +#define _ITTNOTIFY_TYPES_H_ + +typedef enum ___itt_group_id +{ + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_arrays = 1<<14, + __itt_group_all = -1 +} __itt_group_id; + +#pragma pack(push, 8) + +typedef struct ___itt_group_list +{ + __itt_group_id id; + const char* name; +} __itt_group_list; + +#pragma pack(pop) + +#define ITT_GROUP_LIST(varname) \ + static __itt_group_list varname[] = { \ + { __itt_group_all, "all" }, \ + { __itt_group_control, "control" }, \ + { __itt_group_thread, "thread" }, \ + { __itt_group_mark, "mark" }, \ + { __itt_group_sync, "sync" }, \ + { __itt_group_fsync, "fsync" }, \ + { __itt_group_jit, "jit" }, \ + { __itt_group_model, "model" }, \ + { __itt_group_counter, "counter" }, \ + { __itt_group_frame, "frame" }, \ + { __itt_group_stitch, "stitch" }, \ + { __itt_group_heap, "heap" }, \ + { __itt_group_structure, "structure" }, \ + { __itt_group_suppress, "suppress" }, \ + { __itt_group_arrays, "arrays" }, \ + { __itt_group_none, NULL } \ + } + +#endif /* _ITTNOTIFY_TYPES_H_ */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h new file mode 100644 index 0000000000..b05a199d1f --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h @@ -0,0 +1,998 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _LEGACY_ITTNOTIFY_H_ +#define _LEGACY_ITTNOTIFY_H_ + +/** + * @file + * @brief Legacy User API functions and types + */ + +/** @cond exclude_from_documentation */ +#ifndef ITT_OS_WIN +# define ITT_OS_WIN 1 +#endif /* ITT_OS_WIN */ + +#ifndef ITT_OS_LINUX +# define ITT_OS_LINUX 2 +#endif /* ITT_OS_LINUX */ + +#ifndef ITT_OS_MAC +# define ITT_OS_MAC 3 +#endif /* ITT_OS_MAC */ + +#ifndef ITT_OS_FREEBSD +# define ITT_OS_FREEBSD 4 +#endif /* ITT_OS_FREEBSD */ + +#ifndef ITT_OS +# if defined WIN32 || defined _WIN32 +# define ITT_OS ITT_OS_WIN +# elif defined( __APPLE__ ) && defined( __MACH__ ) +# define ITT_OS ITT_OS_MAC +# elif defined( __FreeBSD__ ) +# define ITT_OS ITT_OS_FREEBSD +# else +# define ITT_OS ITT_OS_LINUX +# endif +#endif /* ITT_OS */ + +#ifndef ITT_PLATFORM_WIN +# define ITT_PLATFORM_WIN 1 +#endif /* ITT_PLATFORM_WIN */ + +#ifndef ITT_PLATFORM_POSIX +# define ITT_PLATFORM_POSIX 2 +#endif /* ITT_PLATFORM_POSIX */ + +#ifndef ITT_PLATFORM_MAC +# define ITT_PLATFORM_MAC 3 +#endif /* ITT_PLATFORM_MAC */ + +#ifndef ITT_PLATFORM_FREEBSD +# define ITT_PLATFORM_FREEBSD 4 +#endif /* ITT_PLATFORM_FREEBSD */ + +#ifndef ITT_PLATFORM +# if ITT_OS==ITT_OS_WIN +# define ITT_PLATFORM ITT_PLATFORM_WIN +# elif ITT_OS==ITT_OS_MAC +# define ITT_PLATFORM ITT_PLATFORM_MAC +# elif ITT_OS==ITT_OS_FREEBSD +# define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# else +# define ITT_PLATFORM ITT_PLATFORM_POSIX +# endif +#endif /* ITT_PLATFORM */ + +#if defined(_UNICODE) && !defined(UNICODE) +#define UNICODE +#endif + +#include <stddef.h> +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#include <tchar.h> +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#include <stdint.h> +#if defined(UNICODE) || defined(_UNICODE) +#include <wchar.h> +#endif /* UNICODE || _UNICODE */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef ITTAPI_CDECL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define ITTAPI_CDECL __cdecl +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define ITTAPI_CDECL __attribute__ ((cdecl)) +# else /* _M_IX86 || __i386__ */ +# define ITTAPI_CDECL /* actual only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* ITTAPI_CDECL */ + +#ifndef STDCALL +# if ITT_PLATFORM==ITT_PLATFORM_WIN +# define STDCALL __stdcall +# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +# if defined _M_IX86 || defined __i386__ +# define STDCALL __attribute__ ((stdcall)) +# else /* _M_IX86 || __i386__ */ +# define STDCALL /* supported only on x86 platform */ +# endif /* _M_IX86 || __i386__ */ +# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* STDCALL */ + +#define ITTAPI ITTAPI_CDECL +#define LIBITTAPI ITTAPI_CDECL + +/* TODO: Temporary for compatibility! */ +#define ITTAPI_CALL ITTAPI_CDECL +#define LIBITTAPI_CALL ITTAPI_CDECL + +#if ITT_PLATFORM==ITT_PLATFORM_WIN +/* use __forceinline (VC++ specific) */ +#define ITT_INLINE __forceinline +#define ITT_INLINE_ATTRIBUTE /* nothing */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/* + * Generally, functions are not inlined unless optimization is specified. + * For functions declared inline, this attribute inlines the function even + * if no optimization level was specified. + */ +#ifdef __STRICT_ANSI__ +#define ITT_INLINE static +#define ITT_INLINE_ATTRIBUTE __attribute__((unused)) +#else /* __STRICT_ANSI__ */ +#define ITT_INLINE static inline +#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused)) +#endif /* __STRICT_ANSI__ */ +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +/* Helper macro for joining tokens */ +#define ITT_JOIN_AUX(p,n) p##n +#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n) + +#ifdef ITT_MAJOR +#undef ITT_MAJOR +#endif +#ifdef ITT_MINOR +#undef ITT_MINOR +#endif +#define ITT_MAJOR 3 +#define ITT_MINOR 0 + +/* Standard versioning of a token with major and minor version numbers */ +#define ITT_VERSIONIZE(x) \ + ITT_JOIN(x, \ + ITT_JOIN(_, \ + ITT_JOIN(ITT_MAJOR, \ + ITT_JOIN(_, ITT_MINOR)))) + +#ifndef INTEL_ITTNOTIFY_PREFIX +# define INTEL_ITTNOTIFY_PREFIX __itt_ +#endif /* INTEL_ITTNOTIFY_PREFIX */ +#ifndef INTEL_ITTNOTIFY_POSTFIX +# define INTEL_ITTNOTIFY_POSTFIX _ptr_ +#endif /* INTEL_ITTNOTIFY_POSTFIX */ + +#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX))) + +#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n) +#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n) + +#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) +#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d) +#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x) +#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y) +#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z) +#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a) +#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b) +#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c) + +#ifdef ITT_STUB +#undef ITT_STUB +#endif +#ifdef ITT_STUBV +#undef ITT_STUBV +#endif +#define ITT_STUBV(api,type,name,args) \ + typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \ + extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name); +#define ITT_STUB ITT_STUBV +/** @endcond */ + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/** + * @defgroup legacy Legacy API + * @{ + * @} + */ + +/** + * @defgroup legacy_control Collection Control + * @ingroup legacy + * General behavior: application continues to run, but no profiling information is being collected + * + * Pausing occurs not only for the current thread but for all process as well as spawned processes + * - Intel(R) Parallel Inspector and Intel(R) Inspector XE: + * - Does not analyze or report errors that involve memory access. + * - Other errors are reported as usual. Pausing data collection in + * Intel(R) Parallel Inspector and Intel(R) Inspector XE + * only pauses tracing and analyzing memory access. + * It does not pause tracing or analyzing threading APIs. + * . + * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Does continue to record when new threads are started. + * . + * - Other effects: + * - Possible reduction of runtime overhead. + * . + * @{ + */ +#ifndef _ITTNOTIFY_H_ +/** @brief Pause collection */ +void ITTAPI __itt_pause(void); +/** @brief Resume collection */ +void ITTAPI __itt_resume(void); +/** @brief Detach collection */ +void ITTAPI __itt_detach(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_pause() +#define __itt_pause_ptr 0 +#define __itt_resume() +#define __itt_resume_ptr 0 +#define __itt_detach() +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_pause_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_detach_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +#endif /* _ITTNOTIFY_H_ */ +/** @} legacy_control group */ + +/** + * @defgroup legacy_threads Threads + * @ingroup legacy + * Threads group + * @warning Legacy API + * @{ + */ +/** + * @deprecated Legacy API + * @brief Set name to be associated with thread in analysis GUI. + * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int LIBITTAPI __itt_thr_name_setA(const char *name, int namelen); +int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_thr_name_set __itt_thr_name_setW +# define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr +#else +# define __itt_thr_name_set __itt_thr_name_setA +# define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int LIBITTAPI __itt_thr_name_set(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA ITTNOTIFY_DATA(thr_name_setA) +#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA) +#define __itt_thr_name_setW ITTNOTIFY_DATA(thr_name_setW) +#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set ITTNOTIFY_DATA(thr_name_set) +#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA(name, namelen) +#define __itt_thr_name_setA_ptr 0 +#define __itt_thr_name_setW(name, namelen) +#define __itt_thr_name_setW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set(name, namelen) +#define __itt_thr_name_set_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_thr_name_setA_ptr 0 +#define __itt_thr_name_setW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_thr_name_set_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Mark current thread as ignored from this point on, for the duration of its existence. + */ +void LIBITTAPI __itt_thr_ignore(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, thr_ignore, (void)) +#define __itt_thr_ignore ITTNOTIFY_VOID(thr_ignore) +#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thr_ignore() +#define __itt_thr_ignore_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thr_ignore_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_threads group */ + +/** + * @defgroup legacy_sync Synchronization + * @ingroup legacy + * Synchronization group + * @warning Legacy API + * @{ + */ +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_barrier 1 + +/** + * @hideinitializer + * @brief possible value of attribute argument for sync object type + */ +#define __itt_attr_mutex 2 + +/** + * @deprecated Legacy API + * @brief Assign a name to a sync object using char or Unicode string + * @param[in] addr - pointer to the sync object. You should use a real pointer to your object + * to make sure that the values don't clash with other object addresses + * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will + * be assumed to be of generic "User Synchronization" type + * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned + * to the object -- you can use the __itt_sync_rename call later to assign + * the name + * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the + * exact semantics of how prepare/acquired/releasing calls work. + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +void ITTAPI __itt_sync_set_nameA(void *addr, const char *objtype, const char *objname, int attribute); +void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_sync_set_name __itt_sync_set_nameW +# define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr +#else /* UNICODE */ +# define __itt_sync_set_name __itt_sync_set_nameA +# define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute)) +ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA ITTNOTIFY_VOID(sync_set_nameA) +#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA) +#define __itt_sync_set_nameW ITTNOTIFY_VOID(sync_set_nameW) +#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name ITTNOTIFY_VOID(sync_set_name) +#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA(addr, objtype, objname, attribute) +#define __itt_sync_set_nameA_ptr 0 +#define __itt_sync_set_nameW(addr, objtype, objname, attribute) +#define __itt_sync_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name(addr, objtype, objname, attribute) +#define __itt_sync_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_sync_set_nameA_ptr 0 +#define __itt_sync_set_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_sync_set_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Assign a name and type to a sync object using char or Unicode string + * @param[in] addr - pointer to the sync object. You should use a real pointer to your object + * to make sure that the values don't clash with other object addresses + * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will + * be assumed to be of generic "User Synchronization" type + * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned + * to the object -- you can use the __itt_sync_rename call later to assign + * the name + * @param[in] typelen, namelen - a length of string for appropriate objtype and objname parameter + * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the + * exact semantics of how prepare/acquired/releasing calls work. + * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched) + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); +int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_notify_sync_name __itt_notify_sync_nameW +#else +# define __itt_notify_sync_name __itt_notify_sync_nameA +#endif +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) +ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA ITTNOTIFY_DATA(notify_sync_nameA) +#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA) +#define __itt_notify_sync_nameW ITTNOTIFY_DATA(notify_sync_nameW) +#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name ITTNOTIFY_DATA(notify_sync_name) +#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_nameA_ptr 0 +#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute) +#define __itt_notify_sync_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_notify_sync_nameA_ptr 0 +#define __itt_notify_sync_nameW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_notify_sync_name_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Enter spin loop on user-defined sync object + */ +void LIBITTAPI __itt_notify_sync_prepare(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr)) +#define __itt_notify_sync_prepare ITTNOTIFY_VOID(notify_sync_prepare) +#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_prepare(addr) +#define __itt_notify_sync_prepare_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_prepare_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Quit spin loop without acquiring spin object + */ +void LIBITTAPI __itt_notify_sync_cancel(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr)) +#define __itt_notify_sync_cancel ITTNOTIFY_VOID(notify_sync_cancel) +#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_cancel(addr) +#define __itt_notify_sync_cancel_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_cancel_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Successful spin loop completion (sync object acquired) + */ +void LIBITTAPI __itt_notify_sync_acquired(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr)) +#define __itt_notify_sync_acquired ITTNOTIFY_VOID(notify_sync_acquired) +#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_acquired(addr) +#define __itt_notify_sync_acquired_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_acquired_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Start sync object releasing code. Is called before the lock release call. + */ +void LIBITTAPI __itt_notify_sync_releasing(void* addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr)) +#define __itt_notify_sync_releasing ITTNOTIFY_VOID(notify_sync_releasing) +#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_notify_sync_releasing(addr) +#define __itt_notify_sync_releasing_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_notify_sync_releasing_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_sync group */ + +#ifndef _ITTNOTIFY_H_ +/** + * @defgroup legacy_events Events + * @ingroup legacy + * Events group + * @{ + */ + +/** @brief user event type */ +typedef int __itt_event; + +/** + * @brief Create an event notification + * @note name or namelen being null/name and namelen not matching, user event feature not enabled + * @return non-zero event identifier upon success and __itt_err otherwise + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen); +__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_event_create __itt_event_createW +# define __itt_event_create_ptr __itt_event_createW_ptr +#else +# define __itt_event_create __itt_event_createA +# define __itt_event_create_ptr __itt_event_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen)) +ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA ITTNOTIFY_DATA(event_createA) +#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA) +#define __itt_event_createW ITTNOTIFY_DATA(event_createW) +#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create ITTNOTIFY_DATA(event_create) +#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA(name, namelen) (__itt_event)0 +#define __itt_event_createA_ptr 0 +#define __itt_event_createW(name, namelen) (__itt_event)0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create(name, namelen) (__itt_event)0 +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_event_createA_ptr 0 +#define __itt_event_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_event_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event occurrence. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_start(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event)) +#define __itt_event_start ITTNOTIFY_DATA(event_start) +#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_start(event) (int)0 +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_start_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Record an event end occurrence. + * @note It is optional if events do not have durations. + * @return __itt_err upon failure (invalid event id/user event feature not enabled) + */ +int LIBITTAPI __itt_event_end(__itt_event event); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event)) +#define __itt_event_end ITTNOTIFY_DATA(event_end) +#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_event_end(event) (int)0 +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_event_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_events group */ +#endif /* _ITTNOTIFY_H_ */ + +/** + * @defgroup legacy_memory Memory Accesses + * @ingroup legacy + */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on reading + */ +void LIBITTAPI __itt_memory_read(void *addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size)) +#define __itt_memory_read ITTNOTIFY_VOID(memory_read) +#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_read(addr, size) +#define __itt_memory_read_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_read_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on writing + */ +void LIBITTAPI __itt_memory_write(void *addr, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size)) +#define __itt_memory_write ITTNOTIFY_VOID(memory_write) +#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_write(addr, size) +#define __itt_memory_write_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_write_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief Inform the tool of memory accesses on updating + */ +void LIBITTAPI __itt_memory_update(void *address, size_t size); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size)) +#define __itt_memory_update ITTNOTIFY_VOID(memory_update) +#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_memory_update(addr, size) +#define __itt_memory_update_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_memory_update_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_memory group */ + +/** + * @defgroup legacy_state Thread and Object States + * @ingroup legacy + */ + +/** @brief state type */ +typedef int __itt_state_t; + +/** @cond exclude_from_documentation */ +typedef enum __itt_obj_state { + __itt_obj_state_err = 0, + __itt_obj_state_clr = 1, + __itt_obj_state_set = 2, + __itt_obj_state_use = 3 +} __itt_obj_state_t; + +typedef enum __itt_thr_state { + __itt_thr_state_err = 0, + __itt_thr_state_clr = 1, + __itt_thr_state_set = 2 +} __itt_thr_state_t; + +typedef enum __itt_obj_prop { + __itt_obj_prop_watch = 1, + __itt_obj_prop_ignore = 2, + __itt_obj_prop_sharable = 3 +} __itt_obj_prop_t; + +typedef enum __itt_thr_prop { + __itt_thr_prop_quiet = 1 +} __itt_thr_prop_t; +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object states + */ +__itt_state_t LIBITTAPI __itt_state_get(void); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_state_t, state_get, (void)) +#define __itt_state_get ITTNOTIFY_DATA(state_get) +#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_state_get(void) (__itt_state_t)0 +#define __itt_state_get_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_state_get_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object states + */ +__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s)) +#define __itt_state_set ITTNOTIFY_DATA(state_set) +#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_state_set(s) (__itt_state_t)0 +#define __itt_state_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_state_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object modes + */ +__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s)) +#define __itt_thr_mode_set ITTNOTIFY_DATA(thr_mode_set) +#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0 +#define __itt_thr_mode_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_thr_mode_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @deprecated Legacy API + * @brief managing thread and object modes + */ +__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s)) +#define __itt_obj_mode_set ITTNOTIFY_DATA(obj_mode_set) +#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0 +#define __itt_obj_mode_set_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_obj_mode_set_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} legacy_state group */ + +/** + * @defgroup frames Frames + * @ingroup legacy + * Frames group + * @{ + */ +/** + * @brief opaque structure for frame identification + */ +typedef struct __itt_frame_t *__itt_frame; + +/** + * @brief Create a global frame with given domain + */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_frame ITTAPI __itt_frame_createA(const char *domain); +__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_frame_create __itt_frame_createW +# define __itt_frame_create_ptr __itt_frame_createW_ptr +#else /* UNICODE */ +# define __itt_frame_create __itt_frame_createA +# define __itt_frame_create_ptr __itt_frame_createA_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_frame ITTAPI __itt_frame_create(const char *domain); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain)) +ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA ITTNOTIFY_DATA(frame_createA) +#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA) +#define __itt_frame_createW ITTNOTIFY_DATA(frame_createW) +#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create ITTNOTIFY_DATA(frame_create) +#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA(domain) +#define __itt_frame_createA_ptr 0 +#define __itt_frame_createW(domain) +#define __itt_frame_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create(domain) +#define __itt_frame_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_frame_createA_ptr 0 +#define __itt_frame_createW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_frame_create_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @brief Record an frame begin occurrence. */ +void ITTAPI __itt_frame_begin(__itt_frame frame); +/** @brief Record an frame end occurrence. */ +void ITTAPI __itt_frame_end (__itt_frame frame); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame)) +ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame)) +#define __itt_frame_begin ITTNOTIFY_VOID(frame_begin) +#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin) +#define __itt_frame_end ITTNOTIFY_VOID(frame_end) +#define __itt_frame_end_ptr ITTNOTIFY_NAME(frame_end) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_frame_begin(frame) +#define __itt_frame_begin_ptr 0 +#define __itt_frame_end(frame) +#define __itt_frame_end_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_frame_begin_ptr 0 +#define __itt_frame_end_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ +/** @} frames group */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _LEGACY_ITTNOTIFY_H_ */ diff --git a/contrib/libs/tbb/src/tbb/version.cpp b/contrib/libs/tbb/src/tbb/version.cpp new file mode 100644 index 0000000000..ca113372f1 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/version.cpp @@ -0,0 +1,26 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/version.h" + +extern "C" int TBB_runtime_interface_version() { + return TBB_INTERFACE_VERSION; +} + +extern "C" const char* TBB_runtime_version() { + static const char version_str[] = TBB_VERSION_STRING; + return version_str; +} diff --git a/contrib/libs/tbb/src/tbb/waiters.h b/contrib/libs/tbb/src/tbb/waiters.h new file mode 100644 index 0000000000..07ee5ab4f0 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/waiters.h @@ -0,0 +1,204 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_waiters_H +#define _TBB_waiters_H + +#include "oneapi/tbb/detail/_task.h" +#include "scheduler_common.h" +#include "arena.h" + +namespace tbb { +namespace detail { +namespace r1 { + +inline d1::task* get_self_recall_task(arena_slot& slot); + +class waiter_base { +public: + waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {} + + bool pause() { + if (my_backoff.pause()) { + my_arena.is_out_of_work(); + return true; + } + + return false; + } + + void reset_wait() { + my_backoff.reset_wait(); + } + +protected: + arena& my_arena; + stealing_loop_backoff my_backoff; +}; + +class outermost_worker_waiter : public waiter_base { +public: + using waiter_base::waiter_base; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + + if (is_worker_should_leave(slot)) { + // Leave dispatch loop + return false; + } + + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + waiter_base::pause(); + } + + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + using base_type = waiter_base; + + bool is_worker_should_leave(arena_slot& slot) const { + bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed); + bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool; + + if (is_top_priority_arena) { + // Worker in most priority arena do not leave arena, until all work in task_pool is done + if (is_task_pool_empty && my_arena.is_recall_requested()) { + return true; + } + } else { + if (my_arena.is_recall_requested()) { + // If worker has work in task pool, we must notify other threads, + // because can appear missed wake up of other threads + if (!is_task_pool_empty) { + my_arena.advertise_new_work<arena::wakeup>(); + } + return true; + } + } + + return false; + } +}; + +class sleep_waiter : public waiter_base { +protected: + using waiter_base::waiter_base; + + bool is_arena_empty() { + return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY; + } + + template <typename Pred> + void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { + my_arena.my_market->get_wait_list().wait<extended_concurrent_monitor::thread_context>(wakeup_condition, + extended_context{uniq_tag, &my_arena}); + } +}; + +class external_waiter : public sleep_waiter { +public: + external_waiter(arena& a, d1::wait_context& wo) + : sleep_waiter(a), my_wait_ctx(wo) + {} + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + if (!my_wait_ctx.continue_execution()) + return false; + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot&) { + if (!sleep_waiter::pause()) { + return; + } + + auto wakeup_condition = [&] { return !is_arena_empty() || !my_wait_ctx.continue_execution(); }; + + sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition); + my_backoff.reset_wait(); + } + + d1::wait_context* wait_ctx() { + return &my_wait_ctx; + } + + static bool postpone_execution(d1::task&) { + return false; + } + +private: + d1::wait_context& my_wait_ctx; +}; + +#if __TBB_RESUMABLE_TASKS + +class coroutine_waiter : public sleep_waiter { +public: + using sleep_waiter::sleep_waiter; + + bool continue_execution(arena_slot& slot, d1::task*& t) const { + __TBB_ASSERT(t == nullptr, nullptr); + t = get_self_recall_task(slot); + return true; + } + + void pause(arena_slot& slot) { + if (!sleep_waiter::pause()) { + return; + } + + suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; + + auto wakeup_condition = [&] { return !is_arena_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; + + sleep(std::uintptr_t(sp), wakeup_condition); + my_backoff.reset_wait(); + } + + void reset_wait() { + my_backoff.reset_wait(); + } + + d1::wait_context* wait_ctx() { + return nullptr; + } + + static bool postpone_execution(d1::task& t) { + return task_accessor::is_resume_task(t); + } +}; + +#endif // __TBB_RESUMABLE_TASKS + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_waiters_H |