aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/tbb/src
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tbb/src
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/tbb/src')
-rw-r--r--contrib/libs/tbb/src/tbb/allocator.cpp234
-rw-r--r--contrib/libs/tbb/src/tbb/arena.cpp757
-rw-r--r--contrib/libs/tbb/src/tbb/arena.h616
-rw-r--r--contrib/libs/tbb/src/tbb/arena_slot.cpp219
-rw-r--r--contrib/libs/tbb/src/tbb/arena_slot.h409
-rw-r--r--contrib/libs/tbb/src/tbb/assert_impl.h71
-rw-r--r--contrib/libs/tbb/src/tbb/co_context.h222
-rw-r--r--contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp84
-rw-r--r--contrib/libs/tbb/src/tbb/concurrent_monitor.h529
-rw-r--r--contrib/libs/tbb/src/tbb/def/lin64-tbb.def153
-rw-r--r--contrib/libs/tbb/src/tbb/dynamic_link.cpp477
-rw-r--r--contrib/libs/tbb/src/tbb/dynamic_link.h115
-rw-r--r--contrib/libs/tbb/src/tbb/environment.h81
-rw-r--r--contrib/libs/tbb/src/tbb/exception.cpp162
-rw-r--r--contrib/libs/tbb/src/tbb/global_control.cpp275
-rw-r--r--contrib/libs/tbb/src/tbb/governor.cpp526
-rw-r--r--contrib/libs/tbb/src/tbb/governor.h158
-rw-r--r--contrib/libs/tbb/src/tbb/intrusive_list.h242
-rw-r--r--contrib/libs/tbb/src/tbb/itt_notify.cpp69
-rw-r--r--contrib/libs/tbb/src/tbb/itt_notify.h114
-rw-r--r--contrib/libs/tbb/src/tbb/mailbox.h249
-rw-r--r--contrib/libs/tbb/src/tbb/main.cpp171
-rw-r--r--contrib/libs/tbb/src/tbb/main.h99
-rw-r--r--contrib/libs/tbb/src/tbb/market.cpp640
-rw-r--r--contrib/libs/tbb/src/tbb/market.h317
-rw-r--r--contrib/libs/tbb/src/tbb/misc.cpp137
-rw-r--r--contrib/libs/tbb/src/tbb/misc.h289
-rw-r--r--contrib/libs/tbb/src/tbb/misc_ex.cpp398
-rw-r--r--contrib/libs/tbb/src/tbb/observer_proxy.cpp322
-rw-r--r--contrib/libs/tbb/src/tbb/observer_proxy.h154
-rw-r--r--contrib/libs/tbb/src/tbb/parallel_pipeline.cpp471
-rw-r--r--contrib/libs/tbb/src/tbb/private_server.cpp420
-rw-r--r--contrib/libs/tbb/src/tbb/profiling.cpp265
-rw-r--r--contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp558
-rw-r--r--contrib/libs/tbb/src/tbb/rml_base.h163
-rw-r--r--contrib/libs/tbb/src/tbb/rml_tbb.cpp113
-rw-r--r--contrib/libs/tbb/src/tbb/rml_tbb.h94
-rw-r--r--contrib/libs/tbb/src/tbb/rml_thread_monitor.h258
-rw-r--r--contrib/libs/tbb/src/tbb/rtm_mutex.cpp120
-rw-r--r--contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp271
-rw-r--r--contrib/libs/tbb/src/tbb/scheduler_common.h505
-rw-r--r--contrib/libs/tbb/src/tbb/semaphore.cpp92
-rw-r--r--contrib/libs/tbb/src/tbb/semaphore.h335
-rw-r--r--contrib/libs/tbb/src/tbb/small_object_pool.cpp154
-rw-r--r--contrib/libs/tbb/src/tbb/small_object_pool_impl.h59
-rw-r--r--contrib/libs/tbb/src/tbb/task.cpp225
-rw-r--r--contrib/libs/tbb/src/tbb/task_dispatcher.cpp240
-rw-r--r--contrib/libs/tbb/src/tbb/task_dispatcher.h465
-rw-r--r--contrib/libs/tbb/src/tbb/task_group_context.cpp493
-rw-r--r--contrib/libs/tbb/src/tbb/task_stream.h288
-rw-r--r--contrib/libs/tbb/src/tbb/thread_data.h273
-rw-r--r--contrib/libs/tbb/src/tbb/tls.h93
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h35
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/ittnotify.h4165
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h585
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c1244
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h354
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h73
-rw-r--r--contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h998
-rw-r--r--contrib/libs/tbb/src/tbb/version.cpp26
-rw-r--r--contrib/libs/tbb/src/tbb/waiters.h204
61 files changed, 21928 insertions, 0 deletions
diff --git a/contrib/libs/tbb/src/tbb/allocator.cpp b/contrib/libs/tbb/src/tbb/allocator.cpp
new file mode 100644
index 0000000000..6bf5a0be01
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/allocator.cpp
@@ -0,0 +1,234 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/version.h"
+
+#include "oneapi/tbb/detail/_exception.h"
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_utils.h"
+
+#include "dynamic_link.h"
+#include "misc.h"
+
+#include <cstdlib>
+
+#if _WIN32 || _WIN64
+#include <Windows.h>
+#else
+#include <dlfcn.h>
+#endif /* _WIN32||_WIN64 */
+
+#if __TBB_WEAK_SYMBOLS_PRESENT
+
+#pragma weak scalable_malloc
+#pragma weak scalable_free
+#pragma weak scalable_aligned_malloc
+#pragma weak scalable_aligned_free
+
+extern "C" {
+ void* scalable_malloc(std::size_t);
+ void scalable_free(void*);
+ void* scalable_aligned_malloc(std::size_t, std::size_t);
+ void scalable_aligned_free(void*);
+}
+
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Initialization routine used for first indirect call via allocate_handler.
+static void* initialize_allocate_handler(std::size_t size);
+
+//! Handler for memory allocation
+static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler;
+
+//! Handler for memory deallocation
+static void (*deallocate_handler)(void* pointer) = nullptr;
+
+//! Initialization routine used for first indirect call via cache_aligned_allocate_handler.
+static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment);
+
+//! Allocates memory using standard malloc. It is used when scalable_allocator is not available
+static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment);
+
+//! Allocates memory using standard free. It is used when scalable_allocator is not available
+static void std_cache_aligned_deallocate(void* p);
+
+//! Handler for padded memory allocation
+static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler;
+
+//! Handler for padded memory deallocation
+static void (*cache_aligned_deallocate_handler)(void* p) = nullptr;
+
+//! Table describing how to link the handlers.
+static const dynamic_link_descriptor MallocLinkTable[] = {
+ DLD(scalable_malloc, allocate_handler),
+ DLD(scalable_free, deallocate_handler),
+ DLD(scalable_aligned_malloc, cache_aligned_allocate_handler),
+ DLD(scalable_aligned_free, cache_aligned_deallocate_handler),
+};
+
+
+#if TBB_USE_DEBUG
+#define DEBUG_SUFFIX "_debug"
+#else
+#define DEBUG_SUFFIX
+#endif /* TBB_USE_DEBUG */
+
+// MALLOCLIB_NAME is the name of the oneTBB memory allocator library.
+#if _WIN32||_WIN64
+#define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll"
+#elif __APPLE__
+#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib"
+#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__
+#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so"
+#elif __linux__ // Note that order of these #elif's is important!
+#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2"
+#else
+#error Unknown OS
+#endif
+
+//! Initialize the allocation/free handler pointers.
+/** Caller is responsible for ensuring this routine is called exactly once.
+ The routine attempts to dynamically link with the TBB memory allocator.
+ If that allocator is not found, it links to malloc and free. */
+void initialize_handler_pointers() {
+ __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL);
+ bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4);
+ if(!success) {
+ // If unsuccessful, set the handlers to the default routines.
+ // This must be done now, and not before FillDynamicLinks runs, because if other
+ // threads call the handlers, we want them to go through the DoOneTimeInitializations logic,
+ // which forces them to wait.
+ allocate_handler = &std::malloc;
+ deallocate_handler = &std::free;
+ cache_aligned_allocate_handler = &std_cache_aligned_allocate;
+ cache_aligned_deallocate_handler = &std_cache_aligned_deallocate;
+ }
+
+ PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" );
+}
+
+static std::once_flag initialization_state;
+void initialize_cache_aligned_allocator() {
+ std::call_once(initialization_state, &initialize_handler_pointers);
+}
+
+//! Executed on very first call through allocate_handler
+static void* initialize_allocate_handler(std::size_t size) {
+ initialize_cache_aligned_allocator();
+ __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL);
+ return (*allocate_handler)(size);
+}
+
+//! Executed on very first call through cache_aligned_allocate_handler
+static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) {
+ initialize_cache_aligned_allocator();
+ __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL);
+ return (*cache_aligned_allocate_handler)(bytes, alignment);
+}
+
+// TODO: use CPUID to find actual line size, though consider backward compatibility
+// nfs - no false sharing
+static constexpr std::size_t nfs_size = 128;
+
+std::size_t __TBB_EXPORTED_FUNC cache_line_size() {
+ return nfs_size;
+}
+
+void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) {
+ const std::size_t cache_line_size = nfs_size;
+ __TBB_ASSERT(is_power_of_two(cache_line_size), "must be power of two");
+
+ // Check for overflow
+ if (size + cache_line_size < size) {
+ throw_exception(exception_id::bad_alloc);
+ }
+ // scalable_aligned_malloc considers zero size request an error, and returns NULL
+ if (size == 0) size = 1;
+
+ void* result = cache_aligned_allocate_handler(size, cache_line_size);
+ if (!result) {
+ throw_exception(exception_id::bad_alloc);
+ }
+ __TBB_ASSERT(is_aligned(result, cache_line_size), "The returned address isn't aligned");
+ return result;
+}
+
+void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) {
+ __TBB_ASSERT(cache_aligned_deallocate_handler, "Initialization has not been yet.");
+ (*cache_aligned_deallocate_handler)(p);
+}
+
+static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) {
+ // TODO: make it common with cache_aligned_resource
+ std::size_t space = alignment + bytes;
+ std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space));
+ if (!base) {
+ return nullptr;
+ }
+ std::uintptr_t result = (base + nfs_size) & ~(nfs_size - 1);
+ // Round up to the next cache line (align the base address)
+ __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Cannot store a base pointer to the header");
+ __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage");
+
+ // Record where block actually starts.
+ (reinterpret_cast<std::uintptr_t*>(result))[-1] = base;
+ return reinterpret_cast<void*>(result);
+}
+
+static void std_cache_aligned_deallocate(void* p) {
+ if (p) {
+ __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator");
+ // Recover where block actually starts
+ std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(p))[-1];
+ __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?");
+ std::free(reinterpret_cast<void*>(base));
+ }
+}
+
+void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) {
+ void* result = (*allocate_handler)(size);
+ if (!result) {
+ throw_exception(exception_id::bad_alloc);
+ }
+ return result;
+}
+
+void __TBB_EXPORTED_FUNC deallocate_memory(void* p) {
+ if (p) {
+ __TBB_ASSERT(deallocate_handler, "Initialization has not been yet.");
+ (*deallocate_handler)(p);
+ }
+}
+
+bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() {
+ if (allocate_handler == &initialize_allocate_handler) {
+ void* void_ptr = allocate_handler(1);
+ deallocate_handler(void_ptr);
+ }
+ __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL);
+ // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__)
+ __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)),
+ "Both shim pointers must refer to routines from the same package (either TBB or CRT)");
+ return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/arena.cpp b/contrib/libs/tbb/src/tbb/arena.cpp
new file mode 100644
index 0000000000..1ddab36ff5
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/arena.cpp
@@ -0,0 +1,757 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "task_dispatcher.h"
+#include "governor.h"
+#include "arena.h"
+#include "itt_notify.h"
+#include "semaphore.h"
+#include "waiters.h"
+#include "oneapi/tbb/detail/_task.h"
+#include "oneapi/tbb/info.h"
+#include "oneapi/tbb/tbb_allocator.h"
+
+#include <atomic>
+#include <cstring>
+#include <functional>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_ARENA_BINDING
+class numa_binding_observer : public tbb::task_scheduler_observer {
+ binding_handler* my_binding_handler;
+public:
+ numa_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core )
+ : task_scheduler_observer(*ta)
+ , my_binding_handler(construct_binding_handler(num_slots, numa_id, core_type, max_threads_per_core))
+ {}
+
+ void on_scheduler_entry( bool ) override {
+ apply_affinity_mask(my_binding_handler, this_task_arena::current_thread_index());
+ }
+
+ void on_scheduler_exit( bool ) override {
+ restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index());
+ }
+
+ ~numa_binding_observer(){
+ destroy_binding_handler(my_binding_handler);
+ }
+};
+
+numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_slots, int numa_id, core_type_id core_type, int max_threads_per_core ) {
+ numa_binding_observer* binding_observer = nullptr;
+ if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) {
+ binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core);
+ __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction");
+ binding_observer->observe(true);
+ }
+ return binding_observer;
+}
+
+void destroy_binding_observer( numa_binding_observer* binding_observer ) {
+ __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer");
+ binding_observer->observe(false);
+ binding_observer->~numa_binding_observer();
+ deallocate_memory(binding_observer);
+}
+#endif /*!__TBB_ARENA_BINDING*/
+
+std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) {
+ if ( lower >= upper ) return out_of_arena;
+ // Start search for an empty slot from the one we occupied the last time
+ std::size_t index = tls.my_arena_index;
+ if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower;
+ __TBB_ASSERT( index >= lower && index < upper, NULL );
+ // Find a free slot
+ for ( std::size_t i = index; i < upper; ++i )
+ if (my_slots[i].try_occupy()) return i;
+ for ( std::size_t i = lower; i < index; ++i )
+ if (my_slots[i].try_occupy()) return i;
+ return out_of_arena;
+}
+
+template <bool as_worker>
+std::size_t arena::occupy_free_slot(thread_data& tls) {
+ // Firstly, external threads try to occupy reserved slots
+ std::size_t index = as_worker ? out_of_arena : occupy_free_slot_in_range( tls, 0, my_num_reserved_slots );
+ if ( index == out_of_arena ) {
+ // Secondly, all threads try to occupy all non-reserved slots
+ index = occupy_free_slot_in_range(tls, my_num_reserved_slots, my_num_slots );
+ // Likely this arena is already saturated
+ if ( index == out_of_arena )
+ return out_of_arena;
+ }
+
+ atomic_update( my_limit, (unsigned)(index + 1), std::less<unsigned>() );
+ return index;
+}
+
+std::uintptr_t arena::calculate_stealing_threshold() {
+ stack_anchor_type anchor;
+ return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_market->worker_stack_size());
+}
+
+void arena::process(thread_data& tls) {
+ governor::set_thread_data(tls); // TODO: consider moving to create_one_job.
+ __TBB_ASSERT( is_alive(my_guard), nullptr);
+ __TBB_ASSERT( my_num_slots > 1, nullptr);
+
+ std::size_t index = occupy_free_slot</*as_worker*/true>(tls);
+ if (index == out_of_arena) {
+ on_thread_leaving<ref_worker>();
+ return;
+ }
+ __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" );
+ tls.attach_arena(*this, index);
+
+ task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher();
+ task_disp.set_stealing_threshold(calculate_stealing_threshold());
+ __TBB_ASSERT(task_disp.can_steal(), nullptr);
+ tls.attach_task_dispatcher(task_disp);
+
+ __TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" );
+ my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker);
+
+ // Waiting on special object tied to this arena
+ outermost_worker_waiter waiter(*this);
+ d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter);
+ __TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task");
+ __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr);
+ __TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr);
+
+ my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker);
+ tls.my_last_observer = nullptr;
+
+ task_disp.set_stealing_threshold(0);
+ tls.detach_task_dispatcher();
+
+ // Arena slot detach (arena may be used in market::process)
+ // TODO: Consider moving several calls below into a new method(e.g.detach_arena).
+ tls.my_arena_slot->release();
+ tls.my_arena_slot = nullptr;
+ tls.my_inbox.detach();
+ __TBB_ASSERT(tls.my_inbox.is_idle_state(true), nullptr);
+ __TBB_ASSERT(is_alive(my_guard), nullptr);
+
+ // In contrast to earlier versions of TBB (before 3.0 U5) now it is possible
+ // that arena may be temporarily left unpopulated by threads. See comments in
+ // arena::on_thread_leaving() for more details.
+ on_thread_leaving<ref_worker>();
+ __TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join");
+}
+
+arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level )
+{
+ __TBB_ASSERT( !my_guard, "improperly allocated arena?" );
+ __TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" );
+ __TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" );
+ my_market = &m;
+ my_limit = 1;
+ // Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks).
+ my_num_slots = num_arena_slots(num_slots);
+ my_num_reserved_slots = num_reserved_slots;
+ my_max_num_workers = num_slots-num_reserved_slots;
+ my_priority_level = priority_level;
+ my_references = ref_external; // accounts for the external thread
+ my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed);
+ my_observers.my_arena = this;
+ my_co_cache.init(4 * num_slots);
+ __TBB_ASSERT ( my_max_num_workers <= my_num_slots, NULL );
+ // Initialize the default context. It should be allocated before task_dispatch construction.
+ my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context)))
+ d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings };
+ // Construct slots. Mark internal synchronization elements for the tools.
+ task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots);
+ for( unsigned i = 0; i < my_num_slots; ++i ) {
+ // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL );
+ __TBB_ASSERT( !my_slots[i].task_pool_ptr, NULL );
+ __TBB_ASSERT( !my_slots[i].my_task_pool_size, NULL );
+ mailbox(i).construct();
+ my_slots[i].init_task_streams(i);
+ my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this);
+ my_slots[i].my_is_occupied.store(false, std::memory_order_relaxed);
+ }
+ my_fifo_task_stream.initialize(my_num_slots);
+ my_resume_task_stream.initialize(my_num_slots);
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ my_critical_task_stream.initialize(my_num_slots);
+#endif
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ my_local_concurrency_requests = 0;
+ my_local_concurrency_flag.clear();
+ my_global_concurrency_mode.store(false, std::memory_order_relaxed);
+#endif
+}
+
+arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots,
+ unsigned priority_level )
+{
+ __TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" );
+ __TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" );
+ __TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" );
+ std::size_t n = allocation_size(num_arena_slots(num_slots));
+ unsigned char* storage = (unsigned char*)cache_aligned_allocate(n);
+ // Zero all slots to indicate that they are empty
+ std::memset( storage, 0, n );
+ return *new( storage + num_arena_slots(num_slots) * sizeof(mail_outbox) )
+ arena(m, num_slots, num_reserved_slots, priority_level);
+}
+
+void arena::free_arena () {
+ __TBB_ASSERT( is_alive(my_guard), NULL );
+ __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" );
+ __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" );
+ __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers,
+ "Inconsistent state of a dying arena" );
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ __TBB_ASSERT( !my_global_concurrency_mode, NULL );
+#endif
+ poison_value( my_guard );
+ std::intptr_t drained = 0;
+ for ( unsigned i = 0; i < my_num_slots; ++i ) {
+ // __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" );
+ // TODO: understand the assertion and modify
+ // __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, NULL );
+ __TBB_ASSERT( my_slots[i].head == my_slots[i].tail, NULL ); // TODO: replace by is_quiescent_local_task_pool_empty
+ my_slots[i].free_task_pool();
+ drained += mailbox(i).drain();
+ my_slots[i].my_default_task_dispatcher->~task_dispatcher();
+ }
+ __TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed");
+ __TBB_ASSERT(my_resume_task_stream.empty(), "Not all enqueued tasks were executed");
+ // Cleanup coroutines/schedulers cache
+ my_co_cache.cleanup();
+ my_default_ctx->~task_group_context();
+ cache_aligned_deallocate(my_default_ctx);
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ __TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed");
+#endif
+ // remove an internal reference
+ my_market->release( /*is_public=*/false, /*blocking_terminate=*/false );
+ if ( !my_observers.empty() ) {
+ my_observers.clear();
+ }
+ void* storage = &mailbox(my_num_slots-1);
+ __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, NULL );
+ __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, NULL );
+ this->~arena();
+#if TBB_USE_ASSERT > 1
+ std::memset( storage, 0, allocation_size(my_num_slots) );
+#endif /* TBB_USE_ASSERT */
+ cache_aligned_deallocate( storage );
+}
+
+bool arena::has_enqueued_tasks() {
+ return !my_fifo_task_stream.empty();
+}
+
+bool arena::is_out_of_work() {
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if (my_local_concurrency_flag.try_clear_if([this] {
+ return !has_enqueued_tasks();
+ })) {
+ my_market->adjust_demand(*this, /* delta = */ -1, /* mandatory = */ true);
+ }
+#endif
+
+ // TODO: rework it to return at least a hint about where a task was found; better if the task itself.
+ switch (my_pool_state.load(std::memory_order_acquire)) {
+ case SNAPSHOT_EMPTY:
+ return true;
+ case SNAPSHOT_FULL: {
+ // Use unique id for "busy" in order to avoid ABA problems.
+ const pool_state_t busy = pool_state_t(&busy);
+ // Helper for CAS execution
+ pool_state_t expected_state;
+
+ // Request permission to take snapshot
+ expected_state = SNAPSHOT_FULL;
+ if (my_pool_state.compare_exchange_strong(expected_state, busy)) {
+ // Got permission. Take the snapshot.
+ // NOTE: This is not a lock, as the state can be set to FULL at
+ // any moment by a thread that spawns/enqueues new task.
+ std::size_t n = my_limit.load(std::memory_order_acquire);
+ // Make local copies of volatile parameters. Their change during
+ // snapshot taking procedure invalidates the attempt, and returns
+ // this thread into the dispatch loop.
+ std::size_t k;
+ for (k = 0; k < n; ++k) {
+ if (my_slots[k].task_pool.load(std::memory_order_relaxed) != EmptyTaskPool &&
+ my_slots[k].head.load(std::memory_order_relaxed) < my_slots[k].tail.load(std::memory_order_relaxed))
+ {
+ // k-th primary task pool is nonempty and does contain tasks.
+ break;
+ }
+ if (my_pool_state.load(std::memory_order_acquire) != busy)
+ return false; // the work was published
+ }
+ bool work_absent = k == n;
+ // Test and test-and-set.
+ if (my_pool_state.load(std::memory_order_acquire) == busy) {
+ bool no_stream_tasks = !has_enqueued_tasks() && my_resume_task_stream.empty();
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ no_stream_tasks = no_stream_tasks && my_critical_task_stream.empty();
+#endif
+ work_absent = work_absent && no_stream_tasks;
+ if (work_absent) {
+ // save current demand value before setting SNAPSHOT_EMPTY,
+ // to avoid race with advertise_new_work.
+ int current_demand = (int)my_max_num_workers;
+ expected_state = busy;
+ if (my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_EMPTY)) {
+ // This thread transitioned pool to empty state, and thus is
+ // responsible for telling the market that there is no work to do.
+ my_market->adjust_demand(*this, -current_demand, /* mandatory = */ false);
+ return true;
+ }
+ return false;
+ }
+ // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it.
+ expected_state = busy;
+ my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_FULL);
+ }
+ }
+ return false;
+ }
+ default:
+ // Another thread is taking a snapshot.
+ return false;
+ }
+}
+
+void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) {
+ task_group_context_impl::bind_to(ctx, &td);
+ task_accessor::context(t) = &ctx;
+ task_accessor::isolation(t) = no_isolation;
+ my_fifo_task_stream.push( &t, random_lane_selector(td.my_random) );
+ advertise_new_work<work_enqueued>();
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+// Enable task_arena.h
+#include "oneapi/tbb/task_arena.h" // task_arena_base
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if TBB_USE_ASSERT
+void assert_arena_priority_valid( tbb::task_arena::priority a_priority ) {
+ bool is_arena_priority_correct =
+ a_priority == tbb::task_arena::priority::high ||
+ a_priority == tbb::task_arena::priority::normal ||
+ a_priority == tbb::task_arena::priority::low;
+ __TBB_ASSERT( is_arena_priority_correct,
+ "Task arena priority should be equal to one of the predefined values." );
+}
+#else
+void assert_arena_priority_valid( tbb::task_arena::priority ) {}
+#endif
+
+unsigned arena_priority_level( tbb::task_arena::priority a_priority ) {
+ assert_arena_priority_valid( a_priority );
+ return market::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride);
+}
+
+tbb::task_arena::priority arena_priority( unsigned priority_level ) {
+ auto priority = tbb::task_arena::priority(
+ (market::num_priority_levels - priority_level) * d1::priority_stride
+ );
+ assert_arena_priority_valid( priority );
+ return priority;
+}
+
+struct task_arena_impl {
+ static void initialize(d1::task_arena_base&);
+ static void terminate(d1::task_arena_base&);
+ static bool attach(d1::task_arena_base&);
+ static void execute(d1::task_arena_base&, d1::delegate_base&);
+ static void wait(d1::task_arena_base&);
+ static int max_concurrency(const d1::task_arena_base*);
+ static void enqueue(d1::task&, d1::task_arena_base*);
+};
+
+void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) {
+ task_arena_impl::initialize(ta);
+}
+void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base& ta) {
+ task_arena_impl::terminate(ta);
+}
+bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base& ta) {
+ return task_arena_impl::attach(ta);
+}
+void __TBB_EXPORTED_FUNC execute(d1::task_arena_base& ta, d1::delegate_base& d) {
+ task_arena_impl::execute(ta, d);
+}
+void __TBB_EXPORTED_FUNC wait(d1::task_arena_base& ta) {
+ task_arena_impl::wait(ta);
+}
+
+int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) {
+ return task_arena_impl::max_concurrency(ta);
+}
+
+void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) {
+ task_arena_impl::enqueue(t, ta);
+}
+
+void task_arena_impl::initialize(d1::task_arena_base& ta) {
+ governor::one_time_init();
+ if (ta.my_max_concurrency < 1) {
+#if __TBB_ARENA_BINDING
+
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
+ d1::constraints arena_constraints = d1::constraints{}
+ .set_core_type(ta.core_type())
+ .set_max_threads_per_core(ta.max_threads_per_core())
+ .set_numa_id(ta.my_numa_id);
+ ta.my_max_concurrency = (int)default_concurrency(arena_constraints);
+#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/
+ ta.my_max_concurrency = (int)default_concurrency(ta.my_numa_id);
+#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/
+
+#else /*!__TBB_ARENA_BINDING*/
+ ta.my_max_concurrency = (int)governor::default_num_threads();
+#endif /*!__TBB_ARENA_BINDING*/
+ }
+
+ __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized");
+ unsigned priority_level = arena_priority_level(ta.my_priority);
+ arena* a = market::create_arena(ta.my_max_concurrency, ta.my_num_reserved_slots, priority_level, /* stack_size = */ 0);
+ ta.my_arena.store(a, std::memory_order_release);
+ // add an internal market reference; a public reference was added in create_arena
+ market::global_market( /*is_public=*/false);
+#if __TBB_ARENA_BINDING
+ a->my_numa_binding_observer = construct_binding_observer(
+ static_cast<d1::task_arena*>(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core());
+#endif /*__TBB_ARENA_BINDING*/
+}
+
+void task_arena_impl::terminate(d1::task_arena_base& ta) {
+ arena* a = ta.my_arena.load(std::memory_order_relaxed);
+ assert_pointer_valid(a);
+#if __TBB_ARENA_BINDING
+ if(a->my_numa_binding_observer != nullptr ) {
+ destroy_binding_observer(a->my_numa_binding_observer);
+ a->my_numa_binding_observer = nullptr;
+ }
+#endif /*__TBB_ARENA_BINDING*/
+ a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false );
+ a->on_thread_leaving<arena::ref_external>();
+ ta.my_arena.store(nullptr, std::memory_order_relaxed);
+}
+
+bool task_arena_impl::attach(d1::task_arena_base& ta) {
+ __TBB_ASSERT(!ta.my_arena.load(std::memory_order_relaxed), nullptr);
+ thread_data* td = governor::get_thread_data_if_initialized();
+ if( td && td->my_arena ) {
+ arena* a = td->my_arena;
+ // There is an active arena to attach to.
+ // It's still used by s, so won't be destroyed right away.
+ __TBB_ASSERT(a->my_references > 0, NULL );
+ a->my_references += arena::ref_external;
+ ta.my_num_reserved_slots = a->my_num_reserved_slots;
+ ta.my_priority = arena_priority(a->my_priority_level);
+ ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers;
+ __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, NULL);
+ ta.my_arena.store(a, std::memory_order_release);
+ // increases market's ref count for task_arena
+ market::global_market( /*is_public=*/true );
+ return true;
+ }
+ return false;
+}
+
+void task_arena_impl::enqueue(d1::task& t, d1::task_arena_base* ta) {
+ thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance
+ arena* a = ta->my_arena.load(std::memory_order_relaxed);
+ assert_pointers_valid(ta, a, a->my_default_ctx, td);
+ // Is there a better place for checking the state of my_default_ctx?
+ __TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(),
+ "The task will not be executed because default task_group_context of task_arena is cancelled. Has previously enqueued task thrown an exception?");
+ a->enqueue_task(t, *a->my_default_ctx, *td);
+}
+
+class nested_arena_context : no_copy {
+public:
+ nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index)
+ : m_orig_execute_data_ext(td.my_task_dispatcher->m_execute_data_ext)
+ {
+ if (td.my_arena != &nested_arena) {
+ m_orig_arena = td.my_arena;
+ m_orig_slot_index = td.my_arena_index;
+ m_orig_last_observer = td.my_last_observer;
+
+ td.detach_task_dispatcher();
+ td.attach_arena(nested_arena, slot_index);
+ task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
+ task_disp.set_stealing_threshold(m_orig_execute_data_ext.task_disp->m_stealing_threshold);
+ td.attach_task_dispatcher(task_disp);
+
+ // If the calling thread occupies the slots out of external thread reserve we need to notify the
+ // market that this arena requires one worker less.
+ if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) {
+ td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ -1, /* mandatory = */ false);
+ }
+
+ td.my_last_observer = nullptr;
+ // The task_arena::execute method considers each calling thread as an external thread.
+ td.my_arena->my_observers.notify_entry_observers(td.my_last_observer, /* worker*/false);
+ }
+
+ m_task_dispatcher = td.my_task_dispatcher;
+ m_orig_fifo_tasks_allowed = m_task_dispatcher->allow_fifo_task(true);
+ m_orig_critical_task_allowed = m_task_dispatcher->m_properties.critical_task_allowed;
+ m_task_dispatcher->m_properties.critical_task_allowed = true;
+
+ execution_data_ext& ed_ext = td.my_task_dispatcher->m_execute_data_ext;
+ ed_ext.context = td.my_arena->my_default_ctx;
+ ed_ext.original_slot = td.my_arena_index;
+ ed_ext.affinity_slot = d1::no_slot;
+ ed_ext.task_disp = td.my_task_dispatcher;
+ ed_ext.isolation = no_isolation;
+
+ __TBB_ASSERT(td.my_arena_slot, nullptr);
+ __TBB_ASSERT(td.my_arena_slot->is_occupied(), nullptr);
+ __TBB_ASSERT(td.my_task_dispatcher, nullptr);
+ }
+ ~nested_arena_context() {
+ thread_data& td = *m_task_dispatcher->m_thread_data;
+ __TBB_ASSERT(governor::is_thread_data_set(&td), nullptr);
+ m_task_dispatcher->allow_fifo_task(m_orig_fifo_tasks_allowed);
+ m_task_dispatcher->m_properties.critical_task_allowed = m_orig_critical_task_allowed;
+ if (m_orig_arena) {
+ td.my_arena->my_observers.notify_exit_observers(td.my_last_observer, /*worker*/ false);
+ td.my_last_observer = m_orig_last_observer;
+
+ // Notify the market that this thread releasing a one slot
+ // that can be used by a worker thread.
+ if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) {
+ td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false);
+ }
+
+ td.my_task_dispatcher->set_stealing_threshold(0);
+ td.detach_task_dispatcher();
+ td.my_arena_slot->release();
+ td.my_arena->my_exit_monitors.notify_one(); // do not relax!
+
+ td.attach_arena(*m_orig_arena, m_orig_slot_index);
+ td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp);
+ }
+ td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext;
+ }
+
+private:
+ execution_data_ext m_orig_execute_data_ext{};
+ arena* m_orig_arena{ nullptr };
+ observer_proxy* m_orig_last_observer{ nullptr };
+ task_dispatcher* m_task_dispatcher{ nullptr };
+ unsigned m_orig_slot_index{};
+ bool m_orig_fifo_tasks_allowed{};
+ bool m_orig_critical_task_allowed{};
+};
+
+class delegated_task : public d1::task {
+ d1::delegate_base& m_delegate;
+ concurrent_monitor& m_monitor;
+ d1::wait_context& m_wait_ctx;
+ std::atomic<bool> m_completed;
+ d1::task* execute(d1::execution_data& ed) override {
+ const execution_data_ext& ed_ext = static_cast<const execution_data_ext&>(ed);
+ execution_data_ext orig_execute_data_ext = ed_ext.task_disp->m_execute_data_ext;
+ __TBB_ASSERT(&ed_ext.task_disp->m_execute_data_ext == &ed,
+ "The execute data shall point to the current task dispatcher execute data");
+ __TBB_ASSERT(ed_ext.task_disp->m_execute_data_ext.isolation == no_isolation, nullptr);
+
+ ed_ext.task_disp->m_execute_data_ext.context = ed_ext.task_disp->get_thread_data().my_arena->my_default_ctx;
+ bool fifo_task_allowed = ed_ext.task_disp->allow_fifo_task(true);
+ try_call([&] {
+ m_delegate();
+ }).on_completion([&] {
+ ed_ext.task_disp->m_execute_data_ext = orig_execute_data_ext;
+ ed_ext.task_disp->allow_fifo_task(fifo_task_allowed);
+ });
+
+ finalize();
+ return nullptr;
+ }
+ d1::task* cancel(d1::execution_data&) override {
+ finalize();
+ return nullptr;
+ }
+ void finalize() {
+ m_wait_ctx.release(); // must precede the wakeup
+ m_monitor.notify([this](std::uintptr_t ctx) {
+ return ctx == std::uintptr_t(&m_delegate);
+ }); // do not relax, it needs a fence!
+ m_completed.store(true, std::memory_order_release);
+ }
+public:
+ delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo)
+ : m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{}
+ ~delegated_task() {
+ // The destructor can be called earlier than the m_monitor is notified
+ // because the waiting thread can be released after m_wait_ctx.release_wait.
+ // To close that race we wait for the m_completed signal.
+ spin_wait_until_eq(m_completed, true);
+ }
+};
+
+void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) {
+ arena* a = ta.my_arena.load(std::memory_order_relaxed);
+ __TBB_ASSERT(a != nullptr, nullptr);
+ thread_data* td = governor::get_thread_data();
+
+ bool same_arena = td->my_arena == a;
+ std::size_t index1 = td->my_arena_index;
+ if (!same_arena) {
+ index1 = a->occupy_free_slot</*as_worker */false>(*td);
+ if (index1 == arena::out_of_arena) {
+ concurrent_monitor::thread_context waiter((std::uintptr_t)&d);
+ d1::wait_context wo(1);
+ d1::task_group_context exec_context(d1::task_group_context::isolated);
+ task_group_context_impl::copy_fp_settings(exec_context, *a->my_default_ctx);
+
+ delegated_task dt(d, a->my_exit_monitors, wo);
+ a->enqueue_task( dt, exec_context, *td);
+ size_t index2 = arena::out_of_arena;
+ do {
+ a->my_exit_monitors.prepare_wait(waiter);
+ if (!wo.continue_execution()) {
+ a->my_exit_monitors.cancel_wait(waiter);
+ break;
+ }
+ index2 = a->occupy_free_slot</*as_worker*/false>(*td);
+ if (index2 != arena::out_of_arena) {
+ a->my_exit_monitors.cancel_wait(waiter);
+ nested_arena_context scope(*td, *a, index2 );
+ r1::wait(wo, exec_context);
+ __TBB_ASSERT(!exec_context.my_exception, NULL); // exception can be thrown above, not deferred
+ break;
+ }
+ a->my_exit_monitors.commit_wait(waiter);
+ } while (wo.continue_execution());
+ if (index2 == arena::out_of_arena) {
+ // notify a waiting thread even if this thread did not enter arena,
+ // in case it was woken by a leaving thread but did not need to enter
+ a->my_exit_monitors.notify_one(); // do not relax!
+ }
+ // process possible exception
+ if (exec_context.my_exception) {
+ __TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled.");
+ exec_context.my_exception->throw_self();
+ }
+ __TBB_ASSERT(governor::is_thread_data_set(td), nullptr);
+ return;
+ } // if (index1 == arena::out_of_arena)
+ } // if (!same_arena)
+
+ context_guard_helper</*report_tasks=*/false> context_guard;
+ context_guard.set_ctx(a->my_default_ctx);
+ nested_arena_context scope(*td, *a, index1);
+#if _WIN64
+ try {
+#endif
+ d();
+ __TBB_ASSERT(same_arena || governor::is_thread_data_set(td), nullptr);
+#if _WIN64
+ } catch (...) {
+ context_guard.restore_default();
+ throw;
+ }
+#endif
+}
+
+void task_arena_impl::wait(d1::task_arena_base& ta) {
+ arena* a = ta.my_arena.load(std::memory_order_relaxed);
+ __TBB_ASSERT(a != nullptr, nullptr);
+ thread_data* td = governor::get_thread_data();
+ __TBB_ASSERT_EX(td, "Scheduler is not initialized");
+ __TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" );
+ if (a->my_max_num_workers != 0) {
+ while (a->num_workers_active() || a->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) {
+ yield();
+ }
+ }
+}
+
+int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) {
+ arena* a = nullptr;
+ if( ta ) // for special cases of ta->max_concurrency()
+ a = ta->my_arena.load(std::memory_order_relaxed);
+ else if( thread_data* td = governor::get_thread_data_if_initialized() )
+ a = td->my_arena; // the current arena if any
+
+ if( a ) { // Get parameters from the arena
+ __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL );
+ return a->my_num_reserved_slots + a->my_max_num_workers
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ + (a->my_local_concurrency_flag.test() ? 1 : 0)
+#endif
+ ;
+ }
+
+ if (ta && ta->my_max_concurrency == 1) {
+ return 1;
+ }
+
+#if __TBB_ARENA_BINDING
+ if (ta) {
+#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT
+ d1::constraints arena_constraints = d1::constraints{}
+ .set_numa_id(ta->my_numa_id)
+ .set_core_type(ta->core_type())
+ .set_max_threads_per_core(ta->max_threads_per_core());
+ return (int)default_concurrency(arena_constraints);
+#else /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/
+ return (int)default_concurrency(ta->my_numa_id);
+#endif /*!__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/
+ }
+#endif /*!__TBB_ARENA_BINDING*/
+
+ __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL );
+ return int(governor::default_num_threads());
+}
+
+void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) {
+ // TODO: Decide what to do if the scheduler is not initialized. Is there a use case for it?
+ thread_data* tls = governor::get_thread_data();
+ assert_pointers_valid(tls, tls->my_task_dispatcher);
+ task_dispatcher* dispatcher = tls->my_task_dispatcher;
+ isolation_type previous_isolation = dispatcher->m_execute_data_ext.isolation;
+ try_call([&] {
+ // We temporarily change the isolation tag of the currently running task. It will be restored in the destructor of the guard.
+ isolation_type current_isolation = isolation ? isolation : reinterpret_cast<isolation_type>(&d);
+ // Save the current isolation value and set new one
+ previous_isolation = dispatcher->set_isolation(current_isolation);
+ // Isolation within this callable
+ d();
+ }).on_completion([&] {
+ __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL);
+ dispatcher->set_isolation(previous_isolation);
+ });
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/arena.h b/contrib/libs/tbb/src/tbb/arena.h
new file mode 100644
index 0000000000..b1b9c3dc93
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/arena.h
@@ -0,0 +1,616 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_arena_H
+#define _TBB_arena_H
+
+#include <atomic>
+#include <cstring>
+
+#include "oneapi/tbb/detail/_task.h"
+
+#include "scheduler_common.h"
+#include "intrusive_list.h"
+#include "task_stream.h"
+#include "arena_slot.h"
+#include "rml_tbb.h"
+#include "mailbox.h"
+#include "market.h"
+#include "governor.h"
+#include "concurrent_monitor.h"
+#include "observer_proxy.h"
+#include "oneapi/tbb/spin_mutex.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class task_dispatcher;
+class task_group_context;
+class allocate_root_with_context_proxy;
+
+#if __TBB_ARENA_BINDING
+class numa_binding_observer;
+#endif /*__TBB_ARENA_BINDING*/
+
+//! Bounded coroutines cache LIFO ring buffer
+class arena_co_cache {
+ //! Ring buffer storage
+ task_dispatcher** my_co_scheduler_cache;
+ //! Current cache index
+ unsigned my_head;
+ //! Cache capacity for arena
+ unsigned my_max_index;
+ //! Accessor lock for modification operations
+ tbb::spin_mutex my_co_cache_mutex;
+
+ unsigned next_index() {
+ return ( my_head == my_max_index ) ? 0 : my_head + 1;
+ }
+
+ unsigned prev_index() {
+ return ( my_head == 0 ) ? my_max_index : my_head - 1;
+ }
+
+ bool internal_empty() {
+ return my_co_scheduler_cache[prev_index()] == nullptr;
+ }
+
+ void internal_task_dispatcher_cleanup(task_dispatcher* to_cleanup) {
+ to_cleanup->~task_dispatcher();
+ cache_aligned_deallocate(to_cleanup);
+ }
+
+public:
+ void init(unsigned cache_capacity) {
+ std::size_t alloc_size = cache_capacity * sizeof(task_dispatcher*);
+ my_co_scheduler_cache = (task_dispatcher**)cache_aligned_allocate(alloc_size);
+ std::memset( my_co_scheduler_cache, 0, alloc_size );
+ my_head = 0;
+ my_max_index = cache_capacity - 1;
+ }
+
+ void cleanup() {
+ while (task_dispatcher* to_cleanup = pop()) {
+ internal_task_dispatcher_cleanup(to_cleanup);
+ }
+ cache_aligned_deallocate(my_co_scheduler_cache);
+ }
+
+ //! Insert scheduler to the current available place.
+ //! Replace an old value, if necessary.
+ void push(task_dispatcher* s) {
+ task_dispatcher* to_cleanup = nullptr;
+ {
+ tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex);
+ // Check if we are replacing some existing buffer entrance
+ if (my_co_scheduler_cache[my_head] != nullptr) {
+ to_cleanup = my_co_scheduler_cache[my_head];
+ }
+ // Store the cached value
+ my_co_scheduler_cache[my_head] = s;
+ // Move head index to the next slot
+ my_head = next_index();
+ }
+ // Cleanup replaced buffer if any
+ if (to_cleanup) {
+ internal_task_dispatcher_cleanup(to_cleanup);
+ }
+ }
+
+ //! Get a cached scheduler if any
+ task_dispatcher* pop() {
+ tbb::spin_mutex::scoped_lock lock(my_co_cache_mutex);
+ // No cached coroutine
+ if (internal_empty()) {
+ return nullptr;
+ }
+ // Move head index to the currently available value
+ my_head = prev_index();
+ // Retrieve the value from the buffer
+ task_dispatcher* to_return = my_co_scheduler_cache[my_head];
+ // Clear the previous entrance value
+ my_co_scheduler_cache[my_head] = nullptr;
+ return to_return;
+ }
+};
+
+struct stack_anchor_type {
+ stack_anchor_type() = default;
+ stack_anchor_type(const stack_anchor_type&) = delete;
+};
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+class atomic_flag {
+ static const std::uintptr_t SET = 1;
+ static const std::uintptr_t EMPTY = 0;
+ std::atomic<std::uintptr_t> my_state;
+public:
+ bool test_and_set() {
+ std::uintptr_t state = my_state.load(std::memory_order_acquire);
+ switch (state) {
+ case SET:
+ return false;
+ default: /* busy */
+ if (my_state.compare_exchange_strong(state, SET)) {
+ // We interrupted clear transaction
+ return false;
+ }
+ if (state != EMPTY) {
+ // We lost our epoch
+ return false;
+ }
+ // We are too late but still in the same epoch
+ __TBB_fallthrough;
+ case EMPTY:
+ return my_state.compare_exchange_strong(state, SET);
+ }
+ }
+ template <typename Pred>
+ bool try_clear_if(Pred&& pred) {
+ std::uintptr_t busy = std::uintptr_t(&busy);
+ std::uintptr_t state = my_state.load(std::memory_order_acquire);
+ if (state == SET && my_state.compare_exchange_strong(state, busy)) {
+ if (pred()) {
+ return my_state.compare_exchange_strong(busy, EMPTY);
+ }
+ // The result of the next operation is discarded, always false should be returned.
+ my_state.compare_exchange_strong(busy, SET);
+ }
+ return false;
+ }
+ void clear() {
+ my_state.store(EMPTY, std::memory_order_release);
+ }
+ bool test() {
+ return my_state.load(std::memory_order_acquire) != EMPTY;
+ }
+};
+#endif
+
+//! The structure of an arena, except the array of slots.
+/** Separated in order to simplify padding.
+ Intrusive list node base class is used by market to form a list of arenas. **/
+struct arena_base : padded<intrusive_list_node> {
+ //! The number of workers that have been marked out by the resource manager to service the arena.
+ std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop
+
+ //! Reference counter for the arena.
+ /** Worker and external thread references are counted separately: first several bits are for references
+ from external thread threads or explicit task_arenas (see arena::ref_external_bits below);
+ the rest counts the number of workers servicing the arena. */
+ std::atomic<unsigned> my_references; // heavy use in stealing loop
+
+ //! The maximal number of currently busy slots.
+ std::atomic<unsigned> my_limit; // heavy use in stealing loop
+
+ //! Task pool for the tasks scheduled via task::enqueue() method.
+ /** Such scheduling guarantees eventual execution even if
+ - new tasks are constantly coming (by extracting scheduled tasks in
+ relaxed FIFO order);
+ - the enqueuing thread does not call any of wait_for_all methods. **/
+ task_stream<front_accessor> my_fifo_task_stream; // heavy use in stealing loop
+
+ //! Task pool for the tasks scheduled via tbb::resume() function.
+ task_stream<front_accessor> my_resume_task_stream; // heavy use in stealing loop
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ //! Task pool for the tasks with critical property set.
+ /** Critical tasks are scheduled for execution ahead of other sources (including local task pool
+ and even bypassed tasks) unless the thread already executes a critical task in an outer
+ dispatch loop **/
+ // used on the hot path of the task dispatch loop
+ task_stream<back_nonnull_accessor> my_critical_task_stream;
+#endif
+
+ //! The number of workers requested by the external thread owning the arena.
+ unsigned my_max_num_workers;
+
+ //! The total number of workers that are requested from the resource manager.
+ int my_total_num_workers_requested;
+
+ //! The number of workers that are really requested from the resource manager.
+ //! Possible values are in [0, my_max_num_workers]
+ int my_num_workers_requested;
+
+ //! The index in the array of per priority lists of arenas this object is in.
+ /*const*/ unsigned my_priority_level;
+
+ //! The max priority level of arena in market.
+ std::atomic<bool> my_is_top_priority{false};
+
+ //! Current task pool state and estimate of available tasks amount.
+ /** The estimate is either 0 (SNAPSHOT_EMPTY) or infinity (SNAPSHOT_FULL).
+ Special state is "busy" (any other unsigned value).
+ Note that the implementation of arena::is_busy_or_empty() requires
+ my_pool_state to be unsigned. */
+ using pool_state_t = std::uintptr_t ;
+ std::atomic<pool_state_t> my_pool_state;
+
+ //! The list of local observers attached to this arena.
+ observer_list my_observers;
+
+#if __TBB_ARENA_BINDING
+ //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node.
+ numa_binding_observer* my_numa_binding_observer;
+#endif /*__TBB_ARENA_BINDING*/
+
+ // Below are rarely modified members
+
+ //! The market that owns this arena.
+ market* my_market;
+
+ //! ABA prevention marker.
+ std::uintptr_t my_aba_epoch;
+
+ //! Default task group context.
+ d1::task_group_context* my_default_ctx;
+
+ //! The number of slots in the arena.
+ unsigned my_num_slots;
+
+ //! The number of reserved slots (can be occupied only by external threads).
+ unsigned my_num_reserved_slots;
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ // arena needs an extra worker despite the arena limit
+ atomic_flag my_local_concurrency_flag;
+ // the number of local mandatory concurrency requests
+ int my_local_concurrency_requests;
+ // arena needs an extra worker despite a global limit
+ std::atomic<bool> my_global_concurrency_mode;
+#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
+
+ //! Waiting object for external threads that cannot join the arena.
+ concurrent_monitor my_exit_monitors;
+
+ //! Coroutines (task_dispathers) cache buffer
+ arena_co_cache my_co_cache;
+
+#if TBB_USE_ASSERT
+ //! Used to trap accesses to the object after its destruction.
+ std::uintptr_t my_guard;
+#endif /* TBB_USE_ASSERT */
+}; // struct arena_base
+
+class arena: public padded<arena_base>
+{
+public:
+ using base_type = padded<arena_base>;
+
+ //! Types of work advertised by advertise_new_work()
+ enum new_work_type {
+ work_spawned,
+ wakeup,
+ work_enqueued
+ };
+
+ //! Constructor
+ arena ( market& m, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level);
+
+ //! Allocate an instance of arena.
+ static arena& allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots,
+ unsigned priority_level );
+
+ static int unsigned num_arena_slots ( unsigned num_slots ) {
+ return max(2u, num_slots);
+ }
+
+ static int allocation_size ( unsigned num_slots ) {
+ return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher));
+ }
+
+ //! Get reference to mailbox corresponding to given slot_id
+ mail_outbox& mailbox( d1::slot_id slot ) {
+ __TBB_ASSERT( slot != d1::no_slot, "affinity should be specified" );
+
+ return reinterpret_cast<mail_outbox*>(this)[-(int)(slot+1)]; // cast to 'int' is redundant but left for readability
+ }
+
+ //! Completes arena shutdown, destructs and deallocates it.
+ void free_arena ();
+
+ //! No tasks to steal since last snapshot was taken
+ static const pool_state_t SNAPSHOT_EMPTY = 0;
+
+ //! At least one task has been offered for stealing since the last snapshot started
+ static const pool_state_t SNAPSHOT_FULL = pool_state_t(-1);
+
+ //! The number of least significant bits for external references
+ static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers
+
+ //! Reference increment values for externals and workers
+ static const unsigned ref_external = 1;
+ static const unsigned ref_worker = 1 << ref_external_bits;
+
+ //! No tasks to steal or snapshot is being taken.
+ static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; }
+
+ //! The number of workers active in the arena.
+ unsigned num_workers_active() const {
+ return my_references.load(std::memory_order_acquire) >> ref_external_bits;
+ }
+
+ //! Check if the recall is requested by the market.
+ bool is_recall_requested() const {
+ return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed);
+ }
+
+ //! If necessary, raise a flag that there is new job in arena.
+ template<arena::new_work_type work_type> void advertise_new_work();
+
+ //! Attempts to steal a task from a randomly chosen arena slot
+ d1::task* steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation);
+
+ //! Get a task from a global starvation resistant queue
+ template<task_stream_accessor_type accessor>
+ d1::task* get_stream_task(task_stream<accessor>& stream, unsigned& hint);
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ //! Tries to find a critical task in global critical task stream
+ d1::task* get_critical_task(unsigned& hint, isolation_type isolation);
+#endif
+
+ //! Check if there is job anywhere in arena.
+ /** Return true if no job or if arena is being cleaned up. */
+ bool is_out_of_work();
+
+ //! enqueue a task into starvation-resistance queue
+ void enqueue_task(d1::task&, d1::task_group_context&, thread_data&);
+
+ //! Registers the worker with the arena and enters TBB scheduler dispatch loop
+ void process(thread_data&);
+
+ //! Notification that the thread leaves its arena
+ template<unsigned ref_param>
+ inline void on_thread_leaving ( );
+
+ //! Check for the presence of enqueued tasks at all priority levels
+ bool has_enqueued_tasks();
+
+ static const std::size_t out_of_arena = ~size_t(0);
+ //! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena.
+ template <bool as_worker>
+ std::size_t occupy_free_slot(thread_data&);
+ //! Tries to occupy a slot in the specified range.
+ std::size_t occupy_free_slot_in_range(thread_data& tls, std::size_t lower, std::size_t upper);
+
+ std::uintptr_t calculate_stealing_threshold();
+
+ /** Must be the last data field */
+ arena_slot my_slots[1];
+}; // class arena
+
+template<unsigned ref_param>
+inline void arena::on_thread_leaving ( ) {
+ //
+ // Implementation of arena destruction synchronization logic contained various
+ // bugs/flaws at the different stages of its evolution, so below is a detailed
+ // description of the issues taken into consideration in the framework of the
+ // current design.
+ //
+ // In case of using fire-and-forget tasks (scheduled via task::enqueue())
+ // external thread is allowed to leave its arena before all its work is executed,
+ // and market may temporarily revoke all workers from this arena. Since revoked
+ // workers never attempt to reset arena state to EMPTY and cancel its request
+ // to RML for threads, the arena object is destroyed only when both the last
+ // thread is leaving it and arena's state is EMPTY (that is its external thread
+ // left and it does not contain any work).
+ // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not
+ // be done here (or anywhere else in the external thread to that matter); doing so
+ // can result either in arena's premature destruction (at least without
+ // additional costly checks in workers) or in unnecessary arena state changes
+ // (and ensuing workers migration).
+ //
+ // A worker that checks for work presence and transitions arena to the EMPTY
+ // state (in snapshot taking procedure arena::is_out_of_work()) updates
+ // arena::my_pool_state first and only then arena::my_num_workers_requested.
+ // So the check for work absence must be done against the latter field.
+ //
+ // In a time window between decrementing the active threads count and checking
+ // if there is an outstanding request for workers. New worker thread may arrive,
+ // finish remaining work, set arena state to empty, and leave decrementing its
+ // refcount and destroying. Then the current thread will destroy the arena
+ // the second time. To preclude it a local copy of the outstanding request
+ // value can be stored before decrementing active threads count.
+ //
+ // But this technique may cause two other problem. When the stored request is
+ // zero, it is possible that arena still has threads and they can generate new
+ // tasks and thus re-establish non-zero requests. Then all the threads can be
+ // revoked (as described above) leaving this thread the last one, and causing
+ // it to destroy non-empty arena.
+ //
+ // The other problem takes place when the stored request is non-zero. Another
+ // thread may complete the work, set arena state to empty, and leave without
+ // arena destruction before this thread decrements the refcount. This thread
+ // cannot destroy the arena either. Thus the arena may be "orphaned".
+ //
+ // In both cases we cannot dereference arena pointer after the refcount is
+ // decremented, as our arena may already be destroyed.
+ //
+ // If this is the external thread, the market is protected by refcount to it.
+ // In case of workers market's liveness is ensured by the RML connection
+ // rundown protocol, according to which the client (i.e. the market) lives
+ // until RML server notifies it about connection termination, and this
+ // notification is fired only after all workers return into RML.
+ //
+ // Thus if we decremented refcount to zero we ask the market to check arena
+ // state (including the fact if it is alive) under the lock.
+ //
+ std::uintptr_t aba_epoch = my_aba_epoch;
+ unsigned priority_level = my_priority_level;
+ market* m = my_market;
+ __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter");
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ // When there is no workers someone must free arena, as
+ // without workers, no one calls is_out_of_work().
+ // Skip workerless arenas because they have no demand for workers.
+ // TODO: consider more strict conditions for the cleanup,
+ // because it can create the demand of workers,
+ // but the arena can be already empty (and so ready for destroying)
+ // TODO: Fix the race: while we check soft limit and it might be changed.
+ if( ref_param==ref_external && my_num_slots != my_num_reserved_slots
+ && 0 == m->my_num_workers_soft_limit.load(std::memory_order_relaxed) &&
+ !my_global_concurrency_mode.load(std::memory_order_relaxed) ) {
+ is_out_of_work();
+ // We expect, that in worst case it's enough to have num_priority_levels-1
+ // calls to restore priorities and yet another is_out_of_work() to conform
+ // that no work was found. But as market::set_active_num_workers() can be called
+ // concurrently, can't guarantee last is_out_of_work() return true.
+ }
+#endif
+ if ( (my_references -= ref_param ) == 0 )
+ m->try_destroy_arena( this, aba_epoch, priority_level );
+}
+
+template<arena::new_work_type work_type>
+void arena::advertise_new_work() {
+ auto is_related_arena = [&] (extended_context context) {
+ return this == context.my_arena_addr;
+ };
+
+ if( work_type == work_enqueued ) {
+ atomic_fence(std::memory_order_seq_cst);
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 &&
+ my_global_concurrency_mode.load(std::memory_order_acquire) == false )
+ my_market->enable_mandatory_concurrency(this);
+
+ if (my_max_num_workers == 0 && my_num_reserved_slots == 1 && my_local_concurrency_flag.test_and_set()) {
+ my_market->adjust_demand(*this, /* delta = */ 1, /* mandatory = */ true);
+ }
+#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
+ // Local memory fence here and below is required to avoid missed wakeups; see the comment below.
+ // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable.
+ }
+ else if( work_type == wakeup ) {
+ atomic_fence(std::memory_order_seq_cst);
+ }
+
+ // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences.
+ // Technically, to avoid missed wakeups, there should be a full memory fence between the point we
+ // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a
+ // fence might hurt overall performance more than it helps, because the fence would be executed
+ // on every task pool release, even when stealing does not occur. Since TBB allows parallelism,
+ // but never promises parallelism, the missed wakeup is not a correctness problem.
+ pool_state_t snapshot = my_pool_state.load(std::memory_order_acquire);
+ if( is_busy_or_empty(snapshot) ) {
+ // Attempt to mark as full. The compare_and_swap below is a little unusual because the
+ // result is compared to a value that can be different than the comparand argument.
+ pool_state_t expected_state = snapshot;
+ my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL );
+ if( expected_state == SNAPSHOT_EMPTY ) {
+ if( snapshot != SNAPSHOT_EMPTY ) {
+ // This thread read "busy" into snapshot, and then another thread transitioned
+ // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above
+ // to fail. Attempt to transition my_pool_state from "empty" to "full".
+ expected_state = SNAPSHOT_EMPTY;
+ if( !my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ) ) {
+ // Some other thread transitioned my_pool_state from "empty", and hence became
+ // responsible for waking up workers.
+ return;
+ }
+ }
+ // This thread transitioned pool from empty to full state, and thus is responsible for
+ // telling the market that there is work to do.
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if( work_type == work_spawned ) {
+ if ( my_global_concurrency_mode.load(std::memory_order_acquire) == true )
+ my_market->mandatory_concurrency_disable( this );
+ }
+#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
+ // TODO: investigate adjusting of arena's demand by a single worker.
+ my_market->adjust_demand(*this, my_max_num_workers, /* mandatory = */ false);
+
+ // Notify all sleeping threads that work has appeared in the arena.
+ my_market->get_wait_list().notify(is_related_arena);
+ }
+ }
+}
+
+inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execution_data_ext& ed, isolation_type isolation) {
+ auto slot_num_limit = my_limit.load(std::memory_order_relaxed);
+ if (slot_num_limit == 1) {
+ // No slots to steal from
+ return nullptr;
+ }
+ // Try to steal a task from a random victim.
+ std::size_t k = frnd.get() % (slot_num_limit - 1);
+ // The following condition excludes the external thread that might have
+ // already taken our previous place in the arena from the list .
+ // of potential victims. But since such a situation can take
+ // place only in case of significant oversubscription, keeping
+ // the checks simple seems to be preferable to complicating the code.
+ if (k >= arena_index) {
+ ++k; // Adjusts random distribution to exclude self
+ }
+ arena_slot* victim = &my_slots[k];
+ d1::task **pool = victim->task_pool.load(std::memory_order_relaxed);
+ d1::task *t = nullptr;
+ if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation))) {
+ return nullptr;
+ }
+ if (task_accessor::is_proxy_task(*t)) {
+ task_proxy &tp = *(task_proxy*)t;
+ d1::slot_id slot = tp.slot;
+ t = tp.extract_task<task_proxy::pool_bit>();
+ if (!t) {
+ // Proxy was empty, so it's our responsibility to free it
+ tp.allocator.delete_object(&tp, ed);
+ return nullptr;
+ }
+ // Note affinity is called for any stealed task (proxy or general)
+ ed.affinity_slot = slot;
+ } else {
+ // Note affinity is called for any stealed task (proxy or general)
+ ed.affinity_slot = d1::any_slot;
+ }
+ // Update task owner thread id to identify stealing
+ ed.original_slot = k;
+ return t;
+}
+
+template<task_stream_accessor_type accessor>
+inline d1::task* arena::get_stream_task(task_stream<accessor>& stream, unsigned& hint) {
+ if (stream.empty())
+ return nullptr;
+ return stream.pop(subsequent_lane_selector(hint));
+}
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+// Retrieves critical task respecting isolation level, if provided. The rule is:
+// 1) If no outer critical task and no isolation => take any critical task
+// 2) If working on an outer critical task and no isolation => cannot take any critical task
+// 3) If no outer critical task but isolated => respect isolation
+// 4) If working on an outer critical task and isolated => respect isolation
+// Hint is used to keep some LIFO-ness, start search with the lane that was used during push operation.
+inline d1::task* arena::get_critical_task(unsigned& hint, isolation_type isolation) {
+ if (my_critical_task_stream.empty())
+ return nullptr;
+
+ if ( isolation != no_isolation ) {
+ return my_critical_task_stream.pop_specific( hint, isolation );
+ } else {
+ return my_critical_task_stream.pop(preceding_lane_selector(hint));
+ }
+}
+#endif // __TBB_PREVIEW_CRITICAL_TASKS
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_arena_H */
diff --git a/contrib/libs/tbb/src/tbb/arena_slot.cpp b/contrib/libs/tbb/src/tbb/arena_slot.cpp
new file mode 100644
index 0000000000..72706b3de5
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/arena_slot.cpp
@@ -0,0 +1,219 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "arena_slot.h"
+#include "arena.h"
+#include "thread_data.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//------------------------------------------------------------------------
+// Arena Slot
+//------------------------------------------------------------------------
+d1::task* arena_slot::get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation) {
+ __TBB_ASSERT(tail.load(std::memory_order_relaxed) <= T || is_local_task_pool_quiescent(),
+ "Is it safe to get a task at position T?");
+
+ d1::task* result = task_pool_ptr[T];
+ __TBB_ASSERT(!is_poisoned( result ), "The poisoned task is going to be processed");
+
+ if (!result) {
+ return nullptr;
+ }
+ bool omit = isolation != no_isolation && isolation != task_accessor::isolation(*result);
+ if (!omit && !task_accessor::is_proxy_task(*result)) {
+ return result;
+ } else if (omit) {
+ tasks_omitted = true;
+ return nullptr;
+ }
+
+ task_proxy& tp = static_cast<task_proxy&>(*result);
+ d1::slot_id aff_id = tp.slot;
+ if ( d1::task *t = tp.extract_task<task_proxy::pool_bit>() ) {
+ ed.affinity_slot = aff_id;
+ return t;
+ }
+ // Proxy was empty, so it's our responsibility to free it
+ tp.allocator.delete_object(&tp, ed);
+
+ if ( tasks_omitted ) {
+ task_pool_ptr[T] = nullptr;
+ }
+ return nullptr;
+}
+
+d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) {
+ __TBB_ASSERT(is_task_pool_published(), nullptr);
+ // The current task position in the task pool.
+ std::size_t T0 = tail.load(std::memory_order_relaxed);
+ // The bounds of available tasks in the task pool. H0 is only used when the head bound is reached.
+ std::size_t H0 = (std::size_t)-1, T = T0;
+ d1::task* result = nullptr;
+ bool task_pool_empty = false;
+ bool tasks_omitted = false;
+ do {
+ __TBB_ASSERT( !result, nullptr );
+ // The full fence is required to sync the store of `tail` with the load of `head` (write-read barrier)
+ T = --tail;
+ // The acquire load of head is required to guarantee consistency of our task pool
+ // when a thief rolls back the head.
+ if ( (std::intptr_t)( head.load(std::memory_order_acquire) ) > (std::intptr_t)T ) {
+ acquire_task_pool();
+ H0 = head.load(std::memory_order_relaxed);
+ if ( (std::intptr_t)H0 > (std::intptr_t)T ) {
+ // The thief has not backed off - nothing to grab.
+ __TBB_ASSERT( H0 == head.load(std::memory_order_relaxed)
+ && T == tail.load(std::memory_order_relaxed)
+ && H0 == T + 1, "victim/thief arbitration algorithm failure" );
+ reset_task_pool_and_leave();
+ // No tasks in the task pool.
+ task_pool_empty = true;
+ break;
+ } else if ( H0 == T ) {
+ // There is only one task in the task pool.
+ reset_task_pool_and_leave();
+ task_pool_empty = true;
+ } else {
+ // Release task pool if there are still some tasks.
+ // After the release, the tail will be less than T, thus a thief
+ // will not attempt to get a task at position T.
+ release_task_pool();
+ }
+ }
+ result = get_task_impl( T, ed, tasks_omitted, isolation );
+ if ( result ) {
+ poison_pointer( task_pool_ptr[T] );
+ break;
+ } else if ( !tasks_omitted ) {
+ poison_pointer( task_pool_ptr[T] );
+ __TBB_ASSERT( T0 == T+1, nullptr );
+ T0 = T;
+ }
+ } while ( !result && !task_pool_empty );
+
+ if ( tasks_omitted ) {
+ if ( task_pool_empty ) {
+ // All tasks have been checked. The task pool should be in reset state.
+ // We just restore the bounds for the available tasks.
+ // TODO: Does it have sense to move them to the beginning of the task pool?
+ __TBB_ASSERT( is_quiescent_local_task_pool_reset(), nullptr );
+ if ( result ) {
+ // If we have a task, it should be at H0 position.
+ __TBB_ASSERT( H0 == T, nullptr );
+ ++H0;
+ }
+ __TBB_ASSERT( H0 <= T0, nullptr );
+ if ( H0 < T0 ) {
+ // Restore the task pool if there are some tasks.
+ head.store(H0, std::memory_order_relaxed);
+ tail.store(T0, std::memory_order_relaxed);
+ // The release fence is used in publish_task_pool.
+ publish_task_pool();
+ // Synchronize with snapshot as we published some tasks.
+ ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>();
+ }
+ } else {
+ // A task has been obtained. We need to make a hole in position T.
+ __TBB_ASSERT( is_task_pool_published(), nullptr );
+ __TBB_ASSERT( result, nullptr );
+ task_pool_ptr[T] = nullptr;
+ tail.store(T0, std::memory_order_release);
+ // Synchronize with snapshot as we published some tasks.
+ // TODO: consider some approach not to call wakeup for each time. E.g. check if the tail reached the head.
+ ed.task_disp->m_thread_data->my_arena->advertise_new_work<arena::wakeup>();
+ }
+ }
+
+ __TBB_ASSERT( (std::intptr_t)tail.load(std::memory_order_relaxed) >= 0, nullptr );
+ __TBB_ASSERT( result || tasks_omitted || is_quiescent_local_task_pool_reset(), nullptr );
+ return result;
+}
+
+d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) {
+ d1::task** victim_pool = lock_task_pool();
+ if (!victim_pool) {
+ return nullptr;
+ }
+ d1::task* result = nullptr;
+ std::size_t H = head.load(std::memory_order_relaxed); // mirror
+ std::size_t H0 = H;
+ bool tasks_omitted = false;
+ do {
+ // The full fence is required to sync the store of `head` with the load of `tail` (write-read barrier)
+ H = ++head;
+ // The acquire load of tail is required to guarantee consistency of victim_pool
+ // because the owner synchronizes task spawning via tail.
+ if ((std::intptr_t)H > (std::intptr_t)(tail.load(std::memory_order_acquire))) {
+ // Stealing attempt failed, deque contents has not been changed by us
+ head.store( /*dead: H = */ H0, std::memory_order_relaxed );
+ __TBB_ASSERT( !result, nullptr );
+ goto unlock;
+ }
+ result = victim_pool[H-1];
+ __TBB_ASSERT( !is_poisoned( result ), nullptr );
+
+ if (result) {
+ if (isolation == no_isolation || isolation == task_accessor::isolation(*result)) {
+ if (!task_accessor::is_proxy_task(*result)) {
+ break;
+ }
+ task_proxy& tp = *static_cast<task_proxy*>(result);
+ // If mailed task is likely to be grabbed by its destination thread, skip it.
+ if ( !(task_proxy::is_shared( tp.task_and_tag ) && tp.outbox->recipient_is_idle()) ) {
+ break;
+ }
+ }
+ // The task cannot be executed either due to isolation or proxy constraints.
+ result = nullptr;
+ tasks_omitted = true;
+ } else if (!tasks_omitted) {
+ // Cleanup the task pool from holes until a task is skipped.
+ __TBB_ASSERT( H0 == H-1, nullptr );
+ poison_pointer( victim_pool[H0] );
+ H0 = H;
+ }
+ } while (!result);
+ __TBB_ASSERT( result, nullptr );
+
+ // emit "task was consumed" signal
+ poison_pointer( victim_pool[H-1] );
+ if (tasks_omitted) {
+ // Some proxies in the task pool have been omitted. Set the stolen task to nullptr.
+ victim_pool[H-1] = nullptr;
+ // The release store synchronizes the victim_pool update(the store of nullptr).
+ head.store( /*dead: H = */ H0, std::memory_order_release );
+ }
+unlock:
+ unlock_task_pool(victim_pool);
+
+#if __TBB_PREFETCHING
+ __TBB_cl_evict(&victim_slot.head);
+ __TBB_cl_evict(&victim_slot.tail);
+#endif
+ if (tasks_omitted) {
+ // Synchronize with snapshot as the head and tail can be bumped which can falsely trigger EMPTY state
+ a.advertise_new_work<arena::wakeup>();
+ }
+ return result;
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/arena_slot.h b/contrib/libs/tbb/src/tbb/arena_slot.h
new file mode 100644
index 0000000000..83d61d2197
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/arena_slot.h
@@ -0,0 +1,409 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_arena_slot_H
+#define _TBB_arena_slot_H
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_utils.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+#include "oneapi/tbb/detail/_task.h"
+
+#include "oneapi/tbb/cache_aligned_allocator.h"
+
+#include "misc.h"
+#include "mailbox.h"
+#include "scheduler_common.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class arena;
+class task_group_context;
+
+//--------------------------------------------------------------------------------------------------------
+// Arena Slot
+//--------------------------------------------------------------------------------------------------------
+
+static d1::task** const EmptyTaskPool = nullptr;
+static d1::task** const LockedTaskPool = reinterpret_cast<d1::task**>(~std::intptr_t(0));
+
+struct alignas(max_nfs_size) arena_slot_shared_state {
+ //! Scheduler of the thread attached to the slot
+ /** Marks the slot as busy, and is used to iterate through the schedulers belonging to this arena **/
+ std::atomic<bool> my_is_occupied;
+
+ // Synchronization of access to Task pool
+ /** Also is used to specify if the slot is empty or locked:
+ 0 - empty
+ -1 - locked **/
+ std::atomic<d1::task**> task_pool;
+
+ //! Index of the first ready task in the deque.
+ /** Modified by thieves, and by the owner during compaction/reallocation **/
+ std::atomic<std::size_t> head;
+};
+
+struct alignas(max_nfs_size) arena_slot_private_state {
+ //! Hint provided for operations with the container of starvation-resistant tasks.
+ /** Modified by the owner thread (during these operations). **/
+ unsigned hint_for_fifo_stream;
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ //! Similar to 'hint_for_fifo_stream' but for critical tasks.
+ unsigned hint_for_critical_stream;
+#endif
+
+ //! Similar to 'hint_for_fifo_stream' but for the resume tasks.
+ unsigned hint_for_resume_stream;
+
+ //! Index of the element following the last ready task in the deque.
+ /** Modified by the owner thread. **/
+ std::atomic<std::size_t> tail;
+
+ //! Capacity of the primary task pool (number of elements - pointers to task).
+ std::size_t my_task_pool_size;
+
+ //! Task pool of the scheduler that owns this slot
+ // TODO: previously was task**__TBB_atomic, but seems like not accessed on other thread
+ d1::task** task_pool_ptr;
+};
+
+class arena_slot : private arena_slot_shared_state, private arena_slot_private_state {
+ friend class arena;
+ friend class outermost_worker_waiter;
+ friend class task_dispatcher;
+ friend class thread_data;
+ friend class nested_arena_context;
+
+ //! The original task dispather associated with this slot
+ task_dispatcher* my_default_task_dispatcher;
+
+#if TBB_USE_ASSERT
+ void fill_with_canary_pattern ( std::size_t first, std::size_t last ) {
+ for ( std::size_t i = first; i < last; ++i )
+ poison_pointer(task_pool_ptr[i]);
+ }
+#else
+ void fill_with_canary_pattern ( size_t, std::size_t ) {}
+#endif /* TBB_USE_ASSERT */
+
+ static constexpr std::size_t min_task_pool_size = 64;
+
+ void allocate_task_pool( std::size_t n ) {
+ std::size_t byte_size = ((n * sizeof(d1::task*) + max_nfs_size - 1) / max_nfs_size) * max_nfs_size;
+ my_task_pool_size = byte_size / sizeof(d1::task*);
+ task_pool_ptr = (d1::task**)cache_aligned_allocate(byte_size);
+ // No need to clear the fresh deque since valid items are designated by the head and tail members.
+ // But fill it with a canary pattern in the high vigilance debug mode.
+ fill_with_canary_pattern( 0, my_task_pool_size );
+ }
+
+public:
+ //! Deallocate task pool that was allocated by means of allocate_task_pool.
+ void free_task_pool( ) {
+ // TODO: understand the assertion and modify
+ // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, NULL);
+ if( task_pool_ptr ) {
+ __TBB_ASSERT( my_task_pool_size, NULL);
+ cache_aligned_deallocate( task_pool_ptr );
+ task_pool_ptr = NULL;
+ my_task_pool_size = 0;
+ }
+ }
+
+ //! Get a task from the local pool.
+ /** Called only by the pool owner.
+ Returns the pointer to the task or NULL if a suitable task is not found.
+ Resets the pool if it is empty. **/
+ d1::task* get_task(execution_data_ext&, isolation_type);
+
+ //! Steal task from slot's ready pool
+ d1::task* steal_task(arena&, isolation_type);
+
+ //! Some thread is now the owner of this slot
+ void occupy() {
+ __TBB_ASSERT(!my_is_occupied.load(std::memory_order_relaxed), nullptr);
+ my_is_occupied.store(true, std::memory_order_release);
+ }
+
+ //! Try to occupy the slot
+ bool try_occupy() {
+ return !is_occupied() && my_is_occupied.exchange(true) == false;
+ }
+
+ //! Some thread is now the owner of this slot
+ void release() {
+ __TBB_ASSERT(my_is_occupied.load(std::memory_order_relaxed), nullptr);
+ my_is_occupied.store(false, std::memory_order_release);
+ }
+
+ //! Spawn newly created tasks
+ void spawn(d1::task& t) {
+ std::size_t T = prepare_task_pool(1);
+ __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), NULL);
+ task_pool_ptr[T] = &t;
+ commit_spawned_tasks(T + 1);
+ if (!is_task_pool_published()) {
+ publish_task_pool();
+ }
+ }
+
+ bool is_task_pool_published() const {
+ return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool;
+ }
+
+ bool is_occupied() const {
+ return my_is_occupied.load(std::memory_order_relaxed);
+ }
+
+ task_dispatcher& default_task_dispatcher() {
+ __TBB_ASSERT(my_default_task_dispatcher != nullptr, nullptr);
+ return *my_default_task_dispatcher;
+ }
+
+ void init_task_streams(unsigned h) {
+ hint_for_fifo_stream = h;
+#if __TBB_RESUMABLE_TASKS
+ hint_for_resume_stream = h;
+#endif
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ hint_for_critical_stream = h;
+#endif
+ }
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ unsigned& critical_hint() {
+ return hint_for_critical_stream;
+ }
+#endif
+private:
+ //! Get a task from the local pool at specified location T.
+ /** Returns the pointer to the task or NULL if the task cannot be executed,
+ e.g. proxy has been deallocated or isolation constraint is not met.
+ tasks_omitted tells if some tasks have been omitted.
+ Called only by the pool owner. The caller should guarantee that the
+ position T is not available for a thief. **/
+ d1::task* get_task_impl(size_t T, execution_data_ext& ed, bool& tasks_omitted, isolation_type isolation);
+
+ //! Makes sure that the task pool can accommodate at least n more elements
+ /** If necessary relocates existing task pointers or grows the ready task deque.
+ * Returns (possible updated) tail index (not accounting for n). **/
+ std::size_t prepare_task_pool(std::size_t num_tasks) {
+ std::size_t T = tail.load(std::memory_order_relaxed); // mirror
+ if ( T + num_tasks <= my_task_pool_size ) {
+ return T;
+ }
+
+ std::size_t new_size = num_tasks;
+ if ( !my_task_pool_size ) {
+ __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), NULL );
+ __TBB_ASSERT( !task_pool_ptr, NULL );
+ if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size;
+ allocate_task_pool( new_size );
+ return 0;
+ }
+ acquire_task_pool();
+ std::size_t H = head.load(std::memory_order_relaxed); // mirror
+ d1::task** new_task_pool = task_pool_ptr;;
+ __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, NULL );
+ // Count not skipped tasks. Consider using std::count_if.
+ for ( std::size_t i = H; i < T; ++i )
+ if ( new_task_pool[i] ) ++new_size;
+ // If the free space at the beginning of the task pool is too short, we
+ // are likely facing a pathological single-producer-multiple-consumers
+ // scenario, and thus it's better to expand the task pool
+ bool allocate = new_size > my_task_pool_size - min_task_pool_size/4;
+ if ( allocate ) {
+ // Grow task pool. As this operation is rare, and its cost is asymptotically
+ // amortizable, we can tolerate new task pool allocation done under the lock.
+ if ( new_size < 2 * my_task_pool_size )
+ new_size = 2 * my_task_pool_size;
+ allocate_task_pool( new_size ); // updates my_task_pool_size
+ }
+ // Filter out skipped tasks. Consider using std::copy_if.
+ std::size_t T1 = 0;
+ for ( std::size_t i = H; i < T; ++i ) {
+ if ( new_task_pool[i] ) {
+ task_pool_ptr[T1++] = new_task_pool[i];
+ }
+ }
+ // Deallocate the previous task pool if a new one has been allocated.
+ if ( allocate )
+ cache_aligned_deallocate( new_task_pool );
+ else
+ fill_with_canary_pattern( T1, tail );
+ // Publish the new state.
+ commit_relocated_tasks( T1 );
+ // assert_task_pool_valid();
+ return T1;
+ }
+
+ //! Makes newly spawned tasks visible to thieves
+ void commit_spawned_tasks(std::size_t new_tail) {
+ __TBB_ASSERT (new_tail <= my_task_pool_size, "task deque end was overwritten");
+ // emit "task was released" signal
+ // Release fence is necessary to make sure that previously stored task pointers
+ // are visible to thieves.
+ tail.store(new_tail, std::memory_order_release);
+ }
+
+ //! Used by workers to enter the task pool
+ /** Does not lock the task pool in case if arena slot has been successfully grabbed. **/
+ void publish_task_pool() {
+ __TBB_ASSERT ( task_pool == EmptyTaskPool, "someone else grabbed my arena slot?" );
+ __TBB_ASSERT ( head.load(std::memory_order_relaxed) < tail.load(std::memory_order_relaxed),
+ "entering arena without tasks to share" );
+ // Release signal on behalf of previously spawned tasks (when this thread was not in arena yet)
+ task_pool.store(task_pool_ptr, std::memory_order_release );
+ }
+
+ //! Locks the local task pool
+ /** Garbles task_pool for the duration of the lock. Requires correctly set task_pool_ptr.
+ ATTENTION: This method is mostly the same as generic_scheduler::lock_task_pool(), with
+ a little different logic of slot state checks (slot is either locked or points
+ to our task pool). Thus if either of them is changed, consider changing the counterpart as well. **/
+ void acquire_task_pool() {
+ if (!is_task_pool_published()) {
+ return; // we are not in arena - nothing to lock
+ }
+ bool sync_prepare_done = false;
+ for( atomic_backoff b;;b.pause() ) {
+#if TBB_USE_ASSERT
+ // Local copy of the arena slot task pool pointer is necessary for the next
+ // assertion to work correctly to exclude asynchronous state transition effect.
+ d1::task** tp = task_pool.load(std::memory_order_relaxed);
+ __TBB_ASSERT( tp == LockedTaskPool || tp == task_pool_ptr, "slot ownership corrupt?" );
+#endif
+ d1::task** expected = task_pool_ptr;
+ if( task_pool.load(std::memory_order_relaxed) != LockedTaskPool &&
+ task_pool.compare_exchange_strong(expected, LockedTaskPool ) ) {
+ // We acquired our own slot
+ break;
+ } else if( !sync_prepare_done ) {
+ // Start waiting
+ sync_prepare_done = true;
+ }
+ // Someone else acquired a lock, so pause and do exponential backoff.
+ }
+ __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "not really acquired task pool" );
+ }
+
+ //! Unlocks the local task pool
+ /** Restores task_pool munged by acquire_task_pool. Requires
+ correctly set task_pool_ptr. **/
+ void release_task_pool() {
+ if ( !(task_pool.load(std::memory_order_relaxed) != EmptyTaskPool) )
+ return; // we are not in arena - nothing to unlock
+ __TBB_ASSERT( task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "arena slot is not locked" );
+ task_pool.store( task_pool_ptr, std::memory_order_release );
+ }
+
+ //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL.
+ /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/
+ d1::task** lock_task_pool() {
+ d1::task** victim_task_pool;
+ for ( atomic_backoff backoff;; /*backoff pause embedded in the loop*/) {
+ victim_task_pool = task_pool.load(std::memory_order_relaxed);
+ // Microbenchmarks demonstrated that aborting stealing attempt when the
+ // victim's task pool is locked degrade performance.
+ // NOTE: Do not use comparison of head and tail indices to check for
+ // the presence of work in the victim's task pool, as they may give
+ // incorrect indication because of task pool relocations and resizes.
+ if (victim_task_pool == EmptyTaskPool) {
+ break;
+ }
+ d1::task** expected = victim_task_pool;
+ if (victim_task_pool != LockedTaskPool && task_pool.compare_exchange_strong(expected, LockedTaskPool) ) {
+ // We've locked victim's task pool
+ break;
+ }
+ // Someone else acquired a lock, so pause and do exponential backoff.
+ backoff.pause();
+ }
+ __TBB_ASSERT(victim_task_pool == EmptyTaskPool ||
+ (task_pool.load(std::memory_order_relaxed) == LockedTaskPool &&
+ victim_task_pool != LockedTaskPool), "not really locked victim's task pool?");
+ return victim_task_pool;
+ }
+
+ //! Unlocks victim's task pool
+ /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/
+ void unlock_task_pool(d1::task** victim_task_pool) {
+ __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked");
+ __TBB_ASSERT(victim_task_pool != LockedTaskPool, NULL);
+ task_pool.store(victim_task_pool, std::memory_order_release);
+ }
+
+#if TBB_USE_ASSERT
+ bool is_local_task_pool_quiescent() const {
+ d1::task** tp = task_pool.load(std::memory_order_relaxed);
+ return tp == EmptyTaskPool || tp == LockedTaskPool;
+ }
+
+ bool is_quiescent_local_task_pool_empty() const {
+ __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent");
+ return head.load(std::memory_order_relaxed) == tail.load(std::memory_order_relaxed);
+ }
+
+ bool is_quiescent_local_task_pool_reset() const {
+ __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool is not quiescent");
+ return head.load(std::memory_order_relaxed) == 0 && tail.load(std::memory_order_relaxed) == 0;
+ }
+#endif // TBB_USE_ASSERT
+
+ //! Leave the task pool
+ /** Leaving task pool automatically releases the task pool if it is locked. **/
+ void leave_task_pool() {
+ __TBB_ASSERT(is_task_pool_published(), "Not in arena");
+ // Do not reset my_arena_index. It will be used to (attempt to) re-acquire the slot next time
+ __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when leaving arena");
+ __TBB_ASSERT(is_quiescent_local_task_pool_empty(), "Cannot leave arena when the task pool is not empty");
+ // No release fence is necessary here as this assignment precludes external
+ // accesses to the local task pool when becomes visible. Thus it is harmless
+ // if it gets hoisted above preceding local bookkeeping manipulations.
+ task_pool.store(EmptyTaskPool, std::memory_order_relaxed);
+ }
+
+ //! Resets head and tail indices to 0, and leaves task pool
+ /** The task pool must be locked by the owner (via acquire_task_pool).**/
+ void reset_task_pool_and_leave() {
+ __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "Task pool must be locked when resetting task pool");
+ tail.store(0, std::memory_order_relaxed);
+ head.store(0, std::memory_order_relaxed);
+ leave_task_pool();
+ }
+
+ //! Makes relocated tasks visible to thieves and releases the local task pool.
+ /** Obviously, the task pool must be locked when calling this method. **/
+ void commit_relocated_tasks(std::size_t new_tail) {
+ __TBB_ASSERT(is_local_task_pool_quiescent(), "Task pool must be locked when calling commit_relocated_tasks()");
+ head.store(0, std::memory_order_relaxed);
+ // Tail is updated last to minimize probability of a thread making arena
+ // snapshot being misguided into thinking that this task pool is empty.
+ tail.store(new_tail, std::memory_order_release);
+ release_task_pool();
+ }
+};
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // __TBB_arena_slot_H
diff --git a/contrib/libs/tbb/src/tbb/assert_impl.h b/contrib/libs/tbb/src/tbb/assert_impl.h
new file mode 100644
index 0000000000..7f411e06f7
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/assert_impl.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_assert_impl_H
+#define __TBB_assert_impl_H
+
+#include "oneapi/tbb/detail/_config.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cstdarg>
+#if _MSC_VER && _DEBUG
+#include <crtdbg.h>
+#endif
+
+#include <mutex>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+// TODO: consider extension for formatted error description string
+static void assertion_failure_impl(const char* filename, int line, const char* expression, const char* comment) {
+ std::fprintf(stderr, "Assertion %s failed on line %d of file %s\n", expression, line, filename);
+ if (comment) {
+ std::fprintf(stderr, "Detailed description: %s\n", comment);
+ }
+#if _MSC_VER && _DEBUG
+ if (1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) {
+ _CrtDbgBreak();
+ }
+#else
+ std::fflush(stderr);
+ std::abort();
+#endif
+}
+
+void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment) {
+ static std::once_flag flag;
+ std::call_once(flag, [&](){ assertion_failure_impl(filename, line, expression, comment); });
+}
+
+//! Report a runtime warning.
+void runtime_warning( const char* format, ... ) {
+ char str[1024]; std::memset(str, 0, 1024);
+ va_list args; va_start(args, format);
+ vsnprintf( str, 1024-1, format, args);
+ va_end(args);
+ fprintf(stderr, "TBB Warning: %s\n", str);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // __TBB_assert_impl_H
+
diff --git a/contrib/libs/tbb/src/tbb/co_context.h b/contrib/libs/tbb/src/tbb/co_context.h
new file mode 100644
index 0000000000..552dec356b
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/co_context.h
@@ -0,0 +1,222 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_co_context_H
+#define _TBB_co_context_H
+
+#include "oneapi/tbb/detail/_config.h"
+
+#if __TBB_RESUMABLE_TASKS
+
+#include <cstddef>
+#include <cstdint>
+
+#if _WIN32 || _WIN64
+#include <windows.h>
+#else
+// ucontext.h API is deprecated since macOS 10.6
+#if __APPLE__
+ #if __INTEL_COMPILER
+ #pragma warning(push)
+ #pragma warning(disable:1478)
+ #elif __clang__
+ #pragma clang diagnostic push
+ #pragma clang diagnostic ignored "-Wdeprecated-declarations"
+ #endif
+#endif // __APPLE__
+
+#include <ucontext.h>
+#include <sys/mman.h> // mprotect
+
+#include "governor.h" // default_page_size()
+
+#ifndef MAP_STACK
+// macOS* does not define MAP_STACK
+#define MAP_STACK 0
+#endif
+#ifndef MAP_ANONYMOUS
+// macOS* defines MAP_ANON, which is deprecated in Linux*.
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif // _WIN32 || _WIN64
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if _WIN32 || _WIN64
+ typedef LPVOID coroutine_type;
+#else
+ struct coroutine_type {
+ coroutine_type() : my_context(), my_stack(), my_stack_size() {}
+ ucontext_t my_context;
+ void* my_stack;
+ std::size_t my_stack_size;
+ };
+#endif
+
+ // Forward declaration of the coroutine API.
+ void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg);
+ void current_coroutine(coroutine_type& c);
+ void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine);
+ void destroy_coroutine(coroutine_type& c);
+
+class co_context {
+ enum co_state {
+ co_invalid,
+ co_suspended,
+ co_executing,
+ co_destroyed
+ };
+ coroutine_type my_coroutine;
+ co_state my_state;
+
+public:
+ co_context(std::size_t stack_size, void* arg)
+ : my_state(stack_size ? co_suspended : co_executing)
+ {
+ if (stack_size) {
+ __TBB_ASSERT(arg != 0, nullptr);
+ create_coroutine(my_coroutine, stack_size, arg);
+ } else {
+ current_coroutine(my_coroutine);
+ }
+ }
+
+ ~co_context() {
+ __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), NULL);
+ if (my_state == co_suspended)
+ destroy_coroutine(my_coroutine);
+ my_state = co_destroyed;
+ }
+
+ void resume(co_context& target) {
+ // Do not create non-trivial objects on the stack of this function. They might never be destroyed.
+ __TBB_ASSERT(my_state == co_executing, NULL);
+ __TBB_ASSERT(target.my_state == co_suspended, NULL);
+
+ my_state = co_suspended;
+ target.my_state = co_executing;
+
+ // 'target' can reference an invalid object after swap_coroutine. Do not access it.
+ swap_coroutine(my_coroutine, target.my_coroutine);
+
+ __TBB_ASSERT(my_state == co_executing, NULL);
+ }
+};
+
+#if _WIN32 || _WIN64
+/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept;
+#else
+/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept;
+#endif
+
+#if _WIN32 || _WIN64
+inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) {
+ __TBB_ASSERT(arg, NULL);
+ c = CreateFiber(stack_size, co_local_wait_for_all, arg);
+ __TBB_ASSERT(c, NULL);
+}
+
+inline void current_coroutine(coroutine_type& c) {
+ c = IsThreadAFiber() ? GetCurrentFiber() :
+ ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH);
+ __TBB_ASSERT(c, NULL);
+}
+
+inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) {
+ if (!IsThreadAFiber()) {
+ ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH);
+ }
+ __TBB_ASSERT(new_coroutine, NULL);
+ prev_coroutine = GetCurrentFiber();
+ __TBB_ASSERT(prev_coroutine, NULL);
+ SwitchToFiber(new_coroutine);
+}
+
+inline void destroy_coroutine(coroutine_type& c) {
+ __TBB_ASSERT(c, NULL);
+ DeleteFiber(c);
+}
+#else // !(_WIN32 || _WIN64)
+
+inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) {
+ const std::size_t REG_PAGE_SIZE = governor::default_page_size();
+ const std::size_t page_aligned_stack_size = (stack_size + (REG_PAGE_SIZE - 1)) & ~(REG_PAGE_SIZE - 1);
+ const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE;
+
+ // Allocate the stack with protection property
+ std::uintptr_t stack_ptr = (std::uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL);
+
+ // Allow read write on our stack (guarded pages are still protected)
+ int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE);
+ __TBB_ASSERT_EX(!err, NULL);
+
+ // Remember the stack state
+ c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE);
+ c.my_stack_size = page_aligned_stack_size;
+
+ err = getcontext(&c.my_context);
+ __TBB_ASSERT_EX(!err, NULL);
+
+ c.my_context.uc_link = 0;
+ // cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error
+ c.my_context.uc_stack.ss_sp = (char*)c.my_stack;
+ c.my_context.uc_stack.ss_size = c.my_stack_size;
+ c.my_context.uc_stack.ss_flags = 0;
+
+ typedef void(*coroutine_func_t)();
+ makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, sizeof(arg) / sizeof(int), arg);
+}
+
+inline void current_coroutine(coroutine_type& c) {
+ int err = getcontext(&c.my_context);
+ __TBB_ASSERT_EX(!err, NULL);
+}
+
+inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) {
+ int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context);
+ __TBB_ASSERT_EX(!err, NULL);
+}
+
+inline void destroy_coroutine(coroutine_type& c) {
+ const std::size_t REG_PAGE_SIZE = governor::default_page_size();
+ // Free stack memory with guarded pages
+ munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE);
+ // Clear the stack state afterwards
+ c.my_stack = NULL;
+ c.my_stack_size = 0;
+}
+
+#if __APPLE__
+ #if __INTEL_COMPILER
+ #pragma warning(pop) // 1478 warning
+ #elif __clang__
+ #pragma clang diagnostic pop // "-Wdeprecated-declarations"
+ #endif
+#endif
+
+#endif // _WIN32 || _WIN64
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_RESUMABLE_TASKS */
+
+#endif /* _TBB_co_context_H */
+
diff --git a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp
new file mode 100644
index 0000000000..90077936f6
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_utils.h"
+#include "oneapi/tbb/concurrent_queue.h"
+#include "oneapi/tbb/cache_aligned_allocator.h"
+#include "concurrent_monitor.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+static constexpr std::size_t monitors_number = 2;
+
+std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size )
+{
+ std::size_t monitors_mem_size = sizeof(concurrent_monitor) * monitors_number;
+ std::uint8_t* mem = static_cast<std::uint8_t*>(cache_aligned_allocate(queue_rep_size + monitors_mem_size));
+
+ concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size);
+ for (std::size_t i = 0; i < monitors_number; ++i) {
+ new (monitors + i) concurrent_monitor();
+ }
+
+ return mem;
+}
+
+void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size )
+{
+ concurrent_monitor* monitors = reinterpret_cast<concurrent_monitor*>(mem + queue_rep_size);
+ for (std::size_t i = 0; i < monitors_number; ++i) {
+ monitors[i].~concurrent_monitor();
+ }
+
+ cache_aligned_deallocate(mem);
+}
+
+void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag,
+ std::ptrdiff_t target, d1::delegate_base& predicate )
+{
+ __TBB_ASSERT(monitor_tag < monitors_number, nullptr);
+ concurrent_monitor& monitor = monitors[monitor_tag];
+
+ monitor.wait<concurrent_monitor::thread_context>([&] { return !predicate(); }, std::uintptr_t(target));
+}
+
+void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) {
+ concurrent_monitor& items_avail = monitors[d1::cbq_items_avail_tag];
+ concurrent_monitor& slots_avail = monitors[d1::cbq_slots_avail_tag];
+
+ items_avail.abort_all();
+ slots_avail.abort_all();
+}
+
+struct predicate_leq {
+ std::size_t my_ticket;
+ predicate_leq( std::size_t ticket ) : my_ticket(ticket) {}
+ bool operator() ( std::uintptr_t ticket ) const { return static_cast<std::size_t>(ticket) <= my_ticket; }
+};
+
+void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors,
+ std::size_t monitor_tag, std::size_t ticket)
+{
+ __TBB_ASSERT(monitor_tag < monitors_number, nullptr);
+ concurrent_monitor& monitor = monitors[monitor_tag];
+ monitor.notify(predicate_leq(ticket));
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor.h b/contrib/libs/tbb/src/tbb/concurrent_monitor.h
new file mode 100644
index 0000000000..cb1885a5d0
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/concurrent_monitor.h
@@ -0,0 +1,529 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_monitor_H
+#define __TBB_concurrent_monitor_H
+
+#include "oneapi/tbb/spin_mutex.h"
+#include "oneapi/tbb/detail/_exception.h"
+#include "oneapi/tbb/detail/_aligned_space.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+#include "scheduler_common.h"
+
+#include "semaphore.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Circular doubly-linked list with sentinel
+/** head.next points to the front and head.prev points to the back */
+class circular_doubly_linked_list_with_sentinel : no_copy {
+public:
+ struct base_node {
+ base_node* next;
+ base_node* prev;
+ explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {}
+ };
+
+ // ctor
+ circular_doubly_linked_list_with_sentinel() { clear(); }
+ // dtor
+ ~circular_doubly_linked_list_with_sentinel() {
+ __TBB_ASSERT(head.next == &head && head.prev == &head, "the list is not empty");
+ }
+
+ inline std::size_t size() const { return count.load(std::memory_order_relaxed); }
+ inline bool empty() const { return size() == 0; }
+ inline base_node* front() const { return head.next; }
+ inline base_node* last() const { return head.prev; }
+ inline const base_node* end() const { return &head; }
+
+ //! add to the back of the list
+ inline void add( base_node* n ) {
+ count.store(count.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
+ n->prev = head.prev;
+ n->next = &head;
+ head.prev->next = n;
+ head.prev = n;
+ }
+
+ //! remove node 'n'
+ inline void remove( base_node& n ) {
+ __TBB_ASSERT(count.load(std::memory_order_relaxed) > 0, "attempt to remove an item from an empty list");
+ count.store(count.load( std::memory_order_relaxed ) - 1, std::memory_order_relaxed);
+ n.prev->next = n.next;
+ n.next->prev = n.prev;
+ }
+
+ //! move all elements to 'lst' and initialize the 'this' list
+ inline void flush_to( circular_doubly_linked_list_with_sentinel& lst ) {
+ const std::size_t l_count = size();
+ if (l_count > 0) {
+ lst.count.store(l_count, std::memory_order_relaxed);
+ lst.head.next = head.next;
+ lst.head.prev = head.prev;
+ head.next->prev = &lst.head;
+ head.prev->next = &lst.head;
+ clear();
+ }
+ }
+
+ void clear() {
+ head.next = &head;
+ head.prev = &head;
+ count.store(0, std::memory_order_relaxed);
+ }
+private:
+ std::atomic<std::size_t> count;
+ base_node head;
+};
+
+using base_list = circular_doubly_linked_list_with_sentinel;
+using base_node = circular_doubly_linked_list_with_sentinel::base_node;
+
+template <typename Context>
+class concurrent_monitor_base;
+
+template <typename Context>
+class wait_node : public base_node {
+public:
+
+#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
+ wait_node(Context ctx) : my_context(ctx), my_is_in_list(false) {}
+#else
+ wait_node(Context ctx) : my_context(ctx) {}
+#endif
+
+ virtual ~wait_node() = default;
+
+ virtual void init() {
+ __TBB_ASSERT(!my_initialized, nullptr);
+ my_initialized = true;
+ }
+
+ virtual void wait() = 0;
+
+ virtual void reset() {
+ __TBB_ASSERT(my_skipped_wakeup, nullptr);
+ my_skipped_wakeup = false;
+ }
+
+ virtual void notify() = 0;
+
+protected:
+ friend class concurrent_monitor_base<Context>;
+ friend class thread_data;
+
+ Context my_context{};
+#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900
+ std::atomic<bool> my_is_in_list;
+#else
+ std::atomic<bool> my_is_in_list{false};
+#endif
+
+ bool my_initialized{false};
+ bool my_skipped_wakeup{false};
+ bool my_aborted{false};
+ unsigned my_epoch{0};
+};
+
+template <typename Context>
+class sleep_node : public wait_node<Context> {
+ using base_type = wait_node<Context>;
+public:
+ using base_type::base_type;
+
+ // Make it virtual due to Intel Compiler warning
+ virtual ~sleep_node() {
+ if (this->my_initialized) {
+ if (this->my_skipped_wakeup) semaphore().P();
+ semaphore().~binary_semaphore();
+ }
+ }
+
+ binary_semaphore& semaphore() { return *sema.begin(); }
+
+ void init() override {
+ if (!this->my_initialized) {
+ new (sema.begin()) binary_semaphore;
+ base_type::init();
+ }
+ }
+
+ void wait() override {
+ __TBB_ASSERT(this->my_initialized,
+ "Use of commit_wait() without prior prepare_wait()");
+ semaphore().P();
+ __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?");
+ if (this->my_aborted)
+ throw_exception(exception_id::user_abort);
+ }
+
+ void reset() override {
+ base_type::reset();
+ semaphore().P();
+ }
+
+ void notify() override {
+ semaphore().V();
+ }
+
+private:
+ tbb::detail::aligned_space<binary_semaphore> sema;
+};
+
+//! concurrent_monitor
+/** fine-grained concurrent_monitor implementation */
+template <typename Context>
+class concurrent_monitor_base : no_copy {
+public:
+ //! ctor
+ concurrent_monitor_base() : my_epoch{}
+ {}
+
+ //! dtor
+ ~concurrent_monitor_base() {
+ abort_all();
+ __TBB_ASSERT(my_waitset.empty(), "waitset not empty?");
+ }
+
+ //! prepare wait by inserting 'thr' into the wait queue
+ void prepare_wait( wait_node<Context>& node) {
+ // TODO: consider making even more lazy instantiation of the semaphore, that is only when it is actually needed, e.g. move it in node::wait()
+ if (!node.my_initialized) {
+ node.init();
+ }
+ // this is good place to pump previous skipped wakeup
+ else if (node.my_skipped_wakeup) {
+ node.reset();
+ }
+
+ node.my_is_in_list.store(true, std::memory_order_relaxed);
+
+ {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ node.my_epoch = my_epoch.load(std::memory_order_relaxed);
+ my_waitset.add(&node);
+ }
+
+ // Prepare wait guarantees Write Read memory barrier.
+ // In C++ only full fence covers this type of barrier.
+ atomic_fence(std::memory_order_seq_cst);
+ }
+
+ //! Commit wait if event count has not changed; otherwise, cancel wait.
+ /** Returns true if committed, false if canceled. */
+ inline bool commit_wait( wait_node<Context>& node ) {
+ const bool do_it = node.my_epoch == my_epoch.load(std::memory_order_relaxed);
+ // this check is just an optimization
+ if (do_it) {
+ node.wait();
+ } else {
+ cancel_wait( node );
+ }
+ return do_it;
+ }
+
+ //! Cancel the wait. Removes the thread from the wait queue if not removed yet.
+ void cancel_wait( wait_node<Context>& node ) {
+ // possible skipped wakeup will be pumped in the following prepare_wait()
+ node.my_skipped_wakeup = true;
+ // try to remove node from waitset
+ // Cancel wait guarantees acquire memory barrier.
+ bool in_list = node.my_is_in_list.load(std::memory_order_acquire);
+ if (in_list) {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ if (node.my_is_in_list.load(std::memory_order_relaxed)) {
+ my_waitset.remove(node);
+ // node is removed from waitset, so there will be no wakeup
+ node.my_is_in_list.store(false, std::memory_order_relaxed);
+ node.my_skipped_wakeup = false;
+ }
+ }
+ }
+
+ //! Wait for a condition to be satisfied with waiting-on my_context
+ template <typename NodeType, typename Pred>
+ bool wait(Pred&& pred, NodeType&& node) {
+ prepare_wait(node);
+ while (!guarded_call(std::forward<Pred>(pred), node)) {
+ if (commit_wait(node)) {
+ return true;
+ }
+
+ prepare_wait(node);
+ }
+
+ cancel_wait(node);
+ return false;
+ }
+
+ //! Notify one thread about the event
+ void notify_one() {
+ atomic_fence(std::memory_order_seq_cst);
+ notify_one_relaxed();
+ }
+
+ //! Notify one thread about the event. Relaxed version.
+ void notify_one_relaxed() {
+ if (my_waitset.empty()) {
+ return;
+ }
+
+ base_node* n;
+ const base_node* end = my_waitset.end();
+ {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
+ n = my_waitset.front();
+ if (n != end) {
+ my_waitset.remove(*n);
+ to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
+ }
+ }
+
+ if (n != end) {
+ to_wait_node(n)->notify();
+ }
+ }
+
+ //! Notify all waiting threads of the event
+ void notify_all() {
+ atomic_fence(std::memory_order_seq_cst);
+ notify_all_relaxed();
+ }
+
+ // ! Notify all waiting threads of the event; Relaxed version
+ void notify_all_relaxed() {
+ if (my_waitset.empty()) {
+ return;
+ }
+
+ base_list temp;
+ const base_node* end;
+ {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
+ // TODO: Possible optimization, don't change node state under lock, just do flush
+ my_waitset.flush_to(temp);
+ end = temp.end();
+ for (base_node* n = temp.front(); n != end; n = n->next) {
+ to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
+ }
+ }
+
+ base_node* nxt;
+ for (base_node* n = temp.front(); n != end; n=nxt) {
+ nxt = n->next;
+ to_wait_node(n)->notify();
+ }
+#if TBB_USE_ASSERT
+ temp.clear();
+#endif
+ }
+
+ //! Notify waiting threads of the event that satisfies the given predicate
+ template <typename P>
+ void notify( const P& predicate ) {
+ atomic_fence(std::memory_order_seq_cst);
+ notify_relaxed( predicate );
+ }
+
+ //! Notify waiting threads of the event that satisfies the given predicate;
+ //! the predicate is called under the lock. Relaxed version.
+ template<typename P>
+ void notify_relaxed( const P& predicate ) {
+ if (my_waitset.empty()) {
+ return;
+ }
+
+ base_list temp;
+ base_node* nxt;
+ const base_node* end = my_waitset.end();
+ {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed);
+ for (base_node* n = my_waitset.last(); n != end; n = nxt) {
+ nxt = n->prev;
+ auto* node = static_cast<wait_node<Context>*>(n);
+ if (predicate(node->my_context)) {
+ my_waitset.remove(*n);
+ node->my_is_in_list.store(false, std::memory_order_relaxed);
+ temp.add(n);
+ }
+ }
+ }
+
+ end = temp.end();
+ for (base_node* n=temp.front(); n != end; n = nxt) {
+ nxt = n->next;
+ to_wait_node(n)->notify();
+ }
+#if TBB_USE_ASSERT
+ temp.clear();
+#endif
+ }
+
+ //! Abort any sleeping threads at the time of the call
+ void abort_all() {
+ atomic_fence( std::memory_order_seq_cst );
+ abort_all_relaxed();
+ }
+
+ //! Abort any sleeping threads at the time of the call; Relaxed version
+ void abort_all_relaxed() {
+ if (my_waitset.empty()) {
+ return;
+ }
+
+ base_list temp;
+ const base_node* end;
+ {
+ tbb::spin_mutex::scoped_lock l(my_mutex);
+ my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
+ my_waitset.flush_to(temp);
+ end = temp.end();
+ for (base_node* n = temp.front(); n != end; n = n->next) {
+ to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed);
+ }
+ }
+
+ base_node* nxt;
+ for (base_node* n = temp.front(); n != end; n = nxt) {
+ nxt = n->next;
+ to_wait_node(n)->my_aborted = true;
+ to_wait_node(n)->notify();
+ }
+#if TBB_USE_ASSERT
+ temp.clear();
+#endif
+ }
+
+private:
+ template <typename NodeType, typename Pred>
+ bool guarded_call(Pred&& predicate, NodeType& node) {
+ bool res = false;
+ tbb::detail::d0::try_call( [&] {
+ res = std::forward<Pred>(predicate)();
+ }).on_exception( [&] {
+ cancel_wait(node);
+ });
+
+ return res;
+ }
+
+ tbb::spin_mutex my_mutex;
+ base_list my_waitset;
+ std::atomic<unsigned> my_epoch;
+
+ wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); }
+};
+
+class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> {
+ using base_type = concurrent_monitor_base<std::uintptr_t>;
+public:
+ using base_type::base_type;
+ /** per-thread descriptor for concurrent_monitor */
+ using thread_context = sleep_node<std::uintptr_t>;
+};
+
+struct extended_context {
+ extended_context() = default;
+
+ extended_context(std::uintptr_t first_addr, arena* a) :
+ my_uniq_addr(first_addr), my_arena_addr(a)
+ {}
+
+ std::uintptr_t my_uniq_addr{0};
+ arena* my_arena_addr{nullptr};
+};
+
+
+#if __TBB_RESUMABLE_TASKS
+class resume_node : public wait_node<extended_context> {
+ using base_type = wait_node<extended_context>;
+public:
+ resume_node(extended_context ctx, execution_data_ext& ed_ext, task_dispatcher& target)
+ : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target)
+ , my_suspend_point(my_curr_dispatcher->get_suspend_point())
+ {}
+
+ virtual ~resume_node() {
+ if (this->my_skipped_wakeup) {
+ spin_wait_until_eq(this->my_notify_calls, 1);
+ }
+
+ poison_pointer(my_curr_dispatcher);
+ poison_pointer(my_target_dispatcher);
+ poison_pointer(my_suspend_point);
+ }
+
+ void init() override {
+ base_type::init();
+ }
+
+ void wait() override {
+ my_curr_dispatcher->resume(*my_target_dispatcher);
+ __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?");
+ }
+
+ void reset() override {
+ base_type::reset();
+ spin_wait_until_eq(this->my_notify_calls, 1);
+ my_notify_calls.store(0, std::memory_order_relaxed);
+ }
+
+ // notify is called (perhaps, concurrently) twice from:
+ // - concurrent_monitor::notify
+ // - post_resume_action::register_waiter
+ // The second notify is called after thread switches the stack
+ // (Because we can not call resume while the stack is occupied)
+ // We need calling resume only when both notifications are performed.
+ void notify() override {
+ if (++my_notify_calls == 2) {
+ r1::resume(my_suspend_point);
+ }
+ }
+
+private:
+ friend class thread_data;
+ friend struct suspend_point_type::resume_task;
+ task_dispatcher* my_curr_dispatcher;
+ task_dispatcher* my_target_dispatcher;
+ suspend_point_type* my_suspend_point;
+ std::atomic<int> my_notify_calls{0};
+};
+#endif // __TBB_RESUMABLE_TASKS
+
+class extended_concurrent_monitor : public concurrent_monitor_base<extended_context> {
+ using base_type = concurrent_monitor_base<extended_context>;
+public:
+ using base_type::base_type;
+ /** per-thread descriptor for concurrent_monitor */
+ using thread_context = sleep_node<extended_context>;
+#if __TBB_RESUMABLE_TASKS
+ using resume_context = resume_node;
+#endif
+};
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_concurrent_monitor_H */
diff --git a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def
new file mode 100644
index 0000000000..09e7753ad4
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+{
+global:
+
+/* Assertions (assert.cpp) */
+_ZN3tbb6detail2r117assertion_failureEPKciS3_S3_;
+
+/* ITT (profiling.cpp) */
+_ZN3tbb6detail2r112itt_task_endENS0_2d115itt_domain_enumE;
+_ZN3tbb6detail2r114itt_region_endENS0_2d115itt_domain_enumEPvy;
+_ZN3tbb6detail2r114itt_task_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE;
+_ZN3tbb6detail2r115call_itt_notifyEiPv;
+_ZN3tbb6detail2r115create_itt_syncEPvPKcS4_;
+_ZN3tbb6detail2r116itt_region_beginENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE;
+_ZN3tbb6detail2r116itt_relation_addENS0_2d115itt_domain_enumEPvyNS0_2d012itt_relationES4_y;
+_ZN3tbb6detail2r117itt_set_sync_nameEPvPKc;
+_ZN3tbb6detail2r119itt_make_task_groupENS0_2d115itt_domain_enumEPvyS4_yNS0_2d021string_resource_indexE;
+_ZN3tbb6detail2r120itt_metadata_str_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexEPKc;
+_ZN3tbb6detail2r120itt_metadata_ptr_addENS0_2d115itt_domain_enumEPvyNS0_2d021string_resource_indexES4_;
+
+/* Allocators (allocator.cpp) */
+_ZN3tbb6detail2r115allocate_memoryEm;
+_ZN3tbb6detail2r117deallocate_memoryEPv;
+_ZN3tbb6detail2r122cache_aligned_allocateEm;
+_ZN3tbb6detail2r124cache_aligned_deallocateEPv;
+_ZN3tbb6detail2r115cache_line_sizeEv;
+_ZN3tbb6detail2r117is_tbbmalloc_usedEv;
+
+/* Small object pool (small_object_pool.cpp) */
+_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEm;
+_ZN3tbb6detail2r18allocateERPNS0_2d117small_object_poolEmRKNS2_14execution_dataE;
+_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvm;
+_ZN3tbb6detail2r110deallocateERNS0_2d117small_object_poolEPvmRKNS2_14execution_dataE;
+
+/* Error handling (exception.cpp) */
+_ZN3tbb6detail2r115throw_exceptionENS0_2d012exception_idE;
+_ZTIN3tbb6detail2r114bad_last_allocE;
+_ZTVN3tbb6detail2r114bad_last_allocE;
+_ZTIN3tbb6detail2r112missing_waitE;
+_ZTVN3tbb6detail2r112missing_waitE;
+_ZTIN3tbb6detail2r110user_abortE;
+_ZTVN3tbb6detail2r110user_abortE;
+_ZTIN3tbb6detail2r111unsafe_waitE;
+_ZTVN3tbb6detail2r111unsafe_waitE;
+
+/* RTM Mutex (rtm_mutex.cpp) */
+_ZN3tbb6detail2r17acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockEb;
+_ZN3tbb6detail2r17releaseERNS0_2d19rtm_mutex11scoped_lockE;
+_ZN3tbb6detail2r111try_acquireERNS0_2d19rtm_mutexERNS3_11scoped_lockE;
+
+/* RTM RW Mutex (rtm_rw_mutex.cpp) */
+_ZN3tbb6detail2r114acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb;
+_ZN3tbb6detail2r114acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockEb;
+_ZN3tbb6detail2r118try_acquire_readerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE;
+_ZN3tbb6detail2r118try_acquire_writerERNS0_2d112rtm_rw_mutexERNS3_11scoped_lockE;
+_ZN3tbb6detail2r17releaseERNS0_2d112rtm_rw_mutex11scoped_lockE;
+_ZN3tbb6detail2r17upgradeERNS0_2d112rtm_rw_mutex11scoped_lockE;
+_ZN3tbb6detail2r19downgradeERNS0_2d112rtm_rw_mutex11scoped_lockE;
+
+/* Tasks and partitioners (task.cpp) */
+_ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_;
+_ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE;
+_ZN3tbb6detail2r121current_suspend_pointEv;
+_ZN3tbb6detail2r114notify_waitersEm;
+
+/* Task dispatcher (task_dispatcher.cpp) */
+_ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE;
+_ZN3tbb6detail2r14waitERNS0_2d112wait_contextERNS2_18task_group_contextE;
+_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextE;
+_ZN3tbb6detail2r15spawnERNS0_2d14taskERNS2_18task_group_contextEt;
+_ZN3tbb6detail2r116execute_and_waitERNS0_2d14taskERNS2_18task_group_contextERNS2_12wait_contextES6_;
+_ZN3tbb6detail2r16submitERNS0_2d14taskERNS2_18task_group_contextEPNS1_5arenaEm;
+_ZN3tbb6detail2r115current_contextEv;
+
+/* Task group context (task_group_context.cpp) */
+_ZN3tbb6detail2r110initializeERNS0_2d118task_group_contextE;
+_ZN3tbb6detail2r122cancel_group_executionERNS0_2d118task_group_contextE;
+_ZN3tbb6detail2r128is_group_execution_cancelledERNS0_2d118task_group_contextE;
+_ZN3tbb6detail2r15resetERNS0_2d118task_group_contextE;
+_ZN3tbb6detail2r17destroyERNS0_2d118task_group_contextE;
+_ZN3tbb6detail2r119capture_fp_settingsERNS0_2d118task_group_contextE;
+
+/* Task arena (arena.cpp) */
+_ZN3tbb6detail2r115max_concurrencyEPKNS0_2d115task_arena_baseE;
+_ZN3tbb6detail2r110initializeERNS0_2d115task_arena_baseE;
+_ZN3tbb6detail2r16attachERNS0_2d115task_arena_baseE;
+_ZN3tbb6detail2r17executeERNS0_2d115task_arena_baseERNS2_13delegate_baseE;
+_ZN3tbb6detail2r19terminateERNS0_2d115task_arena_baseE;
+_ZN3tbb6detail2r120isolate_within_arenaERNS0_2d113delegate_baseEl;
+_ZN3tbb6detail2r17enqueueERNS0_2d14taskEPNS2_15task_arena_baseE;
+_ZN3tbb6detail2r14waitERNS0_2d115task_arena_baseE;
+
+/* System topology parsing and threads pinning (governor.cpp) */
+_ZN3tbb6detail2r115numa_node_countEv;
+_ZN3tbb6detail2r117fill_numa_indicesEPi;
+_ZN3tbb6detail2r115core_type_countEl;
+_ZN3tbb6detail2r122fill_core_type_indicesEPil;
+_ZN3tbb6detail2r131constraints_default_concurrencyERKNS0_2d111constraintsEl;
+_ZN3tbb6detail2r128constraints_threads_per_coreERKNS0_2d111constraintsEl;
+_ZN3tbb6detail2r124numa_default_concurrencyEi;
+
+/* Observer (observer_proxy.cpp) */
+_ZN3tbb6detail2r17observeERNS0_2d123task_scheduler_observerEb;
+
+/* Queuing RW Mutex (queuing_rw_mutex.cpp) */
+_ZN3tbb6detail2r111try_acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb;
+_ZN3tbb6detail2r117upgrade_to_writerERNS0_2d116queuing_rw_mutex11scoped_lockE;
+_ZN3tbb6detail2r119downgrade_to_readerERNS0_2d116queuing_rw_mutex11scoped_lockE;
+_ZN3tbb6detail2r17acquireERNS0_2d116queuing_rw_mutexERNS3_11scoped_lockEb;
+_ZN3tbb6detail2r17releaseERNS0_2d116queuing_rw_mutex11scoped_lockE;
+_ZN3tbb6detail2r19constructERNS0_2d116queuing_rw_mutexE;
+
+/* Global control (global_control.cpp) */
+_ZN3tbb6detail2r16createERNS0_2d114global_controlE;
+_ZN3tbb6detail2r17destroyERNS0_2d114global_controlE;
+_ZN3tbb6detail2r127global_control_active_valueEi;
+_ZN3tbb6detail2r18finalizeERNS0_2d121task_scheduler_handleEl;
+_ZN3tbb6detail2r13getERNS0_2d121task_scheduler_handleE;
+
+/* Parallel pipeline (parallel_pipeline.cpp) */
+_ZN3tbb6detail2r117parallel_pipelineERNS0_2d118task_group_contextEmRKNS2_11filter_nodeE;
+_ZN3tbb6detail2r116set_end_of_inputERNS0_2d111base_filterE;
+
+/* Concurrent bounded queue (concurrent_bounded_queue.cpp) */
+_ZN3tbb6detail2r126allocate_bounded_queue_repEm;
+_ZN3tbb6detail2r126wait_bounded_queue_monitorEPNS1_18concurrent_monitorEmlRNS0_2d113delegate_baseE;
+_ZN3tbb6detail2r128abort_bounded_queue_monitorsEPNS1_18concurrent_monitorE;
+_ZN3tbb6detail2r128deallocate_bounded_queue_repEPhm;
+_ZN3tbb6detail2r128notify_bounded_queue_monitorEPNS1_18concurrent_monitorEmm;
+
+/* Versioning (version.cpp) */
+TBB_runtime_interface_version;
+TBB_runtime_version;
+
+local:
+/* TODO: fill more precisely */
+*;
+};
diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.cpp b/contrib/libs/tbb/src/tbb/dynamic_link.cpp
new file mode 100644
index 0000000000..d5c5c7be7d
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/dynamic_link.cpp
@@ -0,0 +1,477 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "dynamic_link.h"
+
+#include "oneapi/tbb/detail/_template_helpers.h"
+#include "oneapi/tbb/detail/_utils.h"
+
+/*
+ This file is used by both TBB and OpenMP RTL. Do not use __TBB_ASSERT() macro
+ and runtime_warning() function because they are not available in OpenMP. Use
+ __TBB_ASSERT_EX and DYNAMIC_LINK_WARNING instead.
+*/
+
+#include <cstdarg> // va_list etc.
+#if _WIN32
+ #include <malloc.h>
+
+ // Unify system calls
+ #define dlopen( name, flags ) LoadLibrary( name )
+ #define dlsym( handle, name ) GetProcAddress( handle, name )
+ #define dlclose( handle ) ( ! FreeLibrary( handle ) )
+ #define dlerror() GetLastError()
+#ifndef PATH_MAX
+ #define PATH_MAX MAX_PATH
+#endif
+#else /* _WIN32 */
+ #include <dlfcn.h>
+ #include <unistd.h>
+
+ #include <cstring>
+ #include <climits>
+ #include <cstdlib>
+#endif /* _WIN32 */
+
+#if __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED
+ //TODO: use function attribute for weak symbols instead of the pragma.
+ #pragma weak dlopen
+ #pragma weak dlsym
+ #pragma weak dlclose
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT && !__TBB_DYNAMIC_LOAD_ENABLED */
+
+
+#define __USE_STATIC_DL_INIT ( !__ANDROID__ )
+
+
+/*
+dynamic_link is a common interface for searching for required symbols in an
+executable and dynamic libraries.
+
+dynamic_link provides certain guarantees:
+ 1. Either all or none of the requested symbols are resolved. Moreover, if
+ symbols are not resolved, the dynamic_link_descriptor table is not modified;
+ 2. All returned symbols have secured lifetime: this means that none of them
+ can be invalidated until dynamic_unlink is called;
+ 3. Any loaded library is loaded only via the full path. The full path is that
+ from which the runtime itself was loaded. (This is done to avoid security
+ issues caused by loading libraries from insecure paths).
+
+dynamic_link searches for the requested symbols in three stages, stopping as
+soon as all of the symbols have been resolved.
+
+ 1. Search the global scope:
+ a. On Windows: dynamic_link tries to obtain the handle of the requested
+ library and if it succeeds it resolves the symbols via that handle.
+ b. On Linux: dynamic_link tries to search for the symbols in the global
+ scope via the main program handle. If the symbols are present in the global
+ scope their lifetime is not guaranteed (since dynamic_link does not know
+ anything about the library from which they are exported). Therefore it
+ tries to "pin" the symbols by obtaining the library name and reopening it.
+ dlopen may fail to reopen the library in two cases:
+ i. The symbols are exported from the executable. Currently dynamic _link
+ cannot handle this situation, so it will not find these symbols in this
+ step.
+ ii. The necessary library has been unloaded and cannot be reloaded. It
+ seems there is nothing that can be done in this case. No symbols are
+ returned.
+
+ 2. Dynamic load: an attempt is made to load the requested library via the
+ full path.
+ The full path used is that from which the runtime itself was loaded. If the
+ library can be loaded, then an attempt is made to resolve the requested
+ symbols in the newly loaded library.
+ If the symbols are not found the library is unloaded.
+
+ 3. Weak symbols: if weak symbols are available they are returned.
+*/
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED
+
+#if !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED
+ // Report runtime errors and continue.
+ #define DYNAMIC_LINK_WARNING dynamic_link_warning
+ static void dynamic_link_warning( dynamic_link_error_t code, ... ) {
+ suppress_unused_warning(code);
+ } // library_warning
+#endif /* !defined(DYNAMIC_LINK_WARNING) && !__TBB_WIN8UI_SUPPORT && __TBB_DYNAMIC_LOAD_ENABLED */
+
+ static bool resolve_symbols( dynamic_link_handle module, const dynamic_link_descriptor descriptors[], std::size_t required )
+ {
+ if ( !module )
+ return false;
+
+ #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
+ if ( !dlsym ) return false;
+ #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */
+
+ const std::size_t n_desc=20; // Usually we don't have more than 20 descriptors per library
+ __TBB_ASSERT_EX( required <= n_desc, "Too many descriptors is required" );
+ if ( required > n_desc ) return false;
+ pointer_to_handler h[n_desc];
+
+ for ( std::size_t k = 0; k < required; ++k ) {
+ dynamic_link_descriptor const & desc = descriptors[k];
+ pointer_to_handler addr = (pointer_to_handler)dlsym( module, desc.name );
+ if ( !addr ) {
+ return false;
+ }
+ h[k] = addr;
+ }
+
+ // Commit the entry points.
+ // Cannot use memset here, because the writes must be atomic.
+ for( std::size_t k = 0; k < required; ++k )
+ *descriptors[k].handler = h[k];
+ return true;
+ }
+
+#if __TBB_WIN8UI_SUPPORT
+ bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) {
+ dynamic_link_handle tmp_handle = NULL;
+ TCHAR wlibrary[256];
+ if ( MultiByteToWideChar(CP_UTF8, 0, library, -1, wlibrary, 255) == 0 ) return false;
+ if ( flags & DYNAMIC_LINK_LOAD )
+ tmp_handle = LoadPackagedLibrary( wlibrary, 0 );
+ if (tmp_handle != NULL){
+ return resolve_symbols(tmp_handle, descriptors, required);
+ }else{
+ return false;
+ }
+ }
+ void dynamic_unlink( dynamic_link_handle ) {}
+ void dynamic_unlink_all() {}
+#else
+#if __TBB_DYNAMIC_LOAD_ENABLED
+/*
+ There is a security issue on Windows: LoadLibrary() may load and execute malicious code.
+ See http://www.microsoft.com/technet/security/advisory/2269637.mspx for details.
+ To avoid the issue, we have to pass full path (not just library name) to LoadLibrary. This
+ function constructs full path to the specified library (it is assumed the library located
+ side-by-side with the tbb.dll.
+
+ The function constructs absolute path for given relative path. Important: Base directory is not
+ current one, it is the directory tbb.dll loaded from.
+
+ Example:
+ Let us assume "tbb.dll" is located in "c:\program files\common\intel\" directory, e.g.
+ absolute path of the library is "c:\program files\common\intel\tbb.dll". Absolute path for
+ "tbbmalloc.dll" would be "c:\program files\common\intel\tbbmalloc.dll". Absolute path for
+ "malloc\tbbmalloc.dll" would be "c:\program files\common\intel\malloc\tbbmalloc.dll".
+*/
+
+ // Struct handle_storage is used by dynamic_link routine to store handles of
+ // all loaded or pinned dynamic libraries. When TBB is shut down, it calls
+ // dynamic_unlink_all() that unloads modules referenced by handle_storage.
+ // This struct should not have any constructors since it may be used before
+ // the constructor is called.
+ #define MAX_LOADED_MODULES 8 // The number of maximum possible modules which can be loaded
+
+ using atomic_incrementer = std::atomic<std::size_t>;
+
+ static struct handles_t {
+ atomic_incrementer my_size;
+ dynamic_link_handle my_handles[MAX_LOADED_MODULES];
+
+ void add(const dynamic_link_handle &handle) {
+ const std::size_t ind = my_size++;
+ __TBB_ASSERT_EX( ind < MAX_LOADED_MODULES, "Too many modules are loaded" );
+ my_handles[ind] = handle;
+ }
+
+ void free() {
+ const std::size_t size = my_size;
+ for (std::size_t i=0; i<size; ++i)
+ dynamic_unlink( my_handles[i] );
+ }
+ } handles;
+
+ static std::once_flag init_dl_data_state;
+
+ static struct ap_data_t {
+ char _path[PATH_MAX+1];
+ std::size_t _len;
+ } ap_data;
+
+ static void init_ap_data() {
+ #if _WIN32
+ // Get handle of our DLL first.
+ HMODULE handle;
+ BOOL brc = GetModuleHandleEx(
+ GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+ (LPCSTR)( & dynamic_link ), // any function inside the library can be used for the address
+ & handle
+ );
+ if ( !brc ) { // Error occurred.
+ int err = GetLastError();
+ DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleHandleEx", err );
+ return;
+ }
+ // Now get path to our DLL.
+ DWORD drc = GetModuleFileName( handle, ap_data._path, static_cast< DWORD >( PATH_MAX ) );
+ if ( drc == 0 ) { // Error occurred.
+ int err = GetLastError();
+ DYNAMIC_LINK_WARNING( dl_sys_fail, "GetModuleFileName", err );
+ return;
+ }
+ if ( drc >= PATH_MAX ) { // Buffer too short.
+ DYNAMIC_LINK_WARNING( dl_buff_too_small );
+ return;
+ }
+ // Find the position of the last backslash.
+ char *backslash = std::strrchr( ap_data._path, '\\' );
+
+ if ( !backslash ) { // Backslash not found.
+ __TBB_ASSERT_EX( backslash!=NULL, "Unbelievable.");
+ return;
+ }
+ __TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable.");
+ ap_data._len = (std::size_t)(backslash - ap_data._path) + 1;
+ *(backslash+1) = 0;
+ #else
+ // Get the library path
+ Dl_info dlinfo;
+ int res = dladdr( (void*)&dynamic_link, &dlinfo ); // any function inside the library can be used for the address
+ if ( !res ) {
+ char const * err = dlerror();
+ DYNAMIC_LINK_WARNING( dl_sys_fail, "dladdr", err );
+ return;
+ } else {
+ __TBB_ASSERT_EX( dlinfo.dli_fname!=NULL, "Unbelievable." );
+ }
+
+ char const *slash = std::strrchr( dlinfo.dli_fname, '/' );
+ std::size_t fname_len=0;
+ if ( slash ) {
+ __TBB_ASSERT_EX( slash >= dlinfo.dli_fname, "Unbelievable.");
+ fname_len = (std::size_t)(slash - dlinfo.dli_fname) + 1;
+ }
+
+ std::size_t rc;
+ if ( dlinfo.dli_fname[0]=='/' ) {
+ // The library path is absolute
+ rc = 0;
+ ap_data._len = 0;
+ } else {
+ // The library path is relative so get the current working directory
+ if ( !getcwd( ap_data._path, sizeof(ap_data._path)/sizeof(ap_data._path[0]) ) ) {
+ DYNAMIC_LINK_WARNING( dl_buff_too_small );
+ return;
+ }
+ ap_data._len = std::strlen( ap_data._path );
+ ap_data._path[ap_data._len++]='/';
+ rc = ap_data._len;
+ }
+
+ if ( fname_len>0 ) {
+ if ( ap_data._len>PATH_MAX ) {
+ DYNAMIC_LINK_WARNING( dl_buff_too_small );
+ ap_data._len=0;
+ return;
+ }
+ std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len );
+ ap_data._len += fname_len;
+ ap_data._path[ap_data._len]=0;
+ }
+ #endif /* _WIN32 */
+ }
+
+ static void init_dl_data() {
+ init_ap_data();
+ }
+
+ /*
+ The function constructs absolute path for given relative path. Important: Base directory is not
+ current one, it is the directory libtbb.so loaded from.
+
+ Arguments:
+ in name -- Name of a file (may be with relative path; it must not be an absolute one).
+ out path -- Buffer to save result (absolute path) to.
+ in len -- Size of buffer.
+ ret -- 0 -- Error occurred.
+ > len -- Buffer too short, required size returned.
+ otherwise -- Ok, number of characters (incl. terminating null) written to buffer.
+ */
+ static std::size_t abs_path( char const * name, char * path, std::size_t len ) {
+ if ( ap_data._len == 0 )
+ return 0;
+
+ std::size_t name_len = std::strlen( name );
+ std::size_t full_len = name_len+ap_data._len;
+ if ( full_len < len ) {
+ __TBB_ASSERT( ap_data._path[ap_data._len] == 0, NULL);
+ __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, NULL);
+ std::strncpy( path, ap_data._path, ap_data._len + 1 );
+ __TBB_ASSERT( path[ap_data._len] == 0, NULL );
+ std::strncat( path, name, len - ap_data._len );
+ __TBB_ASSERT( std::strlen(path) == full_len, NULL );
+ }
+ return full_len+1; // +1 for null character
+ }
+#endif // __TBB_DYNAMIC_LOAD_ENABLED
+ void init_dynamic_link_data() {
+ #if __TBB_DYNAMIC_LOAD_ENABLED
+ std::call_once( init_dl_data_state, init_dl_data );
+ #endif
+ }
+
+ #if __USE_STATIC_DL_INIT
+ // ap_data structure is initialized with current directory on Linux.
+ // So it should be initialized as soon as possible since the current directory may be changed.
+ // static_init_ap_data object provides this initialization during library loading.
+ static struct static_init_dl_data_t {
+ static_init_dl_data_t() {
+ init_dynamic_link_data();
+ }
+ } static_init_dl_data;
+ #endif
+
+ #if __TBB_WEAK_SYMBOLS_PRESENT
+ static bool weak_symbol_link( const dynamic_link_descriptor descriptors[], std::size_t required )
+ {
+ // Check if the required entries are present in what was loaded into our process.
+ for ( std::size_t k = 0; k < required; ++k )
+ if ( !descriptors[k].ptr )
+ return false;
+ // Commit the entry points.
+ for ( std::size_t k = 0; k < required; ++k )
+ *descriptors[k].handler = (pointer_to_handler) descriptors[k].ptr;
+ return true;
+ }
+ #else
+ static bool weak_symbol_link( const dynamic_link_descriptor[], std::size_t ) {
+ return false;
+ }
+ #endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+
+ void dynamic_unlink( dynamic_link_handle handle ) {
+ #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
+ if ( !dlclose ) return;
+ #endif
+ if ( handle ) {
+ dlclose( handle );
+ }
+ }
+
+ void dynamic_unlink_all() {
+ #if __TBB_DYNAMIC_LOAD_ENABLED
+ handles.free();
+ #endif
+ }
+
+ static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) {
+ dynamic_link_handle library_handle{};
+#if _WIN32
+ bool res = GetModuleHandleEx(0, library, &library_handle);
+ __TBB_ASSERT_EX(res && library_handle || !res && !library_handle, nullptr);
+#else /* _WIN32 */
+ #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */
+ if ( !dlopen ) return 0;
+ #endif /* !__TBB_DYNAMIC_LOAD_ENABLED */
+ // RTLD_GLOBAL - to guarantee that old TBB will find the loaded library
+ // RTLD_NOLOAD - not to load the library without the full path
+ library_handle = dlopen(library, RTLD_LAZY | RTLD_GLOBAL | RTLD_NOLOAD);
+#endif /* _WIN32 */
+ if (library_handle) {
+ if (!resolve_symbols(library_handle, descriptors, required)) {
+ dynamic_unlink(library_handle);
+ library_handle = nullptr;
+ }
+ }
+ return library_handle;
+ }
+
+ static void save_library_handle( dynamic_link_handle src, dynamic_link_handle *dst ) {
+ __TBB_ASSERT_EX( src, "The library handle to store must be non-zero" );
+ if ( dst )
+ *dst = src;
+ #if __TBB_DYNAMIC_LOAD_ENABLED
+ else
+ handles.add( src );
+ #endif /* __TBB_DYNAMIC_LOAD_ENABLED */
+ }
+
+ dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) {
+ ::tbb::detail::suppress_unused_warning( library, descriptors, required );
+#if __TBB_DYNAMIC_LOAD_ENABLED
+
+ std::size_t const len = PATH_MAX + 1;
+ char path[ len ];
+ std::size_t rc = abs_path( library, path, len );
+ if ( 0 < rc && rc <= len ) {
+#if _WIN32
+ // Prevent Windows from displaying silly message boxes if it fails to load library
+ // (e.g. because of MS runtime problems - one of those crazy manifest related ones)
+ UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS);
+#endif /* _WIN32 */
+ dynamic_link_handle library_handle = dlopen( path, RTLD_NOW | RTLD_GLOBAL );
+#if _WIN32
+ SetErrorMode (prev_mode);
+#endif /* _WIN32 */
+ if( library_handle ) {
+ if( !resolve_symbols( library_handle, descriptors, required ) ) {
+ // The loaded library does not contain all the expected entry points
+ dynamic_unlink( library_handle );
+ library_handle = NULL;
+ }
+ } else
+ DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() );
+ return library_handle;
+ } else if ( rc>len )
+ DYNAMIC_LINK_WARNING( dl_buff_too_small );
+ // rc == 0 means failing of init_ap_data so the warning has already been issued.
+
+#endif /* __TBB_DYNAMIC_LOAD_ENABLED */
+ return 0;
+ }
+
+ bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) {
+ init_dynamic_link_data();
+
+ // TODO: May global_symbols_link find weak symbols?
+ dynamic_link_handle library_handle = ( flags & DYNAMIC_LINK_GLOBAL ) ? global_symbols_link( library, descriptors, required ) : 0;
+
+ if ( !library_handle && ( flags & DYNAMIC_LINK_LOAD ) )
+ library_handle = dynamic_load( library, descriptors, required );
+
+ if ( !library_handle && ( flags & DYNAMIC_LINK_WEAK ) )
+ return weak_symbol_link( descriptors, required );
+
+ if ( library_handle ) {
+ save_library_handle( library_handle, handle );
+ return true;
+ }
+ return false;
+ }
+
+#endif /*__TBB_WIN8UI_SUPPORT*/
+#else /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */
+ bool dynamic_link( const char*, const dynamic_link_descriptor*, std::size_t, dynamic_link_handle *handle, int ) {
+ if ( handle )
+ *handle=0;
+ return false;
+ }
+ void dynamic_unlink( dynamic_link_handle ) {}
+ void dynamic_unlink_all() {}
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT || __TBB_DYNAMIC_LOAD_ENABLED */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.h b/contrib/libs/tbb/src/tbb/dynamic_link.h
new file mode 100644
index 0000000000..91adcc507c
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/dynamic_link.h
@@ -0,0 +1,115 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_dynamic_link
+#define __TBB_dynamic_link
+
+// Support for dynamic loading entry points from other shared libraries.
+
+#include "oneapi/tbb/detail/_config.h"
+
+#include <atomic>
+#include <mutex>
+
+/** By default, symbols declared and defined here go into namespace tbb::internal.
+ To put them in other namespace, define macros OPEN_INTERNAL_NAMESPACE
+ and CLOSE_INTERNAL_NAMESPACE to override the following default definitions. **/
+
+#include <cstddef>
+#if _WIN32
+#include <Windows.h>
+#endif /* _WIN32 */
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Type definition for a pointer to a void somefunc(void)
+typedef void (*pointer_to_handler)();
+
+//! The helper to construct dynamic_link_descriptor structure
+// Double cast through the void* in DLD macro is necessary to
+// prevent warnings from some compilers (g++ 4.1)
+#if __TBB_WEAK_SYMBOLS_PRESENT
+#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h), (pointer_to_handler)&s}
+#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), NULL}
+#else
+#define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h)}
+#define DLD_NOWEAK(s,h) DLD(s,h)
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+//! Association between a handler name and location of pointer to it.
+struct dynamic_link_descriptor {
+ //! Name of the handler
+ const char* name;
+ //! Pointer to the handler
+ pointer_to_handler* handler;
+#if __TBB_WEAK_SYMBOLS_PRESENT
+ //! Weak symbol
+ pointer_to_handler ptr;
+#endif
+};
+
+#if _WIN32
+using dynamic_link_handle = HMODULE;
+#else
+using dynamic_link_handle = void*;
+#endif /* _WIN32 */
+
+const int DYNAMIC_LINK_GLOBAL = 0x01;
+const int DYNAMIC_LINK_LOAD = 0x02;
+const int DYNAMIC_LINK_WEAK = 0x04;
+const int DYNAMIC_LINK_ALL = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK;
+
+//! Fill in dynamically linked handlers.
+/** 'library' is the name of the requested library. It should not contain a full
+ path since dynamic_link adds the full path (from which the runtime itself
+ was loaded) to the library name.
+ 'required' is the number of the initial entries in the array descriptors[]
+ that have to be found in order for the call to succeed. If the library and
+ all the required handlers are found, then the corresponding handler
+ pointers are set, and the return value is true. Otherwise the original
+ array of descriptors is left untouched and the return value is false.
+ 'required' is limited by 20 (exceeding of this value will result in failure
+ to load the symbols and the return value will be false).
+ 'handle' is the handle of the library if it is loaded. Otherwise it is left
+ untouched.
+ 'flags' is the set of DYNAMIC_LINK_* flags. Each of the DYNAMIC_LINK_* flags
+ allows its corresponding linking stage.
+**/
+bool dynamic_link( const char* library,
+ const dynamic_link_descriptor descriptors[],
+ std::size_t required,
+ dynamic_link_handle* handle = 0,
+ int flags = DYNAMIC_LINK_ALL );
+
+void dynamic_unlink( dynamic_link_handle handle );
+
+void dynamic_unlink_all();
+
+enum dynamic_link_error_t {
+ dl_success = 0,
+ dl_lib_not_found, // char const * lib, dlerr_t err
+ dl_sym_not_found, // char const * sym, dlerr_t err
+ // Note: dlerr_t depends on OS: it is char const * on Linux* and macOS*, int on Windows*.
+ dl_sys_fail, // char const * func, int err
+ dl_buff_too_small // none
+}; // dynamic_link_error_t
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_dynamic_link */
diff --git a/contrib/libs/tbb/src/tbb/environment.h b/contrib/libs/tbb/src/tbb/environment.h
new file mode 100644
index 0000000000..8886ef09e1
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/environment.h
@@ -0,0 +1,81 @@
+/*
+ Copyright (c) 2018-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_tbb_environment_H
+#define __TBB_tbb_environment_H
+
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+#include <cctype>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_WIN8UI_SUPPORT
+static inline bool GetBoolEnvironmentVariable( const char * ) {
+ return false;
+}
+
+static inline long GetIntegralEnvironmentVariable( const char * ) {
+ return -1;
+}
+#else /* __TBB_WIN8UI_SUPPORT */
+static inline bool GetBoolEnvironmentVariable( const char * name ) {
+ if ( const char* s = std::getenv(name) ) {
+ // The result is defined as true only if the environment variable contains
+ // no characters except one '1' character and an arbitrary number of spaces
+ // (including the absence of spaces).
+ size_t index = std::strspn(s, " ");
+ if (s[index] != '1') return false;
+ index++;
+ // Memory access after incrementing is safe, since the getenv() returns a
+ // NULL terminated string, and even if the character getting by index is '1',
+ // and this character is the end of string, after incrementing we will get
+ // an index of character, that contains '\0'
+ index += std::strspn(&s[index], " ");
+ return !s[index];
+ }
+ return false;
+}
+
+static inline long GetIntegralEnvironmentVariable( const char * name ) {
+ if ( const char* s = std::getenv(name) ) {
+ char* end = NULL;
+ errno = 0;
+ long value = std::strtol(s, &end, 10);
+
+ // We have exceeded the range, value is negative or string is incovertable
+ if ( errno == ERANGE || value < 0 || end==s ) {
+ return -1;
+ }
+ for ( ; *end != '\0'; end++ ) {
+ if ( !std::isspace(*end) ) {
+ return -1;
+ }
+ }
+ return value;
+ }
+ return -1;
+}
+#endif /* __TBB_WIN8UI_SUPPORT */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // __TBB_tbb_environment_H
diff --git a/contrib/libs/tbb/src/tbb/exception.cpp b/contrib/libs/tbb/src/tbb/exception.cpp
new file mode 100644
index 0000000000..c3e95d6d97
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/exception.cpp
@@ -0,0 +1,162 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_exception.h"
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+
+#include <cstring>
+#include <cstdio>
+#include <stdexcept> // std::runtime_error
+#include <new>
+#include <stdexcept>
+
+#define __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN \
+ (__GLIBCXX__ && __TBB_GLIBCXX_VERSION>=40700 && __TBB_GLIBCXX_VERSION<60000 && TBB_USE_EXCEPTIONS)
+
+#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN
+// GCC ABI declarations necessary for a workaround
+#include <cxxabi.h>
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+const char* bad_last_alloc::what() const noexcept(true) { return "bad allocation in previous or concurrent attempt"; }
+const char* user_abort::what() const noexcept(true) { return "User-initiated abort has terminated this operation"; }
+const char* missing_wait::what() const noexcept(true) { return "wait() was not called on the structured_task_group"; }
+
+#if TBB_USE_EXCEPTIONS
+ template <typename F>
+ /*[[noreturn]]*/ void do_throw_noexcept(F throw_func) noexcept {
+ throw_func();
+ }
+
+ /*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept {
+ throw_func();
+ }
+
+ bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp
+
+ template <typename F>
+ /*[[noreturn]]*/ void do_throw(F throw_func) {
+ if (terminate_on_exception()) {
+ do_throw_noexcept(throw_func);
+ }
+ throw_func();
+ }
+
+ #define DO_THROW(exc, init_args) do_throw( []{ throw exc init_args; } );
+#else /* !TBB_USE_EXCEPTIONS */
+ #define PRINT_ERROR_AND_ABORT(exc_name, msg) \
+ std::fprintf (stderr, "Exception %s with message %s would have been thrown, " \
+ "if exception handling had not been disabled. Aborting.\n", exc_name, msg); \
+ std::fflush(stderr); \
+ std::abort();
+ #define DO_THROW(exc, init_args) PRINT_ERROR_AND_ABORT(#exc, #init_args)
+#endif /* !TBB_USE_EXCEPTIONS */
+
+void throw_exception ( exception_id eid ) {
+ switch ( eid ) {
+ case exception_id::bad_alloc: DO_THROW(std::bad_alloc, ()); break;
+ case exception_id::bad_last_alloc: DO_THROW(bad_last_alloc, ()); break;
+ case exception_id::user_abort: DO_THROW( user_abort, () ); break;
+ case exception_id::nonpositive_step: DO_THROW(std::invalid_argument, ("Step must be positive") ); break;
+ case exception_id::out_of_range: DO_THROW(std::out_of_range, ("Index out of requested size range")); break;
+ case exception_id::reservation_length_error: DO_THROW(std::length_error, ("Attempt to exceed implementation defined length limits")); break;
+ case exception_id::missing_wait: DO_THROW(missing_wait, ()); break;
+ case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break;
+ case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break;
+ case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break;
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break;
+#endif
+ default: __TBB_ASSERT ( false, "Unknown exception ID" );
+ }
+ __TBB_ASSERT(false, "Unreachable code");
+}
+
+/* The "what" should be fairly short, not more than about 128 characters.
+ Because we control all the call sites to handle_perror, it is pointless
+ to bullet-proof it for very long strings.
+
+ Design note: ADR put this routine off to the side in tbb_misc.cpp instead of
+ Task.cpp because the throw generates a pathetic lot of code, and ADR wanted
+ this large chunk of code to be placed on a cold page. */
+void handle_perror( int error_code, const char* what ) {
+ const int BUF_SIZE = 255;
+ char buf[BUF_SIZE + 1] = { 0 };
+ std::strncat(buf, what, BUF_SIZE);
+ std::size_t buf_len = std::strlen(buf);
+ if (error_code) {
+ std::strncat(buf, ": ", BUF_SIZE - buf_len);
+ buf_len = std::strlen(buf);
+ std::strncat(buf, std::strerror(error_code), BUF_SIZE - buf_len);
+ buf_len = std::strlen(buf);
+ }
+ __TBB_ASSERT(buf_len <= BUF_SIZE && buf[buf_len] == 0, nullptr);
+#if TBB_USE_EXCEPTIONS
+ do_throw([&buf] { throw std::runtime_error(buf); });
+#else
+ PRINT_ERROR_AND_ABORT( "runtime_error", buf);
+#endif /* !TBB_USE_EXCEPTIONS */
+}
+
+#if __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN
+// Runtime detection and workaround for the GCC bug 62258.
+// The problem is that std::rethrow_exception() does not increment a counter
+// of active exceptions, causing std::uncaught_exception() to return a wrong value.
+// The code is created after, and roughly reflects, the workaround
+// at https://gcc.gnu.org/bugzilla/attachment.cgi?id=34683
+
+void fix_broken_rethrow() {
+ struct gcc_eh_data {
+ void * caughtExceptions;
+ unsigned int uncaughtExceptions;
+ };
+ gcc_eh_data* eh_data = punned_cast<gcc_eh_data*>( abi::__cxa_get_globals() );
+ ++eh_data->uncaughtExceptions;
+}
+
+bool gcc_rethrow_exception_broken() {
+ bool is_broken;
+ __TBB_ASSERT( !std::uncaught_exception(),
+ "gcc_rethrow_exception_broken() must not be called when an exception is active" );
+ try {
+ // Throw, catch, and rethrow an exception
+ try {
+ throw __TBB_GLIBCXX_VERSION;
+ } catch(...) {
+ std::rethrow_exception( std::current_exception() );
+ }
+ } catch(...) {
+ // Check the bug presence
+ is_broken = std::uncaught_exception();
+ }
+ if( is_broken ) fix_broken_rethrow();
+ __TBB_ASSERT( !std::uncaught_exception(), NULL );
+ return is_broken;
+}
+#else
+void fix_broken_rethrow() {}
+bool gcc_rethrow_exception_broken() { return false; }
+#endif /* __TBB_STD_RETHROW_EXCEPTION_POSSIBLY_BROKEN */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/global_control.cpp b/contrib/libs/tbb/src/tbb/global_control.cpp
new file mode 100644
index 0000000000..a9eac2cbc3
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/global_control.cpp
@@ -0,0 +1,275 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+
+#include "oneapi/tbb/global_control.h"
+#include "oneapi/tbb/tbb_allocator.h"
+#include "oneapi/tbb/spin_mutex.h"
+
+#include "governor.h"
+#include "market.h"
+#include "misc.h"
+
+#include <atomic>
+#include <set>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Comparator for a set of global_control objects
+struct control_storage_comparator {
+ bool operator()(const global_control* lhs, const global_control* rhs) const;
+};
+
+class control_storage {
+ friend struct global_control_impl;
+ friend std::size_t global_control_active_value(int);
+protected:
+ std::size_t my_active_value{0};
+ std::set<global_control*, control_storage_comparator, tbb_allocator<global_control*>> my_list{};
+ spin_mutex my_list_mutex{};
+public:
+ virtual std::size_t default_value() const = 0;
+ virtual void apply_active(std::size_t new_active) {
+ my_active_value = new_active;
+ }
+ virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const {
+ return a>b; // prefer max by default
+ }
+ virtual std::size_t active_value() {
+ spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call
+ return !my_list.empty() ? my_active_value : default_value();
+ }
+};
+
+class alignas(max_nfs_size) allowed_parallelism_control : public control_storage {
+ virtual std::size_t default_value() const override {
+ return max(1U, governor::default_num_threads());
+ }
+ virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const override {
+ return a<b; // prefer min allowed parallelism
+ }
+ virtual void apply_active(std::size_t new_active) override {
+ control_storage::apply_active(new_active);
+ __TBB_ASSERT( my_active_value>=1, NULL );
+ // -1 to take external thread into account
+ market::set_active_num_workers( my_active_value-1 );
+ }
+ virtual std::size_t active_value() override {
+ spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call
+ if (my_list.empty())
+ return default_value();
+ // non-zero, if market is active
+ const std::size_t workers = market::max_num_workers();
+ // We can't exceed market's maximal number of workers.
+ // +1 to take external thread into account
+ return workers? min(workers+1, my_active_value): my_active_value;
+ }
+public:
+ std::size_t active_value_if_present() const {
+ return !my_list.empty() ? my_active_value : 0;
+ }
+};
+
+class alignas(max_nfs_size) stack_size_control : public control_storage {
+ virtual std::size_t default_value() const override {
+ return ThreadStackSize;
+ }
+ virtual void apply_active(std::size_t new_active) override {
+ control_storage::apply_active(new_active);
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+ __TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" );
+#endif
+ }
+};
+
+class alignas(max_nfs_size) terminate_on_exception_control : public control_storage {
+ virtual std::size_t default_value() const override {
+ return 0;
+ }
+};
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+class alignas(max_nfs_size) lifetime_control : public control_storage {
+ virtual bool is_first_arg_preferred(std::size_t, std::size_t) const override {
+ return false; // not interested
+ }
+ virtual std::size_t default_value() const override {
+ return 0;
+ }
+ virtual void apply_active(std::size_t new_active) override {
+ if (new_active == 1) {
+ // reserve the market reference
+ market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex );
+ if (market::theMarket) {
+ market::add_ref_unsafe(lock, /*is_public*/ true);
+ }
+ } else if (new_active == 0) { // new_active == 0
+ // release the market reference
+ market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex );
+ if (market::theMarket != nullptr) {
+ lock.release();
+ market::theMarket->release(/*is_public*/ true, /*blocking_terminate*/ false);
+ }
+ }
+ control_storage::apply_active(new_active);
+ }
+
+public:
+ bool is_empty() {
+ spin_mutex::scoped_lock lock(my_list_mutex);
+ return my_list.empty();
+ }
+};
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+
+static allowed_parallelism_control allowed_parallelism_ctl;
+static stack_size_control stack_size_ctl;
+static terminate_on_exception_control terminate_on_exception_ctl;
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+static lifetime_control lifetime_ctl;
+static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl};
+#else
+static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl};
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+
+//! Comparator for a set of global_control objects
+inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const {
+ __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , NULL);
+ return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs);
+}
+
+unsigned market::app_parallelism_limit() {
+ return allowed_parallelism_ctl.active_value_if_present();
+}
+
+bool terminate_on_exception() {
+ return global_control::active_value(global_control::terminate_on_exception) == 1;
+}
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+unsigned market::is_lifetime_control_present() {
+ return !lifetime_ctl.is_empty();
+}
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+
+struct global_control_impl {
+private:
+ static bool erase_if_present(control_storage* const c, d1::global_control& gc) {
+ auto it = c->my_list.find(&gc);
+ if (it != c->my_list.end()) {
+ c->my_list.erase(it);
+ return true;
+ }
+ return false;
+ }
+
+public:
+
+ static void create(d1::global_control& gc) {
+ __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL);
+ control_storage* const c = controls[gc.my_param];
+
+ spin_mutex::scoped_lock lock(c->my_list_mutex);
+ if (c->my_list.empty() || c->is_first_arg_preferred(gc.my_value, c->my_active_value)) {
+ // to guarantee that apply_active() is called with current active value,
+ // calls it here and in internal_destroy() under my_list_mutex
+ c->apply_active(gc.my_value);
+ }
+ c->my_list.insert(&gc);
+ }
+
+ static void destroy(d1::global_control& gc) {
+ __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL);
+ control_storage* const c = controls[gc.my_param];
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), NULL);
+#else
+ __TBB_ASSERT(!c->my_list.empty(), NULL);
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ // Concurrent reading and changing global parameter is possible.
+ spin_mutex::scoped_lock lock(c->my_list_mutex);
+ std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value;
+
+ if (!erase_if_present(c, gc)) {
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , NULL);
+ return;
+#else
+ __TBB_ASSERT(false, "Unreachable code");
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ }
+ if (c->my_list.empty()) {
+ __TBB_ASSERT(new_active == (std::size_t) - 1, NULL);
+ new_active = c->default_value();
+ } else {
+ new_active = (*c->my_list.begin())->my_value;
+ }
+ if (new_active != old_active) {
+ c->apply_active(new_active);
+ }
+ }
+
+ static bool remove_and_check_if_empty(d1::global_control& gc) {
+ __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL);
+ control_storage* const c = controls[gc.my_param];
+ __TBB_ASSERT(!c->my_list.empty(), NULL);
+
+ spin_mutex::scoped_lock lock(c->my_list_mutex);
+ erase_if_present(c, gc);
+ return c->my_list.empty();
+ }
+#if TBB_USE_ASSERT
+ static bool is_present(d1::global_control& gc) {
+ __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL);
+ control_storage* const c = controls[gc.my_param];
+
+ spin_mutex::scoped_lock lock(c->my_list_mutex);
+ auto it = c->my_list.find(&gc);
+ if (it != c->my_list.end()) {
+ return true;
+ }
+ return false;
+ }
+#endif // TBB_USE_ASSERT
+};
+
+void __TBB_EXPORTED_FUNC create(d1::global_control& gc) {
+ global_control_impl::create(gc);
+}
+void __TBB_EXPORTED_FUNC destroy(d1::global_control& gc) {
+ global_control_impl::destroy(gc);
+}
+
+bool remove_and_check_if_empty(d1::global_control& gc) {
+ return global_control_impl::remove_and_check_if_empty(gc);
+}
+#if TBB_USE_ASSERT
+bool is_present(d1::global_control& gc) {
+ return global_control_impl::is_present(gc);
+}
+#endif // TBB_USE_ASSERT
+std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) {
+ __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL);
+ return controls[param]->active_value();
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/governor.cpp b/contrib/libs/tbb/src/tbb/governor.cpp
new file mode 100644
index 0000000000..b75b91a75c
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/governor.cpp
@@ -0,0 +1,526 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "governor.h"
+#include "main.h"
+#include "thread_data.h"
+#include "market.h"
+#include "arena.h"
+#include "dynamic_link.h"
+
+#include "oneapi/tbb/task_group.h"
+#include "oneapi/tbb/global_control.h"
+#include "oneapi/tbb/tbb_allocator.h"
+#include "oneapi/tbb/info.h"
+
+#include "task_dispatcher.h"
+
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <atomic>
+#include <algorithm>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+//! global_control.cpp contains definition
+bool remove_and_check_if_empty(d1::global_control& gc);
+bool is_present(d1::global_control& gc);
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+
+namespace rml {
+tbb_server* make_private_server( tbb_client& client );
+} // namespace rml
+
+//------------------------------------------------------------------------
+// governor
+//------------------------------------------------------------------------
+
+void governor::acquire_resources () {
+#if __TBB_USE_POSIX
+ int status = theTLS.create(auto_terminate);
+#else
+ int status = theTLS.create();
+#endif
+ if( status )
+ handle_perror(status, "TBB failed to initialize task scheduler TLS\n");
+ detect_cpu_features(cpu_features);
+ is_rethrow_broken = gcc_rethrow_exception_broken();
+}
+
+void governor::release_resources () {
+ theRMLServerFactory.close();
+ destroy_process_mask();
+
+ __TBB_ASSERT(!(__TBB_InitOnce::initialization_done() && theTLS.get()), "TBB is unloaded while thread data still alive?");
+
+ int status = theTLS.destroy();
+ if( status )
+ runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status));
+ dynamic_unlink_all();
+}
+
+rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) {
+ rml::tbb_server* server = NULL;
+ if( !UsePrivateRML ) {
+ ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client );
+ if( status != ::rml::factory::st_success ) {
+ UsePrivateRML = true;
+ runtime_warning( "rml::tbb_factory::make_server failed with status %x, falling back on private rml", status );
+ }
+ }
+ if ( !server ) {
+ __TBB_ASSERT( UsePrivateRML, NULL );
+ server = rml::make_private_server( client );
+ }
+ __TBB_ASSERT( server, "Failed to create RML server" );
+ return server;
+}
+
+void governor::one_time_init() {
+ if ( !__TBB_InitOnce::initialization_done() ) {
+ DoOneTimeInitialization();
+ }
+}
+
+/*
+ There is no portable way to get stack base address in Posix, however the modern
+ Linux versions provide pthread_attr_np API that can be used to obtain thread's
+ stack size and base address. Unfortunately even this function does not provide
+ enough information for the main thread on IA-64 architecture (RSE spill area
+ and memory stack are allocated as two separate discontinuous chunks of memory),
+ and there is no portable way to discern the main and the secondary threads.
+ Thus for macOS* and IA-64 architecture for Linux* OS we use the TBB worker stack size for
+ all threads and use the current stack top as the stack base. This simplified
+ approach is based on the following assumptions:
+ 1) If the default stack size is insufficient for the user app needs, the
+ required amount will be explicitly specified by the user at the point of the
+ TBB scheduler initialization (as an argument to tbb::task_scheduler_init
+ constructor).
+ 2) When an external thread initializes the scheduler, it has enough space on its
+ stack. Here "enough" means "at least as much as worker threads have".
+ 3) If the user app strives to conserve the memory by cutting stack size, it
+ should do this for TBB workers too (as in the #1).
+*/
+static std::uintptr_t get_stack_base(std::size_t stack_size) {
+ // Stacks are growing top-down. Highest address is called "stack base",
+ // and the lowest is "stack limit".
+#if USE_WINTHREAD
+ suppress_unused_warning(stack_size);
+ NT_TIB* pteb = (NT_TIB*)NtCurrentTeb();
+ __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB");
+ return reinterpret_cast<std::uintptr_t>(pteb->StackBase);
+#else /* USE_PTHREAD */
+ // There is no portable way to get stack base address in Posix, so we use
+ // non-portable method (on all modern Linux) or the simplified approach
+ // based on the common sense assumptions. The most important assumption
+ // is that the main thread's stack size is not less than that of other threads.
+
+ // Points to the lowest addressable byte of a stack.
+ void* stack_limit = nullptr;
+#if __linux__ && !__bg__
+ size_t np_stack_size = 0;
+ pthread_attr_t np_attr_stack;
+ if (0 == pthread_getattr_np(pthread_self(), &np_attr_stack)) {
+ if (0 == pthread_attr_getstack(&np_attr_stack, &stack_limit, &np_stack_size)) {
+ __TBB_ASSERT( &stack_limit > stack_limit, "stack size must be positive" );
+ }
+ pthread_attr_destroy(&np_attr_stack);
+ }
+#endif /* __linux__ */
+ std::uintptr_t stack_base{};
+ if (stack_limit) {
+ stack_base = reinterpret_cast<std::uintptr_t>(stack_limit) + stack_size;
+ } else {
+ // Use an anchor as a base stack address.
+ int anchor{};
+ stack_base = reinterpret_cast<std::uintptr_t>(&anchor);
+ }
+ return stack_base;
+#endif /* USE_PTHREAD */
+}
+
+void governor::init_external_thread() {
+ one_time_init();
+ // Create new scheduler instance with arena
+ int num_slots = default_num_threads();
+ // TODO_REVAMP: support an external thread without an implicit arena
+ int num_reserved_slots = 1;
+ unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal
+ std::size_t stack_size = 0;
+ arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size);
+ // We need an internal reference to the market. TODO: is it legacy?
+ market::global_market(false);
+ // External thread always occupies the first slot
+ thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false);
+ td.attach_arena(a, /*slot index*/ 0);
+
+ stack_size = a.my_market->worker_stack_size();
+ std::uintptr_t stack_base = get_stack_base(stack_size);
+ task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher();
+ task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size));
+ td.attach_task_dispatcher(task_disp);
+
+ td.my_arena_slot->occupy();
+ a.my_market->add_external_thread(td);
+ set_thread_data(td);
+}
+
+void governor::auto_terminate(void* tls) {
+ __TBB_ASSERT(get_thread_data_if_initialized() == nullptr ||
+ get_thread_data_if_initialized() == tls, NULL);
+ if (tls) {
+ thread_data* td = static_cast<thread_data*>(tls);
+
+ // Only external thread can be inside an arena during termination.
+ if (td->my_arena_slot) {
+ arena* a = td->my_arena;
+ market* m = a->my_market;
+
+ a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker);
+
+ td->my_task_dispatcher->m_stealing_threshold = 0;
+ td->detach_task_dispatcher();
+ td->my_arena_slot->release();
+ // Release an arena
+ a->on_thread_leaving<arena::ref_external>();
+
+ m->remove_external_thread(*td);
+ // If there was an associated arena, it added a public market reference
+ m->release( /*is_public*/ true, /*blocking_terminate*/ false);
+ }
+
+ td->~thread_data();
+ cache_aligned_deallocate(td);
+
+ clear_thread_data();
+ }
+ __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL);
+}
+
+void governor::initialize_rml_factory () {
+ ::rml::factory::status_type res = theRMLServerFactory.open();
+ UsePrivateRML = res != ::rml::factory::st_success;
+}
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) {
+ handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1);
+}
+
+void release_impl(d1::task_scheduler_handle& handle) {
+ if (handle.m_ctl != nullptr) {
+ handle.m_ctl->~global_control();
+ deallocate_memory(handle.m_ctl);
+ handle.m_ctl = nullptr;
+ }
+}
+
+bool finalize_impl(d1::task_scheduler_handle& handle) {
+ market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex );
+ bool ok = true; // ok if theMarket does not exist yet
+ market* m = market::theMarket; // read the state of theMarket
+ if (m != nullptr) {
+ lock.release();
+ __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object");
+ thread_data* td = governor::get_thread_data_if_initialized();
+ if (td) {
+ task_dispatcher* task_disp = td->my_task_dispatcher;
+ __TBB_ASSERT(task_disp, nullptr);
+ if (task_disp->m_properties.outermost && !td->my_is_worker) { // is not inside a parallel region
+ governor::auto_terminate(td);
+ }
+ }
+ if (remove_and_check_if_empty(*handle.m_ctl)) {
+ ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true);
+ } else {
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr_t mode) {
+ if (mode == d1::release_nothrowing) {
+ release_impl(handle);
+ return true;
+ } else {
+ bool ok = finalize_impl(handle);
+ // TODO: it is unsafe when finalize is called concurrently and further library unload
+ release_impl(handle);
+ if (mode == d1::finalize_throwing && !ok) {
+ throw_exception(exception_id::unsafe_wait);
+ }
+ return ok;
+ }
+}
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+
+#if __TBB_ARENA_BINDING
+
+#if __TBB_WEAK_SYMBOLS_PRESENT
+#pragma weak __TBB_internal_initialize_system_topology
+#pragma weak __TBB_internal_allocate_binding_handler
+#pragma weak __TBB_internal_deallocate_binding_handler
+#pragma weak __TBB_internal_apply_affinity
+#pragma weak __TBB_internal_restore_affinity
+#pragma weak __TBB_internal_get_default_concurrency
+
+extern "C" {
+void __TBB_internal_initialize_system_topology(
+ size_t groups_num,
+ int& numa_nodes_count, int*& numa_indexes_list,
+ int& core_types_count, int*& core_types_indexes_list
+);
+
+//TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler`
+binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core );
+void __TBB_internal_deallocate_binding_handler( binding_handler* handler_ptr );
+
+void __TBB_internal_apply_affinity( binding_handler* handler_ptr, int slot_num );
+void __TBB_internal_restore_affinity( binding_handler* handler_ptr, int slot_num );
+
+int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int max_threads_per_core );
+}
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+
+// Stubs that will be used if TBBbind library is unavailable.
+static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; }
+static void dummy_deallocate_binding_handler ( binding_handler* ) { }
+static void dummy_apply_affinity ( binding_handler*, int ) { }
+static void dummy_restore_affinity ( binding_handler*, int ) { }
+static int dummy_get_default_concurrency( int, int, int ) { return governor::default_num_threads(); }
+
+// Handlers for communication with TBBbind
+static void (*initialize_system_topology_ptr)(
+ size_t groups_num,
+ int& numa_nodes_count, int*& numa_indexes_list,
+ int& core_types_count, int*& core_types_indexes_list
+) = nullptr;
+
+static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core )
+ = dummy_allocate_binding_handler;
+static void (*deallocate_binding_handler_ptr)( binding_handler* handler_ptr )
+ = dummy_deallocate_binding_handler;
+static void (*apply_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
+ = dummy_apply_affinity;
+static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num )
+ = dummy_restore_affinity;
+int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core )
+ = dummy_get_default_concurrency;
+
+#if _WIN32 || _WIN64 || __linux__
+// Table describing how to link the handlers.
+static const dynamic_link_descriptor TbbBindLinkTable[] = {
+ DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr),
+ DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr),
+ DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr),
+ DLD(__TBB_internal_apply_affinity, apply_affinity_ptr),
+ DLD(__TBB_internal_restore_affinity, restore_affinity_ptr),
+ DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr)
+};
+
+static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_link_descriptor);
+
+#if TBB_USE_DEBUG
+#define DEBUG_SUFFIX "_debug"
+#else
+#define DEBUG_SUFFIX
+#endif /* TBB_USE_DEBUG */
+
+#if _WIN32 || _WIN64
+#define LIBRARY_EXTENSION ".dll"
+#define LIBRARY_PREFIX
+#elif __linux__
+#define LIBRARY_EXTENSION __TBB_STRING(.so.3)
+#define LIBRARY_PREFIX "lib"
+#endif /* __linux__ */
+
+#define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION
+#define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION
+#define TBBBIND_2_4_NAME LIBRARY_PREFIX "tbbbind_2_4" DEBUG_SUFFIX LIBRARY_EXTENSION
+#endif /* _WIN32 || _WIN64 || __linux__ */
+
+// Representation of system hardware topology information on the TBB side.
+// System topology may be initialized by third-party component (e.g. hwloc)
+// or just filled in with default stubs.
+namespace system_topology {
+
+constexpr int automatic = -1;
+
+static std::atomic<do_once_state> initialization_state;
+
+namespace {
+int numa_nodes_count = 0;
+int* numa_nodes_indexes = nullptr;
+
+int core_types_count = 0;
+int* core_types_indexes = nullptr;
+
+const char* load_tbbbind_shared_object() {
+#if _WIN32 || _WIN64 || __linux__
+#if _WIN32 && !_WIN64
+ // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs.
+ SYSTEM_INFO si;
+ GetNativeSystemInfo(&si);
+ if (si.dwNumberOfProcessors > 32) return nullptr;
+#endif /* _WIN32 && !_WIN64 */
+ for (const auto& tbbbind_version : {TBBBIND_2_4_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) {
+ if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize)) {
+ return tbbbind_version;
+ }
+ }
+#endif /* _WIN32 || _WIN64 || __linux__ */
+ return nullptr;
+}
+
+int processor_groups_num() {
+#if _WIN32
+ return NumberOfProcessorGroups();
+#else
+ // Stub to improve code readability by reducing number of the compile-time conditions
+ return 1;
+#endif
+}
+} // internal namespace
+
+// Tries to load TBBbind library API, if success, gets NUMA topology information from it,
+// in another case, fills NUMA topology by stubs.
+void initialization_impl() {
+ governor::one_time_init();
+
+ if (const char* tbbbind_name = load_tbbbind_shared_object()) {
+ initialize_system_topology_ptr(
+ processor_groups_num(),
+ numa_nodes_count, numa_nodes_indexes,
+ core_types_count, core_types_indexes
+ );
+
+ PrintExtraVersionInfo("TBBBIND", tbbbind_name);
+ return;
+ }
+
+ static int dummy_index = automatic;
+
+ numa_nodes_count = 1;
+ numa_nodes_indexes = &dummy_index;
+
+ core_types_count = 1;
+ core_types_indexes = &dummy_index;
+
+ PrintExtraVersionInfo("TBBBIND", "UNAVAILABLE");
+}
+
+void initialize() {
+ atomic_do_once(initialization_impl, initialization_state);
+}
+} // namespace system_topology
+
+binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) {
+ system_topology::initialize();
+ return allocate_binding_handler_ptr(slot_num, numa_id, core_type_id, max_threads_per_core);
+}
+
+void destroy_binding_handler(binding_handler* handler_ptr) {
+ __TBB_ASSERT(deallocate_binding_handler_ptr, "tbbbind loading was not performed");
+ deallocate_binding_handler_ptr(handler_ptr);
+}
+
+void apply_affinity_mask(binding_handler* handler_ptr, int slot_index) {
+ __TBB_ASSERT(slot_index >= 0, "Negative thread index");
+ __TBB_ASSERT(apply_affinity_ptr, "tbbbind loading was not performed");
+ apply_affinity_ptr(handler_ptr, slot_index);
+}
+
+void restore_affinity_mask(binding_handler* handler_ptr, int slot_index) {
+ __TBB_ASSERT(slot_index >= 0, "Negative thread index");
+ __TBB_ASSERT(restore_affinity_ptr, "tbbbind loading was not performed");
+ restore_affinity_ptr(handler_ptr, slot_index);
+}
+
+unsigned __TBB_EXPORTED_FUNC numa_node_count() {
+ system_topology::initialize();
+ return system_topology::numa_nodes_count;
+}
+
+void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array) {
+ system_topology::initialize();
+ std::memcpy(index_array, system_topology::numa_nodes_indexes, system_topology::numa_nodes_count * sizeof(int));
+}
+
+int __TBB_EXPORTED_FUNC numa_default_concurrency(int node_id) {
+ if (node_id >= 0) {
+ system_topology::initialize();
+ int result = get_default_concurrency_ptr(
+ node_id,
+ /*core_type*/system_topology::automatic,
+ /*threads_per_core*/system_topology::automatic
+ );
+ if (result > 0) return result;
+ }
+ return governor::default_num_threads();
+}
+
+unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t /*reserved*/) {
+ system_topology::initialize();
+ return system_topology::core_types_count;
+}
+
+void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t /*reserved*/) {
+ system_topology::initialize();
+ std::memcpy(index_array, system_topology::core_types_indexes, system_topology::core_types_count * sizeof(int));
+}
+
+void constraints_assertion(d1::constraints c) {
+ bool is_topology_initialized = system_topology::initialization_state == do_once_state::initialized;
+ __TBB_ASSERT_RELEASE(c.max_threads_per_core == system_topology::automatic || c.max_threads_per_core > 0,
+ "Wrong max_threads_per_core constraints field value.");
+
+ auto numa_nodes_begin = system_topology::numa_nodes_indexes;
+ auto numa_nodes_end = system_topology::numa_nodes_indexes + system_topology::numa_nodes_count;
+ __TBB_ASSERT_RELEASE(
+ c.numa_id == system_topology::automatic ||
+ (is_topology_initialized && std::find(numa_nodes_begin, numa_nodes_end, c.numa_id) != numa_nodes_end),
+ "The constraints::numa_id value is not known to the library. Use tbb::info::numa_nodes() to get the list of possible values.");
+
+ int* core_types_begin = system_topology::core_types_indexes;
+ int* core_types_end = system_topology::core_types_indexes + system_topology::core_types_count;
+ __TBB_ASSERT_RELEASE(c.core_type == system_topology::automatic ||
+ (is_topology_initialized && std::find(core_types_begin, core_types_end, c.core_type) != core_types_end),
+ "The constraints::core_type value is not known to the library. Use tbb::info::core_types() to get the list of possible values.");
+}
+
+int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t /*reserved*/) {
+ constraints_assertion(c);
+
+ if (c.numa_id >= 0 || c.core_type >= 0 || c.max_threads_per_core > 0) {
+ system_topology::initialize();
+ return get_default_concurrency_ptr(c.numa_id, c.core_type, c.max_threads_per_core);
+ }
+ return governor::default_num_threads();
+}
+
+int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints&, intptr_t /*reserved*/) {
+ return system_topology::automatic;
+}
+#endif /* __TBB_ARENA_BINDING */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/governor.h b/contrib/libs/tbb/src/tbb/governor.h
new file mode 100644
index 0000000000..0ff4781414
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/governor.h
@@ -0,0 +1,158 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_governor_H
+#define _TBB_governor_H
+
+#include "rml_tbb.h"
+
+#include "misc.h" // for AvailableHwConcurrency
+#include "tls.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class market;
+class thread_data;
+class __TBB_InitOnce;
+
+#if __TBB_USE_ITT_NOTIFY
+//! Defined in profiling.cpp
+extern bool ITT_Present;
+#endif
+
+typedef std::size_t stack_size_type;
+
+//------------------------------------------------------------------------
+// Class governor
+//------------------------------------------------------------------------
+
+//! The class handles access to the single instance of market, and to TLS to keep scheduler instances.
+/** It also supports automatic on-demand initialization of the TBB scheduler.
+ The class contains only static data members and methods.*/
+class governor {
+private:
+ friend class __TBB_InitOnce;
+ friend class market;
+
+ // TODO: consider using thread_local (measure performance and side effects)
+ //! TLS for scheduler instances associated with individual threads
+ static basic_tls<thread_data*> theTLS;
+
+ //! Caches the maximal level of parallelism supported by the hardware
+ static unsigned DefaultNumberOfThreads;
+
+ //! Caches the size of OS regular memory page
+ static std::size_t DefaultPageSize;
+
+ // TODO (TBB_REVAMP_TODO): reconsider constant names
+ static rml::tbb_factory theRMLServerFactory;
+
+ static bool UsePrivateRML;
+
+ // Flags for runtime-specific conditions
+ static cpu_features_type cpu_features;
+ static bool is_rethrow_broken;
+
+ //! Create key for thread-local storage and initialize RML.
+ static void acquire_resources ();
+
+ //! Destroy the thread-local storage key and deinitialize RML.
+ static void release_resources ();
+
+ static rml::tbb_server* create_rml_server ( rml::tbb_client& );
+
+public:
+ static unsigned default_num_threads () {
+ // No memory fence required, because at worst each invoking thread calls AvailableHwConcurrency once.
+ return DefaultNumberOfThreads ? DefaultNumberOfThreads :
+ DefaultNumberOfThreads = AvailableHwConcurrency();
+ }
+ static std::size_t default_page_size () {
+ return DefaultPageSize ? DefaultPageSize :
+ DefaultPageSize = DefaultSystemPageSize();
+ }
+ static void one_time_init();
+ //! Processes scheduler initialization request (possibly nested) in an external thread
+ /** If necessary creates new instance of arena and/or local scheduler.
+ The auto_init argument specifies if the call is due to automatic initialization. **/
+ static void init_external_thread();
+
+ //! The routine to undo automatic initialization.
+ /** The signature is written with void* so that the routine
+ can be the destructor argument to pthread_key_create. */
+ static void auto_terminate(void* tls);
+
+ //! Obtain the thread-local instance of the thread data.
+ /** If the scheduler has not been initialized yet, initialization is done automatically.
+ Note that auto-initialized scheduler instance is destroyed only when its thread terminates. **/
+ static thread_data* get_thread_data() {
+ thread_data* td = theTLS.get();
+ if (td) {
+ return td;
+ }
+ init_external_thread();
+ td = theTLS.get();
+ __TBB_ASSERT(td, NULL);
+ return td;
+ }
+
+ static void set_thread_data(thread_data& td) {
+ theTLS.set(&td);
+ }
+
+ static void clear_thread_data() {
+ theTLS.set(nullptr);
+ }
+
+ static thread_data* get_thread_data_if_initialized () {
+ return theTLS.get();
+ }
+
+ static bool is_thread_data_set(thread_data* td) {
+ return theTLS.get() == td;
+ }
+
+ //! Undo automatic initialization if necessary; call when a thread exits.
+ static void terminate_external_thread() {
+ auto_terminate(get_thread_data_if_initialized());
+ }
+
+ static void initialize_rml_factory ();
+
+ static bool does_client_join_workers (const rml::tbb_client &client);
+
+ static bool speculation_enabled() { return cpu_features.rtm_enabled; }
+
+ static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; }
+
+ static bool rethrow_exception_broken() { return is_rethrow_broken; }
+
+ static bool is_itt_present() {
+#if __TBB_USE_ITT_NOTIFY
+ return ITT_Present;
+#else
+ return false;
+#endif
+ }
+}; // class governor
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_governor_H */
diff --git a/contrib/libs/tbb/src/tbb/intrusive_list.h b/contrib/libs/tbb/src/tbb/intrusive_list.h
new file mode 100644
index 0000000000..699bc149aa
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/intrusive_list.h
@@ -0,0 +1,242 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_intrusive_list_H
+#define _TBB_intrusive_list_H
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Data structure to be inherited by the types that can form intrusive lists.
+/** Intrusive list is formed by means of the member_intrusive_list<T> template class.
+ Note that type T must derive from intrusive_list_node either publicly or
+ declare instantiation member_intrusive_list<T> as a friend.
+ This class implements a limited subset of std::list interface. **/
+struct intrusive_list_node {
+ intrusive_list_node* my_prev_node{};
+ intrusive_list_node* my_next_node{};
+#if TBB_USE_ASSERT
+ intrusive_list_node() { my_prev_node = my_next_node = this; }
+#endif /* TBB_USE_ASSERT */
+};
+
+//! List of element of type T, where T is derived from intrusive_list_node
+/** The class is not thread safe. **/
+template <class List, class T>
+class intrusive_list_base {
+ //! Pointer to the head node
+ intrusive_list_node my_head;
+
+ //! Number of list elements
+ std::size_t my_size;
+
+ static intrusive_list_node& node ( T& item ) { return List::node(item); }
+
+ static T& item ( intrusive_list_node* node ) { return List::item(node); }
+
+ static const T& item( const intrusive_list_node* node ) { return List::item(node); }
+
+ template <typename DereferenceType>
+ class iterator_impl {
+ static_assert(std::is_same<DereferenceType, T>::value ||
+ std::is_same<DereferenceType, const T>::value,
+ "Incorrect DereferenceType in iterator_impl");
+
+ using pointer_type = typename std::conditional<std::is_same<DereferenceType, T>::value,
+ intrusive_list_node*,
+ const intrusive_list_node*>::type;
+
+ public:
+ iterator_impl() : my_pos(nullptr) {}
+
+ iterator_impl( pointer_type pos ) : my_pos(pos) {}
+
+ iterator_impl& operator++() {
+ my_pos = my_pos->my_next_node;
+ return *this;
+ }
+
+ iterator_impl operator++( int ) {
+ iterator_impl it(*this);
+ ++*this;
+ return it;
+ }
+
+ iterator_impl& operator--() {
+ my_pos = my_pos->my_prev_node;
+ return *this;
+ }
+
+ iterator_impl operator--( int ) {
+ iterator_impl it(*this);
+ --*this;
+ return it;
+ }
+
+ bool operator==( const iterator_impl& rhs ) const {
+ return my_pos == rhs.my_pos;
+ }
+
+ bool operator!=( const iterator_impl& rhs ) const {
+ return my_pos != rhs.my_pos;
+ }
+
+ DereferenceType& operator*() const {
+ return intrusive_list_base::item(my_pos);
+ }
+
+ DereferenceType* operator->() const {
+ return &intrusive_list_base::item(my_pos);
+ }
+ private:
+ // Node the iterator points to at the moment
+ pointer_type my_pos;
+ }; // class iterator_impl
+
+ void assert_ok () const {
+ __TBB_ASSERT( (my_head.my_prev_node == &my_head && !my_size) ||
+ (my_head.my_next_node != &my_head && my_size >0), "intrusive_list_base corrupted" );
+#if TBB_USE_ASSERT >= 2
+ std::size_t i = 0;
+ for ( intrusive_list_node *n = my_head.my_next_node; n != &my_head; n = n->my_next_node )
+ ++i;
+ __TBB_ASSERT( my_size == i, "Wrong size" );
+#endif /* TBB_USE_ASSERT >= 2 */
+ }
+
+public:
+ using iterator = iterator_impl<T>;
+ using const_iterator = iterator_impl<const T>;
+
+ intrusive_list_base () : my_size(0) {
+ my_head.my_prev_node = &my_head;
+ my_head.my_next_node = &my_head;
+ }
+
+ bool empty () const { return my_head.my_next_node == &my_head; }
+
+ std::size_t size () const { return my_size; }
+
+ iterator begin () { return iterator(my_head.my_next_node); }
+
+ iterator end () { return iterator(&my_head); }
+
+ const_iterator begin () const { return const_iterator(my_head.my_next_node); }
+
+ const_iterator end () const { return const_iterator(&my_head); }
+
+ void push_front ( T& val ) {
+ __TBB_ASSERT( node(val).my_prev_node == &node(val) && node(val).my_next_node == &node(val),
+ "Object with intrusive list node can be part of only one intrusive list simultaneously" );
+ // An object can be part of only one intrusive list at the given moment via the given node member
+ node(val).my_prev_node = &my_head;
+ node(val).my_next_node = my_head.my_next_node;
+ my_head.my_next_node->my_prev_node = &node(val);
+ my_head.my_next_node = &node(val);
+ ++my_size;
+ assert_ok();
+ }
+
+ void remove( T& val ) {
+ __TBB_ASSERT( node(val).my_prev_node != &node(val) && node(val).my_next_node != &node(val), "Element to remove is not in the list" );
+ __TBB_ASSERT( node(val).my_prev_node->my_next_node == &node(val) && node(val).my_next_node->my_prev_node == &node(val), "Element to remove is not in the list" );
+ --my_size;
+ node(val).my_next_node->my_prev_node = node(val).my_prev_node;
+ node(val).my_prev_node->my_next_node = node(val).my_next_node;
+#if TBB_USE_ASSERT
+ node(val).my_prev_node = node(val).my_next_node = &node(val);
+#endif
+ assert_ok();
+ }
+
+ iterator erase ( iterator it ) {
+ T& val = *it;
+ ++it;
+ remove( val );
+ return it;
+ }
+
+}; // intrusive_list_base
+
+#if __TBB_TODO
+// With standard compliant compilers memptr_intrusive_list could be named simply intrusive_list,
+// and inheritance based intrusive_list version would become its partial specialization.
+// Here are the corresponding declarations:
+
+struct dummy_intrusive_list_item { intrusive_list_node my_node; };
+
+template <class T, class U = dummy_intrusive_list_item, intrusive_list_node U::*NodePtr = &dummy_intrusive_list_item::my_node>
+class intrusive_list : public intrusive_list_base<intrusive_list<T, U, NodePtr>, T>;
+
+template <class T>
+class intrusive_list<T, dummy_intrusive_list_item, &dummy_intrusive_list_item::my_node>
+ : public intrusive_list_base<intrusive_list<T>, T>;
+
+#endif /* __TBB_TODO */
+
+//! Double linked list of items of type T containing a member of type intrusive_list_node.
+/** NodePtr is a member pointer to the node data field. Class U is either T or
+ a base class of T containing the node member. Default values exist for the sake
+ of a partial specialization working with inheritance case.
+
+ The list does not have ownership of its items. Its purpose is to avoid dynamic
+ memory allocation when forming lists of existing objects.
+
+ The class is not thread safe. **/
+template <class T, class U, intrusive_list_node U::*NodePtr>
+class memptr_intrusive_list : public intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T>
+{
+ friend class intrusive_list_base<memptr_intrusive_list<T, U, NodePtr>, T>;
+
+ static intrusive_list_node& node ( T& val ) { return val.*NodePtr; }
+
+ static T& item ( intrusive_list_node* node ) {
+ // Cannot use __TBB_offsetof (and consequently __TBB_get_object_ref) macro
+ // with *NodePtr argument because gcc refuses to interpret pasted "->" and "*"
+ // as member pointer dereferencing operator, and explicit usage of ## in
+ // __TBB_offsetof implementation breaks operations with normal member names.
+ return *reinterpret_cast<T*>((char*)node - ((ptrdiff_t)&(reinterpret_cast<T*>(0x1000)->*NodePtr) - 0x1000));
+ }
+
+ static const T& item( const intrusive_list_node* node ) {
+ return item(const_cast<intrusive_list_node*>(node));
+ }
+
+}; // intrusive_list<T, U, NodePtr>
+
+//! Double linked list of items of type T that is derived from intrusive_list_node class.
+/** The list does not have ownership of its items. Its purpose is to avoid dynamic
+ memory allocation when forming lists of existing objects.
+
+ The class is not thread safe. **/
+template <class T>
+class intrusive_list : public intrusive_list_base<intrusive_list<T>, T>
+{
+ friend class intrusive_list_base<intrusive_list<T>, T>;
+
+ static intrusive_list_node& node ( T& val ) { return val; }
+
+ static T& item ( intrusive_list_node* node ) { return *static_cast<T*>(node); }
+
+ static const T& item( const intrusive_list_node* node ) { return *static_cast<const T*>(node); }
+}; // intrusive_list<T>
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_intrusive_list_H */
diff --git a/contrib/libs/tbb/src/tbb/itt_notify.cpp b/contrib/libs/tbb/src/tbb/itt_notify.cpp
new file mode 100644
index 0000000000..0e60579a62
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/itt_notify.cpp
@@ -0,0 +1,69 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#if __TBB_USE_ITT_NOTIFY
+
+#if _WIN32||_WIN64
+ #ifndef UNICODE
+ #define UNICODE
+ #endif
+#else
+ #pragma weak dlopen
+ #pragma weak dlsym
+ #pragma weak dlerror
+#endif /* WIN */
+
+#if __TBB_BUILD
+
+extern "C" void ITT_DoOneTimeInitialization();
+#define __itt_init_ittlib_name(x,y) (ITT_DoOneTimeInitialization(), true)
+
+#elif __TBBMALLOC_BUILD
+
+extern "C" void MallocInitializeITT();
+#define __itt_init_ittlib_name(x,y) (MallocInitializeITT(), true)
+
+#else
+#error This file is expected to be used for either TBB or TBB allocator build.
+#endif // __TBB_BUILD
+
+#include "tools_api/ittnotify_static.c"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+/** This extra proxy method is necessary since __itt_init_lib is declared as static **/
+int __TBB_load_ittnotify() {
+#if !(_WIN32||_WIN64)
+ // tool_api crashes without dlopen, check that it's present. Common case
+ // for lack of dlopen is static binaries, i.e. ones build with -static.
+ if (dlopen == NULL)
+ return 0;
+#endif
+ return __itt_init_ittlib(NULL, // groups for:
+ (__itt_group_id)(__itt_group_sync // prepare/cancel/acquired/releasing
+ | __itt_group_thread // name threads
+ | __itt_group_stitch // stack stitching
+ | __itt_group_structure
+ ));
+}
+
+} //namespace r1
+} //namespace detail
+} // namespace tbb
+
+#endif /* __TBB_USE_ITT_NOTIFY */
diff --git a/contrib/libs/tbb/src/tbb/itt_notify.h b/contrib/libs/tbb/src/tbb/itt_notify.h
new file mode 100644
index 0000000000..9978bcd7cb
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/itt_notify.h
@@ -0,0 +1,114 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_ITT_NOTIFY
+#define _TBB_ITT_NOTIFY
+
+#include "oneapi/tbb/detail/_config.h"
+
+#if __TBB_USE_ITT_NOTIFY
+
+#if _WIN32||_WIN64
+ #ifndef UNICODE
+ #define UNICODE
+ #endif
+#endif /* WIN */
+
+#ifndef INTEL_ITTNOTIFY_API_PRIVATE
+#define INTEL_ITTNOTIFY_API_PRIVATE
+#endif
+
+#include "tools_api/ittnotify.h"
+#include "tools_api/legacy/ittnotify.h"
+extern "C" void __itt_fini_ittlib(void);
+
+#if _WIN32||_WIN64
+ #undef _T
+#endif /* WIN */
+
+#endif /* __TBB_USE_ITT_NOTIFY */
+
+#if !ITT_CALLER_NULL
+#define ITT_CALLER_NULL ((__itt_caller)0)
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Unicode support
+#if (_WIN32||_WIN64) && !__MINGW32__
+ //! Unicode character type. Always wchar_t on Windows.
+ /** We do not use typedefs from Windows TCHAR family to keep consistence of TBB coding style. **/
+ using tchar = wchar_t;
+ //! Standard Windows macro to markup the string literals.
+ #define _T(string_literal) L ## string_literal
+#else /* !WIN */
+ using tchar = char;
+ //! Standard Windows style macro to markup the string literals.
+ #define _T(string_literal) string_literal
+#endif /* !WIN */
+
+//! Display names of internal synchronization types
+extern const tchar
+ *SyncType_Scheduler;
+//! Display names of internal synchronization components/scenarios
+extern const tchar
+ *SyncObj_ContextsList
+ ;
+
+#if __TBB_USE_ITT_NOTIFY
+// const_cast<void*>() is necessary to cast off volatility
+#define ITT_NOTIFY(name,obj) __itt_##name(const_cast<void*>(static_cast<volatile void*>(obj)))
+#define ITT_THREAD_SET_NAME(name) __itt_thread_set_name(name)
+#define ITT_FINI_ITTLIB() __itt_fini_ittlib()
+#define ITT_SYNC_CREATE(obj, type, name) __itt_sync_create((void*)(obj), type, name, 2)
+#define ITT_STACK_CREATE(obj) obj = __itt_stack_caller_create()
+#define ITT_STACK_DESTROY(obj) (obj!=nullptr) ? __itt_stack_caller_destroy(static_cast<__itt_caller>(obj)) : ((void)0)
+#define ITT_CALLEE_ENTER(cond, t, obj) if(cond) {\
+ __itt_stack_callee_enter(static_cast<__itt_caller>(obj));\
+ __itt_sync_acquired(t);\
+ }
+#define ITT_CALLEE_LEAVE(cond, obj) (cond) ? __itt_stack_callee_leave(static_cast<__itt_caller>(obj)) : ((void)0)
+
+#define ITT_TASK_GROUP(obj,name,parent) r1::itt_make_task_group(d1::ITT_DOMAIN_MAIN,(void*)(obj),ALGORITHM,(void*)(parent),(parent!=nullptr) ? ALGORITHM : FLOW_NULL,name)
+#define ITT_TASK_BEGIN(obj,name,id) r1::itt_task_begin(d1::ITT_DOMAIN_MAIN,(void*)(id),ALGORITHM,(void*)(obj),ALGORITHM,name)
+#define ITT_TASK_END r1::itt_task_end(d1::ITT_DOMAIN_MAIN)
+
+
+#else /* !__TBB_USE_ITT_NOTIFY */
+
+#define ITT_NOTIFY(name,obj) ((void)0)
+#define ITT_THREAD_SET_NAME(name) ((void)0)
+#define ITT_FINI_ITTLIB() ((void)0)
+#define ITT_SYNC_CREATE(obj, type, name) ((void)0)
+#define ITT_STACK_CREATE(obj) ((void)0)
+#define ITT_STACK_DESTROY(obj) ((void)0)
+#define ITT_CALLEE_ENTER(cond, t, obj) ((void)0)
+#define ITT_CALLEE_LEAVE(cond, obj) ((void)0)
+#define ITT_TASK_GROUP(type,name,parent) ((void)0)
+#define ITT_TASK_BEGIN(type,name,id) ((void)0)
+#define ITT_TASK_END ((void)0)
+
+#endif /* !__TBB_USE_ITT_NOTIFY */
+
+int __TBB_load_ittnotify();
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_ITT_NOTIFY */
diff --git a/contrib/libs/tbb/src/tbb/mailbox.h b/contrib/libs/tbb/src/tbb/mailbox.h
new file mode 100644
index 0000000000..2f49e9b35e
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/mailbox.h
@@ -0,0 +1,249 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_mailbox_H
+#define _TBB_mailbox_H
+
+#include "oneapi/tbb/cache_aligned_allocator.h"
+#include "oneapi/tbb/detail/_small_object_pool.h"
+
+#include "arena_slot.h"
+#include "scheduler_common.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+struct task_proxy : public d1::task {
+ static const intptr_t pool_bit = 1<<0;
+ static const intptr_t mailbox_bit = 1<<1;
+ static const intptr_t location_mask = pool_bit | mailbox_bit;
+ /* All but two low-order bits represent a (task*).
+ Two low-order bits mean:
+ 1 = proxy is/was/will be in task pool
+ 2 = proxy is/was/will be in mailbox */
+ std::atomic<intptr_t> task_and_tag;
+
+ //! Pointer to next task_proxy in a mailbox
+ std::atomic<task_proxy*> next_in_mailbox;
+
+ //! Mailbox to which this was mailed.
+ mail_outbox* outbox;
+
+ //! Task affinity id which is referenced
+ d1::slot_id slot;
+
+ d1::small_object_allocator allocator;
+
+ //! True if the proxy is stored both in its sender's pool and in the destination mailbox.
+ static bool is_shared ( intptr_t tat ) {
+ return (tat & location_mask) == location_mask;
+ }
+
+ //! Returns a pointer to the encapsulated task or nullptr.
+ static task* task_ptr ( intptr_t tat ) {
+ return (task*)(tat & ~location_mask);
+ }
+
+ //! Returns a pointer to the encapsulated task or nullptr, and frees proxy if necessary.
+ template<intptr_t from_bit>
+ inline task* extract_task () {
+ // __TBB_ASSERT( prefix().extra_state == es_task_proxy, "Normal task misinterpreted as a proxy?" );
+ intptr_t tat = task_and_tag.load(std::memory_order_acquire);
+ __TBB_ASSERT( tat == from_bit || (is_shared(tat) && task_ptr(tat)),
+ "Proxy's tag cannot specify both locations if the proxy "
+ "was retrieved from one of its original locations" );
+ if ( tat != from_bit ) {
+ const intptr_t cleaner_bit = location_mask & ~from_bit;
+ // Attempt to transition the proxy to the "empty" state with
+ // cleaner_bit specifying entity responsible for its eventual freeing.
+ // Explicit cast to void* is to work around a seeming ICC 11.1 bug.
+ if ( task_and_tag.compare_exchange_strong(tat, cleaner_bit) ) {
+ // Successfully grabbed the task, and left new owner with the job of freeing the proxy
+ return task_ptr(tat);
+ }
+ }
+ // Proxied task has already been claimed from another proxy location.
+ __TBB_ASSERT( task_and_tag.load(std::memory_order_relaxed) == from_bit, "Empty proxy cannot contain non-zero task pointer" );
+ return nullptr;
+ }
+
+ virtual task* execute(d1::execution_data&) {
+ __TBB_ASSERT_RELEASE(false, nullptr);
+ return nullptr;
+ }
+ virtual task* cancel(d1::execution_data&) {
+ __TBB_ASSERT_RELEASE(false, nullptr);
+ return nullptr;
+ }
+}; // struct task_proxy
+
+//! Internal representation of mail_outbox, without padding.
+class unpadded_mail_outbox {
+protected:
+ typedef std::atomic<task_proxy*> atomic_proxy_ptr;
+
+ //! Pointer to first task_proxy in mailbox, or nullptr if box is empty.
+ atomic_proxy_ptr my_first;
+
+ //! Pointer to pointer that will point to next item in the queue. Never nullptr.
+ std::atomic<atomic_proxy_ptr*> my_last;
+
+ //! Owner of mailbox is not executing a task, and has drained its own task pool.
+ std::atomic<bool> my_is_idle;
+};
+
+// TODO: - consider moving to arena slot
+//! Class representing where mail is put.
+/** Padded to occupy a cache line. */
+class mail_outbox : padded<unpadded_mail_outbox> {
+
+ task_proxy* internal_pop( isolation_type isolation ) {
+ task_proxy* curr = my_first.load(std::memory_order_acquire);
+ if ( !curr )
+ return nullptr;
+ atomic_proxy_ptr* prev_ptr = &my_first;
+ if ( isolation != no_isolation ) {
+ while ( task_accessor::isolation(*curr) != isolation ) {
+ prev_ptr = &curr->next_in_mailbox;
+ // The next_in_mailbox should be read with acquire to guarantee (*curr) consistency.
+ curr = curr->next_in_mailbox.load(std::memory_order_acquire);
+ if ( !curr )
+ return nullptr;
+ }
+ }
+ // There is a first item in the mailbox. See if there is a second.
+ // The next_in_mailbox should be read with acquire to guarantee (*second) consistency.
+ if ( task_proxy* second = curr->next_in_mailbox.load(std::memory_order_acquire) ) {
+ // There are at least two items, so first item can be popped easily.
+ prev_ptr->store(second, std::memory_order_relaxed);
+ } else {
+ // There is only one item. Some care is required to pop it.
+
+ prev_ptr->store(nullptr, std::memory_order_relaxed);
+ atomic_proxy_ptr* expected = &curr->next_in_mailbox;
+ if ( my_last.compare_exchange_strong( expected, prev_ptr ) ) {
+ // Successfully transitioned mailbox from having one item to having none.
+ __TBB_ASSERT( !curr->next_in_mailbox.load(std::memory_order_relaxed), nullptr);
+ } else {
+ // Some other thread updated my_last but has not filled in first->next_in_mailbox
+ // Wait until first item points to second item.
+ atomic_backoff backoff;
+ // The next_in_mailbox should be read with acquire to guarantee (*second) consistency.
+ while ( !(second = curr->next_in_mailbox.load(std::memory_order_acquire)) ) backoff.pause();
+ prev_ptr->store( second, std::memory_order_relaxed);
+ }
+ }
+ assert_pointer_valid(curr);
+ return curr;
+ }
+public:
+ friend class mail_inbox;
+
+ //! Push task_proxy onto the mailbox queue of another thread.
+ /** Implementation is wait-free. */
+ void push( task_proxy* t ) {
+ assert_pointer_valid(t);
+ t->next_in_mailbox.store(nullptr, std::memory_order_relaxed);
+ atomic_proxy_ptr* const link = my_last.exchange(&t->next_in_mailbox);
+ // Logically, the release fence is not required because the exchange above provides the
+ // release-acquire semantic that guarantees that (*t) will be consistent when another thread
+ // loads the link atomic. However, C++11 memory model guarantees consistency of(*t) only
+ // when the same atomic is used for synchronization.
+ link->store(t, std::memory_order_release);
+ }
+
+ //! Return true if mailbox is empty
+ bool empty() {
+ return my_first.load(std::memory_order_relaxed) == nullptr;
+ }
+
+ //! Construct *this as a mailbox from zeroed memory.
+ /** Raise assertion if *this is not previously zeroed, or sizeof(this) is wrong.
+ This method is provided instead of a full constructor since we know the object
+ will be constructed in zeroed memory. */
+ void construct() {
+ __TBB_ASSERT( sizeof(*this)==max_nfs_size, nullptr );
+ __TBB_ASSERT( !my_first.load(std::memory_order_relaxed), nullptr );
+ __TBB_ASSERT( !my_last.load(std::memory_order_relaxed), nullptr );
+ __TBB_ASSERT( !my_is_idle.load(std::memory_order_relaxed), nullptr );
+ my_last = &my_first;
+ suppress_unused_warning(pad);
+ }
+
+ //! Drain the mailbox
+ intptr_t drain() {
+ intptr_t k = 0;
+ // No fences here because other threads have already quit.
+ for( ; task_proxy* t = my_first; ++k ) {
+ my_first.store(t->next_in_mailbox, std::memory_order_relaxed);
+ // cache_aligned_deallocate((char*)t - task_prefix_reservation_size);
+ }
+ return k;
+ }
+
+ //! True if thread that owns this mailbox is looking for work.
+ bool recipient_is_idle() {
+ return my_is_idle.load(std::memory_order_relaxed);
+ }
+}; // class mail_outbox
+
+//! Class representing source of mail.
+class mail_inbox {
+ //! Corresponding sink where mail that we receive will be put.
+ mail_outbox* my_putter;
+public:
+ //! Construct unattached inbox
+ mail_inbox() : my_putter(nullptr) {}
+
+ //! Attach inbox to a corresponding outbox.
+ void attach( mail_outbox& putter ) {
+ my_putter = &putter;
+ }
+ //! Detach inbox from its outbox
+ void detach() {
+ __TBB_ASSERT(my_putter,"not attached");
+ my_putter = nullptr;
+ }
+ //! Get next piece of mail, or nullptr if mailbox is empty.
+ task_proxy* pop( isolation_type isolation ) {
+ return my_putter->internal_pop( isolation );
+ }
+ //! Return true if mailbox is empty
+ bool empty() {
+ return my_putter->empty();
+ }
+ //! Indicate whether thread that reads this mailbox is idle.
+ /** Raises assertion failure if mailbox is redundantly marked as not idle. */
+ void set_is_idle( bool value ) {
+ if( my_putter ) {
+ __TBB_ASSERT( my_putter->my_is_idle.load(std::memory_order_relaxed) || value, "attempt to redundantly mark mailbox as not idle" );
+ my_putter->my_is_idle.store(value, std::memory_order_relaxed);
+ }
+ }
+ //! Indicate whether thread that reads this mailbox is idle.
+ bool is_idle_state ( bool value ) const {
+ return !my_putter || my_putter->my_is_idle.load(std::memory_order_relaxed) == value;
+ }
+}; // class mail_inbox
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_mailbox_H */
diff --git a/contrib/libs/tbb/src/tbb/main.cpp b/contrib/libs/tbb/src/tbb/main.cpp
new file mode 100644
index 0000000000..ec6c98d682
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/main.cpp
@@ -0,0 +1,171 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_config.h"
+
+#include "main.h"
+#include "governor.h"
+#include "environment.h"
+#include "market.h"
+#include "misc.h"
+#include "itt_notify.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//------------------------------------------------------------------------
+// Begin shared data layout.
+// The following global data items are mostly read-only after initialization.
+//------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// governor data
+basic_tls<thread_data*> governor::theTLS;
+unsigned governor::DefaultNumberOfThreads;
+size_t governor::DefaultPageSize;
+rml::tbb_factory governor::theRMLServerFactory;
+bool governor::UsePrivateRML;
+bool governor::is_rethrow_broken;
+
+//------------------------------------------------------------------------
+// market data
+market* market::theMarket;
+market::global_market_mutex_type market::theMarketMutex;
+
+//------------------------------------------------------------------------
+// context propagation data
+context_state_propagation_mutex_type the_context_state_propagation_mutex;
+std::atomic<uintptr_t> the_context_state_propagation_epoch{};
+
+//------------------------------------------------------------------------
+// One time initialization data
+
+//! Counter of references to global shared resources such as TLS.
+std::atomic<int> __TBB_InitOnce::count{};
+
+std::atomic_flag __TBB_InitOnce::InitializationLock = ATOMIC_FLAG_INIT;
+
+//! Flag that is set to true after one-time initializations are done.
+std::atomic<bool> __TBB_InitOnce::InitializationDone{};
+
+#if __TBB_USE_ITT_NOTIFY
+//! Defined in profiling.cpp
+extern bool ITT_Present;
+void ITT_DoUnsafeOneTimeInitialization();
+#endif
+
+#if !(_WIN32||_WIN64) || __TBB_SOURCE_DIRECTLY_INCLUDED
+static __TBB_InitOnce __TBB_InitOnceHiddenInstance;
+#endif
+
+#if TBB_USE_ASSERT
+std::atomic<int> the_observer_proxy_count;
+
+struct check_observer_proxy_count {
+ ~check_observer_proxy_count() {
+ if (the_observer_proxy_count != 0) {
+ runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count));
+ }
+ }
+};
+// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count
+// after auto termination.
+static check_observer_proxy_count the_check_observer_proxy_count;
+#endif /* TBB_USE_ASSERT */
+
+//------------------------------------------------------------------------
+// __TBB_InitOnce
+//------------------------------------------------------------------------
+
+void __TBB_InitOnce::add_ref() {
+ if( ++count==1 )
+ governor::acquire_resources();
+}
+
+void __TBB_InitOnce::remove_ref() {
+ int k = --count;
+ __TBB_ASSERT(k>=0,"removed __TBB_InitOnce ref that was not added?");
+ if( k==0 ) {
+ governor::release_resources();
+ ITT_FINI_ITTLIB();
+ }
+}
+
+//------------------------------------------------------------------------
+// One-time Initializations
+//------------------------------------------------------------------------
+
+//! Defined in cache_aligned_allocator.cpp
+void initialize_cache_aligned_allocator();
+
+//! Performs thread-safe lazy one-time general TBB initialization.
+void DoOneTimeInitialization() {
+ __TBB_InitOnce::lock();
+ // No fence required for load of InitializationDone, because we are inside a critical section.
+ if( !__TBB_InitOnce::InitializationDone ) {
+ __TBB_InitOnce::add_ref();
+ if( GetBoolEnvironmentVariable("TBB_VERSION") )
+ PrintVersion();
+ bool itt_present = false;
+#if __TBB_USE_ITT_NOTIFY
+ ITT_DoUnsafeOneTimeInitialization();
+ itt_present = ITT_Present;
+#endif /* __TBB_USE_ITT_NOTIFY */
+ initialize_cache_aligned_allocator();
+ governor::initialize_rml_factory();
+ // Force processor groups support detection
+ governor::default_num_threads();
+ // Force OS regular page size detection
+ governor::default_page_size();
+ PrintExtraVersionInfo( "TOOLS SUPPORT", itt_present ? "enabled" : "disabled" );
+ __TBB_InitOnce::InitializationDone = true;
+ }
+ __TBB_InitOnce::unlock();
+}
+
+#if (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED
+//! Windows "DllMain" that handles startup and shutdown of dynamic library.
+extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvReserved ) {
+ switch( reason ) {
+ case DLL_PROCESS_ATTACH:
+ __TBB_InitOnce::add_ref();
+ break;
+ case DLL_PROCESS_DETACH:
+ // Since THREAD_DETACH is not called for the main thread, call auto-termination
+ // here as well - but not during process shutdown (due to risk of a deadlock).
+ if ( lpvReserved==NULL ) { // library unload
+ governor::terminate_external_thread();
+ }
+ __TBB_InitOnce::remove_ref();
+ // It is assumed that InitializationDone is not set after DLL_PROCESS_DETACH,
+ // and thus no race on InitializationDone is possible.
+ if ( __TBB_InitOnce::initialization_done() ) {
+ // Remove reference that we added in DoOneTimeInitialization.
+ __TBB_InitOnce::remove_ref();
+ }
+ break;
+ case DLL_THREAD_DETACH:
+ governor::terminate_external_thread();
+ break;
+ }
+ return true;
+}
+#endif /* (_WIN32||_WIN64) && !__TBB_SOURCE_DIRECTLY_INCLUDED */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/main.h b/contrib/libs/tbb/src/tbb/main.h
new file mode 100644
index 0000000000..c6f54bb47b
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/main.h
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_main_H
+#define _TBB_main_H
+
+#include "governor.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+void DoOneTimeInitialization();
+
+//------------------------------------------------------------------------
+// __TBB_InitOnce
+//------------------------------------------------------------------------
+
+// TODO (TBB_REVAMP_TODO): consider better names
+//! Class that supports TBB initialization.
+/** It handles acquisition and release of global resources (e.g. TLS) during startup and shutdown,
+ as well as synchronization for DoOneTimeInitialization. */
+class __TBB_InitOnce {
+ friend void DoOneTimeInitialization();
+ friend void ITT_DoUnsafeOneTimeInitialization();
+
+ static std::atomic<int> count;
+
+ //! Platform specific code to acquire resources.
+ static void acquire_resources();
+
+ //! Platform specific code to release resources.
+ static void release_resources();
+
+ //! Specifies if the one-time initializations has been done.
+ static std::atomic<bool> InitializationDone;
+
+ //! Global initialization lock
+ /** Scenarios are possible when tools interop has to be initialized before the
+ TBB itself. This imposes a requirement that the global initialization lock
+ has to support valid static initialization, and does not issue any tool
+ notifications in any build mode. **/
+ static std::atomic_flag InitializationLock;
+
+public:
+ static void lock() {
+ tbb::detail::atomic_backoff backoff;
+ while( InitializationLock.test_and_set() ) backoff.pause();
+ }
+
+ static void unlock() { InitializationLock.clear(std::memory_order_release); }
+
+ static bool initialization_done() { return InitializationDone.load(std::memory_order_acquire); }
+
+ //! Add initial reference to resources.
+ /** We assume that dynamic loading of the library prevents any other threads
+ from entering the library until this constructor has finished running. **/
+ __TBB_InitOnce() { add_ref(); }
+
+ //! Remove the initial reference to resources.
+ /** This is not necessarily the last reference if other threads are still running. **/
+ ~__TBB_InitOnce() {
+ governor::terminate_external_thread(); // TLS dtor not called for the main thread
+ remove_ref();
+ // We assume that InitializationDone is not set after file-scope destructors
+ // start running, and thus no race on InitializationDone is possible.
+ if ( initialization_done() ) {
+ // Remove an extra reference that was added in DoOneTimeInitialization.
+ remove_ref();
+ }
+ }
+ //! Add reference to resources. If first reference added, acquire the resources.
+ static void add_ref();
+
+ //! Remove reference to resources. If last reference removed, release the resources.
+ static void remove_ref();
+
+}; // class __TBB_InitOnce
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_main_H */
diff --git a/contrib/libs/tbb/src/tbb/market.cpp b/contrib/libs/tbb/src/tbb/market.cpp
new file mode 100644
index 0000000000..9259eaf588
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/market.cpp
@@ -0,0 +1,640 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/global_control.h" // global_control::active_value
+
+#include "market.h"
+#include "main.h"
+#include "governor.h"
+#include "arena.h"
+#include "thread_data.h"
+#include "itt_notify.h"
+
+#include <cstring> // std::memset()
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+/** This method must be invoked under my_arenas_list_mutex. **/
+arena* market::select_next_arena( arena* hint ) {
+ unsigned next_arena_priority_level = num_priority_levels;
+ if ( hint )
+ next_arena_priority_level = hint->my_priority_level;
+ for ( unsigned idx = 0; idx < next_arena_priority_level; ++idx ) {
+ if ( !my_arenas[idx].empty() )
+ return &*my_arenas[idx].begin();
+ }
+ // don't change if arena with higher priority is not found.
+ return hint;
+}
+
+void market::insert_arena_into_list ( arena& a ) {
+ __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr );
+ my_arenas[a.my_priority_level].push_front( a );
+ __TBB_ASSERT( !my_next_arena || my_next_arena->my_priority_level < num_priority_levels, nullptr );
+ my_next_arena = select_next_arena( my_next_arena );
+}
+
+void market::remove_arena_from_list ( arena& a ) {
+ __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr );
+ my_arenas[a.my_priority_level].remove( a );
+ if ( my_next_arena == &a )
+ my_next_arena = nullptr;
+ my_next_arena = select_next_arena( my_next_arena );
+}
+
+//------------------------------------------------------------------------
+// market
+//------------------------------------------------------------------------
+
+market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size )
+ : my_num_workers_hard_limit(workers_hard_limit)
+ , my_num_workers_soft_limit(workers_soft_limit)
+ , my_next_arena(nullptr)
+ , my_ref_count(1)
+ , my_stack_size(stack_size)
+ , my_workers_soft_limit_to_report(workers_soft_limit)
+{
+ // Once created RML server will start initializing workers that will need
+ // global market instance to get worker stack size
+ my_server = governor::create_rml_server( *this );
+ __TBB_ASSERT( my_server, "Failed to create RML server" );
+}
+
+static unsigned calc_workers_soft_limit(unsigned workers_soft_limit, unsigned workers_hard_limit) {
+ if( int soft_limit = market::app_parallelism_limit() )
+ workers_soft_limit = soft_limit-1;
+ else // if user set no limits (yet), use market's parameter
+ workers_soft_limit = max( governor::default_num_threads() - 1, workers_soft_limit );
+ if( workers_soft_limit >= workers_hard_limit )
+ workers_soft_limit = workers_hard_limit-1;
+ return workers_soft_limit;
+}
+
+bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned workers_requested, std::size_t stack_size ) {
+ market *m = theMarket;
+ if( m ) {
+ ++m->my_ref_count;
+ const unsigned old_public_count = is_public ? m->my_public_ref_count++ : /*any non-zero value*/1;
+ lock.release();
+ if( old_public_count==0 )
+ set_active_num_workers( calc_workers_soft_limit(workers_requested, m->my_num_workers_hard_limit) );
+
+ // do not warn if default number of workers is requested
+ if( workers_requested != governor::default_num_threads()-1 ) {
+ __TBB_ASSERT( skip_soft_limit_warning > workers_requested,
+ "skip_soft_limit_warning must be larger than any valid workers_requested" );
+ unsigned soft_limit_to_report = m->my_workers_soft_limit_to_report.load(std::memory_order_relaxed);
+ if( soft_limit_to_report < workers_requested ) {
+ runtime_warning( "The number of workers is currently limited to %u. "
+ "The request for %u workers is ignored. Further requests for more workers "
+ "will be silently ignored until the limit changes.\n",
+ soft_limit_to_report, workers_requested );
+ // The race is possible when multiple threads report warnings.
+ // We are OK with that, as there are just multiple warnings.
+ unsigned expected_limit = soft_limit_to_report;
+ m->my_workers_soft_limit_to_report.compare_exchange_strong(expected_limit, skip_soft_limit_warning);
+ }
+
+ }
+ if( m->my_stack_size < stack_size )
+ runtime_warning( "Thread stack size has been already set to %u. "
+ "The request for larger stack (%u) cannot be satisfied.\n", m->my_stack_size, stack_size );
+ return true;
+ }
+ return false;
+}
+
+market& market::global_market(bool is_public, unsigned workers_requested, std::size_t stack_size) {
+ global_market_mutex_type::scoped_lock lock( theMarketMutex );
+ if( !market::add_ref_unsafe(lock, is_public, workers_requested, stack_size) ) {
+ // TODO: A lot is done under theMarketMutex locked. Can anything be moved out?
+ if( stack_size == 0 )
+ stack_size = global_control::active_value(global_control::thread_stack_size);
+ // Expecting that 4P is suitable for most applications.
+ // Limit to 2P for large thread number.
+ // TODO: ask RML for max concurrency and possibly correct hard_limit
+ const unsigned factor = governor::default_num_threads()<=128? 4 : 2;
+ // The requested number of threads is intentionally not considered in
+ // computation of the hard limit, in order to separate responsibilities
+ // and avoid complicated interactions between global_control and task_scheduler_init.
+ // The market guarantees that at least 256 threads might be created.
+ const unsigned workers_hard_limit = max(max(factor*governor::default_num_threads(), 256u), app_parallelism_limit());
+ const unsigned workers_soft_limit = calc_workers_soft_limit(workers_requested, workers_hard_limit);
+ // Create the global market instance
+ std::size_t size = sizeof(market);
+ __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(thread_data*) == sizeof(market),
+ "my_workers must be the last data field of the market class");
+ size += sizeof(thread_data*) * (workers_hard_limit - 1);
+ __TBB_InitOnce::add_ref();
+ void* storage = cache_aligned_allocate(size);
+ std::memset( storage, 0, size );
+ // Initialize and publish global market
+ market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size );
+ if( is_public )
+ m->my_public_ref_count.store(1, std::memory_order_relaxed);
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ if (market::is_lifetime_control_present()) {
+ ++m->my_public_ref_count;
+ ++m->my_ref_count;
+ }
+#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ theMarket = m;
+ // This check relies on the fact that for shared RML default_concurrency==max_concurrency
+ if ( !governor::UsePrivateRML && m->my_server->default_concurrency() < workers_soft_limit )
+ runtime_warning( "RML might limit the number of workers to %u while %u is requested.\n"
+ , m->my_server->default_concurrency(), workers_soft_limit );
+ }
+ return *theMarket;
+}
+
+void market::destroy () {
+ this->market::~market(); // qualified to suppress warning
+ cache_aligned_deallocate( this );
+ __TBB_InitOnce::remove_ref();
+}
+
+bool market::release ( bool is_public, bool blocking_terminate ) {
+ market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?");
+ bool do_release = false;
+ {
+ global_market_mutex_type::scoped_lock lock( theMarketMutex );
+ if ( blocking_terminate ) {
+ __TBB_ASSERT( is_public, "Only an object with a public reference can request the blocking terminate" );
+ while ( my_public_ref_count.load(std::memory_order_relaxed) == 1 &&
+ my_ref_count.load(std::memory_order_relaxed) > 1 ) {
+ lock.release();
+ // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all
+ // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created.
+ // Theoretically, new private references to the market can be added during waiting making it potentially
+ // endless.
+ // TODO: revise why the weak scheduler needs market's pointer and try to remove this wait.
+ // Note that the market should know about its schedulers for cancellation/exception/priority propagation,
+ // see e.g. task_group_context::cancel_group_execution()
+ while ( my_public_ref_count.load(std::memory_order_acquire) == 1 &&
+ my_ref_count.load(std::memory_order_acquire) > 1 ) {
+ yield();
+ }
+ lock.acquire( theMarketMutex );
+ }
+ }
+ if ( is_public ) {
+ __TBB_ASSERT( theMarket == this, "Global market instance was destroyed prematurely?" );
+ __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), NULL );
+ --my_public_ref_count;
+ }
+ if ( --my_ref_count == 0 ) {
+ __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), NULL );
+ do_release = true;
+ theMarket = NULL;
+ }
+ }
+ if( do_release ) {
+ __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed),
+ "No public references remain if we remove the market." );
+ // inform RML that blocking termination is required
+ my_join_workers = blocking_terminate;
+ my_server->request_close_connection();
+ return blocking_terminate;
+ }
+ return false;
+}
+
+int market::update_workers_request() {
+ int old_request = my_num_workers_requested;
+ my_num_workers_requested = min(my_total_demand.load(std::memory_order_relaxed),
+ (int)my_num_workers_soft_limit.load(std::memory_order_relaxed));
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if (my_mandatory_num_requested > 0) {
+ __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL);
+ my_num_workers_requested = 1;
+ }
+#endif
+ update_allotment(my_num_workers_requested);
+ return my_num_workers_requested - old_request;
+}
+
+void market::set_active_num_workers ( unsigned soft_limit ) {
+ market *m;
+
+ {
+ global_market_mutex_type::scoped_lock lock( theMarketMutex );
+ if ( !theMarket )
+ return; // actual value will be used at market creation
+ m = theMarket;
+ if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == soft_limit)
+ return;
+ ++m->my_ref_count;
+ }
+ // have my_ref_count for market, use it safely
+
+ int delta = 0;
+ {
+ arenas_list_mutex_type::scoped_lock lock( m->my_arenas_list_mutex );
+ __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, NULL);
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ arena_list_type* arenas = m->my_arenas;
+
+ if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0 &&
+ m->my_mandatory_num_requested > 0)
+ {
+ for (unsigned level = 0; level < num_priority_levels; ++level )
+ for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it)
+ if (it->my_global_concurrency_mode.load(std::memory_order_relaxed))
+ m->disable_mandatory_concurrency_impl(&*it);
+ }
+ __TBB_ASSERT(m->my_mandatory_num_requested == 0, NULL);
+#endif
+
+ m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release);
+ // report only once after new soft limit value is set
+ m->my_workers_soft_limit_to_report.store(soft_limit, std::memory_order_relaxed);
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) {
+ for (unsigned level = 0; level < num_priority_levels; ++level )
+ for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it)
+ if (it->has_enqueued_tasks())
+ m->enable_mandatory_concurrency_impl(&*it);
+ }
+#endif
+
+ delta = m->update_workers_request();
+ }
+ // adjust_job_count_estimate must be called outside of any locks
+ if( delta!=0 )
+ m->my_server->adjust_job_count_estimate( delta );
+ // release internal market reference to match ++m->my_ref_count above
+ m->release( /*is_public=*/false, /*blocking_terminate=*/false );
+}
+
+bool governor::does_client_join_workers (const rml::tbb_client &client) {
+ return ((const market&)client).must_join_workers();
+}
+
+arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level,
+ std::size_t stack_size )
+{
+ __TBB_ASSERT( num_slots > 0, NULL );
+ __TBB_ASSERT( num_reserved_slots <= num_slots, NULL );
+ // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange).
+ market &m = global_market( /*is_public=*/true, num_slots-num_reserved_slots, stack_size );
+ arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level );
+ // Add newly created arena into the existing market's list.
+ arenas_list_mutex_type::scoped_lock lock(m.my_arenas_list_mutex);
+ m.insert_arena_into_list(a);
+ return &a;
+}
+
+/** This method must be invoked under my_arenas_list_mutex. **/
+void market::detach_arena ( arena& a ) {
+ market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?");
+ __TBB_ASSERT( !a.my_slots[0].is_occupied(), NULL );
+ if (a.my_global_concurrency_mode.load(std::memory_order_relaxed))
+ disable_mandatory_concurrency_impl(&a);
+
+ remove_arena_from_list(a);
+ if (a.my_aba_epoch == my_arenas_aba_epoch.load(std::memory_order_relaxed)) {
+ my_arenas_aba_epoch.store(my_arenas_aba_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed);
+ }
+}
+
+void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) {
+ bool locked = true;
+ __TBB_ASSERT( a, NULL );
+ // we hold reference to the market, so it cannot be destroyed at any moment here
+ market::enforce([this] { return theMarket == this; }, NULL);
+ __TBB_ASSERT( my_ref_count!=0, NULL );
+ my_arenas_list_mutex.lock();
+ arena_list_type::iterator it = my_arenas[priority_level].begin();
+ for ( ; it != my_arenas[priority_level].end(); ++it ) {
+ if ( a == &*it ) {
+ if ( it->my_aba_epoch == aba_epoch ) {
+ // Arena is alive
+ if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) {
+ __TBB_ASSERT(
+ !a->my_num_workers_allotted.load(std::memory_order_relaxed) &&
+ (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers),
+ "Inconsistent arena state"
+ );
+ // Arena is abandoned. Destroy it.
+ detach_arena( *a );
+ my_arenas_list_mutex.unlock();
+ locked = false;
+ a->free_arena();
+ }
+ }
+ if (locked)
+ my_arenas_list_mutex.unlock();
+ return;
+ }
+ }
+ my_arenas_list_mutex.unlock();
+}
+
+/** This method must be invoked under my_arenas_list_mutex. **/
+arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) {
+ // TODO: make sure arena with higher priority returned only if there are available slots in it.
+ hint = select_next_arena( hint );
+ if ( !hint )
+ return nullptr;
+ arena_list_type::iterator it = hint;
+ unsigned curr_priority_level = hint->my_priority_level;
+ __TBB_ASSERT( it != arenas[curr_priority_level].end(), nullptr );
+ do {
+ arena& a = *it;
+ if ( ++it == arenas[curr_priority_level].end() ) {
+ do {
+ ++curr_priority_level %= num_priority_levels;
+ } while ( arenas[curr_priority_level].empty() );
+ it = arenas[curr_priority_level].begin();
+ }
+ if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) {
+ a.my_references += arena::ref_worker;
+ return &a;
+ }
+ } while ( it != hint );
+ return nullptr;
+}
+
+arena* market::arena_in_need(arena* prev) {
+ if (my_total_demand.load(std::memory_order_acquire) <= 0)
+ return nullptr;
+ arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false);
+ // TODO: introduce three state response: alive, not_alive, no_market_arenas
+ if ( is_arena_alive(prev) )
+ return arena_in_need(my_arenas, prev);
+ return arena_in_need(my_arenas, my_next_arena);
+}
+
+int market::update_allotment ( arena_list_type* arenas, int workers_demand, int max_workers ) {
+ __TBB_ASSERT( workers_demand > 0, nullptr );
+ max_workers = min(workers_demand, max_workers);
+ int unassigned_workers = max_workers;
+ int assigned = 0;
+ int carry = 0;
+ unsigned max_priority_level = num_priority_levels;
+ for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) {
+ int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers);
+ unassigned_workers -= assigned_per_priority;
+ for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) {
+ arena& a = *it;
+ __TBB_ASSERT(a.my_num_workers_requested >= 0, nullptr);
+ __TBB_ASSERT(a.my_num_workers_requested <= int(a.my_max_num_workers)
+ || (a.my_max_num_workers == 0 && a.my_local_concurrency_requests > 0 && a.my_num_workers_requested == 1), nullptr);
+ if (a.my_num_workers_requested == 0) {
+ __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr);
+ continue;
+ }
+
+ if (max_priority_level == num_priority_levels) {
+ max_priority_level = list_idx;
+ }
+
+ int allotted = 0;
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) {
+ __TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr);
+ allotted = a.my_global_concurrency_mode.load(std::memory_order_relaxed) &&
+ assigned < max_workers ? 1 : 0;
+ } else
+#endif
+ {
+ int tmp = a.my_num_workers_requested * assigned_per_priority + carry;
+ allotted = tmp / my_priority_level_demand[list_idx];
+ carry = tmp % my_priority_level_demand[list_idx];
+ __TBB_ASSERT(allotted <= a.my_num_workers_requested, nullptr);
+ __TBB_ASSERT(allotted <= int(a.my_num_slots - a.my_num_reserved_slots), nullptr);
+ }
+ a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed);
+ a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed);
+ assigned += allotted;
+ }
+ }
+ __TBB_ASSERT( 0 <= assigned && assigned <= max_workers, nullptr );
+ return assigned;
+}
+
+/** This method must be invoked under my_arenas_list_mutex. **/
+bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) {
+ __TBB_ASSERT( a, "Expected non-null pointer to arena." );
+ for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it )
+ if ( a == &*it )
+ return true;
+ return false;
+}
+
+/** This method must be invoked under my_arenas_list_mutex. **/
+bool market::is_arena_alive(arena* a) {
+ if ( !a )
+ return false;
+
+ // Still cannot access internals of the arena since the object itself might be destroyed.
+
+ for ( unsigned idx = 0; idx < num_priority_levels; ++idx ) {
+ if ( is_arena_in_list( my_arenas[idx], a ) )
+ return true;
+ }
+ return false;
+}
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+void market::enable_mandatory_concurrency_impl ( arena *a ) {
+ __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL);
+ __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL);
+
+ a->my_global_concurrency_mode.store(true, std::memory_order_relaxed);
+ my_mandatory_num_requested++;
+}
+
+void market::enable_mandatory_concurrency ( arena *a ) {
+ int delta = 0;
+ {
+ arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex);
+ if (my_num_workers_soft_limit.load(std::memory_order_relaxed) != 0 ||
+ a->my_global_concurrency_mode.load(std::memory_order_relaxed))
+ return;
+
+ enable_mandatory_concurrency_impl(a);
+ delta = update_workers_request();
+ }
+
+ if (delta != 0)
+ my_server->adjust_job_count_estimate(delta);
+}
+
+void market::disable_mandatory_concurrency_impl(arena* a) {
+ __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL);
+ __TBB_ASSERT(my_mandatory_num_requested > 0, NULL);
+
+ a->my_global_concurrency_mode.store(false, std::memory_order_relaxed);
+ my_mandatory_num_requested--;
+}
+
+void market::mandatory_concurrency_disable ( arena *a ) {
+ int delta = 0;
+ {
+ arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex);
+ if (!a->my_global_concurrency_mode.load(std::memory_order_relaxed))
+ return;
+ // There is a racy window in advertise_new_work between mandtory concurrency enabling and
+ // setting SNAPSHOT_FULL. It gives a chance to spawn request to disable mandatory concurrency.
+ // Therefore, we double check that there is no enqueued tasks.
+ if (a->has_enqueued_tasks())
+ return;
+
+ __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL);
+ disable_mandatory_concurrency_impl(a);
+
+ delta = update_workers_request();
+ }
+ if (delta != 0)
+ my_server->adjust_job_count_estimate(delta);
+}
+#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
+
+void market::adjust_demand ( arena& a, int delta, bool mandatory ) {
+ if (!delta) {
+ return;
+ }
+ int target_epoch{};
+ {
+ arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex);
+ __TBB_ASSERT(theMarket != nullptr, "market instance was destroyed prematurely?");
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ if (mandatory) {
+ __TBB_ASSERT(delta == 1 || delta == -1, nullptr);
+ // Count the number of mandatory requests and proceed only for 0->1 and 1->0 transitions.
+ a.my_local_concurrency_requests += delta;
+ if ((delta > 0 && a.my_local_concurrency_requests != 1) ||
+ (delta < 0 && a.my_local_concurrency_requests != 0))
+ {
+ return;
+ }
+ }
+#endif
+ a.my_total_num_workers_requested += delta;
+ int target_workers = 0;
+ // Cap target_workers into interval [0, a.my_max_num_workers]
+ if (a.my_total_num_workers_requested > 0) {
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ // At least one thread should be requested when mandatory concurrency
+ int max_num_workers = int(a.my_max_num_workers);
+ if (a.my_local_concurrency_requests > 0 && max_num_workers == 0) {
+ max_num_workers = 1;
+ }
+#endif
+ target_workers = min(a.my_total_num_workers_requested, max_num_workers);
+ }
+
+ delta = target_workers - a.my_num_workers_requested;
+
+ if (delta == 0) {
+ return;
+ }
+
+ a.my_num_workers_requested += delta;
+ if (a.my_num_workers_requested == 0) {
+ a.my_num_workers_allotted.store(0, std::memory_order_relaxed);
+ }
+
+ int total_demand = my_total_demand.load(std::memory_order_relaxed) + delta;
+ my_total_demand.store(total_demand, std::memory_order_relaxed);
+ my_priority_level_demand[a.my_priority_level] += delta;
+ unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed);
+ if (my_mandatory_num_requested > 0) {
+ __TBB_ASSERT(effective_soft_limit == 0, NULL);
+ effective_soft_limit = 1;
+ }
+
+ update_allotment(effective_soft_limit);
+ if (delta > 0) {
+ // can't overflow soft_limit, but remember values request by arenas in
+ // my_total_demand to not prematurely release workers to RML
+ if (my_num_workers_requested + delta > (int)effective_soft_limit)
+ delta = effective_soft_limit - my_num_workers_requested;
+ }
+ else {
+ // the number of workers should not be decreased below my_total_demand
+ if (my_num_workers_requested + delta < total_demand)
+ delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested;
+ }
+ my_num_workers_requested += delta;
+ __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, NULL);
+
+ target_epoch = my_adjust_demand_target_epoch++;
+ }
+
+ spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch);
+ // Must be called outside of any locks
+ my_server->adjust_job_count_estimate( delta );
+ my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release);
+}
+
+void market::process( job& j ) {
+ thread_data& td = static_cast<thread_data&>(j);
+ // td.my_arena can be dead. Don't access it until arena_in_need is called
+ arena *a = td.my_arena;
+ for (int i = 0; i < 2; ++i) {
+ while ( (a = arena_in_need(a)) ) {
+ a->process(td);
+ }
+ // Workers leave market because there is no arena in need. It can happen earlier than
+ // adjust_job_count_estimate() decreases my_slack and RML can put this thread to sleep.
+ // It might result in a busy-loop checking for my_slack<0 and calling this method instantly.
+ // the yield refines this spinning.
+ if ( !i ) {
+ yield();
+ }
+ }
+}
+
+void market::cleanup( job& j) {
+ market::enforce([this] { return theMarket != this; }, NULL );
+ governor::auto_terminate(&j);
+}
+
+void market::acknowledge_close_connection() {
+ destroy();
+}
+
+::rml::job* market::create_one_job() {
+ unsigned short index = ++my_first_unused_worker_idx;
+ __TBB_ASSERT( index > 0, NULL );
+ ITT_THREAD_SET_NAME(_T("TBB Worker Thread"));
+ // index serves as a hint decreasing conflicts between workers when they migrate between arenas
+ thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true };
+ __TBB_ASSERT( index <= my_num_workers_hard_limit, NULL );
+ __TBB_ASSERT( my_workers[index - 1] == nullptr, NULL );
+ my_workers[index - 1] = td;
+ return td;
+}
+
+void market::add_external_thread(thread_data& td) {
+ context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
+ my_masters.push_front(td);
+}
+
+void market::remove_external_thread(thread_data& td) {
+ context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
+ my_masters.remove(td);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/market.h b/contrib/libs/tbb/src/tbb/market.h
new file mode 100644
index 0000000000..8443467447
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/market.h
@@ -0,0 +1,317 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_market_H
+#define _TBB_market_H
+
+#include "scheduler_common.h"
+#include "concurrent_monitor.h"
+#include "intrusive_list.h"
+#include "rml_tbb.h"
+
+#include "oneapi/tbb/spin_rw_mutex.h"
+#include "oneapi/tbb/task_group.h"
+
+#include <atomic>
+
+#if defined(_MSC_VER) && defined(_Wp64)
+ // Workaround for overzealous compiler warnings in /Wp64 mode
+ #pragma warning (push)
+ #pragma warning (disable: 4244)
+#endif
+
+namespace tbb {
+namespace detail {
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+namespace d1 {
+class task_scheduler_handle;
+}
+#endif
+
+namespace r1 {
+
+class task_arena_base;
+class task_group_context;
+
+//------------------------------------------------------------------------
+// Class market
+//------------------------------------------------------------------------
+
+class market : no_copy, rml::tbb_client {
+ friend class arena;
+ friend class task_arena_base;
+ template<typename SchedulerTraits> friend class custom_scheduler;
+ friend class task_group_context;
+ friend class governor;
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ friend class lifetime_control;
+#endif
+
+public:
+ //! Keys for the arena map array. The lower the value the higher priority of the arena list.
+ static constexpr unsigned num_priority_levels = 3;
+
+private:
+ friend void ITT_DoUnsafeOneTimeInitialization ();
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ friend bool finalize_impl(d1::task_scheduler_handle& handle);
+#endif
+
+ typedef intrusive_list<arena> arena_list_type;
+ typedef intrusive_list<thread_data> thread_data_list_type;
+
+ //! Currently active global market
+ static market* theMarket;
+
+ typedef scheduler_mutex_type global_market_mutex_type;
+
+ //! Mutex guarding creation/destruction of theMarket, insertions/deletions in my_arenas, and cancellation propagation
+ static global_market_mutex_type theMarketMutex;
+
+ //! Lightweight mutex guarding accounting operations with arenas list
+ typedef spin_rw_mutex arenas_list_mutex_type;
+ // TODO: introduce fine-grained (per priority list) locking of arenas.
+ arenas_list_mutex_type my_arenas_list_mutex;
+
+ //! Pointer to the RML server object that services this TBB instance.
+ rml::tbb_server* my_server;
+
+ //! Waiting object for external and coroutine waiters.
+ extended_concurrent_monitor my_sleep_monitor;
+
+ //! Maximal number of workers allowed for use by the underlying resource manager
+ /** It can't be changed after market creation. **/
+ unsigned my_num_workers_hard_limit;
+
+ //! Current application-imposed limit on the number of workers (see set_active_num_workers())
+ /** It can't be more than my_num_workers_hard_limit. **/
+ std::atomic<unsigned> my_num_workers_soft_limit;
+
+ //! Number of workers currently requested from RML
+ int my_num_workers_requested;
+
+ //! The target serialization epoch for callers of adjust_job_count_estimate
+ int my_adjust_demand_target_epoch;
+
+ //! The current serialization epoch for callers of adjust_job_count_estimate
+ std::atomic<int> my_adjust_demand_current_epoch;
+
+ //! First unused index of worker
+ /** Used to assign indices to the new workers coming from RML, and busy part
+ of my_workers array. **/
+ std::atomic<unsigned> my_first_unused_worker_idx;
+
+ //! Number of workers that were requested by all arenas on all priority levels
+ std::atomic<int> my_total_demand;
+
+ //! Number of workers that were requested by arenas per single priority list item
+ int my_priority_level_demand[num_priority_levels];
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ //! How many times mandatory concurrency was requested from the market
+ int my_mandatory_num_requested;
+#endif
+
+ //! Per priority list of registered arenas
+ arena_list_type my_arenas[num_priority_levels];
+
+ //! The first arena to be checked when idle worker seeks for an arena to enter
+ /** The check happens in round-robin fashion. **/
+ arena *my_next_arena;
+
+ //! ABA prevention marker to assign to newly created arenas
+ std::atomic<uintptr_t> my_arenas_aba_epoch;
+
+ //! Reference count controlling market object lifetime
+ std::atomic<unsigned> my_ref_count;
+
+ //! Count of external threads attached
+ std::atomic<unsigned> my_public_ref_count;
+
+ //! Stack size of worker threads
+ std::size_t my_stack_size;
+
+ //! Shutdown mode
+ bool my_join_workers;
+
+ //! The value indicating that the soft limit warning is unnecessary
+ static const unsigned skip_soft_limit_warning = ~0U;
+
+ //! Either workers soft limit to be reported via runtime_warning() or skip_soft_limit_warning
+ std::atomic<unsigned> my_workers_soft_limit_to_report;
+
+ //! Constructor
+ market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size );
+
+ //! Destroys and deallocates market object created by market::create()
+ void destroy ();
+
+ //! Recalculates the number of workers requested from RML and updates the allotment.
+ int update_workers_request();
+
+ //! Recalculates the number of workers assigned to each arena in the list.
+ /** The actual number of workers servicing a particular arena may temporarily
+ deviate from the calculated value. **/
+ void update_allotment (unsigned effective_soft_limit) {
+ int total_demand = my_total_demand.load(std::memory_order_relaxed);
+ if (total_demand) {
+ update_allotment(my_arenas, total_demand, (int)effective_soft_limit);
+ }
+ }
+
+ //! Returns next arena that needs more workers, or NULL.
+ arena* arena_in_need(arena* prev);
+
+ template <typename Pred>
+ static void enforce (Pred pred, const char* msg) {
+ suppress_unused_warning(pred, msg);
+#if TBB_USE_ASSERT
+ global_market_mutex_type::scoped_lock lock(theMarketMutex);
+ __TBB_ASSERT(pred(), msg);
+#endif
+ }
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Helpers to unify code branches dependent on priority feature presence
+
+ arena* select_next_arena( arena* hint );
+
+ void insert_arena_into_list ( arena& a );
+
+ void remove_arena_from_list ( arena& a );
+
+ arena* arena_in_need ( arena_list_type* arenas, arena* hint );
+
+ int update_allotment ( arena_list_type* arenas, int total_demand, int max_workers );
+
+ bool is_arena_in_list( arena_list_type& arenas, arena* a );
+
+ bool is_arena_alive( arena* a );
+
+ ////////////////////////////////////////////////////////////////////////////////
+ // Implementation of rml::tbb_client interface methods
+
+ version_type version () const override { return 0; }
+
+ unsigned max_job_count () const override { return my_num_workers_hard_limit; }
+
+ std::size_t min_stack_size () const override { return worker_stack_size(); }
+
+ job* create_one_job () override;
+
+ void cleanup( job& j ) override;
+
+ void acknowledge_close_connection () override;
+
+ void process( job& j ) override;
+
+public:
+ //! Factory method creating new market object
+ static market& global_market( bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 );
+
+ //! Add reference to market if theMarket exists
+ static bool add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 );
+
+ //! Creates an arena object
+ /** If necessary, also creates global market instance, and boosts its ref count.
+ Each call to create_arena() must be matched by the call to arena::free_arena(). **/
+ static arena* create_arena ( int num_slots, int num_reserved_slots,
+ unsigned arena_index, std::size_t stack_size );
+
+ //! Removes the arena from the market's list
+ void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level );
+
+ //! Removes the arena from the market's list
+ void detach_arena ( arena& );
+
+ //! Decrements market's refcount and destroys it in the end
+ bool release ( bool is_public, bool blocking_terminate );
+
+ //! Return wait list
+ extended_concurrent_monitor& get_wait_list() { return my_sleep_monitor; }
+
+#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
+ //! Imlpementation of mandatory concurrency enabling
+ void enable_mandatory_concurrency_impl ( arena *a );
+
+ //! Inform the external thread that there is an arena with mandatory concurrency
+ void enable_mandatory_concurrency ( arena *a );
+
+ //! Inform the external thread that the arena is no more interested in mandatory concurrency
+ void disable_mandatory_concurrency_impl(arena* a);
+
+ //! Inform the external thread that the arena is no more interested in mandatory concurrency
+ void mandatory_concurrency_disable ( arena *a );
+#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
+
+ //! Request that arena's need in workers should be adjusted.
+ /** Concurrent invocations are possible only on behalf of different arenas. **/
+ void adjust_demand ( arena&, int delta, bool mandatory );
+
+ //! Used when RML asks for join mode during workers termination.
+ bool must_join_workers () const { return my_join_workers; }
+
+ //! Returns the requested stack size of worker threads.
+ std::size_t worker_stack_size () const { return my_stack_size; }
+
+ //! Set number of active workers
+ static void set_active_num_workers( unsigned w );
+
+ //! Reports active parallelism level according to user's settings
+ static unsigned app_parallelism_limit();
+
+#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE
+ //! Reports if any active global lifetime references are present
+ static unsigned is_lifetime_control_present();
+#endif
+
+ //! Finds all contexts affected by the state change and propagates the new state to them.
+ /** The propagation is relayed to the market because tasks created by one
+ external thread can be passed to and executed by other external threads. This means
+ that context trees can span several arenas at once and thus state change
+ propagation cannot be generally localized to one arena only. **/
+ template <typename T>
+ bool propagate_task_group_state (std::atomic<T> d1::task_group_context::*mptr_state, d1::task_group_context& src, T new_state );
+
+ //! List of registered external threads
+ thread_data_list_type my_masters;
+
+ //! Array of pointers to the registered workers
+ /** Used by cancellation propagation mechanism.
+ Must be the last data member of the class market. **/
+ thread_data* my_workers[1];
+
+ static unsigned max_num_workers() {
+ global_market_mutex_type::scoped_lock lock( theMarketMutex );
+ return theMarket? theMarket->my_num_workers_hard_limit : 0;
+ }
+
+ void add_external_thread(thread_data& td);
+
+ void remove_external_thread(thread_data& td);
+}; // class market
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#if defined(_MSC_VER) && defined(_Wp64)
+ // Workaround for overzealous compiler warnings in /Wp64 mode
+ #pragma warning (pop)
+#endif // warning 4244 is back
+
+#endif /* _TBB_market_H */
diff --git a/contrib/libs/tbb/src/tbb/misc.cpp b/contrib/libs/tbb/src/tbb/misc.cpp
new file mode 100644
index 0000000000..0e1d33a596
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/misc.cpp
@@ -0,0 +1,137 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Source file for miscellaneous entities that are infrequently referenced by
+// an executing program.
+
+#include "oneapi/tbb/detail/_exception.h"
+#include "oneapi/tbb/detail/_machine.h"
+
+#include "oneapi/tbb/version.h"
+
+#include "misc.h"
+#include "governor.h"
+#include "assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here.
+
+#include <cstdio>
+#include <cstdlib>
+#include <stdexcept>
+#include <cstring>
+#include <cstdarg>
+
+#if _WIN32||_WIN64
+#include <windows.h>
+#endif
+
+#if !_WIN32
+#include <unistd.h> // sysconf(_SC_PAGESIZE)
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//------------------------------------------------------------------------
+// governor data
+//------------------------------------------------------------------------
+cpu_features_type governor::cpu_features;
+
+
+size_t DefaultSystemPageSize() {
+#if _WIN32
+ SYSTEM_INFO si;
+ GetSystemInfo(&si);
+ return si.dwPageSize;
+#else
+ return sysconf(_SC_PAGESIZE);
+#endif
+}
+
+/** The leading "\0" is here so that applying "strings" to the binary delivers a clean result. */
+static const char VersionString[] = "\0" TBB_VERSION_STRINGS;
+
+static bool PrintVersionFlag = false;
+
+void PrintVersion() {
+ PrintVersionFlag = true;
+ std::fputs(VersionString+1,stderr);
+}
+
+void PrintExtraVersionInfo( const char* category, const char* format, ... ) {
+ if( PrintVersionFlag ) {
+ char str[1024]; std::memset(str, 0, 1024);
+ va_list args; va_start(args, format);
+ // Note: correct vsnprintf definition obtained from tbb_assert_impl.h
+ std::vsnprintf( str, 1024-1, format, args);
+ va_end(args);
+ std::fprintf(stderr, "oneTBB: %s\t%s\n", category, str );
+ }
+}
+
+//! check for transaction support.
+#if _MSC_VER
+#include <intrin.h> // for __cpuid
+#endif
+
+#if __TBB_x86_32 || __TBB_x86_64
+void check_cpuid(int leaf, int sub_leaf, int registers[4]) {
+#if _MSC_VER
+ __cpuidex(registers, leaf, sub_leaf);
+#else
+ int reg_eax = 0;
+ int reg_ebx = 0;
+ int reg_ecx = 0;
+ int reg_edx = 0;
+#if __TBB_x86_32 && __PIC__
+ // On 32-bit systems with position-independent code GCC fails to work around the stuff in EBX
+ // register. We help it using backup and restore.
+ __asm__("mov %%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchg %%ebx, %%esi"
+ : "=a"(reg_eax), "=S"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx)
+ : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx
+ );
+#else
+ __asm__("cpuid"
+ : "=a"(reg_eax), "=b"(reg_ebx), "=c"(reg_ecx), "=d"(reg_edx)
+ : "0"(leaf), "2"(sub_leaf) // read value from eax and ecx
+ );
+#endif
+ registers[0] = reg_eax;
+ registers[1] = reg_ebx;
+ registers[2] = reg_ecx;
+ registers[3] = reg_edx;
+#endif
+}
+#endif
+
+void detect_cpu_features(cpu_features_type& cpu_features) {
+ suppress_unused_warning(cpu_features);
+#if __TBB_x86_32 || __TBB_x86_64
+ const int rtm_ebx_mask = 1 << 11;
+ const int waitpkg_ecx_mask = 1 << 5;
+ int registers[4] = {0};
+
+ // Check RTM and WAITPKG
+ check_cpuid(7, 0, registers);
+ cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0;
+ cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0;
+#endif /* (__TBB_x86_32 || __TBB_x86_64) */
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/misc.h b/contrib/libs/tbb/src/tbb/misc.h
new file mode 100644
index 0000000000..6a3cf778a4
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/misc.h
@@ -0,0 +1,289 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_tbb_misc_H
+#define _TBB_tbb_misc_H
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_utils.h"
+
+#if __TBB_ARENA_BINDING
+#include "oneapi/tbb/info.h"
+#endif /*__TBB_ARENA_BINDING*/
+
+#if __linux__ || __FreeBSD__
+#include <sys/param.h> // __FreeBSD_version
+#if __FreeBSD_version >= 701000
+#include <sys/cpuset.h>
+#endif
+#endif
+
+#include <atomic>
+
+// Does the operating system have a system call to pin a thread to a set of OS processors?
+#define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000))
+// On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB,
+// and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account.
+#define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__)
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+void runtime_warning(const char* format, ... );
+
+#if __TBB_ARENA_BINDING
+class task_arena;
+class task_scheduler_observer;
+#endif /*__TBB_ARENA_BINDING*/
+
+const std::size_t MByte = 1024*1024;
+
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+// In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API
+// that does not allow to specify the stack size.
+// Still, the thread stack size value, either explicit or default, is used by the scheduler.
+// So here we set the default value to match the platform's default of 1MB.
+const std::size_t ThreadStackSize = 1*MByte;
+#else
+const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte;
+#endif
+
+#ifndef __TBB_HardwareConcurrency
+
+//! Returns maximal parallelism level supported by the current OS configuration.
+int AvailableHwConcurrency();
+
+#else
+
+inline int AvailableHwConcurrency() {
+ int n = __TBB_HardwareConcurrency();
+ return n > 0 ? n : 1; // Fail safety strap
+}
+#endif /* __TBB_HardwareConcurrency */
+
+//! Returns OS regular memory page size
+size_t DefaultSystemPageSize();
+
+//! Returns number of processor groups in the current OS configuration.
+/** AvailableHwConcurrency must be called at least once before calling this method. **/
+int NumberOfProcessorGroups();
+
+#if _WIN32||_WIN64
+
+//! Retrieves index of processor group containing processor with the given index
+int FindProcessorGroupIndex ( int processorIndex );
+
+//! Affinitizes the thread to the specified processor group
+void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex );
+
+#endif /* _WIN32||_WIN64 */
+
+//! Prints TBB version information on stderr
+void PrintVersion();
+
+//! Prints arbitrary extra TBB version information on stderr
+void PrintExtraVersionInfo( const char* category, const char* format, ... );
+
+//! A callback routine to print RML version information on stderr
+void PrintRMLVersionInfo( void* arg, const char* server_info );
+
+// For TBB compilation only; not to be used in public headers
+#if defined(min) || defined(max)
+#undef min
+#undef max
+#endif
+
+//! Utility template function returning lesser of the two values.
+/** Provided here to avoid including not strict safe <algorithm>.\n
+ In case operands cause signed/unsigned or size mismatch warnings it is caller's
+ responsibility to do the appropriate cast before calling the function. **/
+template<typename T>
+T min ( const T& val1, const T& val2 ) {
+ return val1 < val2 ? val1 : val2;
+}
+
+//! Utility template function returning greater of the two values.
+/** Provided here to avoid including not strict safe <algorithm>.\n
+ In case operands cause signed/unsigned or size mismatch warnings it is caller's
+ responsibility to do the appropriate cast before calling the function. **/
+template<typename T>
+T max ( const T& val1, const T& val2 ) {
+ return val1 < val2 ? val2 : val1;
+}
+
+//! Utility helper structure to ease overload resolution
+template<int > struct int_to_type {};
+
+//------------------------------------------------------------------------
+// FastRandom
+//------------------------------------------------------------------------
+
+//! A fast random number generator.
+/** Uses linear congruential method. */
+class FastRandom {
+private:
+ unsigned x, c;
+ static const unsigned a = 0x9e3779b1; // a big prime number
+public:
+ //! Get a random number.
+ unsigned short get() {
+ return get(x);
+ }
+ //! Get a random number for the given seed; update the seed for next use.
+ unsigned short get( unsigned& seed ) {
+ unsigned short r = (unsigned short)(seed>>16);
+ __TBB_ASSERT(c&1, "c must be odd for big rng period");
+ seed = seed*a+c;
+ return r;
+ }
+ //! Construct a random number generator.
+ FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); }
+
+ template <typename T>
+ void init( T seed ) {
+ init(seed,int_to_type<sizeof(seed)>());
+ }
+ void init( uint64_t seed , int_to_type<8> ) {
+ init(uint32_t((seed>>32)+seed), int_to_type<4>());
+ }
+ void init( uint32_t seed, int_to_type<4> ) {
+ // threads use different seeds for unique sequences
+ c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number
+ x = c^(seed>>1); // also shuffle x for the first get() invocation
+ }
+};
+
+//------------------------------------------------------------------------
+// Atomic extensions
+//------------------------------------------------------------------------
+
+//! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate
+/** Return value semantics is the same as for CAS. **/
+template<typename T1, class Pred>
+T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) {
+ T1 oldValue = dst.load(std::memory_order_acquire);
+ while ( compare(oldValue, newValue) ) {
+ if ( dst.compare_exchange_strong(oldValue, newValue) )
+ break;
+ }
+ return oldValue;
+}
+
+#if __TBB_USE_OS_AFFINITY_SYSCALL
+ #if __linux__
+ typedef cpu_set_t basic_mask_t;
+ #elif __FreeBSD_version >= 701000
+ typedef cpuset_t basic_mask_t;
+ #else
+ #error affinity_helper is not implemented in this OS
+ #endif
+ class affinity_helper : no_copy {
+ basic_mask_t* threadMask;
+ int is_changed;
+ public:
+ affinity_helper() : threadMask(NULL), is_changed(0) {}
+ ~affinity_helper();
+ void protect_affinity_mask( bool restore_process_mask );
+ void dismiss();
+ };
+ void destroy_process_mask();
+#else
+ class affinity_helper : no_copy {
+ public:
+ void protect_affinity_mask( bool ) {}
+ void dismiss() {}
+ };
+ inline void destroy_process_mask(){}
+#endif /* __TBB_USE_OS_AFFINITY_SYSCALL */
+
+struct cpu_features_type {
+ bool rtm_enabled{false};
+ bool waitpkg_enabled{false};
+};
+
+void detect_cpu_features(cpu_features_type& cpu_features);
+
+#if __TBB_ARENA_BINDING
+class binding_handler;
+
+binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core);
+void destroy_binding_handler(binding_handler* handler_ptr);
+void apply_affinity_mask(binding_handler* handler_ptr, int slot_num);
+void restore_affinity_mask(binding_handler* handler_ptr, int slot_num);
+
+#endif /*__TBB_ARENA_BINDING*/
+
+// RTM specific section
+// abort code for mutexes that detect a conflict with another thread.
+enum {
+ speculation_not_supported = 0x00,
+ speculation_transaction_aborted = 0x01,
+ speculation_can_retry = 0x02,
+ speculation_memadd_conflict = 0x04,
+ speculation_buffer_overflow = 0x08,
+ speculation_breakpoint_hit = 0x10,
+ speculation_nested_abort = 0x20,
+ speculation_xabort_mask = 0xFF000000,
+ speculation_xabort_shift = 24,
+ speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
+ speculation_successful_begin = 0xFFFFFFFF,
+ speculation_retry = speculation_transaction_aborted
+ | speculation_can_retry
+ | speculation_memadd_conflict
+};
+
+// We suppose that successful transactions are sequentially ordered and
+// do not require additional memory fences around them.
+// Technically it can be achieved only if xbegin has implicit
+// acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level.
+// See the article: https://arxiv.org/pdf/1710.04839.pdf
+static inline unsigned int begin_transaction() {
+#if __TBB_TSX_INTRINSICS_PRESENT
+ return _xbegin();
+#else
+ return speculation_not_supported; // return unsuccessful code
+#endif
+}
+
+static inline void end_transaction() {
+#if __TBB_TSX_INTRINSICS_PRESENT
+ _xend();
+#endif
+}
+
+static inline void abort_transaction() {
+#if __TBB_TSX_INTRINSICS_PRESENT
+ _xabort(speculation_xabort_not_free);
+#endif
+}
+
+#if TBB_USE_ASSERT
+static inline unsigned char is_in_transaction() {
+#if __TBB_TSX_INTRINSICS_PRESENT
+ return _xtest();
+#else
+ return 0;
+#endif
+}
+#endif // TBB_USE_ASSERT
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_tbb_misc_H */
diff --git a/contrib/libs/tbb/src/tbb/misc_ex.cpp b/contrib/libs/tbb/src/tbb/misc_ex.cpp
new file mode 100644
index 0000000000..177392bb65
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/misc_ex.cpp
@@ -0,0 +1,398 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Source file for miscellaneous entities that are infrequently referenced by
+// an executing program, and implementation of which requires dynamic linking.
+
+#include "misc.h"
+
+#if !defined(__TBB_HardwareConcurrency)
+
+#include "dynamic_link.h"
+#include <stdio.h>
+#include <limits.h>
+
+#if _WIN32||_WIN64
+#include <windows.h>
+#if __TBB_WIN8UI_SUPPORT
+#include <thread>
+#endif
+#else
+#include <unistd.h>
+#if __linux__
+#include <sys/sysinfo.h>
+#include <cstring>
+#include <sched.h>
+#include <cerrno>
+#elif __sun
+#include <sys/sysinfo.h>
+#elif __FreeBSD__
+#include <cerrno>
+#include <cstring>
+#include <sys/param.h> // Required by <sys/cpuset.h>
+#include <sys/cpuset.h>
+#endif
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_USE_OS_AFFINITY_SYSCALL
+
+#if __linux__
+// Handlers for interoperation with libiomp
+static int (*libiomp_try_restoring_original_mask)();
+// Table for mapping to libiomp entry points
+static const dynamic_link_descriptor iompLinkTable[] = {
+ DLD_NOWEAK( kmp_set_thread_affinity_mask_initial, libiomp_try_restoring_original_mask )
+};
+#endif
+
+static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) {
+#if __linux__
+ if( sched_setaffinity( 0, maskSize, threadMask ) )
+#else /* FreeBSD */
+ if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
+#endif
+ // Here and below the error severity is lowered from critical level
+ // because it may happen during TBB library unload because of not
+ // waiting for workers to complete (current RML policy, to be fixed).
+ // handle_perror( errno, "setaffinity syscall" );
+ runtime_warning( "setaffinity syscall failed" );
+}
+
+static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) {
+#if __linux__
+ if( sched_getaffinity( 0, maskSize, threadMask ) )
+#else /* FreeBSD */
+ if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) )
+#endif
+ runtime_warning( "getaffinity syscall failed" );
+}
+
+static basic_mask_t* process_mask;
+static int num_masks;
+
+void destroy_process_mask() {
+ if( process_mask ) {
+ delete [] process_mask;
+ }
+}
+
+#define curMaskSize sizeof(basic_mask_t) * num_masks
+affinity_helper::~affinity_helper() {
+ if( threadMask ) {
+ if( is_changed ) {
+ set_thread_affinity_mask( curMaskSize, threadMask );
+ }
+ delete [] threadMask;
+ }
+}
+void affinity_helper::protect_affinity_mask( bool restore_process_mask ) {
+ if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity?
+ threadMask = new basic_mask_t [num_masks];
+ std::memset( threadMask, 0, curMaskSize );
+ get_thread_affinity_mask( curMaskSize, threadMask );
+ if( restore_process_mask ) {
+ __TBB_ASSERT( process_mask, "A process mask is requested but not yet stored" );
+ is_changed = memcmp( process_mask, threadMask, curMaskSize );
+ if( is_changed )
+ set_thread_affinity_mask( curMaskSize, process_mask );
+ } else {
+ // Assume that the mask will be changed by the caller.
+ is_changed = 1;
+ }
+ }
+}
+void affinity_helper::dismiss() {
+ if( threadMask ) {
+ delete [] threadMask;
+ threadMask = NULL;
+ }
+ is_changed = 0;
+}
+#undef curMaskSize
+
+static std::atomic<do_once_state> hardware_concurrency_info;
+
+static int theNumProcs;
+
+static void initialize_hardware_concurrency_info () {
+ int err;
+ int availableProcs = 0;
+ int numMasks = 1;
+#if __linux__
+ int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
+ int pid = getpid();
+#else /* FreeBSD >= 7.1 */
+ int maxProcs = sysconf(_SC_NPROCESSORS_ONLN);
+#endif
+ basic_mask_t* processMask;
+ const std::size_t BasicMaskSize = sizeof(basic_mask_t);
+ for (;;) {
+ const int curMaskSize = BasicMaskSize * numMasks;
+ processMask = new basic_mask_t[numMasks];
+ std::memset( processMask, 0, curMaskSize );
+#if __linux__
+ err = sched_getaffinity( pid, curMaskSize, processMask );
+ if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 )
+ break;
+#else /* FreeBSD >= 7.1 */
+ // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask
+ err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask );
+ if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 )
+ break;
+#endif /* FreeBSD >= 7.1 */
+ delete[] processMask;
+ numMasks <<= 1;
+ }
+ if ( !err ) {
+ // We have found the mask size and captured the process affinity mask into processMask.
+ num_masks = numMasks; // do here because it's needed for affinity_helper to work
+#if __linux__
+ // For better coexistence with libiomp which might have changed the mask already,
+ // check for its presence and ask it to restore the mask.
+ dynamic_link_handle libhandle;
+ if ( dynamic_link( "libiomp5.so", iompLinkTable, 1, &libhandle, DYNAMIC_LINK_GLOBAL ) ) {
+ // We have found the symbol provided by libiomp5 for restoring original thread affinity.
+ affinity_helper affhelp;
+ affhelp.protect_affinity_mask( /*restore_process_mask=*/false );
+ if ( libiomp_try_restoring_original_mask()==0 ) {
+ // Now we have the right mask to capture, restored by libiomp.
+ const int curMaskSize = BasicMaskSize * numMasks;
+ std::memset( processMask, 0, curMaskSize );
+ get_thread_affinity_mask( curMaskSize, processMask );
+ } else
+ affhelp.dismiss(); // thread mask has not changed
+ dynamic_unlink( libhandle );
+ // Destructor of affinity_helper restores the thread mask (unless dismissed).
+ }
+#endif
+ for ( int m = 0; availableProcs < maxProcs && m < numMasks; ++m ) {
+ for ( std::size_t i = 0; (availableProcs < maxProcs) && (i < BasicMaskSize * CHAR_BIT); ++i ) {
+ if ( CPU_ISSET( i, processMask + m ) )
+ ++availableProcs;
+ }
+ }
+ process_mask = processMask;
+ }
+ else {
+ // Failed to get the process affinity mask; assume the whole machine can be used.
+ availableProcs = (maxProcs == INT_MAX) ? sysconf(_SC_NPROCESSORS_ONLN) : maxProcs;
+ delete[] processMask;
+ }
+ theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap
+ __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL );
+}
+
+int AvailableHwConcurrency() {
+ atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
+ return theNumProcs;
+}
+
+/* End of __TBB_USE_OS_AFFINITY_SYSCALL implementation */
+#elif __ANDROID__
+
+// Work-around for Android that reads the correct number of available CPUs since system calls are unreliable.
+// Format of "present" file is: ([<int>-<int>|<int>],)+
+int AvailableHwConcurrency() {
+ FILE *fp = fopen("/sys/devices/system/cpu/present", "r");
+ if (fp == NULL) return 1;
+ int num_args, lower, upper, num_cpus=0;
+ while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) {
+ switch(num_args) {
+ case 2: num_cpus += upper - lower + 1; break;
+ case 1: num_cpus += 1; break;
+ }
+ fscanf(fp, ",");
+ }
+ return (num_cpus > 0) ? num_cpus : 1;
+}
+
+#elif defined(_SC_NPROCESSORS_ONLN)
+
+int AvailableHwConcurrency() {
+ int n = sysconf(_SC_NPROCESSORS_ONLN);
+ return (n > 0) ? n : 1;
+}
+
+#elif _WIN32||_WIN64
+
+static std::atomic<do_once_state> hardware_concurrency_info;
+
+static const WORD TBB_ALL_PROCESSOR_GROUPS = 0xffff;
+
+// Statically allocate an array for processor group information.
+// Windows 7 supports maximum 4 groups, but let's look ahead a little.
+static const WORD MaxProcessorGroups = 64;
+
+struct ProcessorGroupInfo {
+ DWORD_PTR mask; ///< Affinity mask covering the whole group
+ int numProcs; ///< Number of processors in the group
+ int numProcsRunningTotal; ///< Subtotal of processors in this and preceding groups
+
+ //! Total number of processor groups in the system
+ static int NumGroups;
+
+ //! Index of the group with a slot reserved for the first external thread
+ /** In the context of multiple processor groups support current implementation
+ defines "the first external thread" as the first thread to invoke
+ AvailableHwConcurrency().
+
+ TODO: Implement a dynamic scheme remapping workers depending on the pending
+ external threads affinity. **/
+ static int HoleIndex;
+};
+
+int ProcessorGroupInfo::NumGroups = 1;
+int ProcessorGroupInfo::HoleIndex = 0;
+
+ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups];
+
+struct TBB_GROUP_AFFINITY {
+ DWORD_PTR Mask;
+ WORD Group;
+ WORD Reserved[3];
+};
+
+static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL;
+static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL;
+static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread,
+ const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff );
+static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* );
+
+static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = {
+ DLD(GetActiveProcessorCount, TBB_GetActiveProcessorCount)
+ , DLD(GetActiveProcessorGroupCount, TBB_GetActiveProcessorGroupCount)
+ , DLD(SetThreadGroupAffinity, TBB_SetThreadGroupAffinity)
+ , DLD(GetThreadGroupAffinity, TBB_GetThreadGroupAffinity)
+};
+
+static void initialize_hardware_concurrency_info () {
+#if __TBB_WIN8UI_SUPPORT
+ // For these applications processor groups info is unavailable
+ // Setting up a number of processors for one processor group
+ theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = std::thread::hardware_concurrency();
+#else /* __TBB_WIN8UI_SUPPORT */
+ dynamic_link( "Kernel32.dll", ProcessorGroupsApiLinkTable,
+ sizeof(ProcessorGroupsApiLinkTable)/sizeof(dynamic_link_descriptor) );
+ SYSTEM_INFO si;
+ GetNativeSystemInfo(&si);
+ DWORD_PTR pam, sam, m = 1;
+ GetProcessAffinityMask( GetCurrentProcess(), &pam, &sam );
+ int nproc = 0;
+ for ( std::size_t i = 0; i < sizeof(DWORD_PTR) * CHAR_BIT; ++i, m <<= 1 ) {
+ if ( pam & m )
+ ++nproc;
+ }
+ __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL );
+ // By default setting up a number of processors for one processor group
+ theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc;
+ // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present
+ if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) {
+ // The process does not have restricting affinity mask and multiple processor groups are possible
+ ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount();
+ __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL );
+ // Fail safety bootstrap. Release versions will limit available concurrency
+ // level, while debug ones would assert.
+ if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups )
+ ProcessorGroupInfo::NumGroups = MaxProcessorGroups;
+ if ( ProcessorGroupInfo::NumGroups > 1 ) {
+ TBB_GROUP_AFFINITY ga;
+ if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) )
+ ProcessorGroupInfo::HoleIndex = ga.Group;
+ int nprocs = 0;
+ for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) {
+ ProcessorGroupInfo &pgi = theProcessorGroups[i];
+ pgi.numProcs = (int)TBB_GetActiveProcessorCount(i);
+ __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL );
+ pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1;
+ pgi.numProcsRunningTotal = nprocs += pgi.numProcs;
+ }
+ __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL );
+ }
+ }
+#endif /* __TBB_WIN8UI_SUPPORT */
+
+ PrintExtraVersionInfo("Processor groups", "%d", ProcessorGroupInfo::NumGroups);
+ if (ProcessorGroupInfo::NumGroups>1)
+ for (int i=0; i<ProcessorGroupInfo::NumGroups; ++i)
+ PrintExtraVersionInfo( "----- Group", "%d: size %d", i, theProcessorGroups[i].numProcs);
+}
+
+int NumberOfProcessorGroups() {
+ __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "NumberOfProcessorGroups is used before AvailableHwConcurrency" );
+ return ProcessorGroupInfo::NumGroups;
+}
+
+// Offset for the slot reserved for the first external thread
+#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx))
+
+int FindProcessorGroupIndex ( int procIdx ) {
+ // In case of oversubscription spread extra workers in a round robin manner
+ int holeIdx;
+ const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
+ if ( procIdx >= numProcs - 1 ) {
+ holeIdx = INT_MAX;
+ procIdx = (procIdx - numProcs + 1) % numProcs;
+ }
+ else
+ holeIdx = ProcessorGroupInfo::HoleIndex;
+ __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "FindProcessorGroupIndex is used before AvailableHwConcurrency" );
+ // Approximate the likely group index assuming all groups are of the same size
+ int i = procIdx / theProcessorGroups[0].numProcs;
+ // Make sure the approximation is a valid group index
+ if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1;
+ // Now adjust the approximation up or down
+ if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) {
+ while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) {
+ __TBB_ASSERT( i > 0, NULL );
+ --i;
+ }
+ }
+ else {
+ do {
+ ++i;
+ } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) );
+ }
+ __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL );
+ return i;
+}
+
+void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) {
+ __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "MoveThreadIntoProcessorGroup is used before AvailableHwConcurrency" );
+ if ( !TBB_SetThreadGroupAffinity )
+ return;
+ TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} };
+ TBB_SetThreadGroupAffinity( hThread, &ga, NULL );
+}
+
+int AvailableHwConcurrency() {
+ atomic_do_once( &initialize_hardware_concurrency_info, hardware_concurrency_info );
+ return theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal;
+}
+
+/* End of _WIN32||_WIN64 implementation */
+#else
+ #error AvailableHwConcurrency is not implemented for this OS
+#endif
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* !__TBB_HardwareConcurrency */
diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.cpp b/contrib/libs/tbb/src/tbb/observer_proxy.cpp
new file mode 100644
index 0000000000..4f7c07c266
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/observer_proxy.cpp
@@ -0,0 +1,322 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_utils.h"
+
+#include "observer_proxy.h"
+#include "arena.h"
+#include "main.h"
+#include "thread_data.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if TBB_USE_ASSERT
+extern std::atomic<int> the_observer_proxy_count;
+#endif /* TBB_USE_ASSERT */
+
+observer_proxy::observer_proxy( d1::task_scheduler_observer& tso )
+ : my_ref_count(1), my_list(NULL), my_next(NULL), my_prev(NULL), my_observer(&tso)
+{
+#if TBB_USE_ASSERT
+ ++the_observer_proxy_count;
+#endif /* TBB_USE_ASSERT */
+}
+
+observer_proxy::~observer_proxy() {
+ __TBB_ASSERT( !my_ref_count, "Attempt to destroy proxy still in use" );
+ poison_value(my_ref_count);
+ poison_pointer(my_prev);
+ poison_pointer(my_next);
+#if TBB_USE_ASSERT
+ --the_observer_proxy_count;
+#endif /* TBB_USE_ASSERT */
+}
+
+void observer_list::clear() {
+ // Though the method will work fine for the empty list, we require the caller
+ // to check for the list emptiness before invoking it to avoid extra overhead.
+ __TBB_ASSERT( !empty(), NULL );
+ {
+ scoped_lock lock(mutex(), /*is_writer=*/true);
+ observer_proxy *next = my_head.load(std::memory_order_relaxed);
+ while ( observer_proxy *p = next ) {
+ next = p->my_next;
+ // Both proxy p and observer p->my_observer (if non-null) are guaranteed
+ // to be alive while the list is locked.
+ d1::task_scheduler_observer *obs = p->my_observer;
+ // Make sure that possible concurrent observer destruction does not
+ // conflict with the proxy list cleanup.
+ if (!obs || !(p = obs->my_proxy.exchange(nullptr))) {
+ continue;
+ }
+ // accessing 'obs' after detaching of obs->my_proxy leads to the race with observer destruction
+ __TBB_ASSERT(!next || p == next->my_prev, nullptr);
+ __TBB_ASSERT(is_alive(p->my_ref_count), "Observer's proxy died prematurely");
+ __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed) == 1, "Reference for observer is missing");
+ poison_pointer(p->my_observer);
+ remove(p);
+ --p->my_ref_count;
+ delete p;
+ }
+ }
+
+ // If observe(false) is called concurrently with the destruction of the arena,
+ // need to wait until all proxies are removed.
+ for (atomic_backoff backoff; ; backoff.pause()) {
+ scoped_lock lock(mutex(), /*is_writer=*/false);
+ if (my_head.load(std::memory_order_relaxed) == nullptr) {
+ break;
+ }
+ }
+
+ __TBB_ASSERT(my_head.load(std::memory_order_relaxed) == nullptr && my_tail.load(std::memory_order_relaxed) == nullptr, nullptr);
+}
+
+void observer_list::insert( observer_proxy* p ) {
+ scoped_lock lock(mutex(), /*is_writer=*/true);
+ if (my_head.load(std::memory_order_relaxed)) {
+ p->my_prev = my_tail.load(std::memory_order_relaxed);
+ my_tail.load(std::memory_order_relaxed)->my_next = p;
+ } else {
+ my_head.store(p, std::memory_order_relaxed);
+ }
+ my_tail.store(p, std::memory_order_relaxed);
+}
+
+void observer_list::remove(observer_proxy* p) {
+ __TBB_ASSERT(my_head.load(std::memory_order_relaxed), "Attempt to remove an item from an empty list");
+ __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be NULL");
+ if (p == my_tail.load(std::memory_order_relaxed)) {
+ __TBB_ASSERT(!p->my_next, nullptr);
+ my_tail.store(p->my_prev, std::memory_order_relaxed);
+ } else {
+ __TBB_ASSERT(p->my_next, nullptr);
+ p->my_next->my_prev = p->my_prev;
+ }
+ if (p == my_head.load(std::memory_order_relaxed)) {
+ __TBB_ASSERT(!p->my_prev, nullptr);
+ my_head.store(p->my_next, std::memory_order_relaxed);
+ } else {
+ __TBB_ASSERT(p->my_prev, nullptr);
+ p->my_prev->my_next = p->my_next;
+ }
+ __TBB_ASSERT((my_head.load(std::memory_order_relaxed) && my_tail.load(std::memory_order_relaxed)) ||
+ (!my_head.load(std::memory_order_relaxed) && !my_tail.load(std::memory_order_relaxed)), nullptr);
+}
+
+void observer_list::remove_ref(observer_proxy* p) {
+ std::uintptr_t r = p->my_ref_count.load(std::memory_order_acquire);
+ __TBB_ASSERT(is_alive(r), nullptr);
+ while (r > 1) {
+ if (p->my_ref_count.compare_exchange_strong(r, r - 1)) {
+ return;
+ }
+ }
+ __TBB_ASSERT(r == 1, nullptr);
+ // Reference count might go to zero
+ {
+ // Use lock to avoid resurrection by a thread concurrently walking the list
+ observer_list::scoped_lock lock(mutex(), /*is_writer=*/true);
+ r = --p->my_ref_count;
+ if (!r) {
+ remove(p);
+ }
+ }
+ __TBB_ASSERT(r || !p->my_ref_count, nullptr);
+ if (!r) {
+ delete p;
+ }
+}
+
+void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker) {
+ // Pointer p marches though the list from last (exclusively) to the end.
+ observer_proxy* p = last, * prev = p;
+ for (;;) {
+ d1::task_scheduler_observer* tso = nullptr;
+ // Hold lock on list only long enough to advance to the next proxy in the list.
+ {
+ scoped_lock lock(mutex(), /*is_writer=*/false);
+ do {
+ if (p) {
+ // We were already processing the list.
+ if (observer_proxy* q = p->my_next) {
+ if (p == prev) {
+ remove_ref_fast(prev); // sets prev to NULL if successful
+ }
+ p = q;
+ } else {
+ // Reached the end of the list.
+ if (p == prev) {
+ // Keep the reference as we store the 'last' pointer in scheduler
+ __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)) >= 1 + (p->my_observer ? 1 : 0), nullptr);
+ } else {
+ // The last few proxies were empty
+ __TBB_ASSERT(int(p->my_ref_count.load(std::memory_order_relaxed)), nullptr);
+ ++p->my_ref_count;
+ if (prev) {
+ lock.release();
+ remove_ref(prev);
+ }
+ }
+ last = p;
+ return;
+ }
+ } else {
+ // Starting pass through the list
+ p = my_head.load(std::memory_order_relaxed);
+ if (!p) {
+ return;
+ }
+ }
+ tso = p->my_observer;
+ } while (!tso);
+ ++p->my_ref_count;
+ ++tso->my_busy_count;
+ }
+ __TBB_ASSERT(!prev || p != prev, nullptr);
+ // Release the proxy pinned before p
+ if (prev) {
+ remove_ref(prev);
+ }
+ // Do not hold any locks on the list while calling user's code.
+ // Do not intercept any exceptions that may escape the callback so that
+ // they are either handled by the TBB scheduler or passed to the debugger.
+ tso->on_scheduler_entry(worker);
+ __TBB_ASSERT(p->my_ref_count.load(std::memory_order_relaxed), nullptr);
+ intptr_t bc = --tso->my_busy_count;
+ __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed");
+ prev = p;
+ }
+}
+
+void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) {
+ // Pointer p marches though the list from the beginning to last (inclusively).
+ observer_proxy* p = nullptr, * prev = nullptr;
+ for (;;) {
+ d1::task_scheduler_observer* tso = nullptr;
+ // Hold lock on list only long enough to advance to the next proxy in the list.
+ {
+ scoped_lock lock(mutex(), /*is_writer=*/false);
+ do {
+ if (p) {
+ // We were already processing the list.
+ if (p != last) {
+ __TBB_ASSERT(p->my_next, "List items before 'last' must have valid my_next pointer");
+ if (p == prev)
+ remove_ref_fast(prev); // sets prev to NULL if successful
+ p = p->my_next;
+ } else {
+ // remove the reference from the last item
+ remove_ref_fast(p);
+ if (p) {
+ lock.release();
+ if (p != prev && prev) {
+ remove_ref(prev);
+ }
+ remove_ref(p);
+ }
+ return;
+ }
+ } else {
+ // Starting pass through the list
+ p = my_head.load(std::memory_order_relaxed);
+ __TBB_ASSERT(p, "Nonzero 'last' must guarantee that the global list is non-empty");
+ }
+ tso = p->my_observer;
+ } while (!tso);
+ // The item is already refcounted
+ if (p != last) // the last is already referenced since entry notification
+ ++p->my_ref_count;
+ ++tso->my_busy_count;
+ }
+ __TBB_ASSERT(!prev || p != prev, nullptr);
+ if (prev)
+ remove_ref(prev);
+ // Do not hold any locks on the list while calling user's code.
+ // Do not intercept any exceptions that may escape the callback so that
+ // they are either handled by the TBB scheduler or passed to the debugger.
+ tso->on_scheduler_exit(worker);
+ __TBB_ASSERT(p->my_ref_count || p == last, nullptr);
+ intptr_t bc = --tso->my_busy_count;
+ __TBB_ASSERT_EX(bc >= 0, "my_busy_count underflowed");
+ prev = p;
+ }
+}
+
+void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) {
+ if( enable ) {
+ if( !tso.my_proxy.load(std::memory_order_relaxed) ) {
+ observer_proxy* p = new observer_proxy(tso);
+ tso.my_proxy.store(p, std::memory_order_relaxed);
+ tso.my_busy_count.store(0, std::memory_order_relaxed);
+
+ thread_data* td = governor::get_thread_data_if_initialized();
+ if (p->my_observer->my_task_arena == nullptr) {
+ if (!(td && td->my_arena)) {
+ td = governor::get_thread_data();
+ }
+ __TBB_ASSERT(__TBB_InitOnce::initialization_done(), nullptr);
+ __TBB_ASSERT(td && td->my_arena, nullptr);
+ p->my_list = &td->my_arena->my_observers;
+ } else {
+ d1::task_arena* ta = p->my_observer->my_task_arena;
+ arena* a = ta->my_arena.load(std::memory_order_acquire);
+ if (a == nullptr) { // Avoid recursion during arena initialization
+ ta->initialize();
+ a = ta->my_arena.load(std::memory_order_relaxed);
+ }
+ __TBB_ASSERT(a != nullptr, nullptr);
+ p->my_list = &a->my_observers;
+ }
+ p->my_list->insert(p);
+ // Notify newly activated observer and other pending ones if it belongs to current arena
+ if (td && td->my_arena && &td->my_arena->my_observers == p->my_list) {
+ p->my_list->notify_entry_observers(td->my_last_observer, td->my_is_worker);
+ }
+ }
+ } else {
+ // Make sure that possible concurrent proxy list cleanup does not conflict
+ // with the observer destruction here.
+ if ( observer_proxy* proxy = tso.my_proxy.exchange(nullptr) ) {
+ // List destruction should not touch this proxy after we've won the above interlocked exchange.
+ __TBB_ASSERT( proxy->my_observer == &tso, nullptr);
+ __TBB_ASSERT( is_alive(proxy->my_ref_count.load(std::memory_order_relaxed)), "Observer's proxy died prematurely" );
+ __TBB_ASSERT( proxy->my_ref_count.load(std::memory_order_relaxed) >= 1, "reference for observer missing" );
+ observer_list &list = *proxy->my_list;
+ {
+ // Ensure that none of the list walkers relies on observer pointer validity
+ observer_list::scoped_lock lock(list.mutex(), /*is_writer=*/true);
+ proxy->my_observer = nullptr;
+ // Proxy may still be held by other threads (to track the last notified observer)
+ if( !--proxy->my_ref_count ) {// nobody can increase it under exclusive lock
+ list.remove(proxy);
+ __TBB_ASSERT( !proxy->my_ref_count, NULL );
+ delete proxy;
+ }
+ }
+ spin_wait_until_eq(tso.my_busy_count, 0); // other threads are still accessing the callback
+ }
+ }
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.h b/contrib/libs/tbb/src/tbb/observer_proxy.h
new file mode 100644
index 0000000000..2450247ecd
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/observer_proxy.h
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_observer_proxy_H
+#define __TBB_observer_proxy_H
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_aligned_space.h"
+
+#include "oneapi/tbb/task_scheduler_observer.h"
+#include "oneapi/tbb/spin_rw_mutex.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class observer_list {
+ friend class arena;
+
+ // Mutex is wrapped with aligned_space to shut up warnings when its destructor
+ // is called while threads are still using it.
+ typedef aligned_space<spin_rw_mutex> my_mutex_type;
+
+ //! Pointer to the head of this list.
+ std::atomic<observer_proxy*> my_head{nullptr};
+
+ //! Pointer to the tail of this list.
+ std::atomic<observer_proxy*> my_tail{nullptr};
+
+ //! Mutex protecting this list.
+ my_mutex_type my_mutex;
+
+ //! Back-pointer to the arena this list belongs to.
+ arena* my_arena;
+
+ //! Decrement refcount of the proxy p if there are other outstanding references.
+ /** In case of success sets p to NULL. Must be invoked from under the list lock. **/
+ inline static void remove_ref_fast( observer_proxy*& p );
+
+ //! Implements notify_entry_observers functionality.
+ void do_notify_entry_observers( observer_proxy*& last, bool worker );
+
+ //! Implements notify_exit_observers functionality.
+ void do_notify_exit_observers( observer_proxy* last, bool worker );
+
+public:
+ observer_list () = default;
+
+ //! Removes and destroys all observer proxies from the list.
+ /** Cannot be used concurrently with other methods. **/
+ void clear ();
+
+ //! Add observer proxy to the tail of the list.
+ void insert ( observer_proxy* p );
+
+ //! Remove observer proxy from the list.
+ void remove ( observer_proxy* p );
+
+ //! Decrement refcount of the proxy and destroy it if necessary.
+ /** When refcount reaches zero removes the proxy from the list and destructs it. **/
+ void remove_ref( observer_proxy* p );
+
+ //! Type of the scoped lock for the reader-writer mutex associated with the list.
+ typedef spin_rw_mutex::scoped_lock scoped_lock;
+
+ //! Accessor to the reader-writer mutex associated with the list.
+ spin_rw_mutex& mutex () { return my_mutex.begin()[0]; }
+
+ bool empty () const { return my_head.load(std::memory_order_relaxed) == nullptr; }
+
+ //! Call entry notifications on observers added after last was notified.
+ /** Updates last to become the last notified observer proxy (in the global list)
+ or leaves it to be nullptr. The proxy has its refcount incremented. **/
+ inline void notify_entry_observers( observer_proxy*& last, bool worker );
+
+ //! Call exit notifications on last and observers added before it.
+ inline void notify_exit_observers( observer_proxy*& last, bool worker );
+}; // class observer_list
+
+//! Wrapper for an observer object
+/** To maintain shared lists of observers the scheduler first wraps each observer
+ object into a proxy so that a list item remained valid even after the corresponding
+ proxy object is destroyed by the user code. **/
+class observer_proxy {
+ friend class task_scheduler_observer;
+ friend class observer_list;
+ friend void observe(d1::task_scheduler_observer&, bool);
+ //! Reference count used for garbage collection.
+ /** 1 for reference from my task_scheduler_observer.
+ 1 for each task dispatcher's last observer pointer.
+ No accounting for neighbors in the shared list. */
+ std::atomic<std::uintptr_t> my_ref_count;
+ //! Reference to the list this observer belongs to.
+ observer_list* my_list;
+ //! Pointer to next observer in the list specified by my_head.
+ /** NULL for the last item in the list. **/
+ observer_proxy* my_next;
+ //! Pointer to the previous observer in the list specified by my_head.
+ /** For the head of the list points to the last item. **/
+ observer_proxy* my_prev;
+ //! Associated observer
+ d1::task_scheduler_observer* my_observer;
+
+ //! Constructs proxy for the given observer and adds it to the specified list.
+ observer_proxy( d1::task_scheduler_observer& );
+
+ ~observer_proxy();
+}; // class observer_proxy
+
+void observer_list::remove_ref_fast( observer_proxy*& p ) {
+ if( p->my_observer ) {
+ // Can decrement refcount quickly, as it cannot drop to zero while under the lock.
+ std::uintptr_t r = --p->my_ref_count;
+ __TBB_ASSERT_EX( r, NULL );
+ p = NULL;
+ } else {
+ // Use slow form of refcount decrementing, after the lock is released.
+ }
+}
+
+void observer_list::notify_entry_observers(observer_proxy*& last, bool worker) {
+ if (last == my_tail.load(std::memory_order_relaxed))
+ return;
+ do_notify_entry_observers(last, worker);
+}
+
+void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) {
+ if (last == nullptr) {
+ return;
+ }
+ __TBB_ASSERT(!is_poisoned(last), NULL);
+ do_notify_exit_observers( last, worker );
+ __TBB_ASSERT(last != nullptr, NULL);
+ poison_pointer(last);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_observer_proxy_H */
diff --git a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp
new file mode 100644
index 0000000000..b7655c6b35
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp
@@ -0,0 +1,471 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/parallel_pipeline.h"
+#include "oneapi/tbb/spin_mutex.h"
+#include "oneapi/tbb/tbb_allocator.h"
+#include "oneapi/tbb/cache_aligned_allocator.h"
+#include "itt_notify.h"
+#include "tls.h"
+#include "oneapi/tbb/detail/_exception.h"
+#include "oneapi/tbb/detail/_small_object_pool.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+void handle_perror(int error_code, const char* aux_info);
+
+using Token = unsigned long;
+
+//! A processing pipeline that applies filters to items.
+/** @ingroup algorithms */
+class pipeline {
+ friend void parallel_pipeline(d1::task_group_context&, std::size_t, const d1::filter_node&);
+public:
+
+ //! Construct empty pipeline.
+ pipeline(d1::task_group_context& cxt, std::size_t max_token) :
+ my_context(cxt),
+ first_filter(nullptr),
+ last_filter(nullptr),
+ input_tokens(Token(max_token)),
+ end_of_input(false),
+ wait_ctx(0) {
+ __TBB_ASSERT( max_token>0, "pipeline::run must have at least one token" );
+ }
+
+ ~pipeline();
+
+ //! Add filter to end of pipeline.
+ void add_filter( d1::base_filter& );
+
+ //! Traverse tree of fitler-node in-order and add filter for each leaf
+ void fill_pipeline(const d1::filter_node& root) {
+ if( root.left && root.right ) {
+ fill_pipeline(*root.left);
+ fill_pipeline(*root.right);
+ }
+ else {
+ __TBB_ASSERT(!root.left && !root.right, "tree should be full");
+ add_filter(*root.create_filter());
+ }
+ }
+
+private:
+ friend class stage_task;
+ friend class base_filter;
+ friend void set_end_of_input(d1::base_filter& bf);
+
+ task_group_context& my_context;
+
+ //! Pointer to first filter in the pipeline.
+ d1::base_filter* first_filter;
+
+ //! Pointer to last filter in the pipeline.
+ d1::base_filter* last_filter;
+
+ //! Number of idle tokens waiting for input stage.
+ std::atomic<Token> input_tokens;
+
+ //! False until flow_control::stop() is called.
+ std::atomic<bool> end_of_input;
+
+ d1::wait_context wait_ctx;
+};
+
+//! This structure is used to store task information in a input buffer
+struct task_info {
+ void* my_object = nullptr;
+ //! Invalid unless a task went through an ordered stage.
+ Token my_token = 0;
+ //! False until my_token is set.
+ bool my_token_ready = false;
+ //! True if my_object is valid.
+ bool is_valid = false;
+ //! Set to initial state (no object, no token)
+ void reset() {
+ my_object = nullptr;
+ my_token = 0;
+ my_token_ready = false;
+ is_valid = false;
+ }
+};
+
+//! A buffer of input items for a filter.
+/** Each item is a task_info, inserted into a position in the buffer corresponding to a Token. */
+class input_buffer {
+ friend class base_filter;
+ friend class stage_task;
+ friend class pipeline;
+ friend void set_end_of_input(d1::base_filter& bf);
+
+ using size_type = Token;
+
+ //! Array of deferred tasks that cannot yet start executing.
+ task_info* array;
+
+ //! Size of array
+ /** Always 0 or a power of 2 */
+ size_type array_size;
+
+ //! Lowest token that can start executing.
+ /** All prior Token have already been seen. */
+ Token low_token;
+
+ //! Serializes updates.
+ spin_mutex array_mutex;
+
+ //! Resize "array".
+ /** Caller is responsible to acquiring a lock on "array_mutex". */
+ void grow( size_type minimum_size );
+
+ //! Initial size for "array"
+ /** Must be a power of 2 */
+ static const size_type initial_buffer_size = 4;
+
+ //! Used for out of order buffer, and for assigning my_token if is_ordered and my_token not already assigned
+ Token high_token;
+
+ //! True for ordered filter, false otherwise.
+ const bool is_ordered;
+
+ //! for parallel filters that accepts NULLs, thread-local flag for reaching end_of_input
+ using end_of_input_tls_t = basic_tls<std::intptr_t>;
+ end_of_input_tls_t end_of_input_tls;
+ bool end_of_input_tls_allocated; // no way to test pthread creation of TLS
+
+public:
+ input_buffer(const input_buffer&) = delete;
+ input_buffer& operator=(const input_buffer&) = delete;
+
+ //! Construct empty buffer.
+ input_buffer( bool ordered) :
+ array(nullptr),
+ array_size(0),
+ low_token(0),
+ high_token(0),
+ is_ordered(ordered),
+ end_of_input_tls(),
+ end_of_input_tls_allocated(false) {
+ grow(initial_buffer_size);
+ __TBB_ASSERT( array, nullptr );
+ }
+
+ //! Destroy the buffer.
+ ~input_buffer() {
+ __TBB_ASSERT( array, nullptr );
+ cache_aligned_allocator<task_info>().deallocate(array,array_size);
+ poison_pointer( array );
+ if( end_of_input_tls_allocated ) {
+ destroy_my_tls();
+ }
+ }
+
+ //! Define order when the first filter is serial_in_order.
+ Token get_ordered_token(){
+ return high_token++;
+ }
+
+ //! Put a token into the buffer.
+ /** If task information was placed into buffer, returns true;
+ otherwise returns false, informing the caller to create and spawn a task.
+ */
+ bool try_put_token( task_info& info ) {
+ info.is_valid = true;
+ spin_mutex::scoped_lock lock( array_mutex );
+ Token token;
+ if( is_ordered ) {
+ if( !info.my_token_ready ) {
+ info.my_token = high_token++;
+ info.my_token_ready = true;
+ }
+ token = info.my_token;
+ } else
+ token = high_token++;
+ __TBB_ASSERT( (long)(token-low_token)>=0, nullptr );
+ if( token!=low_token ) {
+ // Trying to put token that is beyond low_token.
+ // Need to wait until low_token catches up before dispatching.
+ if( token-low_token>=array_size )
+ grow( token-low_token+1 );
+ ITT_NOTIFY( sync_releasing, this );
+ array[token&(array_size-1)] = info;
+ return true;
+ }
+ return false;
+ }
+
+ //! Note that processing of a token is finished.
+ /** Fires up processing of the next token, if processing was deferred. */
+ // Uses template to avoid explicit dependency on stage_task.
+ template<typename StageTask>
+ void try_to_spawn_task_for_next_token(StageTask& spawner, d1::execution_data& ed) {
+ task_info wakee;
+ {
+ spin_mutex::scoped_lock lock( array_mutex );
+ // Wake the next task
+ task_info& item = array[++low_token & (array_size-1)];
+ ITT_NOTIFY( sync_acquired, this );
+ wakee = item;
+ item.is_valid = false;
+ }
+ if( wakee.is_valid )
+ spawner.spawn_stage_task(wakee, ed);
+ }
+
+ // end_of_input signal for parallel_pipeline, parallel input filters with 0 tokens allowed.
+ void create_my_tls() {
+ int status = end_of_input_tls.create();
+ if(status)
+ handle_perror(status, "TLS not allocated for filter");
+ end_of_input_tls_allocated = true;
+ }
+ void destroy_my_tls() {
+ int status = end_of_input_tls.destroy();
+ if(status)
+ handle_perror(status, "Failed to destroy filter TLS");
+ }
+ bool my_tls_end_of_input() {
+ return end_of_input_tls.get() != 0;
+ }
+ void set_my_tls_end_of_input() {
+ end_of_input_tls.set(1);
+ }
+};
+
+void input_buffer::grow( size_type minimum_size ) {
+ size_type old_size = array_size;
+ size_type new_size = old_size ? 2*old_size : initial_buffer_size;
+ while( new_size<minimum_size )
+ new_size*=2;
+ task_info* new_array = cache_aligned_allocator<task_info>().allocate(new_size);
+ task_info* old_array = array;
+ for( size_type i=0; i<new_size; ++i )
+ new_array[i].is_valid = false;
+ Token t=low_token;
+ for( size_type i=0; i<old_size; ++i, ++t )
+ new_array[t&(new_size-1)] = old_array[t&(old_size-1)];
+ array = new_array;
+ array_size = new_size;
+ if( old_array )
+ cache_aligned_allocator<task_info>().deallocate(old_array,old_size);
+}
+
+class stage_task : public d1::task, public task_info {
+private:
+ friend class pipeline;
+ pipeline& my_pipeline;
+ d1::base_filter* my_filter;
+ d1::small_object_allocator m_allocator;
+ //! True if this task has not yet read the input.
+ bool my_at_start;
+
+ //! True if this can be executed again.
+ bool execute_filter(d1::execution_data& ed);
+
+ //! Spawn task if token is available.
+ void try_spawn_stage_task(d1::execution_data& ed) {
+ ITT_NOTIFY( sync_releasing, &my_pipeline.input_tokens );
+ if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_relaxed)) > 1 ) {
+ d1::small_object_allocator alloc{};
+ r1::spawn( *alloc.new_object<stage_task>(ed, my_pipeline, alloc ), my_pipeline.my_context );
+ }
+ }
+
+public:
+
+ //! Construct stage_task for first stage in a pipeline.
+ /** Such a stage has not read any input yet. */
+ stage_task(pipeline& pipeline, d1::small_object_allocator& alloc ) :
+ my_pipeline(pipeline),
+ my_filter(pipeline.first_filter),
+ m_allocator(alloc),
+ my_at_start(true)
+ {
+ task_info::reset();
+ my_pipeline.wait_ctx.reserve();
+ }
+ //! Construct stage_task for a subsequent stage in a pipeline.
+ stage_task(pipeline& pipeline, d1::base_filter* filter, const task_info& info, d1::small_object_allocator& alloc) :
+ task_info(info),
+ my_pipeline(pipeline),
+ my_filter(filter),
+ m_allocator(alloc),
+ my_at_start(false)
+ {
+ my_pipeline.wait_ctx.reserve();
+ }
+ //! Roughly equivalent to the constructor of input stage task
+ void reset() {
+ task_info::reset();
+ my_filter = my_pipeline.first_filter;
+ my_at_start = true;
+ }
+ void finalize(d1::execution_data& ed) {
+ m_allocator.delete_object(this, ed);
+ }
+ //! The virtual task execution method
+ task* execute(d1::execution_data& ed) override {
+ if(!execute_filter(ed)) {
+ finalize(ed);
+ return nullptr;
+ }
+ return this;
+ }
+ task* cancel(d1::execution_data& ed) override {
+ finalize(ed);
+ return nullptr;
+ }
+
+ ~stage_task() {
+ if ( my_filter && my_object ) {
+ my_filter->finalize(my_object);
+ my_object = nullptr;
+ }
+ my_pipeline.wait_ctx.release();
+ }
+ //! Creates and spawns stage_task from task_info
+ void spawn_stage_task(const task_info& info, d1::execution_data& ed) {
+ d1::small_object_allocator alloc{};
+ stage_task* clone = alloc.new_object<stage_task>(ed, my_pipeline, my_filter, info, alloc);
+ r1::spawn(*clone, my_pipeline.my_context);
+ }
+};
+
+bool stage_task::execute_filter(d1::execution_data& ed) {
+ __TBB_ASSERT( !my_at_start || !my_object, "invalid state of task" );
+ if( my_at_start ) {
+ if( my_filter->is_serial() ) {
+ my_object = (*my_filter)(my_object);
+ if( my_object || ( my_filter->object_may_be_null() && !my_pipeline.end_of_input.load(std::memory_order_relaxed)) ) {
+ if( my_filter->is_ordered() ) {
+ my_token = my_filter->my_input_buffer->get_ordered_token();
+ my_token_ready = true;
+ }
+ if( !my_filter->next_filter_in_pipeline ) { // we're only filter in pipeline
+ reset();
+ return true;
+ } else {
+ try_spawn_stage_task(ed);
+ }
+ } else {
+ my_pipeline.end_of_input.store(true, std::memory_order_relaxed);
+ return false;
+ }
+ } else /*not is_serial*/ {
+ if ( my_pipeline.end_of_input.load(std::memory_order_relaxed) ) {
+ return false;
+ }
+
+ try_spawn_stage_task(ed);
+
+ my_object = (*my_filter)(my_object);
+ if( !my_object && (!my_filter->object_may_be_null() || my_filter->my_input_buffer->my_tls_end_of_input()) ){
+ my_pipeline.end_of_input.store(true, std::memory_order_relaxed);
+ return false;
+ }
+ }
+ my_at_start = false;
+ } else {
+ my_object = (*my_filter)(my_object);
+ if( my_filter->is_serial() )
+ my_filter->my_input_buffer->try_to_spawn_task_for_next_token(*this, ed);
+ }
+ my_filter = my_filter->next_filter_in_pipeline;
+ if( my_filter ) {
+ // There is another filter to execute.
+ if( my_filter->is_serial() ) {
+ // The next filter must execute tokens when they are available (in order for serial_in_order)
+ if( my_filter->my_input_buffer->try_put_token(*this) ){
+ my_filter = nullptr; // To prevent deleting my_object twice if exception occurs
+ return false;
+ }
+ }
+ } else {
+ // Reached end of the pipe.
+ std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_relaxed);
+
+ if( ntokens_avail>0 // Only recycle if there is one available token
+ || my_pipeline.end_of_input.load(std::memory_order_relaxed) ) {
+ return false; // No need to recycle for new input
+ }
+ ITT_NOTIFY( sync_acquired, &my_pipeline.input_tokens );
+ // Recycle as an input stage task.
+ reset();
+ }
+ return true;
+}
+
+pipeline:: ~pipeline() {
+ while( first_filter ) {
+ d1::base_filter* f = first_filter;
+ if( input_buffer* b = f->my_input_buffer ) {
+ b->~input_buffer();
+ deallocate_memory(b);
+ }
+ first_filter = f->next_filter_in_pipeline;
+ f->~base_filter();
+ deallocate_memory(f);
+ }
+}
+
+void pipeline::add_filter( d1::base_filter& new_fitler ) {
+ __TBB_ASSERT( new_fitler.next_filter_in_pipeline==d1::base_filter::not_in_pipeline(), "filter already part of pipeline?" );
+ new_fitler.my_pipeline = this;
+ if ( first_filter == nullptr )
+ first_filter = &new_fitler;
+ else
+ last_filter->next_filter_in_pipeline = &new_fitler;
+ new_fitler.next_filter_in_pipeline = nullptr;
+ last_filter = &new_fitler;
+ if( new_fitler.is_serial() ) {
+ new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( new_fitler.is_ordered() );
+ } else {
+ if( first_filter == &new_fitler && new_fitler.object_may_be_null() ) {
+ //TODO: buffer only needed to hold TLS; could improve
+ new_fitler.my_input_buffer = new (allocate_memory(sizeof(input_buffer))) input_buffer( /*is_ordered*/false );
+ new_fitler.my_input_buffer->create_my_tls();
+ }
+ }
+}
+
+void __TBB_EXPORTED_FUNC parallel_pipeline(d1::task_group_context& cxt, std::size_t max_token, const d1::filter_node& fn) {
+ pipeline pipe(cxt, max_token);
+
+ pipe.fill_pipeline(fn);
+
+ d1::small_object_allocator alloc{};
+ stage_task& st = *alloc.new_object<stage_task>(pipe, alloc);
+
+ // Start execution of tasks
+ r1::execute_and_wait(st, cxt, pipe.wait_ctx, cxt);
+}
+
+void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter& bf) {
+ __TBB_ASSERT(bf.my_input_buffer, nullptr);
+ __TBB_ASSERT(bf.object_may_be_null(), nullptr);
+ if(bf.is_serial() ) {
+ bf.my_pipeline->end_of_input.store(true, std::memory_order_relaxed);
+ } else {
+ __TBB_ASSERT(bf.my_input_buffer->end_of_input_tls_allocated, nullptr);
+ bf.my_input_buffer->set_my_tls_end_of_input();
+ }
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/private_server.cpp b/contrib/libs/tbb/src/tbb/private_server.cpp
new file mode 100644
index 0000000000..bc0af84bb4
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/private_server.cpp
@@ -0,0 +1,420 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/cache_aligned_allocator.h"
+
+#include "rml_tbb.h"
+#include "rml_thread_monitor.h"
+
+#include "scheduler_common.h"
+#include "governor.h"
+#include "misc.h"
+
+#include <atomic>
+
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+namespace rml {
+
+using rml::internal::thread_monitor;
+typedef thread_monitor::handle_type thread_handle;
+
+class private_server;
+
+class private_worker: no_copy {
+private:
+ //! State in finite-state machine that controls the worker.
+ /** State diagram:
+ init --> starting --> normal
+ | | |
+ | V |
+ \------> quit <------/
+ */
+ enum state_t {
+ //! *this is initialized
+ st_init,
+ //! *this has associated thread that is starting up.
+ st_starting,
+ //! Associated thread is doing normal life sequence.
+ st_normal,
+ //! Associated thread has ended normal life sequence and promises to never touch *this again.
+ st_quit
+ };
+ std::atomic<state_t> my_state;
+
+ //! Associated server
+ private_server& my_server;
+
+ //! Associated client
+ tbb_client& my_client;
+
+ //! index used for avoiding the 64K aliasing problem
+ const std::size_t my_index;
+
+ //! Monitor for sleeping when there is no work to do.
+ /** The invariant that holds for sleeping workers is:
+ "my_slack<=0 && my_state==st_normal && I am on server's list of asleep threads" */
+ thread_monitor my_thread_monitor;
+
+ //! Handle of the OS thread associated with this worker
+ thread_handle my_handle;
+
+ //! Link for list of workers that are sleeping or have no associated thread.
+ private_worker* my_next;
+
+ friend class private_server;
+
+ //! Actions executed by the associated thread
+ void run() noexcept;
+
+ //! Wake up associated thread (or launch a thread if there is none)
+ void wake_or_launch();
+
+ //! Called by a thread (usually not the associated thread) to commence termination.
+ void start_shutdown();
+
+ static __RML_DECL_THREAD_ROUTINE thread_routine( void* arg );
+
+ static void release_handle(thread_handle my_handle, bool join);
+
+protected:
+ private_worker( private_server& server, tbb_client& client, const std::size_t i ) :
+ my_state(st_init), my_server(server), my_client(client), my_index(i),
+ my_thread_monitor(), my_handle(), my_next()
+ {}
+};
+
+static const std::size_t cache_line_size = tbb::detail::max_nfs_size;
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+ // Suppress overzealous compiler warnings about uninstantiable class
+ #pragma warning(push)
+ #pragma warning(disable:4510 4610)
+#endif
+class padded_private_worker: public private_worker {
+ char pad[cache_line_size - sizeof(private_worker)%cache_line_size];
+public:
+ padded_private_worker( private_server& server, tbb_client& client, const std::size_t i )
+ : private_worker(server,client,i) { suppress_unused_warning(pad); }
+};
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+ #pragma warning(pop)
+#endif
+
+class private_server: public tbb_server, no_copy {
+private:
+ tbb_client& my_client;
+ //! Maximum number of threads to be created.
+ /** Threads are created lazily, so maximum might not actually be reached. */
+ const tbb_client::size_type my_n_thread;
+
+ //! Stack size for each thread. */
+ const std::size_t my_stack_size;
+
+ //! Number of jobs that could use their associated thread minus number of active threads.
+ /** If negative, indicates oversubscription.
+ If positive, indicates that more threads should run.
+ Can be lowered asynchronously, but must be raised only while holding my_asleep_list_mutex,
+ because raising it impacts the invariant for sleeping threads. */
+ std::atomic<int> my_slack;
+
+ //! Counter used to determine when to delete this.
+ std::atomic<int> my_ref_count;
+
+ padded_private_worker* my_thread_array;
+
+ //! List of workers that are asleep or committed to sleeping until notified by another thread.
+ std::atomic<private_worker*> my_asleep_list_root;
+
+ //! Protects my_asleep_list_root
+ typedef scheduler_mutex_type asleep_list_mutex_type;
+ asleep_list_mutex_type my_asleep_list_mutex;
+
+#if TBB_USE_ASSERT
+ std::atomic<int> my_net_slack_requests;
+#endif /* TBB_USE_ASSERT */
+
+ //! Wake up to two sleeping workers, if there are any sleeping.
+ /** The call is used to propagate a chain reaction where each thread wakes up two threads,
+ which in turn each wake up two threads, etc. */
+ void propagate_chain_reaction() {
+ // First test of a double-check idiom. Second test is inside wake_some(0).
+ if( my_asleep_list_root.load(std::memory_order_acquire) )
+ wake_some(0);
+ }
+
+ //! Try to add t to list of sleeping workers
+ bool try_insert_in_asleep_list( private_worker& t );
+
+ //! Equivalent of adding additional_slack to my_slack and waking up to 2 threads if my_slack permits.
+ void wake_some( int additional_slack );
+
+ virtual ~private_server();
+
+ void remove_server_ref() {
+ if( --my_ref_count==0 ) {
+ my_client.acknowledge_close_connection();
+ this->~private_server();
+ tbb::cache_aligned_allocator<private_server>().deallocate( this, 1 );
+ }
+ }
+
+ friend class private_worker;
+public:
+ private_server( tbb_client& client );
+
+ version_type version() const override {
+ return 0;
+ }
+
+ void request_close_connection( bool /*exiting*/ ) override {
+ for( std::size_t i=0; i<my_n_thread; ++i )
+ my_thread_array[i].start_shutdown();
+ remove_server_ref();
+ }
+
+ void yield() override { d0::yield(); }
+
+ void independent_thread_number_changed( int ) override {__TBB_ASSERT(false,NULL);}
+
+ unsigned default_concurrency() const override { return governor::default_num_threads() - 1; }
+
+ void adjust_job_count_estimate( int delta ) override;
+
+#if _WIN32||_WIN64
+ void register_external_thread ( ::rml::server::execution_resource_t& ) override {}
+ void unregister_external_thread ( ::rml::server::execution_resource_t ) override {}
+#endif /* _WIN32||_WIN64 */
+};
+
+//------------------------------------------------------------------------
+// Methods of private_worker
+//------------------------------------------------------------------------
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+ // Suppress overzealous compiler warnings about an initialized variable 'sink_for_alloca' not referenced
+ #pragma warning(push)
+ #pragma warning(disable:4189)
+#endif
+#if __MINGW32__ && __GNUC__==4 &&__GNUC_MINOR__>=2 && !__MINGW64__
+// ensure that stack is properly aligned for TBB threads
+__attribute__((force_align_arg_pointer))
+#endif
+__RML_DECL_THREAD_ROUTINE private_worker::thread_routine( void* arg ) {
+ private_worker* self = static_cast<private_worker*>(arg);
+ AVOID_64K_ALIASING( self->my_index );
+ self->run();
+ return 0;
+}
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+ #pragma warning(pop)
+#endif
+
+void private_worker::release_handle(thread_handle handle, bool join) {
+ if (join)
+ thread_monitor::join(handle);
+ else
+ thread_monitor::detach_thread(handle);
+}
+
+void private_worker::start_shutdown() {
+ state_t expected_state = my_state.load(std::memory_order_acquire);
+ __TBB_ASSERT( expected_state!=st_quit, NULL );
+
+ while( !my_state.compare_exchange_strong( expected_state, st_quit ) );
+
+ if( expected_state==st_normal || expected_state==st_starting ) {
+ // May have invalidated invariant for sleeping, so wake up the thread.
+ // Note that the notify() here occurs without maintaining invariants for my_slack.
+ // It does not matter, because my_state==st_quit overrides checking of my_slack.
+ my_thread_monitor.notify();
+ // Do not need release handle in st_init state,
+ // because in this case the thread wasn't started yet.
+ // For st_starting release is done at launch site.
+ if (expected_state==st_normal)
+ release_handle(my_handle, governor::does_client_join_workers(my_client));
+ } else if( expected_state==st_init ) {
+ // Perform action that otherwise would be performed by associated thread when it quits.
+ my_server.remove_server_ref();
+ }
+}
+
+void private_worker::run() noexcept {
+ my_server.propagate_chain_reaction();
+
+ // Transiting to st_normal here would require setting my_handle,
+ // which would create race with the launching thread and
+ // complications in handle management on Windows.
+
+ ::rml::job& j = *my_client.create_one_job();
+ while( my_state.load(std::memory_order_acquire)!=st_quit ) {
+ if( my_server.my_slack.load(std::memory_order_acquire)>=0 ) {
+ my_client.process(j);
+ } else {
+ thread_monitor::cookie c;
+ // Prepare to wait
+ my_thread_monitor.prepare_wait(c);
+ // Check/set the invariant for sleeping
+ if( my_state.load(std::memory_order_acquire)!=st_quit && my_server.try_insert_in_asleep_list(*this) ) {
+ my_thread_monitor.commit_wait(c);
+ __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" );
+ my_server.propagate_chain_reaction();
+ } else {
+ // Invariant broken
+ my_thread_monitor.cancel_wait();
+ }
+ }
+ }
+ my_client.cleanup(j);
+
+ ++my_server.my_slack;
+ my_server.remove_server_ref();
+}
+
+inline void private_worker::wake_or_launch() {
+ state_t expected_state = st_init;
+ if( my_state.compare_exchange_strong( expected_state, st_starting ) ) {
+ // after this point, remove_server_ref() must be done by created thread
+#if __TBB_USE_WINAPI
+ my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size, &this->my_index );
+#elif __TBB_USE_POSIX
+ {
+ affinity_helper fpa;
+ fpa.protect_affinity_mask( /*restore_process_mask=*/true );
+ my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size );
+ // Implicit destruction of fpa resets original affinity mask.
+ }
+#endif /* __TBB_USE_POSIX */
+ expected_state = st_starting;
+ if ( !my_state.compare_exchange_strong( expected_state, st_normal ) ) {
+ // Do shutdown during startup. my_handle can't be released
+ // by start_shutdown, because my_handle value might be not set yet
+ // at time of transition from st_starting to st_quit.
+ __TBB_ASSERT( expected_state==st_quit, NULL );
+ release_handle(my_handle, governor::does_client_join_workers(my_client));
+ }
+ }
+ else {
+ __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" );
+ my_thread_monitor.notify();
+ }
+}
+
+//------------------------------------------------------------------------
+// Methods of private_server
+//------------------------------------------------------------------------
+private_server::private_server( tbb_client& client ) :
+ my_client(client),
+ my_n_thread(client.max_job_count()),
+ my_stack_size(client.min_stack_size()),
+ my_slack(0),
+ my_ref_count(my_n_thread+1),
+ my_thread_array(NULL),
+ my_asleep_list_root(NULL)
+#if TBB_USE_ASSERT
+ , my_net_slack_requests(0)
+#endif /* TBB_USE_ASSERT */
+{
+ my_thread_array = tbb::cache_aligned_allocator<padded_private_worker>().allocate( my_n_thread );
+ for( std::size_t i=0; i<my_n_thread; ++i ) {
+ private_worker* t = new( &my_thread_array[i] ) padded_private_worker( *this, client, i );
+ t->my_next = my_asleep_list_root.exchange(t, std::memory_order_relaxed);
+ }
+}
+
+private_server::~private_server() {
+ __TBB_ASSERT( my_net_slack_requests==0, NULL );
+ for( std::size_t i=my_n_thread; i--; )
+ my_thread_array[i].~padded_private_worker();
+ tbb::cache_aligned_allocator<padded_private_worker>().deallocate( my_thread_array, my_n_thread );
+ tbb::detail::poison_pointer( my_thread_array );
+}
+
+inline bool private_server::try_insert_in_asleep_list( private_worker& t ) {
+ asleep_list_mutex_type::scoped_lock lock;
+ if( !lock.try_acquire(my_asleep_list_mutex) )
+ return false;
+ // Contribute to slack under lock so that if another takes that unit of slack,
+ // it sees us sleeping on the list and wakes us up.
+ int k = ++my_slack;
+ if( k<=0 ) {
+ t.my_next = my_asleep_list_root.exchange(&t, std::memory_order_relaxed);
+ return true;
+ } else {
+ --my_slack;
+ return false;
+ }
+}
+
+void private_server::wake_some( int additional_slack ) {
+ __TBB_ASSERT( additional_slack>=0, NULL );
+ private_worker* wakee[2];
+ private_worker**w = wakee;
+ {
+ asleep_list_mutex_type::scoped_lock lock(my_asleep_list_mutex);
+ while( my_asleep_list_root.load(std::memory_order_relaxed) && w<wakee+2 ) {
+ if( additional_slack>0 ) {
+ // additional demand does not exceed surplus supply
+ if ( additional_slack+my_slack.load(std::memory_order_acquire)<=0 )
+ break;
+ --additional_slack;
+ } else {
+ // Chain reaction; Try to claim unit of slack
+ int old = my_slack;
+ do {
+ if( old<=0 ) goto done;
+ } while( !my_slack.compare_exchange_strong(old,old-1) );
+ }
+ // Pop sleeping worker to combine with claimed unit of slack
+ auto old = my_asleep_list_root.load(std::memory_order_relaxed);
+ my_asleep_list_root.store(old->my_next, std::memory_order_relaxed);
+ *w++ = old;
+ }
+ if( additional_slack ) {
+ // Contribute our unused slack to my_slack.
+ my_slack += additional_slack;
+ }
+ }
+done:
+ while( w>wakee ) {
+ private_worker* ww = *--w;
+ ww->my_next = NULL;
+ ww->wake_or_launch();
+ }
+}
+
+void private_server::adjust_job_count_estimate( int delta ) {
+#if TBB_USE_ASSERT
+ my_net_slack_requests+=delta;
+#endif /* TBB_USE_ASSERT */
+ if( delta<0 ) {
+ my_slack+=delta;
+ } else if( delta>0 ) {
+ wake_some( delta );
+ }
+}
+
+//! Factory method called from task.cpp to create a private_server.
+tbb_server* make_private_server( tbb_client& client ) {
+ return new( tbb::cache_aligned_allocator<private_server>().allocate(1) ) private_server(client);
+}
+
+} // namespace rml
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/profiling.cpp b/contrib/libs/tbb/src/tbb/profiling.cpp
new file mode 100644
index 0000000000..2603f35b88
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/profiling.cpp
@@ -0,0 +1,265 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+
+#include "main.h"
+#include "itt_notify.h"
+
+#include "oneapi/tbb/profiling.h"
+
+#include <string.h>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if __TBB_USE_ITT_NOTIFY
+bool ITT_Present;
+static std::atomic<bool> ITT_InitializationDone;
+
+static __itt_domain *tbb_domains[d1::ITT_NUM_DOMAINS] = {};
+
+struct resource_string {
+ const char *str;
+ __itt_string_handle *itt_str_handle;
+};
+
+//
+// populate resource strings
+//
+#define TBB_STRING_RESOURCE( index_name, str ) { str, nullptr },
+static resource_string strings_for_itt[] = {
+ #include "oneapi/tbb/detail/_string_resource.h"
+ { "num_resource_strings", nullptr }
+};
+#undef TBB_STRING_RESOURCE
+
+static __itt_string_handle* ITT_get_string_handle(std::uintptr_t idx) {
+ __TBB_ASSERT(idx < NUM_STRINGS, "string handle out of valid range");
+ return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : NULL;
+}
+
+static void ITT_init_domains() {
+ tbb_domains[d1::ITT_DOMAIN_MAIN] = __itt_domain_create( _T("tbb") );
+ tbb_domains[d1::ITT_DOMAIN_MAIN]->flags = 1;
+ tbb_domains[d1::ITT_DOMAIN_FLOW] = __itt_domain_create( _T("tbb.flow") );
+ tbb_domains[d1::ITT_DOMAIN_FLOW]->flags = 1;
+ tbb_domains[d1::ITT_DOMAIN_ALGO] = __itt_domain_create( _T("tbb.algorithm") );
+ tbb_domains[d1::ITT_DOMAIN_ALGO]->flags = 1;
+}
+
+static void ITT_init_strings() {
+ for ( std::uintptr_t i = 0; i < NUM_STRINGS; ++i ) {
+#if _WIN32||_WIN64
+ strings_for_itt[i].itt_str_handle = __itt_string_handle_createA( strings_for_itt[i].str );
+#else
+ strings_for_itt[i].itt_str_handle = __itt_string_handle_create( strings_for_itt[i].str );
+#endif
+ }
+}
+
+static void ITT_init() {
+ ITT_init_domains();
+ ITT_init_strings();
+}
+
+/** Thread-unsafe lazy one-time initialization of tools interop.
+ Used by both dummy handlers and general TBB one-time initialization routine. **/
+void ITT_DoUnsafeOneTimeInitialization () {
+ // Double check ITT_InitializationDone is necessary because the first check
+ // in ITT_DoOneTimeInitialization is not guarded with the __TBB_InitOnce lock.
+ if ( !ITT_InitializationDone ) {
+ ITT_Present = (__TBB_load_ittnotify()!=0);
+ if (ITT_Present) ITT_init();
+ ITT_InitializationDone = true;
+ }
+}
+
+/** Thread-safe lazy one-time initialization of tools interop.
+ Used by dummy handlers only. **/
+extern "C"
+void ITT_DoOneTimeInitialization() {
+ if ( !ITT_InitializationDone ) {
+ __TBB_InitOnce::lock();
+ ITT_DoUnsafeOneTimeInitialization();
+ __TBB_InitOnce::unlock();
+ }
+}
+
+void create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname) {
+ ITT_SYNC_CREATE(ptr, objtype, objname);
+}
+
+void call_itt_notify(int t, void *ptr) {
+ switch (t) {
+ case 0: ITT_NOTIFY(sync_prepare, ptr); break;
+ case 1: ITT_NOTIFY(sync_cancel, ptr); break;
+ case 2: ITT_NOTIFY(sync_acquired, ptr); break;
+ case 3: ITT_NOTIFY(sync_releasing, ptr); break;
+ case 4: ITT_NOTIFY(sync_destroy, ptr); break;
+ }
+}
+
+void itt_set_sync_name(void* obj, const tchar* name) {
+ __itt_sync_rename(obj, name);
+}
+
+const __itt_id itt_null_id = { 0, 0, 0 };
+
+static inline __itt_domain* get_itt_domain(d1::itt_domain_enum idx) {
+ if (tbb_domains[idx] == NULL) {
+ ITT_DoOneTimeInitialization();
+ }
+ return tbb_domains[idx];
+}
+
+static inline void itt_id_make(__itt_id* id, void* addr, unsigned long long extra) {
+ *id = __itt_id_make(addr, extra);
+}
+
+static inline void itt_id_create(const __itt_domain* domain, __itt_id id) {
+ __itt_id_create(domain, id);
+}
+
+void itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra,
+ void* parent, unsigned long long parent_extra, string_resource_index name_index) {
+ if (__itt_domain* d = get_itt_domain(domain)) {
+ __itt_id group_id = itt_null_id;
+ __itt_id parent_id = itt_null_id;
+ itt_id_make(&group_id, group, group_extra);
+ itt_id_create(d, group_id);
+ if (parent) {
+ itt_id_make(&parent_id, parent, parent_extra);
+ }
+ __itt_string_handle* n = ITT_get_string_handle(name_index);
+ __itt_task_group(d, group_id, parent_id, n);
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra,
+ string_resource_index key, const char *value ) {
+ if ( __itt_domain *d = get_itt_domain( domain ) ) {
+ __itt_id id = itt_null_id;
+ itt_id_make( &id, addr, addr_extra );
+ __itt_string_handle *k = ITT_get_string_handle(key);
+ size_t value_length = strlen( value );
+#if _WIN32||_WIN64
+ __itt_metadata_str_addA(d, id, k, value, value_length);
+#else
+ __itt_metadata_str_add(d, id, k, value, value_length);
+#endif
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void *addr, unsigned long long addr_extra,
+ string_resource_index key, void *value ) {
+ if ( __itt_domain *d = get_itt_domain( domain ) ) {
+ __itt_id id = itt_null_id;
+ itt_id_make( &id, addr, addr_extra );
+ __itt_string_handle *k = ITT_get_string_handle(key);
+#if __TBB_x86_32
+ __itt_metadata_add(d, id, k, __itt_metadata_u32, 1, value);
+#else
+ __itt_metadata_add(d, id, k, __itt_metadata_u64, 1, value);
+#endif
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void *addr0, unsigned long long addr0_extra,
+ itt_relation relation, void *addr1, unsigned long long addr1_extra ) {
+ if ( __itt_domain *d = get_itt_domain( domain ) ) {
+ __itt_id id0 = itt_null_id;
+ __itt_id id1 = itt_null_id;
+ itt_id_make( &id0, addr0, addr0_extra );
+ itt_id_make( &id1, addr1, addr1_extra );
+ __itt_relation_add( d, id0, (__itt_relation)relation, id1 );
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra,
+ void* parent, unsigned long long parent_extra, string_resource_index name_index) {
+ if (__itt_domain* d = get_itt_domain(domain)) {
+ __itt_id task_id = itt_null_id;
+ __itt_id parent_id = itt_null_id;
+ if (task) {
+ itt_id_make(&task_id, task, task_extra);
+ }
+ if (parent) {
+ itt_id_make(&parent_id, parent, parent_extra);
+ }
+ __itt_string_handle* n = ITT_get_string_handle(name_index);
+ __itt_task_begin(d, task_id, parent_id, n);
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain) {
+ if (__itt_domain* d = get_itt_domain(domain)) {
+ __itt_task_end(d);
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void *region, unsigned long long region_extra,
+ void *parent, unsigned long long parent_extra, string_resource_index /* name_index */ ) {
+ if ( __itt_domain *d = get_itt_domain( domain ) ) {
+ __itt_id region_id = itt_null_id;
+ __itt_id parent_id = itt_null_id;
+ itt_id_make( &region_id, region, region_extra );
+ if ( parent ) {
+ itt_id_make( &parent_id, parent, parent_extra );
+ }
+ __itt_region_begin( d, region_id, parent_id, NULL );
+ }
+}
+
+void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void *region, unsigned long long region_extra ) {
+ if ( __itt_domain *d = get_itt_domain( domain ) ) {
+ __itt_id region_id = itt_null_id;
+ itt_id_make( &region_id, region, region_extra );
+ __itt_region_end( d, region_id );
+ }
+}
+
+#else
+void create_itt_sync(void* /*ptr*/, const tchar* /*objtype*/, const tchar* /*objname*/) {}
+void call_itt_notify(int /*t*/, void* /*ptr*/) {}
+void itt_set_sync_name(void* /*obj*/, const tchar* /*name*/) {}
+void itt_make_task_group(d1::itt_domain_enum /*domain*/, void* /*group*/, unsigned long long /*group_extra*/,
+ void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/) {}
+void itt_metadata_str_add(d1::itt_domain_enum /*domain*/, void* /*addr*/, unsigned long long /*addr_extra*/,
+ string_resource_index /*key*/, const char* /*value*/ ) { }
+void itt_metadata_ptr_add(d1::itt_domain_enum /*domain*/, void * /*addr*/, unsigned long long /*addr_extra*/,
+ string_resource_index /*key*/, void * /*value*/ ) {}
+void itt_relation_add(d1::itt_domain_enum /*domain*/, void* /*addr0*/, unsigned long long /*addr0_extra*/,
+ itt_relation /*relation*/, void* /*addr1*/, unsigned long long /*addr1_extra*/ ) { }
+void itt_task_begin(d1::itt_domain_enum /*domain*/, void* /*task*/, unsigned long long /*task_extra*/,
+ void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { }
+void itt_task_end(d1::itt_domain_enum /*domain*/ ) { }
+void itt_region_begin(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/,
+ void* /*parent*/, unsigned long long /*parent_extra*/, string_resource_index /*name_index*/ ) { }
+void itt_region_end(d1::itt_domain_enum /*domain*/, void* /*region*/, unsigned long long /*region_extra*/ ) { }
+#endif /* __TBB_USE_ITT_NOTIFY */
+
+const tchar
+ *SyncType_Scheduler = _T("%Constant")
+ ;
+const tchar
+ *SyncObj_ContextsList = _T("TBB Scheduler")
+ ;
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp
new file mode 100644
index 0000000000..cfdc4d3c2a
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp
@@ -0,0 +1,558 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/** Before making any changes in the implementation, please emulate algorithmic changes
+ with SPIN tool using <TBB directory>/tools/spin_models/ReaderWriterMutex.pml.
+ There could be some code looking as "can be restructured" but its structure does matter! */
+
+#include "oneapi/tbb/queuing_rw_mutex.h"
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_utils.h"
+#include "itt_notify.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+ // Workaround for overzealous compiler warnings
+ #pragma warning (push)
+ #pragma warning (disable: 4311 4312)
+#endif
+
+//! A view of a T* with additional functionality for twiddling low-order bits.
+template<typename T>
+class tricky_atomic_pointer {
+public:
+ using word = uintptr_t;
+
+ static T* fetch_add( std::atomic<word>& location, word addend, std::memory_order memory_order ) {
+ return reinterpret_cast<T*>(location.fetch_add(addend, memory_order));
+ }
+
+ static T* exchange( std::atomic<word>& location, T* value, std::memory_order memory_order ) {
+ return reinterpret_cast<T*>(location.exchange(reinterpret_cast<word>(value), memory_order));
+ }
+
+ static T* compare_exchange_strong( std::atomic<word>& obj, const T* expected, const T* desired, std::memory_order memory_order ) {
+ word expd = reinterpret_cast<word>(expected);
+ obj.compare_exchange_strong(expd, reinterpret_cast<word>(desired), memory_order);
+ return reinterpret_cast<T*>(expd);
+ }
+
+ static void store( std::atomic<word>& location, const T* value, std::memory_order memory_order ) {
+ location.store(reinterpret_cast<word>(value), memory_order);
+ }
+
+ static T* load( std::atomic<word>& location, std::memory_order memory_order ) {
+ return reinterpret_cast<T*>(location.load(memory_order));
+ }
+
+ static void spin_wait_while_eq(const std::atomic<word>& location, const T* value) {
+ tbb::detail::d0::spin_wait_while_eq(location, reinterpret_cast<word>(value) );
+ }
+
+ T* & ref;
+ tricky_atomic_pointer( T*& original ) : ref(original) {};
+ tricky_atomic_pointer(const tricky_atomic_pointer&) = delete;
+ tricky_atomic_pointer& operator=(const tricky_atomic_pointer&) = delete;
+ T* operator&( const word operand2 ) const {
+ return reinterpret_cast<T*>( reinterpret_cast<word>(ref) & operand2 );
+ }
+ T* operator|( const word operand2 ) const {
+ return reinterpret_cast<T*>( reinterpret_cast<word>(ref) | operand2 );
+ }
+};
+
+using tricky_pointer = tricky_atomic_pointer<queuing_rw_mutex::scoped_lock>;
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+ // Workaround for overzealous compiler warnings
+ #pragma warning (pop)
+#endif
+
+//! Flag bits in a state_t that specify information about a locking request.
+enum state_t_flags : unsigned char {
+ STATE_NONE = 0,
+ STATE_WRITER = 1<<0,
+ STATE_READER = 1<<1,
+ STATE_READER_UNBLOCKNEXT = 1<<2,
+ STATE_ACTIVEREADER = 1<<3,
+ STATE_UPGRADE_REQUESTED = 1<<4,
+ STATE_UPGRADE_WAITING = 1<<5,
+ STATE_UPGRADE_LOSER = 1<<6,
+ STATE_COMBINED_WAITINGREADER = STATE_READER | STATE_READER_UNBLOCKNEXT,
+ STATE_COMBINED_READER = STATE_COMBINED_WAITINGREADER | STATE_ACTIVEREADER,
+ STATE_COMBINED_UPGRADING = STATE_UPGRADE_WAITING | STATE_UPGRADE_LOSER
+};
+
+static const unsigned char RELEASED = 0;
+static const unsigned char ACQUIRED = 1;
+
+struct queuing_rw_mutex_impl {
+ //! Try to acquire the internal lock
+ /** Returns true if lock was successfully acquired. */
+ static bool try_acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
+ {
+ auto expected = RELEASED;
+ return s.my_internal_lock.compare_exchange_strong(expected, ACQUIRED);
+ }
+
+ //! Acquire the internal lock
+ static void acquire_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
+ {
+ // Usually, we would use the test-test-and-set idiom here, with exponential backoff.
+ // But so far, experiments indicate there is no value in doing so here.
+ while( !try_acquire_internal_lock(s) ) {
+ machine_pause(1);
+ }
+ }
+
+ //! Release the internal lock
+ static void release_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
+ {
+ s.my_internal_lock.store(RELEASED, std::memory_order_release);
+ }
+
+ //! Wait for internal lock to be released
+ static void wait_for_release_of_internal_lock(d1::queuing_rw_mutex::scoped_lock& s)
+ {
+ spin_wait_until_eq(s.my_internal_lock, RELEASED);
+ }
+
+ //! A helper function
+ static void unblock_or_wait_on_internal_lock(d1::queuing_rw_mutex::scoped_lock& s, uintptr_t flag ) {
+ if( flag ) {
+ wait_for_release_of_internal_lock(s);
+ }
+ else {
+ release_internal_lock(s);
+ }
+ }
+
+ //! Mask for low order bit of a pointer.
+ static const tricky_pointer::word FLAG = 0x1;
+
+ static uintptr_t get_flag( d1::queuing_rw_mutex::scoped_lock* ptr ) {
+ return reinterpret_cast<uintptr_t>(ptr) & FLAG;
+ }
+
+ //------------------------------------------------------------------------
+ // Methods of queuing_rw_mutex::scoped_lock
+ //------------------------------------------------------------------------
+
+ //! A method to acquire queuing_rw_mutex lock
+ static void acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write)
+ {
+ __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex");
+
+ // Must set all fields before the exchange, because once the
+ // exchange executes, *this becomes accessible to other threads.
+ s.my_mutex = &m;
+ s.my_prev.store(0U, std::memory_order_relaxed);
+ s.my_next.store(0U, std::memory_order_relaxed);
+ s.my_going.store(0U, std::memory_order_relaxed);
+ s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed);
+ s.my_internal_lock.store(RELEASED, std::memory_order_relaxed);
+
+ queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release);
+
+ if( write ) { // Acquiring for write
+
+ if( predecessor ) {
+ ITT_NOTIFY(sync_prepare, s.my_mutex);
+ predecessor = tricky_pointer(predecessor) & ~FLAG;
+ __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" );
+ #if TBB_USE_ASSERT
+ atomic_fence(std::memory_order_seq_cst); // on "m.q_tail"
+ __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!");
+ #endif
+ tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release);
+ spin_wait_until_eq(s.my_going, 1U);
+ }
+
+ } else { // Acquiring for read
+ #if __TBB_USE_ITT_NOTIFY
+ bool sync_prepare_done = false;
+ #endif
+ if( predecessor ) {
+ unsigned char pred_state;
+ __TBB_ASSERT( !s.my_prev, "the predecessor is already set" );
+ if( tricky_pointer(predecessor) & FLAG ) {
+ /* this is only possible if predecessor is an upgrading reader and it signals us to wait */
+ pred_state = STATE_UPGRADE_WAITING;
+ predecessor = tricky_pointer(predecessor) & ~FLAG;
+ } else {
+ // Load predecessor->my_state now, because once predecessor->my_next becomes
+ // non-NULL, we must assume that *predecessor might be destroyed.
+ pred_state = STATE_READER;
+ predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel);
+ }
+ tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
+ __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" );
+ #if TBB_USE_ASSERT
+ atomic_fence(std::memory_order_seq_cst); // on "m.q_tail"
+ __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!");
+ #endif
+ tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release);
+ if( pred_state != STATE_ACTIVEREADER ) {
+ #if __TBB_USE_ITT_NOTIFY
+ sync_prepare_done = true;
+ ITT_NOTIFY(sync_prepare, s.my_mutex);
+ #endif
+ spin_wait_until_eq(s.my_going, 1U);
+ }
+ }
+
+ // The protected state must have been acquired here before it can be further released to any other reader(s):
+ unsigned char old_state = STATE_READER;
+ s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel);
+ if( old_state!=STATE_READER ) {
+#if __TBB_USE_ITT_NOTIFY
+ if( !sync_prepare_done )
+ ITT_NOTIFY(sync_prepare, s.my_mutex);
+#endif
+ // Failed to become active reader -> need to unblock the next waiting reader first
+ __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" );
+ spin_wait_while_eq(s.my_next, 0U);
+ /* my_state should be changed before unblocking the next otherwise it might finish
+ and another thread can get our old state and left blocked */
+ s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);
+ tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release);
+ }
+ __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" );
+ }
+
+ ITT_NOTIFY(sync_acquired, s.my_mutex);
+
+ // Force acquire so that user's critical section receives correct values
+ // from processor that was previously in the user's critical section.
+ atomic_fence(std::memory_order_acquire);
+ }
+
+ //! A method to acquire queuing_rw_mutex if it is free
+ static bool try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write)
+ {
+ __TBB_ASSERT( !s.my_mutex, "scoped_lock is already holding a mutex");
+
+ if( m.q_tail.load(std::memory_order_relaxed) )
+ return false; // Someone already took the lock
+
+ // Must set all fields before the exchange, because once the
+ // exchange executes, *this becomes accessible to other threads.
+ s.my_prev.store(0U, std::memory_order_relaxed);
+ s.my_next.store(0U, std::memory_order_relaxed);
+ s.my_going.store(0U, std::memory_order_relaxed); // TODO: remove dead assignment?
+ s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_ACTIVEREADER), std::memory_order_relaxed);
+ s.my_internal_lock.store(RELEASED, std::memory_order_relaxed);
+
+ // The CAS must have release semantics, because we are
+ // "sending" the fields initialized above to other processors.
+ d1::queuing_rw_mutex::scoped_lock* expected = nullptr;
+ if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) )
+ return false; // Someone already took the lock
+ // Force acquire so that user's critical section receives correct values
+ // from processor that was previously in the user's critical section.
+ atomic_fence(std::memory_order_acquire);
+ s.my_mutex = &m;
+ ITT_NOTIFY(sync_acquired, s.my_mutex);
+ return true;
+ }
+
+ //! A method to release queuing_rw_mutex lock
+ static void release(d1::queuing_rw_mutex::scoped_lock& s) {
+ __TBB_ASSERT(s.my_mutex!=nullptr, "no lock acquired");
+
+ ITT_NOTIFY(sync_releasing, s.my_mutex);
+
+ if( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) { // Acquired for write
+
+ // The logic below is the same as "writerUnlock", but elides
+ // "return" from the middle of the routine.
+ // In the statement below, acquire semantics of reading my_next is required
+ // so that following operations with fields of my_next are safe.
+ d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
+ if( !next ) {
+ d1::queuing_rw_mutex::scoped_lock* expected = &s;
+ if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) {
+ // this was the only item in the queue, and the queue is now empty.
+ goto done;
+ }
+ spin_wait_while_eq( s.my_next, 0U );
+ next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
+ }
+ next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early
+ if( next->my_state==STATE_UPGRADE_WAITING ) {
+ // the next waiting for upgrade means this writer was upgraded before.
+ acquire_internal_lock(s);
+ // Responsibility transition, the one who reads uncorrupted my_prev will do release.
+ d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release);
+ next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed);
+ next->my_going.store(1U, std::memory_order_release);
+ unblock_or_wait_on_internal_lock(s, get_flag(tmp));
+ } else {
+ // next->state cannot be STATE_UPGRADE_REQUESTED
+ __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" );
+ __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" );
+ tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed);
+ next->my_going.store(1U, std::memory_order_release);
+ }
+
+ } else { // Acquired for read
+
+ queuing_rw_mutex::scoped_lock *tmp = nullptr;
+ retry:
+ // Addition to the original paper: Mark my_prev as in use
+ queuing_rw_mutex::scoped_lock *predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire);
+
+ if( predecessor ) {
+ if( !(try_acquire_internal_lock(*predecessor)) )
+ {
+ // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades.
+ // In the second case, it could or could not know my "in use" flag - need to check
+ // Responsibility transition, the one who reads uncorrupted my_prev will do release.
+ tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release);
+ if( !(tricky_pointer(tmp) & FLAG) ) {
+ // Wait for the predecessor to change my_prev (e.g. during unlink)
+ // TODO: spin_wait condition seems never reachable
+ tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG );
+ // Now owner of predecessor is waiting for _us_ to release its lock
+ release_internal_lock(*predecessor);
+ }
+ // else the "in use" flag is back -> the predecessor didn't get it and will release itself; nothing to do
+
+ tmp = nullptr;
+ goto retry;
+ }
+ __TBB_ASSERT(predecessor && predecessor->my_internal_lock.load(std::memory_order_relaxed)==ACQUIRED, "predecessor's lock is not acquired");
+ tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
+ acquire_internal_lock(s);
+
+ tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release);
+
+ d1::queuing_rw_mutex::scoped_lock* expected = &s;
+ if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) {
+ spin_wait_while_eq( s.my_next, 0U );
+ }
+ __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" );
+
+ // ensure acquire semantics of reading 'my_next'
+ if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below
+ // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0
+ tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release);
+ // I->prev->next = I->next;
+ __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed)==predecessor, nullptr);
+ predecessor->my_next.store(s.my_next.load(std::memory_order_relaxed), std::memory_order_release);
+ }
+ // Safe to release in the order opposite to acquiring which makes the code simpler
+ release_internal_lock(*predecessor);
+
+ } else { // No predecessor when we looked
+ acquire_internal_lock(s); // "exclusiveLock(&I->EL)"
+ d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
+ if( !next ) {
+ d1::queuing_rw_mutex::scoped_lock* expected = &s;
+ if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) {
+ spin_wait_while_eq( s.my_next, 0U );
+ next = tricky_pointer::load(s.my_next, std::memory_order_relaxed);
+ } else {
+ goto unlock_self;
+ }
+ }
+ next->my_going.store(2U, std::memory_order_relaxed);
+ // Responsibility transition, the one who reads uncorrupted my_prev will do release.
+ tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release);
+ next->my_going.store(1U, std::memory_order_release);
+ }
+ unlock_self:
+ unblock_or_wait_on_internal_lock(s, get_flag(tmp));
+ }
+ done:
+ spin_wait_while_eq( s.my_going, 2U );
+
+ s.initialize();
+ }
+
+ static bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) {
+ if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader
+
+ ITT_NOTIFY(sync_releasing, s.my_mutex);
+ s.my_state.store(STATE_READER, std::memory_order_relaxed);
+ if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) {
+ // the following load of q_tail must not be reordered with setting STATE_READER above
+ if( &s==s.my_mutex->q_tail.load() ) {
+ unsigned char old_state = STATE_READER;
+ s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release);
+ if( old_state==STATE_READER )
+ return true; // Downgrade completed
+ }
+ /* wait for the next to register */
+ spin_wait_while_eq( s.my_next, 0U );
+ }
+ d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire);
+ __TBB_ASSERT( next, "still no successor at this point!" );
+ if( next->my_state & STATE_COMBINED_WAITINGREADER )
+ next->my_going.store(1U, std::memory_order_release);
+ else if( next->my_state==STATE_UPGRADE_WAITING )
+ // the next waiting for upgrade means this writer was upgraded before.
+ next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed);
+ s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);;
+ return true;
+ }
+
+ static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) {
+ if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer
+
+ __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" );
+
+ queuing_rw_mutex::scoped_lock * tmp;
+ queuing_rw_mutex::scoped_lock * me = &s;
+
+ ITT_NOTIFY(sync_releasing, s.my_mutex);
+ s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed);
+ requested:
+ __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" );
+ acquire_internal_lock(s);
+ d1::queuing_rw_mutex::scoped_lock* expected = &s;
+ if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) {
+ spin_wait_while_eq( s.my_next, 0U );
+ queuing_rw_mutex::scoped_lock * next;
+ next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire);
+ unsigned short n_state = next->my_state;
+ /* the next reader can be blocked by our state. the best thing to do is to unblock it */
+ if( n_state & STATE_COMBINED_WAITINGREADER )
+ next->my_going.store(1U, std::memory_order_release);
+ // Responsibility transition, the one who reads uncorrupted my_prev will do release.
+ tmp = tricky_pointer::exchange(next->my_prev, &s, std::memory_order_release);
+ unblock_or_wait_on_internal_lock(s, get_flag(tmp));
+ if( n_state & (STATE_COMBINED_READER | STATE_UPGRADE_REQUESTED) ) {
+ // save next|FLAG for simplicity of following comparisons
+ tmp = tricky_pointer(next)|FLAG;
+ for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) {
+ if( s.my_state & STATE_COMBINED_UPGRADING ) {
+ if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp )
+ tricky_pointer::store(s.my_next, next, std::memory_order_relaxed);
+ goto waiting;
+ }
+ }
+ __TBB_ASSERT(tricky_pointer::load(s.my_next, std::memory_order_relaxed) != (tricky_pointer(next)|FLAG), nullptr);
+ goto requested;
+ } else {
+ __TBB_ASSERT( n_state & (STATE_WRITER | STATE_UPGRADE_WAITING), "unexpected state");
+ __TBB_ASSERT( (tricky_pointer(next)|FLAG) == tricky_pointer::load(s.my_next, std::memory_order_relaxed), nullptr);
+ tricky_pointer::store(s.my_next, next, std::memory_order_relaxed);
+ }
+ } else {
+ /* We are in the tail; whoever comes next is blocked by q_tail&FLAG */
+ release_internal_lock(s);
+ } // if( this != my_mutex->q_tail... )
+ {
+ unsigned char old_state = STATE_UPGRADE_REQUESTED;
+ s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire);
+ }
+ waiting:
+ __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" );
+ __TBB_ASSERT( s.my_state & STATE_COMBINED_UPGRADING, "wrong state at upgrade waiting_retry" );
+ __TBB_ASSERT( me==&s, nullptr );
+ ITT_NOTIFY(sync_prepare, s.my_mutex);
+ /* if no one was blocked by the "corrupted" q_tail, turn it back */
+ expected = tricky_pointer(me)|FLAG;
+ s.my_mutex->q_tail.compare_exchange_strong(expected, &s, std::memory_order_release);
+ queuing_rw_mutex::scoped_lock * predecessor;
+ // Mark my_prev as 'in use' to prevent predecessor from releasing
+ predecessor = tricky_pointer::fetch_add(s.my_prev, FLAG, std::memory_order_acquire);
+ if( predecessor ) {
+ bool success = try_acquire_internal_lock(*predecessor);
+ {
+ // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state.
+ // Corrupted pointer transitions responsibility to release the predecessor`s node on us.
+ unsigned char old_state = STATE_UPGRADE_REQUESTED;
+ predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release);
+ }
+ if( !success ) {
+ // Responsibility transition, the one who reads uncorrupted my_prev will do release.
+ tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release);
+ if( tricky_pointer(tmp) & FLAG ) {
+ tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor);
+ predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed);
+ } else {
+ // TODO: spin_wait condition seems never reachable
+ tricky_pointer::spin_wait_while_eq(s.my_prev, tricky_pointer(predecessor)|FLAG);
+ release_internal_lock(*predecessor);
+ }
+ } else {
+ tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed);
+ release_internal_lock(*predecessor);
+ tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor);
+ predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed);
+ }
+ if( predecessor )
+ goto waiting;
+ } else {
+ tricky_pointer::store(s.my_prev, nullptr, std::memory_order_relaxed);
+ }
+ __TBB_ASSERT( !predecessor && !s.my_prev, nullptr );
+
+ // additional lifetime issue prevention checks
+ // wait for the successor to finish working with my fields
+ wait_for_release_of_internal_lock(s);
+ // now wait for the predecessor to finish working with my fields
+ spin_wait_while_eq( s.my_going, 2U );
+
+ // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear).
+ atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear)
+
+ bool result = ( s.my_state != STATE_UPGRADE_LOSER );
+ s.my_state.store(STATE_WRITER, std::memory_order_relaxed);
+ s.my_going.store(1U, std::memory_order_relaxed);
+
+ ITT_NOTIFY(sync_acquired, s.my_mutex);
+ return result;
+ }
+
+ static void construct(d1::queuing_rw_mutex& m) {
+ suppress_unused_warning(m);
+ ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T(""));
+ }
+};
+
+void __TBB_EXPORTED_FUNC acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) {
+ queuing_rw_mutex_impl::acquire(m, s, write);
+}
+
+bool __TBB_EXPORTED_FUNC try_acquire(d1::queuing_rw_mutex& m, d1::queuing_rw_mutex::scoped_lock& s, bool write) {
+ return queuing_rw_mutex_impl::try_acquire(m, s, write);
+}
+
+void __TBB_EXPORTED_FUNC release(d1::queuing_rw_mutex::scoped_lock& s) {
+ queuing_rw_mutex_impl::release(s);
+}
+
+bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) {
+ return queuing_rw_mutex_impl::upgrade_to_writer(s);
+}
+
+bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) {
+ return queuing_rw_mutex_impl::downgrade_to_reader(s);
+}
+
+void __TBB_EXPORTED_FUNC construct(d1::queuing_rw_mutex& m) {
+ queuing_rw_mutex_impl::construct(m);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/rml_base.h b/contrib/libs/tbb/src/tbb/rml_base.h
new file mode 100644
index 0000000000..9e1705837c
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rml_base.h
@@ -0,0 +1,163 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Header guard and namespace names follow rml conventions.
+
+#ifndef __RML_rml_base_H
+#define __RML_rml_base_H
+
+#include <cstddef>
+
+#if _WIN32||_WIN64
+#include <windows.h>
+#endif /* _WIN32||_WIN64 */
+
+#ifdef RML_PURE_VIRTUAL_HANDLER
+#define RML_PURE(T) {RML_PURE_VIRTUAL_HANDLER(); return (T)0;}
+#else
+#define RML_PURE(T) = 0;
+#endif
+
+namespace rml {
+
+class server;
+
+class versioned_object {
+public:
+ //! A version number
+ typedef unsigned version_type;
+
+ virtual ~versioned_object() {}
+
+ //! Get version of this object
+ /** The version number is incremented when a incompatible change is introduced.
+ The version number is invariant for the lifetime of the object. */
+ virtual version_type version() const RML_PURE(version_type)
+
+};
+
+//! Represents a client's job for an execution context.
+/** A job object is constructed by the client.
+ Not derived from versioned_object because version is same as for client. */
+class job {
+ friend class server;
+};
+
+//! Information that client provides to server when asking for a server.
+/** The instance must endure at least until acknowledge_close_connection is called. */
+class client: public versioned_object {
+public:
+ //! Typedef for convenience of derived classes in other namespaces.
+ typedef ::rml::job job;
+
+ //! Index of a job in a job pool
+ typedef unsigned size_type;
+
+ //! Maximum number of threads that client can exploit profitably if nothing else is running on the machine.
+ /** The returned value should remain invariant for the lifetime of the connection. [idempotent] */
+ virtual size_type max_job_count() const RML_PURE(size_type)
+
+ //! Minimum stack size for each job. 0 means to use default stack size. [idempotent]
+ virtual std::size_t min_stack_size() const RML_PURE(std::size_t)
+
+ //! Server calls this routine when it needs client to create a job object.
+ virtual job* create_one_job() RML_PURE(job*)
+
+ //! Acknowledge that all jobs have been cleaned up.
+ /** Called by server in response to request_close_connection
+ after cleanup(job) has been called for each job. */
+ virtual void acknowledge_close_connection() RML_PURE(void)
+
+ //! Inform client that server is done with *this.
+ /** Client should destroy the job.
+ Not necessarily called by execution context represented by *this.
+ Never called while any other thread is working on the job. */
+ virtual void cleanup( job& ) RML_PURE(void)
+
+ // In general, we should not add new virtual methods, because that would
+ // break derived classes. Think about reserving some vtable slots.
+};
+
+// Information that server provides to client.
+// Virtual functions are routines provided by the server for the client to call.
+class server: public versioned_object {
+public:
+ //! Typedef for convenience of derived classes.
+ typedef ::rml::job job;
+
+#if _WIN32||_WIN64
+ typedef void* execution_resource_t;
+#endif
+
+ //! Request that connection to server be closed.
+ /** Causes each job associated with the client to have its cleanup method called,
+ possibly by a thread different than the thread that created the job.
+ This method can return before all cleanup methods return.
+ Actions that have to wait after all cleanup methods return should be part of
+ client::acknowledge_close_connection.
+ Pass true as exiting if request_close_connection() is called because exit() is
+ called. In that case, it is the client's responsibility to make sure all threads
+ are terminated. In all other cases, pass false. */
+ virtual void request_close_connection( bool exiting = false ) = 0;
+
+ //! Called by client thread when it reaches a point where it cannot make progress until other threads do.
+ virtual void yield() = 0;
+
+ //! Called by client to indicate a change in the number of non-RML threads that are running.
+ /** This is a performance hint to the RML to adjust how many threads it should let run
+ concurrently. The delta is the change in the number of non-RML threads that are running.
+ For example, a value of 1 means the client has started running another thread, and a value
+ of -1 indicates that the client has blocked or terminated one of its threads. */
+ virtual void independent_thread_number_changed( int delta ) = 0;
+
+ //! Default level of concurrency for which RML strives when there are no non-RML threads running.
+ /** Normally, the value is the hardware concurrency minus one.
+ The "minus one" accounts for the thread created by main(). */
+ virtual unsigned default_concurrency() const = 0;
+};
+
+class factory {
+public:
+ //! status results
+ enum status_type {
+ st_success=0,
+ st_connection_exists,
+ st_not_found,
+ st_incompatible
+ };
+
+protected:
+ //! Pointer to routine that waits for server to indicate when client can close itself.
+ status_type (*my_wait_to_close_routine)( factory& );
+
+public:
+ //! Library handle for use by RML.
+#if _WIN32||_WIN64
+ HMODULE library_handle;
+#else
+ void* library_handle;
+#endif /* _WIN32||_WIN64 */
+
+ //! Special marker to keep dll from being unloaded prematurely
+ static const std::size_t c_dont_unload = 1;
+};
+
+//! Typedef for callback functions to print server info
+typedef void (*server_info_callback_t)( void* arg, const char* server_info );
+
+} // namespace rml
+
+#endif /* __RML_rml_base_H */
diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.cpp b/contrib/libs/tbb/src/tbb/rml_tbb.cpp
new file mode 100644
index 0000000000..122e2709f7
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rml_tbb.cpp
@@ -0,0 +1,113 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_assert.h"
+
+#include "rml_tbb.h"
+#include "dynamic_link.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+namespace rml {
+
+#define MAKE_SERVER(x) DLD(__TBB_make_rml_server,x)
+#define GET_INFO(x) DLD(__TBB_call_with_my_server_info,x)
+#define SERVER tbb_server
+#define CLIENT tbb_client
+#define FACTORY tbb_factory
+
+#if __TBB_WEAK_SYMBOLS_PRESENT
+ #pragma weak __TBB_make_rml_server
+ #pragma weak __TBB_call_with_my_server_info
+ extern "C" {
+ ::rml::factory::status_type __TBB_make_rml_server( rml::tbb_factory& f, rml::tbb_server*& server, rml::tbb_client& client );
+ void __TBB_call_with_my_server_info( ::rml::server_info_callback_t cb, void* arg );
+ }
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+
+#if TBB_USE_DEBUG
+#define DEBUG_SUFFIX "_debug"
+#else
+#define DEBUG_SUFFIX
+#endif /* TBB_USE_DEBUG */
+
+// RML_SERVER_NAME is the name of the RML server library.
+#if _WIN32 || _WIN64
+#define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll"
+#elif __APPLE__
+#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib"
+#elif __linux__
+#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1"
+#elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX
+#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so"
+#else
+#error Unknown OS
+#endif
+
+const ::rml::versioned_object::version_type CLIENT_VERSION = 2;
+
+#if __TBB_WEAK_SYMBOLS_PRESENT
+ #pragma weak __RML_open_factory
+ #pragma weak __RML_close_factory
+ extern "C" {
+ ::rml::factory::status_type __RML_open_factory ( ::rml::factory&, ::rml::versioned_object::version_type&, ::rml::versioned_object::version_type );
+ void __RML_close_factory( ::rml::factory& f );
+ }
+#endif /* __TBB_WEAK_SYMBOLS_PRESENT */
+
+::rml::factory::status_type FACTORY::open() {
+ // Failure of following assertion indicates that factory is already open, or not zero-inited.
+ __TBB_ASSERT_EX( !library_handle, NULL );
+ status_type (*open_factory_routine)( factory&, version_type&, version_type );
+ dynamic_link_descriptor server_link_table[4] = {
+ DLD(__RML_open_factory,open_factory_routine),
+ MAKE_SERVER(my_make_server_routine),
+ DLD(__RML_close_factory,my_wait_to_close_routine),
+ GET_INFO(my_call_with_server_info_routine),
+ };
+ status_type result;
+ if ( dynamic_link( RML_SERVER_NAME, server_link_table, 4, &library_handle ) ) {
+ version_type server_version;
+ result = (*open_factory_routine)( *this, server_version, CLIENT_VERSION );
+ // server_version can be checked here for incompatibility if necessary.
+ } else {
+ library_handle = NULL;
+ result = st_not_found;
+ }
+ return result;
+}
+
+void FACTORY::close() {
+ if ( library_handle )
+ (*my_wait_to_close_routine)(*this);
+ if ( (size_t)library_handle>FACTORY::c_dont_unload ) {
+ dynamic_unlink(library_handle);
+ library_handle = NULL;
+ }
+}
+
+::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) {
+ // Failure of following assertion means that factory was not successfully opened.
+ __TBB_ASSERT_EX( my_make_server_routine, NULL );
+ return (*my_make_server_routine)(*this,s,c);
+}
+
+} // namespace rml
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.h b/contrib/libs/tbb/src/tbb/rml_tbb.h
new file mode 100644
index 0000000000..de923be1b2
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rml_tbb.h
@@ -0,0 +1,94 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Header guard and namespace names follow TBB conventions.
+
+#ifndef __TBB_rml_tbb_H
+#define __TBB_rml_tbb_H
+
+#include "oneapi/tbb/version.h"
+#include "rml_base.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+namespace rml {
+
+//------------------------------------------------------------------------
+// Classes instantiated by the server
+//------------------------------------------------------------------------
+
+//! Represents a set of oneTBB worker threads provided by the server.
+class tbb_server: public ::rml::server {
+public:
+ //! Inform server of adjustments in the number of workers that the client can profitably use.
+ virtual void adjust_job_count_estimate( int delta ) = 0;
+
+#if _WIN32||_WIN64
+ //! Inform server of a oneTBB external thread.
+ virtual void register_external_thread( execution_resource_t& v ) = 0;
+
+ //! Inform server that the oneTBB external thread is done with its work.
+ virtual void unregister_external_thread( execution_resource_t v ) = 0;
+#endif /* _WIN32||_WIN64 */
+};
+
+//------------------------------------------------------------------------
+// Classes instantiated by the client
+//------------------------------------------------------------------------
+
+class tbb_client: public ::rml::client {
+public:
+ //! Defined by TBB to steal a task and execute it.
+ /** Called by server when it wants an execution context to do some TBB work.
+ The method should return when it is okay for the thread to yield indefinitely. */
+ virtual void process( job& ) RML_PURE(void)
+};
+
+/** Client must ensure that instance is zero-inited, typically by being a file-scope object. */
+class tbb_factory: public ::rml::factory {
+
+ //! Pointer to routine that creates an RML server.
+ status_type (*my_make_server_routine)( tbb_factory&, tbb_server*&, tbb_client& );
+
+ //! Pointer to routine that calls callback function with server version info.
+ void (*my_call_with_server_info_routine)( ::rml::server_info_callback_t cb, void* arg );
+
+public:
+ typedef ::rml::versioned_object::version_type version_type;
+ typedef tbb_client client_type;
+ typedef tbb_server server_type;
+
+ //! Open factory.
+ /** Dynamically links against RML library.
+ Returns st_success, st_incompatible, or st_not_found. */
+ status_type open();
+
+ //! Factory method to be called by client to create a server object.
+ /** Factory must be open.
+ Returns st_success, or st_incompatible . */
+ status_type make_server( server_type*&, client_type& );
+
+ //! Close factory
+ void close();
+};
+
+} // namespace rml
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /*__TBB_rml_tbb_H */
diff --git a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h
new file mode 100644
index 0000000000..613ec72e98
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h
@@ -0,0 +1,258 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// All platform-specific threading support is encapsulated here. */
+
+#ifndef __RML_thread_monitor_H
+#define __RML_thread_monitor_H
+
+#if __TBB_USE_WINAPI
+#include <windows.h>
+#include <process.h>
+#include <malloc.h> //_alloca
+#include "misc.h" // support for processor groups
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+#include <thread>
+#endif
+#elif __TBB_USE_POSIX
+#include <pthread.h>
+#include <cstring>
+#include <cstdlib>
+#else
+#error Unsupported platform
+#endif
+#include <cstdio>
+
+#include "oneapi/tbb/detail/_template_helpers.h"
+
+#include "itt_notify.h"
+#include "semaphore.h"
+
+// All platform-specific threading support is in this header.
+
+#if (_WIN32||_WIN64)&&!__TBB_ipf
+// Deal with 64K aliasing. The formula for "offset" is a Fibonacci hash function,
+// which has the desirable feature of spreading out the offsets fairly evenly
+// without knowing the total number of offsets, and furthermore unlikely to
+// accidentally cancel out other 64K aliasing schemes that Microsoft might implement later.
+// See Knuth Vol 3. "Theorem S" for details on Fibonacci hashing.
+// The second statement is really does need "volatile", otherwise the compiler might remove the _alloca.
+#define AVOID_64K_ALIASING(idx) \
+ std::size_t offset = (idx+1) * 40503U % (1U<<16); \
+ void* volatile sink_for_alloca = _alloca(offset); \
+ __TBB_ASSERT_EX(sink_for_alloca, "_alloca failed");
+#else
+// Linux thread allocators avoid 64K aliasing.
+#define AVOID_64K_ALIASING(idx) tbb::detail::suppress_unused_warning(idx)
+#endif /* _WIN32||_WIN64 */
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+// Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info
+void handle_perror(int error_code, const char* aux_info);
+
+namespace rml {
+namespace internal {
+
+#if __TBB_USE_ITT_NOTIFY
+static const ::tbb::detail::r1::tchar *SyncType_RML = _T("%Constant");
+static const ::tbb::detail::r1::tchar *SyncObj_ThreadMonitor = _T("RML Thr Monitor");
+#endif /* __TBB_USE_ITT_NOTIFY */
+
+//! Monitor with limited two-phase commit form of wait.
+/** At most one thread should wait on an instance at a time. */
+class thread_monitor {
+public:
+ class cookie {
+ friend class thread_monitor;
+ std::atomic<std::size_t> my_epoch{0};
+ };
+ thread_monitor() : skipped_wakeup(false), my_sema() {
+ ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor);
+ }
+ ~thread_monitor() {}
+
+ //! If a thread is waiting or started a two-phase wait, notify it.
+ /** Can be called by any thread. */
+ void notify();
+
+ //! Begin two-phase wait.
+ /** Should only be called by thread that owns the monitor.
+ The caller must either complete the wait or cancel it. */
+ void prepare_wait( cookie& c );
+
+ //! Complete a two-phase wait and wait until notification occurs after the earlier prepare_wait.
+ void commit_wait( cookie& c );
+
+ //! Cancel a two-phase wait.
+ void cancel_wait();
+
+#if __TBB_USE_WINAPI
+ typedef HANDLE handle_type;
+
+ #define __RML_DECL_THREAD_ROUTINE unsigned WINAPI
+ typedef unsigned (WINAPI *thread_routine_type)(void*);
+
+ //! Launch a thread
+ static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = NULL );
+
+#elif __TBB_USE_POSIX
+ typedef pthread_t handle_type;
+
+ #define __RML_DECL_THREAD_ROUTINE void*
+ typedef void*(*thread_routine_type)(void*);
+
+ //! Launch a thread
+ static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size );
+#endif /* __TBB_USE_POSIX */
+
+ //! Join thread
+ static void join(handle_type handle);
+
+ //! Detach thread
+ static void detach_thread(handle_type handle);
+private:
+ cookie my_cookie; // epoch counter
+ std::atomic<bool> in_wait{false};
+ bool skipped_wakeup;
+ binary_semaphore my_sema;
+#if __TBB_USE_POSIX
+ static void check( int error_code, const char* routine );
+#endif
+};
+
+#if __TBB_USE_WINAPI
+
+#ifndef STACK_SIZE_PARAM_IS_A_RESERVATION
+#define STACK_SIZE_PARAM_IS_A_RESERVATION 0x00010000
+#endif
+
+// _beginthreadex API is not available in Windows 8 Store* applications, so use std::thread instead
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_function, void* arg, std::size_t, const std::size_t*) {
+//TODO: check that exception thrown from std::thread is not swallowed silently
+ std::thread* thread_tmp=new std::thread(thread_function, arg);
+ return thread_tmp->native_handle();
+}
+#else
+inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const std::size_t* worker_index ) {
+ unsigned thread_id;
+ int number_of_processor_groups = ( worker_index ) ? NumberOfProcessorGroups() : 0;
+ unsigned create_flags = ( number_of_processor_groups > 1 ) ? CREATE_SUSPENDED : 0;
+ HANDLE h = (HANDLE)_beginthreadex( NULL, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id );
+ if( !h ) {
+ handle_perror(0, "thread_monitor::launch: _beginthreadex failed\n");
+ }
+ if ( number_of_processor_groups > 1 ) {
+ MoveThreadIntoProcessorGroup( h, FindProcessorGroupIndex( static_cast<int>(*worker_index) ) );
+ ResumeThread( h );
+ }
+ return h;
+}
+#endif //__TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+
+void thread_monitor::join(handle_type handle) {
+#if TBB_USE_ASSERT
+ DWORD res =
+#endif
+ WaitForSingleObjectEx(handle, INFINITE, FALSE);
+ __TBB_ASSERT( res==WAIT_OBJECT_0, NULL );
+#if TBB_USE_ASSERT
+ BOOL val =
+#endif
+ CloseHandle(handle);
+ __TBB_ASSERT( val, NULL );
+}
+
+void thread_monitor::detach_thread(handle_type handle) {
+#if TBB_USE_ASSERT
+ BOOL val =
+#endif
+ CloseHandle(handle);
+ __TBB_ASSERT( val, NULL );
+}
+
+#endif /* __TBB_USE_WINAPI */
+
+#if __TBB_USE_POSIX
+inline void thread_monitor::check( int error_code, const char* routine ) {
+ if( error_code ) {
+ handle_perror(error_code, routine);
+ }
+}
+
+inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routine)(void*), void* arg, std::size_t stack_size ) {
+ // FIXME - consider more graceful recovery than just exiting if a thread cannot be launched.
+ // Note that there are some tricky situations to deal with, such that the thread is already
+ // grabbed as part of an OpenMP team.
+ pthread_attr_t s;
+ check(pthread_attr_init( &s ), "pthread_attr_init has failed");
+ if( stack_size>0 )
+ check(pthread_attr_setstacksize( &s, stack_size ), "pthread_attr_setstack_size has failed" );
+ pthread_t handle;
+ check( pthread_create( &handle, &s, thread_routine, arg ), "pthread_create has failed" );
+ check( pthread_attr_destroy( &s ), "pthread_attr_destroy has failed" );
+ return handle;
+}
+
+void thread_monitor::join(handle_type handle) {
+ check(pthread_join(handle, NULL), "pthread_join has failed");
+}
+
+void thread_monitor::detach_thread(handle_type handle) {
+ check(pthread_detach(handle), "pthread_detach has failed");
+}
+#endif /* __TBB_USE_POSIX */
+
+inline void thread_monitor::notify() {
+ my_cookie.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire) + 1, std::memory_order_release);
+ bool do_signal = in_wait.exchange( false );
+ if( do_signal )
+ my_sema.V();
+}
+
+inline void thread_monitor::prepare_wait( cookie& c ) {
+ if( skipped_wakeup ) {
+ // Lazily consume a signal that was skipped due to cancel_wait
+ skipped_wakeup = false;
+ my_sema.P(); // does not really wait on the semaphore
+ }
+ // Former c = my_cookie
+ c.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire), std::memory_order_release);
+ in_wait.store( true, std::memory_order_seq_cst );
+}
+
+inline void thread_monitor::commit_wait( cookie& c ) {
+ bool do_it = ( c.my_epoch.load(std::memory_order_relaxed) == my_cookie.my_epoch.load(std::memory_order_relaxed) );
+ if( do_it ) my_sema.P();
+ else cancel_wait();
+}
+
+inline void thread_monitor::cancel_wait() {
+ // if not in_wait, then some thread has sent us a signal;
+ // it will be consumed by the next prepare_wait call
+ skipped_wakeup = ! in_wait.exchange( false );
+}
+
+} // namespace internal
+} // namespace rml
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __RML_thread_monitor_H */
diff --git a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp
new file mode 100644
index 0000000000..fe7fb66dc8
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp
@@ -0,0 +1,120 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_rtm_mutex.h"
+#include "itt_notify.h"
+#include "governor.h"
+#include "misc.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+// maximum number of times to retry
+// TODO: experiment on retry values.
+static constexpr int retry_threshold = 10;
+
+struct rtm_mutex_impl {
+ //! Release speculative mutex
+ static void release(d1::rtm_mutex::scoped_lock& s) {
+ switch(s.m_transaction_state) {
+ case d1::rtm_mutex::rtm_state::rtm_transacting:
+ __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating");
+ end_transaction();
+ s.m_mutex = nullptr;
+ break;
+ case d1::rtm_mutex::rtm_state::rtm_real:
+ s.m_mutex->unlock();
+ s.m_mutex = nullptr;
+ break;
+ case d1::rtm_mutex::rtm_state::rtm_none:
+ __TBB_ASSERT(false, "mutex is not locked, but in release");
+ break;
+ default:
+ __TBB_ASSERT(false, "invalid m_transaction_state");
+ }
+ s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_none;
+ }
+
+ //! Acquire lock on the given mutex.
+ static void acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) {
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, "scoped_lock already in transaction");
+ if(governor::speculation_enabled()) {
+ int num_retries = 0;
+ unsigned int abort_code = 0;
+ do {
+ if(m.m_flag.load(std::memory_order_acquire)) {
+ if(only_speculate) return;
+ spin_wait_while_eq(m.m_flag, true);
+ }
+ // _xbegin returns -1 on success or the abort code, so capture it
+ if((abort_code = begin_transaction()) == speculation_successful_begin)
+ {
+ // started speculation
+ if(m.m_flag.load(std::memory_order_relaxed)) {
+ abort_transaction();
+ }
+ s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_transacting;
+ // Don not wrap the following assignment to a function,
+ // because it can abort the transaction in debug. Need mutex for release().
+ s.m_mutex = &m;
+ return; // successfully started speculation
+ }
+ ++num_retries;
+ } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold));
+ }
+
+ if(only_speculate) return;
+ s.m_mutex = &m;
+ s.m_mutex->lock();
+ s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real;
+ return;
+ }
+
+ //! Try to acquire lock on the given mutex.
+ static bool try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) {
+ acquire(m, s, /*only_speculate=*/true);
+ if (s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_transacting) {
+ return true;
+ }
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, NULL);
+ // transacting acquire failed. try_lock the real mutex
+ if (m.try_lock()) {
+ s.m_mutex = &m;
+ s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real;
+ return true;
+ }
+ return false;
+ }
+};
+
+void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s, bool only_speculate) {
+ rtm_mutex_impl::acquire(m, s, only_speculate);
+}
+bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex& m, d1::rtm_mutex::scoped_lock& s) {
+ return rtm_mutex_impl::try_acquire(m, s);
+}
+void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock& s) {
+ rtm_mutex_impl::release(s);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp
new file mode 100644
index 0000000000..5e50de4c39
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp
@@ -0,0 +1,271 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_assert.h"
+#include "oneapi/tbb/detail/_rtm_rw_mutex.h"
+#include "itt_notify.h"
+#include "governor.h"
+#include "misc.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+struct rtm_rw_mutex_impl {
+ // maximum number of times to retry
+ // TODO: experiment on retry values.
+ static constexpr int retry_threshold_read = 10;
+ static constexpr int retry_threshold_write = 10;
+
+ //! Release speculative mutex
+ static void release(d1::rtm_rw_mutex::scoped_lock& s) {
+ switch(s.m_transaction_state) {
+ case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer:
+ case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader:
+ __TBB_ASSERT(is_in_transaction(), "m_transaction_state && not speculating");
+ end_transaction();
+ s.m_mutex = nullptr;
+ break;
+ case d1::rtm_rw_mutex::rtm_type::rtm_real_reader:
+ __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag set but read lock acquired");
+ s.m_mutex->unlock_shared();
+ s.m_mutex = nullptr;
+ break;
+ case d1::rtm_rw_mutex::rtm_type::rtm_real_writer:
+ __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "write_flag unset but write lock acquired");
+ s.m_mutex->write_flag.store(false, std::memory_order_relaxed);
+ s.m_mutex->unlock();
+ s.m_mutex = nullptr;
+ break;
+ case d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex:
+ __TBB_ASSERT(false, "rtm_not_in_mutex, but in release");
+ break;
+ default:
+ __TBB_ASSERT(false, "invalid m_transaction_state");
+ }
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex;
+ }
+
+ //! Acquire write lock on the given mutex.
+ static void acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) {
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction");
+ if(governor::speculation_enabled()) {
+ int num_retries = 0;
+ unsigned int abort_code = 0;
+ do {
+ if(m.m_state.load(std::memory_order_acquire)) {
+ if(only_speculate) return;
+ spin_wait_until_eq(m.m_state, d1::rtm_rw_mutex::state_type(0));
+ }
+ // _xbegin returns -1 on success or the abort code, so capture it
+ if((abort_code = begin_transaction()) == speculation_successful_begin)
+ {
+ // started speculation
+ if(m.m_state.load(std::memory_order_relaxed)) { // add spin_rw_mutex to read-set.
+ // reader or writer grabbed the lock, so abort.
+ abort_transaction();
+ }
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer;
+ // Don not wrap the following assignment to a function,
+ // because it can abort the transaction in debug. Need mutex for release().
+ s.m_mutex = &m;
+ return; // successfully started speculation
+ }
+ ++num_retries;
+ } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_write));
+ }
+
+ if(only_speculate) return;
+ s.m_mutex = &m; // should apply a real try_lock...
+ s.m_mutex->lock(); // kill transactional writers
+ __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After acquire for write, write_flag already true");
+ m.write_flag.store(true, std::memory_order_relaxed); // kill transactional readers
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer;
+ return;
+ }
+
+ //! Acquire read lock on given mutex.
+ // only_speculate : true if we are doing a try_acquire. If true and we fail to speculate, don't
+ // really acquire the lock, return and do a try_acquire on the contained spin_rw_mutex. If
+ // the lock is already held by a writer, just return.
+ static void acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) {
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction");
+ if(governor::speculation_enabled()) {
+ int num_retries = 0;
+ unsigned int abort_code = 0;
+ do {
+ // if in try_acquire, and lock is held as writer, don't attempt to speculate.
+ if(m.write_flag.load(std::memory_order_acquire)) {
+ if(only_speculate) return;
+ spin_wait_while_eq(m.write_flag, true);
+ }
+ // _xbegin returns -1 on success or the abort code, so capture it
+ if((abort_code = begin_transaction()) == speculation_successful_begin)
+ {
+ // started speculation
+ if(m.write_flag.load(std::memory_order_relaxed)) { // add write_flag to read-set.
+ abort_transaction(); // writer grabbed the lock, so abort.
+ }
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader;
+ // Don not wrap the following assignment to a function,
+ // because it can abort the transaction in debug. Need mutex for release().
+ s.m_mutex = &m;
+ return; // successfully started speculation
+ }
+ // fallback path
+ // retry only if there is any hope of getting into a transaction soon
+ // Retry in the following cases (from Section 8.3.5 of
+ // Intel(R) Architecture Instruction Set Extensions Programming Reference):
+ // 1. abort caused by XABORT instruction (bit 0 of EAX register is set)
+ // 2. the transaction may succeed on a retry (bit 1 of EAX register is set)
+ // 3. if another logical processor conflicted with a memory address
+ // that was part of the transaction that aborted (bit 2 of EAX register is set)
+ // That is, retry if (abort_code & 0x7) is non-zero
+ ++num_retries;
+ } while((abort_code & speculation_retry) != 0 && (num_retries < retry_threshold_read));
+ }
+
+ if(only_speculate) return;
+ s.m_mutex = &m;
+ s.m_mutex->lock_shared();
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader;
+ }
+
+ //! Upgrade reader to become a writer.
+ /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
+ static bool upgrade(d1::rtm_rw_mutex::scoped_lock& s) {
+ switch(s.m_transaction_state) {
+ case d1::rtm_rw_mutex::rtm_type::rtm_real_reader: {
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer;
+ bool no_release = s.m_mutex->upgrade();
+ __TBB_ASSERT(!s.m_mutex->write_flag.load(std::memory_order_relaxed), "After upgrade, write_flag already true");
+ s.m_mutex->write_flag.store(true, std::memory_order_relaxed);
+ return no_release;
+ }
+ case d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader: {
+ d1::rtm_rw_mutex& m = *s.m_mutex;
+ if(m.m_state.load(std::memory_order_acquire)) { // add spin_rw_mutex to read-set.
+ // Real reader or writer holds the lock; so commit the read and re-acquire for write.
+ release(s);
+ acquire_writer(m, s, false);
+ return false;
+ } else
+ {
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer;
+ return true;
+ }
+ }
+ default:
+ __TBB_ASSERT(false, "Invalid state for upgrade");
+ return false;
+ }
+ }
+
+ //! Downgrade writer to a reader.
+ static bool downgrade(d1::rtm_rw_mutex::scoped_lock& s) {
+ switch (s.m_transaction_state) {
+ case d1::rtm_rw_mutex::rtm_type::rtm_real_writer:
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader;
+ __TBB_ASSERT(s.m_mutex->write_flag.load(std::memory_order_relaxed), "Before downgrade write_flag not true");
+ s.m_mutex->write_flag.store(false, std::memory_order_relaxed);
+ s.m_mutex->downgrade();
+ return true;
+ case d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer:
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader;
+ return true;
+ default:
+ __TBB_ASSERT(false, "Invalid state for downgrade");
+ return false;
+ }
+ }
+
+ //! Try to acquire write lock on the given mutex.
+ // There may be reader(s) which acquired the spin_rw_mutex, as well as possibly
+ // transactional reader(s). If this is the case, the acquire will fail, and assigning
+ // write_flag will kill the transactors. So we only assign write_flag if we have successfully
+ // acquired the lock.
+ static bool try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) {
+ acquire_writer(m, s, /*only_speculate=*/true);
+ if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer) {
+ return true;
+ }
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL);
+ // transacting write acquire failed. try_lock the real mutex
+ if (m.try_lock()) {
+ s.m_mutex = &m;
+ // only shoot down readers if we're not transacting ourselves
+ __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After try_acquire_writer, write_flag already true");
+ m.write_flag.store(true, std::memory_order_relaxed);
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer;
+ return true;
+ }
+ return false;
+ }
+
+ //! Try to acquire read lock on the given mutex.
+ static bool try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) {
+ // speculatively acquire the lock. If this fails, do try_lock_shared on the spin_rw_mutex.
+ acquire_reader(m, s, /*only_speculate=*/true);
+ if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader) {
+ return true;
+ }
+ __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL);
+ // transacting read acquire failed. try_lock_shared the real mutex
+ if (m.try_lock_shared()) {
+ s.m_mutex = &m;
+ s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_reader;
+ return true;
+ }
+ return false;
+ }
+};
+
+void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) {
+ rtm_rw_mutex_impl::acquire_writer(m, s, only_speculate);
+}
+//! Internal acquire read lock.
+// only_speculate == true if we're doing a try_lock, else false.
+void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s, bool only_speculate) {
+ rtm_rw_mutex_impl::acquire_reader(m, s, only_speculate);
+}
+//! Internal upgrade reader to become a writer.
+bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock& s) {
+ return rtm_rw_mutex_impl::upgrade(s);
+}
+//! Internal downgrade writer to become a reader.
+bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock& s) {
+ return rtm_rw_mutex_impl::downgrade(s);
+}
+//! Internal try_acquire write lock.
+bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) {
+ return rtm_rw_mutex_impl::try_acquire_writer(m, s);
+}
+//! Internal try_acquire read lock.
+bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex& m, d1::rtm_rw_mutex::scoped_lock& s) {
+ return rtm_rw_mutex_impl::try_acquire_reader(m, s);
+}
+//! Internal release lock.
+void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock& s) {
+ rtm_rw_mutex_impl::release(s);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+
diff --git a/contrib/libs/tbb/src/tbb/scheduler_common.h b/contrib/libs/tbb/src/tbb/scheduler_common.h
new file mode 100644
index 0000000000..ee13dbf981
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/scheduler_common.h
@@ -0,0 +1,505 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_scheduler_common_H
+#define _TBB_scheduler_common_H
+
+#include "oneapi/tbb/detail/_utils.h"
+#include "oneapi/tbb/detail/_template_helpers.h"
+#include "oneapi/tbb/detail/_task.h"
+#include "oneapi/tbb/detail/_machine.h"
+#include "oneapi/tbb/task_group.h"
+#include "oneapi/tbb/cache_aligned_allocator.h"
+#include "itt_notify.h"
+#include "co_context.h"
+#include "misc.h"
+#include "governor.h"
+
+#ifndef __TBB_SCHEDULER_MUTEX_TYPE
+#define __TBB_SCHEDULER_MUTEX_TYPE tbb::spin_mutex
+#endif
+// TODO: add conditional inclusion based on specified type
+#include "oneapi/tbb/spin_mutex.h"
+
+#if TBB_USE_ASSERT
+#include <atomic>
+#endif
+
+#include <cstdint>
+#include <exception>
+
+//! Mutex type for global locks in the scheduler
+using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE;
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+ // Workaround for overzealous compiler warnings
+ // These particular warnings are so ubiquitous that no attempt is made to narrow
+ // the scope of the warnings.
+ #pragma warning (disable: 4100 4127 4312 4244 4267 4706)
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class arena;
+class mail_inbox;
+class mail_outbox;
+class market;
+class observer_proxy;
+
+enum task_stream_accessor_type { front_accessor = 0, back_nonnull_accessor };
+template<task_stream_accessor_type> class task_stream;
+
+using isolation_type = std::intptr_t;
+constexpr isolation_type no_isolation = 0;
+
+//------------------------------------------------------------------------
+// Extended execute data
+//------------------------------------------------------------------------
+
+//! Execute data used on a task dispatcher side, reflects a current execution state
+struct execution_data_ext : d1::execution_data {
+ task_dispatcher* task_disp{};
+ isolation_type isolation{};
+ d1::wait_context* wait_ctx{};
+};
+
+//------------------------------------------------------------------------
+// Task accessor
+//------------------------------------------------------------------------
+
+//! Interpretation of reserved task fields inside a task dispatcher
+struct task_accessor {
+ static constexpr std::uint64_t proxy_task_trait = 1;
+ static constexpr std::uint64_t resume_task_trait = 2;
+ static d1::task_group_context*& context(d1::task& t) {
+ task_group_context** tgc = reinterpret_cast<task_group_context**>(&t.m_reserved[0]);
+ return *tgc;
+ }
+ static isolation_type& isolation(d1::task& t) {
+ isolation_type* tag = reinterpret_cast<isolation_type*>(&t.m_reserved[2]);
+ return *tag;
+ }
+ static void set_proxy_trait(d1::task& t) {
+ // TODO: refactor proxy tasks not to work on uninitialized memory.
+ //__TBB_ASSERT((t.m_version_and_traits & proxy_task_trait) == 0, nullptr);
+ t.m_version_and_traits |= proxy_task_trait;
+ }
+ static bool is_proxy_task(d1::task& t) {
+ return (t.m_version_and_traits & proxy_task_trait) != 0;
+ }
+ static void set_resume_trait(d1::task& t) {
+ __TBB_ASSERT((t.m_version_and_traits & resume_task_trait) == 0, nullptr);
+ t.m_version_and_traits |= resume_task_trait;
+ }
+ static bool is_resume_task(d1::task& t) {
+ return (t.m_version_and_traits & resume_task_trait) != 0;
+ }
+};
+
+//------------------------------------------------------------------------
+//! Extended variant of the standard offsetof macro
+/** The standard offsetof macro is not sufficient for TBB as it can be used for
+ POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/
+#define __TBB_offsetof(class_name, member_name) \
+ ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000)
+
+//! Returns address of the object containing a member with the given name and address
+#define __TBB_get_object_ref(class_name, member_name, member_addr) \
+ (*reinterpret_cast<class_name*>((char*)member_addr - __TBB_offsetof(class_name, member_name)))
+
+//! Helper class for tracking floating point context and task group context switches
+/** Assuming presence of an itt collector, in addition to keeping track of floating
+ point context, this class emits itt events to indicate begin and end of task group
+ context execution **/
+template <bool report_tasks>
+class context_guard_helper {
+ const d1::task_group_context* curr_ctx;
+ d1::cpu_ctl_env guard_cpu_ctl_env;
+ d1::cpu_ctl_env curr_cpu_ctl_env;
+public:
+ context_guard_helper() : curr_ctx(NULL) {
+ guard_cpu_ctl_env.get_env();
+ curr_cpu_ctl_env = guard_cpu_ctl_env;
+ }
+ ~context_guard_helper() {
+ if (curr_cpu_ctl_env != guard_cpu_ctl_env)
+ guard_cpu_ctl_env.set_env();
+ if (report_tasks && curr_ctx)
+ ITT_TASK_END;
+ }
+ // The function is called from bypass dispatch loop on the hot path.
+ // Consider performance issues when refactoring.
+ void set_ctx(const d1::task_group_context* ctx) {
+ if (!ctx)
+ return;
+ const d1::cpu_ctl_env* ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&ctx->my_cpu_ctl_env);
+ // Compare the FPU settings directly because the context can be reused between parallel algorithms.
+ if (*ctl != curr_cpu_ctl_env) {
+ curr_cpu_ctl_env = *ctl;
+ curr_cpu_ctl_env.set_env();
+ }
+ if (report_tasks && ctx != curr_ctx) {
+ // if task group context was active, report end of current execution frame.
+ if (curr_ctx)
+ ITT_TASK_END;
+ // reporting begin of new task group context execution frame.
+ // using address of task group context object to group tasks (parent).
+ // id of task execution frame is NULL and reserved for future use.
+ ITT_TASK_BEGIN(ctx, ctx->my_name, NULL);
+ curr_ctx = ctx;
+ }
+ }
+#if _WIN64
+ void restore_default() {
+ if (curr_cpu_ctl_env != guard_cpu_ctl_env) {
+ guard_cpu_ctl_env.set_env();
+ curr_cpu_ctl_env = guard_cpu_ctl_env;
+ }
+ }
+#endif // _WIN64
+};
+
+#if (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64)
+#if _MSC_VER
+#pragma intrinsic(__rdtsc)
+#endif
+inline std::uint64_t machine_time_stamp() {
+#if __INTEL_COMPILER
+ return _rdtsc();
+#elif _MSC_VER
+ return __rdtsc();
+#else
+ std::uint32_t hi, lo;
+ __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo));
+ return (std::uint64_t(hi) << 32) | lo;
+#endif
+}
+
+inline void prolonged_pause_impl() {
+ // Assumption based on practice: 1000-2000 ticks seems to be a suitable invariant for the
+ // majority of platforms. Currently, skip platforms that define __TBB_STEALING_PAUSE
+ // because these platforms require very careful tuning.
+ std::uint64_t prev = machine_time_stamp();
+ const std::uint64_t finish = prev + 1000;
+ atomic_backoff backoff;
+ do {
+ backoff.bounded_pause();
+ std::uint64_t curr = machine_time_stamp();
+ if (curr <= prev)
+ // Possibly, the current logical thread is moved to another hardware thread or overflow is occurred.
+ break;
+ prev = curr;
+ } while (prev < finish);
+}
+#else
+inline void prolonged_pause_impl() {
+#ifdef __TBB_ipf
+ static const long PauseTime = 1500;
+#else
+ static const long PauseTime = 80;
+#endif
+ // TODO IDEA: Update PauseTime adaptively?
+ machine_pause(PauseTime);
+}
+#endif
+
+inline void prolonged_pause() {
+#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64)
+ if (governor::wait_package_enabled()) {
+ std::uint64_t time_stamp = machine_time_stamp();
+ // _tpause function directs the processor to enter an implementation-dependent optimized state
+ // until the Time Stamp Counter reaches or exceeds the value specified in second parameter.
+ // Constant "700" is ticks to wait for.
+ // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state.
+ _tpause(0, time_stamp + 700);
+ }
+ else
+#endif
+ prolonged_pause_impl();
+}
+
+class stealing_loop_backoff {
+ const int my_pause_threshold;
+ const int my_yield_threshold;
+ int my_pause_count;
+ int my_yield_count;
+public:
+ // my_yield_threshold = 100 is an experimental value. Ideally, once we start calling __TBB_Yield(),
+ // the time spent spinning before calling is_out_of_work() should be approximately
+ // the time it takes for a thread to be woken up. Doing so would guarantee that we do
+ // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount.
+ stealing_loop_backoff(int num_workers)
+ : my_pause_threshold{ 2 * (num_workers + 1) }
+#if __APPLE__
+ // threshold value tuned separately for macOS due to high cost of sched_yield there
+ , my_yield_threshold{10}
+#else
+ , my_yield_threshold{100}
+#endif
+ , my_pause_count{}
+ , my_yield_count{}
+ {}
+ bool pause() {
+ prolonged_pause();
+ if (my_pause_count++ >= my_pause_threshold) {
+ my_pause_count = my_pause_threshold;
+ d0::yield();
+ if (my_yield_count++ >= my_yield_threshold) {
+ my_yield_count = my_yield_threshold;
+ return true;
+ }
+ }
+ return false;
+ }
+ void reset_wait() {
+ my_pause_count = my_yield_count = 0;
+ }
+};
+
+//------------------------------------------------------------------------
+// Exception support
+//------------------------------------------------------------------------
+//! Task group state change propagation global epoch
+/** Together with generic_scheduler::my_context_state_propagation_epoch forms
+ cross-thread signaling mechanism that allows to avoid locking at the hot path
+ of normal execution flow.
+
+ When a descendant task group context is registered or unregistered, the global
+ and local epochs are compared. If they differ, a state change is being propagated,
+ and thus registration/deregistration routines take slower branch that may block
+ (at most one thread of the pool can be blocked at any moment). Otherwise the
+ control path is lock-free and fast. **/
+extern std::atomic<std::uintptr_t> the_context_state_propagation_epoch;
+
+//! Mutex guarding state change propagation across task groups forest.
+/** Also protects modification of related data structures. **/
+typedef scheduler_mutex_type context_state_propagation_mutex_type;
+extern context_state_propagation_mutex_type the_context_state_propagation_mutex;
+
+class tbb_exception_ptr {
+ std::exception_ptr my_ptr;
+public:
+ static tbb_exception_ptr* allocate() noexcept;
+
+ //! Destroys this objects
+ /** Note that objects of this type can be created only by the allocate() method. **/
+ void destroy() noexcept;
+
+ //! Throws the contained exception .
+ void throw_self();
+
+private:
+ tbb_exception_ptr(const std::exception_ptr& src) : my_ptr(src) {}
+}; // class tbb_exception_ptr
+
+//------------------------------------------------------------------------
+// Debugging support
+//------------------------------------------------------------------------
+
+#if TBB_USE_ASSERT
+static const std::uintptr_t venom = tbb::detail::select_size_t_constant<0xDEADBEEFU, 0xDDEEAADDDEADBEEFULL>::value;
+
+inline void poison_value(std::uintptr_t& val) { val = venom; }
+
+inline void poison_value(std::atomic<std::uintptr_t>& val) { val.store(venom, std::memory_order_relaxed); }
+
+/** Expected to be used in assertions only, thus no empty form is defined. **/
+inline bool is_alive(std::uintptr_t v) { return v != venom; }
+
+/** Logically, this method should be a member of class task.
+ But we do not want to publish it, so it is here instead. */
+inline void assert_task_valid(const d1::task* t) {
+ assert_pointer_valid(t);
+}
+#else /* !TBB_USE_ASSERT */
+
+/** In contrast to debug version poison_value() is a macro here because
+ the variable used as its argument may be undefined in release builds. **/
+#define poison_value(g) ((void)0)
+
+inline void assert_task_valid(const d1::task*) {}
+
+#endif /* !TBB_USE_ASSERT */
+
+struct suspend_point_type {
+#if __TBB_RESUMABLE_TASKS
+ //! The arena related to this task_dispatcher
+ arena* m_arena{ nullptr };
+ //! The random for the resume task
+ FastRandom m_random;
+ //! The flag is raised when the original owner should return to this task dispatcher.
+ std::atomic<bool> m_is_owner_recalled{ false };
+ //! Inicates if the resume task should be placed to the critical task stream.
+ bool m_is_critical{ false };
+ //! Associated coroutine
+ co_context m_co_context;
+
+ struct resume_task final : public d1::task {
+ task_dispatcher& m_target;
+ explicit resume_task(task_dispatcher& target) : m_target(target) {
+ task_accessor::set_resume_trait(*this);
+ }
+ d1::task* execute(d1::execution_data& ed) override;
+ d1::task* cancel(d1::execution_data&) override {
+ __TBB_ASSERT(false, "The resume task cannot be canceled");
+ return nullptr;
+ }
+ } m_resume_task;
+
+ suspend_point_type(arena* a, std::size_t stack_size, task_dispatcher& target);
+#endif /*__TBB_RESUMABLE_TASKS */
+};
+
+class alignas (max_nfs_size) task_dispatcher {
+public:
+ // TODO: reconsider low level design to better organize dependencies and files.
+ friend class thread_data;
+ friend class arena_slot;
+ friend class nested_arena_context;
+ friend class delegated_task;
+ friend struct base_waiter;
+
+ //! The data of the current thread attached to this task_dispatcher
+ thread_data* m_thread_data{ nullptr };
+
+ //! The current execution data
+ execution_data_ext m_execute_data_ext;
+
+ //! Properties
+ struct properties {
+ bool outermost{ true };
+ bool fifo_tasks_allowed{ true };
+ bool critical_task_allowed{ true };
+ } m_properties;
+
+ //! Position in the call stack when stealing is still allowed.
+ std::uintptr_t m_stealing_threshold{};
+
+ //! Suspend point (null if this task dispatcher has been never suspended)
+ suspend_point_type* m_suspend_point{ nullptr };
+
+ //! Attempt to get a task from the mailbox.
+ /** Gets a task only if it has not been executed by its sender or a thief
+ that has stolen it from the sender's task pool. Otherwise returns NULL.
+ This method is intended to be used only by the thread extracting the proxy
+ from its mailbox. (In contrast to local task pool, mailbox can be read only
+ by its owner). **/
+ d1::task* get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation);
+
+ d1::task* get_critical_task(d1::task*, execution_data_ext&, isolation_type, bool);
+
+ template <bool ITTPossible, typename Waiter>
+ d1::task* receive_or_steal_task(thread_data& tls, execution_data_ext& ed, Waiter& waiter,
+ isolation_type isolation, bool outermost, bool criticality_absence);
+
+ template <bool ITTPossible, typename Waiter>
+ d1::task* local_wait_for_all(d1::task * t, Waiter& waiter);
+
+ task_dispatcher(const task_dispatcher&) = delete;
+
+ bool can_steal();
+public:
+ task_dispatcher(arena* a);
+
+ ~task_dispatcher() {
+ if (m_suspend_point) {
+ m_suspend_point->~suspend_point_type();
+ cache_aligned_deallocate(m_suspend_point);
+ }
+ poison_pointer(m_thread_data);
+ poison_pointer(m_suspend_point);
+ }
+
+ template <typename Waiter>
+ d1::task* local_wait_for_all(d1::task* t, Waiter& waiter);
+
+ bool allow_fifo_task(bool new_state) {
+ bool old_state = m_properties.fifo_tasks_allowed;
+ m_properties.fifo_tasks_allowed = new_state;
+ return old_state;
+ }
+
+ isolation_type set_isolation(isolation_type isolation) {
+ isolation_type prev = m_execute_data_ext.isolation;
+ m_execute_data_ext.isolation = isolation;
+ return prev;
+ }
+
+ thread_data& get_thread_data() {
+ __TBB_ASSERT(m_thread_data, nullptr);
+ return *m_thread_data;
+ }
+
+ static void execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx);
+
+ void set_stealing_threshold(std::uintptr_t stealing_threshold) {
+ bool assert_condition = (stealing_threshold == 0 && m_stealing_threshold != 0) ||
+ (stealing_threshold != 0 && m_stealing_threshold == 0);
+ __TBB_ASSERT_EX( assert_condition, nullptr );
+ m_stealing_threshold = stealing_threshold;
+ }
+
+ d1::task* get_inbox_or_critical_task(execution_data_ext&, mail_inbox&, isolation_type, bool);
+ d1::task* get_stream_or_critical_task(execution_data_ext&, arena&, task_stream<front_accessor>&,
+ unsigned& /*hint_for_stream*/, isolation_type,
+ bool /*critical_allowed*/);
+ d1::task* steal_or_get_critical(execution_data_ext&, arena&, unsigned /*arena_index*/, FastRandom&,
+ isolation_type, bool /*critical_allowed*/);
+
+#if __TBB_RESUMABLE_TASKS
+ /* [[noreturn]] */ void co_local_wait_for_all() noexcept;
+ void suspend(suspend_callback_type suspend_callback, void* user_callback);
+ void resume(task_dispatcher& target);
+ suspend_point_type* get_suspend_point();
+ void init_suspend_point(arena* a, std::size_t stack_size);
+ friend void internal_resume(suspend_point_type*);
+ void recall_point();
+#endif /* __TBB_RESUMABLE_TASKS */
+};
+
+inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) {
+ return base - stack_size / 2;
+}
+
+struct task_group_context_impl {
+ static void destroy(d1::task_group_context&);
+ static void initialize(d1::task_group_context&);
+ static void register_with(d1::task_group_context&, thread_data*);
+ static void bind_to_impl(d1::task_group_context&, thread_data*);
+ static void bind_to(d1::task_group_context&, thread_data*);
+ template <typename T>
+ static void propagate_task_group_state(d1::task_group_context&, std::atomic<T> d1::task_group_context::*, d1::task_group_context&, T);
+ static bool cancel_group_execution(d1::task_group_context&);
+ static bool is_group_execution_cancelled(const d1::task_group_context&);
+ static void reset(d1::task_group_context&);
+ static void capture_fp_settings(d1::task_group_context&);
+ static void copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src);
+};
+
+
+//! Forward declaration for scheduler entities
+bool gcc_rethrow_exception_broken();
+void fix_broken_rethrow();
+//! Forward declaration: throws std::runtime_error with what() returning error_code description prefixed with aux_info
+void handle_perror(int error_code, const char* aux_info);
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_scheduler_common_H */
diff --git a/contrib/libs/tbb/src/tbb/semaphore.cpp b/contrib/libs/tbb/src/tbb/semaphore.cpp
new file mode 100644
index 0000000000..92c9e675ab
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/semaphore.cpp
@@ -0,0 +1,92 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "semaphore.h"
+#if __TBB_USE_SRWLOCK
+#include "dynamic_link.h" // Refers to src/tbb, not include/tbb
+#error #include "tbb_misc.h"
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+// TODO: For new win UI port, we can use SRWLock API without dynamic_link etc.
+#if __TBB_USE_SRWLOCK
+
+static std::atomic<do_once_state> concmon_module_inited;
+
+void WINAPI init_binsem_using_event( SRWLOCK* h_ )
+{
+ srwl_or_handle* shptr = (srwl_or_handle*) h_;
+ shptr->h = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS|SEMAPHORE_ALL_ACCESS );
+}
+
+void WINAPI acquire_binsem_using_event( SRWLOCK* h_ )
+{
+ srwl_or_handle* shptr = (srwl_or_handle*) h_;
+ WaitForSingleObjectEx( shptr->h, INFINITE, FALSE );
+}
+
+void WINAPI release_binsem_using_event( SRWLOCK* h_ )
+{
+ srwl_or_handle* shptr = (srwl_or_handle*) h_;
+ SetEvent( shptr->h );
+}
+
+static void (WINAPI *__TBB_init_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&init_binsem_using_event;
+static void (WINAPI *__TBB_acquire_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&acquire_binsem_using_event;
+static void (WINAPI *__TBB_release_binsem)( SRWLOCK* ) = (void (WINAPI *)(SRWLOCK*))&release_binsem_using_event;
+
+//! Table describing the how to link the handlers.
+static const dynamic_link_descriptor SRWLLinkTable[] = {
+ DLD(InitializeSRWLock, __TBB_init_binsem),
+ DLD(AcquireSRWLockExclusive, __TBB_acquire_binsem),
+ DLD(ReleaseSRWLockExclusive, __TBB_release_binsem)
+};
+
+inline void init_concmon_module()
+{
+ __TBB_ASSERT( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event, NULL );
+ if( dynamic_link( "Kernel32.dll", SRWLLinkTable, sizeof(SRWLLinkTable)/sizeof(dynamic_link_descriptor) ) ) {
+ __TBB_ASSERT( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event, NULL );
+ __TBB_ASSERT( (uintptr_t)__TBB_acquire_binsem!=(uintptr_t)&acquire_binsem_using_event, NULL );
+ __TBB_ASSERT( (uintptr_t)__TBB_release_binsem!=(uintptr_t)&release_binsem_using_event, NULL );
+ }
+}
+
+binary_semaphore::binary_semaphore() {
+ atomic_do_once( &init_concmon_module, concmon_module_inited );
+
+ __TBB_init_binsem( &my_sem.lock );
+ if( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event )
+ P();
+}
+
+binary_semaphore::~binary_semaphore() {
+ if( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event )
+ CloseHandle( my_sem.h );
+}
+
+void binary_semaphore::P() { __TBB_acquire_binsem( &my_sem.lock ); }
+
+void binary_semaphore::V() { __TBB_release_binsem( &my_sem.lock ); }
+
+#endif /* __TBB_USE_SRWLOCK */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/semaphore.h b/contrib/libs/tbb/src/tbb/semaphore.h
new file mode 100644
index 0000000000..0a88536e36
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/semaphore.h
@@ -0,0 +1,335 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_semaphore_H
+#define __TBB_semaphore_H
+
+#include "oneapi/tbb/detail/_utils.h"
+
+#if _WIN32||_WIN64
+#include <windows.h>
+#elif __APPLE__
+#include <mach/semaphore.h>
+#include <mach/task.h>
+#include <mach/mach_init.h>
+#include <mach/error.h>
+#else
+#include <semaphore.h>
+#ifdef TBB_USE_DEBUG
+#include <cerrno>
+#endif
+#endif /*_WIN32||_WIN64*/
+
+#include <atomic>
+
+#if __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
+
+/* Futex definitions */
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#if defined(SYS_futex)
+
+/* This section is included for Linux and some other systems that may support futexes.*/
+
+#define __TBB_USE_FUTEX 1
+
+#if defined(__has_include)
+#define __TBB_has_include __has_include
+#else
+#define __TBB_has_include(x) 0
+#endif
+
+/*
+If available, use typical headers where futex API is defined. While Linux and OpenBSD
+are known to provide such headers, other systems might have them as well.
+*/
+#if defined(__linux__) || __TBB_has_include(<linux/futex.h>)
+#include <linux/futex.h>
+#elif defined(__OpenBSD__) || __TBB_has_include(<sys/futex.h>)
+#error #include <sys/futex.h>
+#endif
+
+#include <climits>
+#include <cerrno>
+
+/*
+Some systems might not define the macros or use different names. In such case we expect
+the actual parameter values to match Linux: 0 for wait, 1 for wake.
+*/
+#if defined(FUTEX_WAIT_PRIVATE)
+#define __TBB_FUTEX_WAIT FUTEX_WAIT_PRIVATE
+#elif defined(FUTEX_WAIT)
+#define __TBB_FUTEX_WAIT FUTEX_WAIT
+#else
+#define __TBB_FUTEX_WAIT 0
+#endif
+
+#if defined(FUTEX_WAKE_PRIVATE)
+#define __TBB_FUTEX_WAKE FUTEX_WAKE_PRIVATE
+#elif defined(FUTEX_WAKE)
+#define __TBB_FUTEX_WAKE FUTEX_WAKE
+#else
+#define __TBB_FUTEX_WAKE 1
+#endif
+
+#endif // SYS_futex
+#endif // __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+// Futex implementation
+////////////////////////////////////////////////////////////////////////////////////////////////////
+
+#if __TBB_USE_FUTEX
+
+static inline int futex_wait( void *futex, int comparand ) {
+ int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
+#if TBB_USE_ASSERT
+ int e = errno;
+ __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
+#endif /* TBB_USE_ASSERT */
+ return r;
+}
+
+static inline int futex_wakeup_one( void *futex ) {
+ int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
+ __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
+ return r;
+}
+
+// Additional possible methods that are not required right now
+// static inline int futex_wakeup_all( void *futex ) {
+// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
+// __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
+// return r;
+// }
+
+#endif // __TBB_USE_FUTEX
+
+////////////////////////////////////////////////////////////////////////////////////////////////////
+#if _WIN32||_WIN64
+typedef LONG sem_count_t;
+//! Edsger Dijkstra's counting semaphore
+class semaphore : no_copy {
+ static const int max_semaphore_cnt = MAXLONG;
+public:
+ //! ctor
+ semaphore(size_t start_cnt_ = 0) {init_semaphore(start_cnt_);}
+ //! dtor
+ ~semaphore() {CloseHandle( sem );}
+ //! wait/acquire
+ void P() {WaitForSingleObjectEx( sem, INFINITE, FALSE );}
+ //! post/release
+ void V() {ReleaseSemaphore( sem, 1, NULL );}
+private:
+ HANDLE sem;
+ void init_semaphore(size_t start_cnt_) {
+ sem = CreateSemaphoreEx( NULL, LONG(start_cnt_), max_semaphore_cnt, NULL, 0, SEMAPHORE_ALL_ACCESS );
+ }
+};
+#elif __APPLE__
+//! Edsger Dijkstra's counting semaphore
+class semaphore : no_copy {
+public:
+ //! ctor
+ semaphore(int start_cnt_ = 0) : sem(start_cnt_) { init_semaphore(start_cnt_); }
+ //! dtor
+ ~semaphore() {
+ kern_return_t ret = semaphore_destroy( mach_task_self(), sem );
+ __TBB_ASSERT_EX( ret==err_none, NULL );
+ }
+ //! wait/acquire
+ void P() {
+ int ret;
+ do {
+ ret = semaphore_wait( sem );
+ } while( ret==KERN_ABORTED );
+ __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" );
+ }
+ //! post/release
+ void V() { semaphore_signal( sem ); }
+private:
+ semaphore_t sem;
+ void init_semaphore(int start_cnt_) {
+ kern_return_t ret = semaphore_create( mach_task_self(), &sem, SYNC_POLICY_FIFO, start_cnt_ );
+ __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" );
+ }
+};
+#else /* Linux/Unix */
+typedef uint32_t sem_count_t;
+//! Edsger Dijkstra's counting semaphore
+class semaphore : no_copy {
+public:
+ //! ctor
+ semaphore(int start_cnt_ = 0 ) { init_semaphore( start_cnt_ ); }
+
+ //! dtor
+ ~semaphore() {
+ int ret = sem_destroy( &sem );
+ __TBB_ASSERT_EX( !ret, NULL );
+ }
+ //! wait/acquire
+ void P() {
+ while( sem_wait( &sem )!=0 )
+ __TBB_ASSERT( errno==EINTR, NULL );
+ }
+ //! post/release
+ void V() { sem_post( &sem ); }
+private:
+ sem_t sem;
+ void init_semaphore(int start_cnt_) {
+ int ret = sem_init( &sem, /*shared among threads*/ 0, start_cnt_ );
+ __TBB_ASSERT_EX( !ret, NULL );
+ }
+};
+#endif /* _WIN32||_WIN64 */
+
+
+//! for performance reasons, we want specialized binary_semaphore
+#if _WIN32||_WIN64
+#if !__TBB_USE_SRWLOCK
+//! binary_semaphore for concurrent_monitor
+class binary_semaphore : no_copy {
+public:
+ //! ctor
+ binary_semaphore() { my_sem = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS ); }
+ //! dtor
+ ~binary_semaphore() { CloseHandle( my_sem ); }
+ //! wait/acquire
+ void P() { WaitForSingleObjectEx( my_sem, INFINITE, FALSE ); }
+ //! post/release
+ void V() { SetEvent( my_sem ); }
+private:
+ HANDLE my_sem;
+};
+#else /* __TBB_USE_SRWLOCK */
+
+union srwl_or_handle {
+ SRWLOCK lock;
+ HANDLE h;
+};
+
+//! binary_semaphore for concurrent_monitor
+class binary_semaphore : no_copy {
+public:
+ //! ctor
+ binary_semaphore();
+ //! dtor
+ ~binary_semaphore();
+ //! wait/acquire
+ void P();
+ //! post/release
+ void V();
+private:
+ srwl_or_handle my_sem;
+};
+#endif /* !__TBB_USE_SRWLOCK */
+#elif __APPLE__
+//! binary_semaphore for concurrent monitor
+class binary_semaphore : no_copy {
+public:
+ //! ctor
+ binary_semaphore() : my_sem(0) {
+ kern_return_t ret = semaphore_create( mach_task_self(), &my_sem, SYNC_POLICY_FIFO, 0 );
+ __TBB_ASSERT_EX( ret==err_none, "failed to create a semaphore" );
+ }
+ //! dtor
+ ~binary_semaphore() {
+ kern_return_t ret = semaphore_destroy( mach_task_self(), my_sem );
+ __TBB_ASSERT_EX( ret==err_none, NULL );
+ }
+ //! wait/acquire
+ void P() {
+ int ret;
+ do {
+ ret = semaphore_wait( my_sem );
+ } while( ret==KERN_ABORTED );
+ __TBB_ASSERT( ret==KERN_SUCCESS, "semaphore_wait() failed" );
+ }
+ //! post/release
+ void V() { semaphore_signal( my_sem ); }
+private:
+ semaphore_t my_sem;
+};
+#else /* Linux/Unix */
+
+#if __TBB_USE_FUTEX
+class binary_semaphore : no_copy {
+// The implementation is equivalent to the "Mutex, Take 3" one
+// in the paper "Futexes Are Tricky" by Ulrich Drepper
+public:
+ //! ctor
+ binary_semaphore() { my_sem = 1; }
+ //! dtor
+ ~binary_semaphore() {}
+ //! wait/acquire
+ void P() {
+ int s = 0;
+ if( !my_sem.compare_exchange_strong( s, 1 ) ) {
+ if( s!=2 )
+ s = my_sem.exchange( 2 );
+ while( s!=0 ) { // This loop deals with spurious wakeup
+ futex_wait( &my_sem, 2 );
+ s = my_sem.exchange( 2 );
+ }
+ }
+ }
+ //! post/release
+ void V() {
+ __TBB_ASSERT( my_sem.load(std::memory_order_relaxed)>=1, "multiple V()'s in a row?" );
+ if( my_sem.exchange( 0 )==2 )
+ futex_wakeup_one( &my_sem );
+ }
+private:
+ std::atomic<int> my_sem; // 0 - open; 1 - closed, no waits; 2 - closed, possible waits
+};
+#else
+typedef uint32_t sem_count_t;
+//! binary_semaphore for concurrent monitor
+class binary_semaphore : no_copy {
+public:
+ //! ctor
+ binary_semaphore() {
+ int ret = sem_init( &my_sem, /*shared among threads*/ 0, 0 );
+ __TBB_ASSERT_EX( !ret, NULL );
+ }
+ //! dtor
+ ~binary_semaphore() {
+ int ret = sem_destroy( &my_sem );
+ __TBB_ASSERT_EX( !ret, NULL );
+ }
+ //! wait/acquire
+ void P() {
+ while( sem_wait( &my_sem )!=0 )
+ __TBB_ASSERT( errno==EINTR, NULL );
+ }
+ //! post/release
+ void V() { sem_post( &my_sem ); }
+private:
+ sem_t my_sem;
+};
+#endif /* __TBB_USE_FUTEX */
+#endif /* _WIN32||_WIN64 */
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_semaphore_H */
diff --git a/contrib/libs/tbb/src/tbb/small_object_pool.cpp b/contrib/libs/tbb/src/tbb/small_object_pool.cpp
new file mode 100644
index 0000000000..28d11d011d
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/small_object_pool.cpp
@@ -0,0 +1,154 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/cache_aligned_allocator.h"
+#include "oneapi/tbb/detail/_small_object_pool.h"
+#include "oneapi/tbb/detail/_task.h"
+#include "governor.h"
+#include "thread_data.h"
+#include "task_dispatcher.h"
+
+#include <cstddef>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+small_object_pool_impl::small_object* const small_object_pool_impl::dead_public_list =
+ reinterpret_cast<small_object_pool_impl::small_object*>(1);
+
+void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes, const d1::execution_data& ed) {
+ auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
+ auto pool = tls.my_small_object_pool;
+ return pool->allocate_impl(allocator, number_of_bytes);
+}
+
+void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& allocator, std::size_t number_of_bytes) {
+ // TODO: optimize if the allocator contains a valid pool.
+ auto tls = governor::get_thread_data();
+ auto pool = tls->my_small_object_pool;
+ return pool->allocate_impl(allocator, number_of_bytes);
+}
+
+void* small_object_pool_impl::allocate_impl(d1::small_object_pool*& allocator, std::size_t number_of_bytes)
+{
+ small_object* obj{nullptr};
+
+ if (number_of_bytes <= small_object_size) {
+ if (m_private_list) {
+ obj = m_private_list;
+ m_private_list = m_private_list->next;
+ } else if (m_public_list.load(std::memory_order_relaxed)) {
+ // No fence required for read of my_public_list above, because std::atomic::exchange() has a fence.
+ obj = m_public_list.exchange(nullptr);
+ __TBB_ASSERT( obj, "another thread emptied the my_public_list" );
+ m_private_list = obj->next;
+ } else {
+ obj = new (cache_aligned_allocate(small_object_size)) small_object{nullptr};
+ ++m_private_counter;
+ }
+ } else {
+ obj = new (cache_aligned_allocate(number_of_bytes)) small_object{nullptr};
+ }
+ allocator = this;
+
+ // Return uninitialized memory for further construction on user side.
+ obj->~small_object();
+ return obj;
+}
+
+void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes) {
+ auto pool = static_cast<small_object_pool_impl*>(&allocator);
+ auto tls = governor::get_thread_data();
+ pool->deallocate_impl(ptr, number_of_bytes, *tls);
+}
+
+void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& allocator, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed) {
+ auto& tls = static_cast<const execution_data_ext&>(ed).task_disp->get_thread_data();
+ auto pool = static_cast<small_object_pool_impl*>(&allocator);
+ pool->deallocate_impl(ptr, number_of_bytes, tls);
+}
+
+void small_object_pool_impl::deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td) {
+ __TBB_ASSERT(ptr != nullptr, "pointer to deallocate should not be null");
+ __TBB_ASSERT(number_of_bytes >= sizeof(small_object), "number of bytes should be at least sizeof(small_object)");
+
+ if (number_of_bytes <= small_object_size) {
+ auto obj = new (ptr) small_object{nullptr};
+ if (td.my_small_object_pool == this) {
+ obj->next = m_private_list;
+ m_private_list = obj;
+ } else {
+ auto old_public_list = m_public_list.load(std::memory_order_relaxed);
+
+ for (;;) {
+ if (old_public_list == dead_public_list) {
+ obj->~small_object();
+ cache_aligned_deallocate(obj);
+ if (++m_public_counter == 0)
+ {
+ this->~small_object_pool_impl();
+ cache_aligned_deallocate(this);
+ }
+ break;
+ }
+ obj->next = old_public_list;
+ if (m_public_list.compare_exchange_strong(old_public_list, obj)) {
+ break;
+ }
+ }
+ }
+ } else {
+ cache_aligned_deallocate(ptr);
+ }
+}
+
+std::int64_t small_object_pool_impl::cleanup_list(small_object* list)
+{
+ std::int64_t removed_count{};
+
+ while (list) {
+ small_object* current = list;
+ list = list->next;
+ current->~small_object();
+ cache_aligned_deallocate(current);
+ ++removed_count;
+ }
+ return removed_count;
+}
+
+void small_object_pool_impl::destroy()
+{
+ // clean up private list and subtract the removed count from private counter
+ m_private_counter -= cleanup_list(m_private_list);
+ // Grab public list and place dead mark
+ small_object* public_list = m_public_list.exchange(dead_public_list);
+ // clean up public list and subtract from private (intentionally) counter
+ m_private_counter -= cleanup_list(public_list);
+ __TBB_ASSERT(m_private_counter >= 0, "Private counter may not be less than 0");
+ // Equivalent to fetch_sub(m_private_counter) - m_private_counter. But we need to do it
+ // atomically with operator-= not to access m_private_counter after the subtraction.
+ auto new_value = m_public_counter -= m_private_counter;
+ // check if this method is responsible to clean up the resources
+ if (new_value == 0) {
+ this->~small_object_pool_impl();
+ cache_aligned_deallocate(this);
+ }
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
diff --git a/contrib/libs/tbb/src/tbb/small_object_pool_impl.h b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h
new file mode 100644
index 0000000000..a6b664beab
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/small_object_pool_impl.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_small_object_pool_impl_H
+#define __TBB_small_object_pool_impl_H
+
+#include "oneapi/tbb/detail/_small_object_pool.h"
+#include "oneapi/tbb/detail/_utils.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <atomic>
+
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class thread_data;
+
+class small_object_pool_impl : public d1::small_object_pool
+{
+ static constexpr std::size_t small_object_size = 256;
+ struct small_object {
+ small_object* next;
+ };
+ static small_object* const dead_public_list;
+public:
+ void* allocate_impl(small_object_pool*& allocator, std::size_t number_of_bytes);
+ void deallocate_impl(void* ptr, std::size_t number_of_bytes, thread_data& td);
+ void destroy();
+private:
+ static std::int64_t cleanup_list(small_object* list);
+ ~small_object_pool_impl() = default;
+private:
+ alignas(max_nfs_size) small_object* m_private_list;
+ std::int64_t m_private_counter{};
+ alignas(max_nfs_size) std::atomic<small_object*> m_public_list;
+ std::atomic<std::int64_t> m_public_counter{};
+};
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* __TBB_small_object_pool_impl_H */
diff --git a/contrib/libs/tbb/src/tbb/task.cpp b/contrib/libs/tbb/src/tbb/task.cpp
new file mode 100644
index 0000000000..129614447a
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/task.cpp
@@ -0,0 +1,225 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Do not include task.h directly. Use scheduler_common.h instead
+#include "scheduler_common.h"
+#include "governor.h"
+#include "arena.h"
+#include "thread_data.h"
+#include "task_dispatcher.h"
+#include "waiters.h"
+#include "itt_notify.h"
+
+#include "oneapi/tbb/detail/_task.h"
+#include "oneapi/tbb/partitioner.h"
+#include "oneapi/tbb/task.h"
+
+#include <cstring>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//------------------------------------------------------------------------
+// resumable tasks
+//------------------------------------------------------------------------
+#if __TBB_RESUMABLE_TASKS
+
+void suspend(suspend_callback_type suspend_callback, void* user_callback) {
+ thread_data& td = *governor::get_thread_data();
+ td.my_task_dispatcher->suspend(suspend_callback, user_callback);
+ // Do not access td after suspend.
+}
+
+void resume(suspend_point_type* sp) {
+ assert_pointers_valid(sp, sp->m_arena);
+ task_dispatcher& task_disp = sp->m_resume_task.m_target;
+ __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr);
+
+ // TODO: remove this work-around
+ // Prolong the arena's lifetime while all coroutines are alive
+ // (otherwise the arena can be destroyed while some tasks are suspended).
+ arena& a = *sp->m_arena;
+ a.my_references += arena::ref_external;
+
+ if (task_disp.m_properties.critical_task_allowed) {
+ // The target is not in the process of executing critical task, so the resume task is not critical.
+ a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random));
+ } else {
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ // The target is in the process of executing critical task, so the resume task is critical.
+ a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random));
+#endif
+ }
+
+ // Do not access target after that point.
+ a.advertise_new_work<arena::wakeup>();
+
+ // Release our reference to my_arena.
+ a.on_thread_leaving<arena::ref_external>();
+}
+
+suspend_point_type* current_suspend_point() {
+ thread_data& td = *governor::get_thread_data();
+ return td.my_task_dispatcher->get_suspend_point();
+}
+
+static task_dispatcher& create_coroutine(thread_data& td) {
+ // We may have some task dispatchers cached
+ task_dispatcher* task_disp = td.my_arena->my_co_cache.pop();
+ if (!task_disp) {
+ void* ptr = cache_aligned_allocate(sizeof(task_dispatcher));
+ task_disp = new(ptr) task_dispatcher(td.my_arena);
+ task_disp->init_suspend_point(td.my_arena, td.my_arena->my_market->worker_stack_size());
+ }
+ // Prolong the arena's lifetime until all coroutines is alive
+ // (otherwise the arena can be destroyed while some tasks are suspended).
+ // TODO: consider behavior if there are more than 4K external references.
+ td.my_arena->my_references += arena::ref_external;
+ return *task_disp;
+}
+
+void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) {
+ __TBB_ASSERT(suspend_callback != nullptr, nullptr);
+ __TBB_ASSERT(user_callback != nullptr, nullptr);
+ __TBB_ASSERT(m_thread_data != nullptr, nullptr);
+
+ arena_slot* slot = m_thread_data->my_arena_slot;
+ __TBB_ASSERT(slot != nullptr, nullptr);
+
+ task_dispatcher& default_task_disp = slot->default_task_dispatcher();
+ // TODO: simplify the next line, e.g. is_task_dispatcher_recalled( task_dispatcher& )
+ bool is_recalled = default_task_disp.get_suspend_point()->m_is_owner_recalled.load(std::memory_order_acquire);
+ task_dispatcher& target = is_recalled ? default_task_disp : create_coroutine(*m_thread_data);
+
+ thread_data::suspend_callback_wrapper callback = { suspend_callback, user_callback, get_suspend_point() };
+ m_thread_data->set_post_resume_action(thread_data::post_resume_action::callback, &callback);
+ resume(target);
+
+ if (m_properties.outermost) {
+ recall_point();
+ }
+}
+
+void task_dispatcher::resume(task_dispatcher& target) {
+ // Do not create non-trivial objects on the stack of this function. They might never be destroyed
+ {
+ thread_data* td = m_thread_data;
+ __TBB_ASSERT(&target != this, "We cannot resume to ourself");
+ __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data");
+ __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher");
+ __TBB_ASSERT(td->my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set");
+ __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument");
+
+ // Change the task dispatcher
+ td->detach_task_dispatcher();
+ td->attach_task_dispatcher(target);
+ }
+ __TBB_ASSERT(m_suspend_point != nullptr, "Suspend point must be created");
+ __TBB_ASSERT(target.m_suspend_point != nullptr, "Suspend point must be created");
+ // Swap to the target coroutine.
+ m_suspend_point->m_co_context.resume(target.m_suspend_point->m_co_context);
+ // Pay attention that m_thread_data can be changed after resume
+ {
+ thread_data* td = m_thread_data;
+ __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data");
+ __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher");
+ td->do_post_resume_action();
+
+ // Remove the recall flag if the thread in its original task dispatcher
+ arena_slot* slot = td->my_arena_slot;
+ __TBB_ASSERT(slot != nullptr, nullptr);
+ if (this == slot->my_default_task_dispatcher) {
+ __TBB_ASSERT(m_suspend_point != nullptr, nullptr);
+ m_suspend_point->m_is_owner_recalled.store(false, std::memory_order_relaxed);
+ }
+ }
+}
+
+void thread_data::do_post_resume_action() {
+ __TBB_ASSERT(my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set");
+ __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument");
+
+ switch (my_post_resume_action) {
+ case post_resume_action::register_waiter:
+ {
+ static_cast<extended_concurrent_monitor::resume_context*>(my_post_resume_arg)->notify();
+ break;
+ }
+ case post_resume_action::resume:
+ {
+ r1::resume(static_cast<suspend_point_type*>(my_post_resume_arg));
+ break;
+ }
+ case post_resume_action::callback:
+ {
+ suspend_callback_wrapper callback = *static_cast<suspend_callback_wrapper*>(my_post_resume_arg);
+ callback();
+ break;
+ }
+ case post_resume_action::cleanup:
+ {
+ task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(my_post_resume_arg);
+ // Release coroutine's reference to my_arena.
+ my_arena->on_thread_leaving<arena::ref_external>();
+ // Cache the coroutine for possible later re-usage
+ my_arena->my_co_cache.push(to_cleanup);
+ break;
+ }
+ case post_resume_action::notify:
+ {
+ std::atomic<bool>& owner_recall_flag = *static_cast<std::atomic<bool>*>(my_post_resume_arg);
+ owner_recall_flag.store(true, std::memory_order_release);
+ // Do not access recall_flag because it can be destroyed after the notification.
+ break;
+ }
+ default:
+ __TBB_ASSERT(false, "Unknown post resume action");
+ }
+
+ my_post_resume_action = post_resume_action::none;
+ my_post_resume_arg = nullptr;
+}
+
+#else
+
+void suspend(suspend_callback_type, void*) {
+ __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform");
+}
+
+void resume(suspend_point_type*) {
+ __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform");
+}
+
+suspend_point_type* current_suspend_point() {
+ __TBB_ASSERT_RELEASE(false, "Resumable tasks are unsupported on this platform");
+ return nullptr;
+}
+
+#endif /* __TBB_RESUMABLE_TASKS */
+
+void notify_waiters(std::uintptr_t wait_ctx_addr) {
+ auto is_related_wait_ctx = [&] (extended_context context) {
+ return wait_ctx_addr == context.my_uniq_addr;
+ };
+
+ r1::governor::get_thread_data()->my_arena->my_market->get_wait_list().notify(is_related_wait_ctx);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp
new file mode 100644
index 0000000000..86818af1d1
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp
@@ -0,0 +1,240 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "task_dispatcher.h"
+#include "waiters.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+static inline void spawn_and_notify(d1::task& t, arena_slot* slot, arena* a) {
+ slot->spawn(t);
+ a->advertise_new_work<arena::work_spawned>();
+ // TODO: TBB_REVAMP_TODO slot->assert_task_pool_valid();
+}
+
+void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx) {
+ thread_data* tls = governor::get_thread_data();
+ task_group_context_impl::bind_to(ctx, tls);
+ arena* a = tls->my_arena;
+ arena_slot* slot = tls->my_arena_slot;
+ // Capture current context
+ task_accessor::context(t) = &ctx;
+ // Mark isolation
+ task_accessor::isolation(t) = tls->my_task_dispatcher->m_execute_data_ext.isolation;
+ spawn_and_notify(t, slot, a);
+}
+
+void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id) {
+ thread_data* tls = governor::get_thread_data();
+ task_group_context_impl::bind_to(ctx, tls);
+ arena* a = tls->my_arena;
+ arena_slot* slot = tls->my_arena_slot;
+ execution_data_ext& ed = tls->my_task_dispatcher->m_execute_data_ext;
+
+ // Capture context
+ task_accessor::context(t) = &ctx;
+ // Mark isolation
+ task_accessor::isolation(t) = ed.isolation;
+
+ if ( id != d1::no_slot && id != tls->my_arena_index ) {
+ // Allocate proxy task
+ d1::small_object_allocator alloc{};
+ auto proxy = alloc.new_object<task_proxy>(static_cast<d1::execution_data&>(ed));
+ // Mark as a proxy
+ task_accessor::set_proxy_trait(*proxy);
+ // Mark isolation for the proxy task
+ task_accessor::isolation(*proxy) = ed.isolation;
+ // Deallocation hint (tls) from the task allocator
+ proxy->allocator = alloc;
+ proxy->slot = id;
+ proxy->outbox = &a->mailbox(id);
+ // Mark proxy as present in both locations (sender's task pool and destination mailbox)
+ proxy->task_and_tag = intptr_t(&t) | task_proxy::location_mask;
+ // Mail the proxy - after this point t may be destroyed by another thread at any moment.
+ proxy->outbox->push(proxy);
+ // Spawn proxy to the local task pool
+ spawn_and_notify(*proxy, slot, a);
+ } else {
+ spawn_and_notify(t, slot, a);
+ }
+}
+
+void __TBB_EXPORTED_FUNC submit(d1::task& t, d1::task_group_context& ctx, arena* a, std::uintptr_t as_critical) {
+ suppress_unused_warning(as_critical);
+ assert_pointer_valid(a);
+ thread_data& tls = *governor::get_thread_data();
+
+ // TODO revamp: for each use case investigate neccesity to make this call
+ task_group_context_impl::bind_to(ctx, &tls);
+ task_accessor::context(t) = &ctx;
+ // TODO revamp: consider respecting task isolation if this call is being made by external thread
+ task_accessor::isolation(t) = tls.my_task_dispatcher->m_execute_data_ext.isolation;
+
+ // TODO: consider code refactoring when lane selection mechanism is unified.
+
+ if ( tls.is_attached_to(a) ) {
+ arena_slot* slot = tls.my_arena_slot;
+#if __TBB_PREVIEW_CRITICAL_TASKS
+ if( as_critical ) {
+ a->my_critical_task_stream.push( &t, subsequent_lane_selector(slot->critical_hint()) );
+ } else
+#endif
+ {
+ slot->spawn(t);
+ }
+ } else {
+ random_lane_selector lane_selector{tls.my_random};
+#if !__TBB_PREVIEW_CRITICAL_TASKS
+ suppress_unused_warning(as_critical);
+#else
+ if ( as_critical ) {
+ a->my_critical_task_stream.push( &t, lane_selector );
+ } else
+#endif
+ {
+ // Avoid joining the arena the thread is not currently in.
+ a->my_fifo_task_stream.push( &t, lane_selector );
+ }
+ }
+ // It is assumed that some thread will explicitly wait in the arena the task is submitted
+ // into. Therefore, no need to utilize mandatory concurrency here.
+ a->advertise_new_work<arena::work_spawned>();
+}
+
+void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) {
+ task_accessor::context(t) = &t_ctx;
+ task_dispatcher::execute_and_wait(&t, wait_ctx, w_ctx);
+}
+
+void __TBB_EXPORTED_FUNC wait(d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) {
+ // Enter the task dispatch loop without a task
+ task_dispatcher::execute_and_wait(nullptr, wait_ctx, w_ctx);
+}
+
+d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data* ed) {
+ if (ed) {
+ const execution_data_ext* ed_ext = static_cast<const execution_data_ext*>(ed);
+ assert_pointers_valid(ed_ext->task_disp, ed_ext->task_disp->m_thread_data);
+ return ed_ext->task_disp->m_thread_data->my_arena_index;
+ } else {
+ thread_data* td = governor::get_thread_data_if_initialized();
+ return td ? int(td->my_arena_index) : -1;
+ }
+}
+
+d1::task_group_context* __TBB_EXPORTED_FUNC current_context() {
+ thread_data* td = governor::get_thread_data();
+ assert_pointers_valid(td, td->my_task_dispatcher);
+
+ task_dispatcher* task_disp = td->my_task_dispatcher;
+ if (task_disp->m_properties.outermost) {
+ // No one task is executed, so no execute_data.
+ return nullptr;
+ } else {
+ return td->my_task_dispatcher->m_execute_data_ext.context;
+ }
+}
+
+void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, d1::task_group_context& w_ctx) {
+ // Get an associated task dispatcher
+ thread_data* tls = governor::get_thread_data();
+ __TBB_ASSERT(tls->my_task_dispatcher != nullptr, nullptr);
+ task_dispatcher& local_td = *tls->my_task_dispatcher;
+
+ // TODO: factor out the binding to execute_and_wait_impl
+ if (t) {
+ task_group_context_impl::bind_to(*task_accessor::context(*t), tls);
+ // Propagate the isolation to the task executed without spawn.
+ task_accessor::isolation(*t) = tls->my_task_dispatcher->m_execute_data_ext.isolation;
+ }
+
+ // Waiting on special object tied to a waiting thread.
+ external_waiter waiter{ *tls->my_arena, wait_ctx };
+ t = local_td.local_wait_for_all(t, waiter);
+ __TBB_ASSERT_EX(t == nullptr, "External waiter must not leave dispatch loop with a task");
+
+ // The external thread couldn't exit the dispatch loop in an idle state
+ if (local_td.m_thread_data->my_inbox.is_idle_state(true)) {
+ local_td.m_thread_data->my_inbox.set_is_idle(false);
+ }
+
+ if (w_ctx.my_exception) {
+ __TBB_ASSERT(w_ctx.is_group_execution_cancelled(), "The task group context with an exception should be canceled.");
+ w_ctx.my_exception->throw_self();
+ }
+}
+
+#if __TBB_RESUMABLE_TASKS
+
+#if _WIN32
+/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept
+#else
+/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept
+#endif
+{
+ // Do not create non-trivial objects on the stack of this function. They will never be destroyed.
+ __TBB_ASSERT(arg != nullptr, nullptr);
+ task_dispatcher& task_disp = *static_cast<task_dispatcher*>(arg);
+
+ assert_pointers_valid(task_disp.m_thread_data, task_disp.m_thread_data->my_arena);
+ task_disp.set_stealing_threshold(task_disp.m_thread_data->my_arena->calculate_stealing_threshold());
+ __TBB_ASSERT(task_disp.can_steal(), nullptr);
+ task_disp.co_local_wait_for_all();
+ // This code is unreachable
+}
+
+/* [[noreturn]] */ void task_dispatcher::co_local_wait_for_all() noexcept {
+ // Do not create non-trivial objects on the stack of this function. They will never be destroyed.
+ assert_pointer_valid(m_thread_data);
+
+ // Basically calls the user callback passed to the tbb::task::suspend function
+ m_thread_data->do_post_resume_action();
+
+ // Endless loop here because coroutine could be reused
+ for (;;) {
+ arena* a = m_thread_data->my_arena;
+ coroutine_waiter waiter(*a);
+ d1::task* resume_task = local_wait_for_all(nullptr, waiter);
+ assert_task_valid(resume_task);
+ __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr);
+
+ m_thread_data->set_post_resume_action(thread_data::post_resume_action::cleanup, this);
+ resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target);
+ }
+ // This code is unreachable
+}
+
+d1::suspend_point task_dispatcher::get_suspend_point() {
+ if (m_suspend_point == nullptr) {
+ assert_pointer_valid(m_thread_data);
+ // 0 means that we attach this task dispatcher to the current stack
+ init_suspend_point(m_thread_data->my_arena, 0);
+ }
+ assert_pointer_valid(m_suspend_point);
+ return m_suspend_point;
+}
+void task_dispatcher::init_suspend_point(arena* a, std::size_t stack_size) {
+ __TBB_ASSERT(m_suspend_point == nullptr, nullptr);
+ m_suspend_point = new(cache_aligned_allocate(sizeof(suspend_point_type)))
+ suspend_point_type(a, stack_size, *this);
+}
+#endif /* __TBB_RESUMABLE_TASKS */
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.h b/contrib/libs/tbb/src/tbb/task_dispatcher.h
new file mode 100644
index 0000000000..54a6c0d934
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/task_dispatcher.h
@@ -0,0 +1,465 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_task_dispatcher_H
+#define _TBB_task_dispatcher_H
+
+#include "oneapi/tbb/detail/_utils.h"
+#include "oneapi/tbb/detail/_task.h"
+#include "oneapi/tbb/global_control.h"
+
+#include "scheduler_common.h"
+#include "waiters.h"
+#include "arena_slot.h"
+#include "arena.h"
+#include "thread_data.h"
+#include "mailbox.h"
+#include "itt_notify.h"
+#include "concurrent_monitor.h"
+
+#include <atomic>
+
+#if !__TBB_CPU_CTL_ENV_PRESENT
+#include <fenv.h> //
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+inline d1::task* get_self_recall_task(arena_slot& slot) {
+ suppress_unused_warning(slot);
+ d1::task* t = nullptr;
+#if __TBB_RESUMABLE_TASKS
+ suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point;
+ if (sp && sp->m_is_owner_recalled.load(std::memory_order_acquire)) {
+ t = &sp->m_resume_task;
+ __TBB_ASSERT(sp->m_resume_task.m_target.m_thread_data == nullptr, nullptr);
+ }
+#endif /* __TBB_RESUMABLE_TASKS */
+ return t;
+}
+
+// Defined in exception.cpp
+/*[[noreturn]]*/void do_throw_noexcept(void (*throw_exception)()) noexcept;
+
+//------------------------------------------------------------------------
+// Suspend point
+//------------------------------------------------------------------------
+#if __TBB_RESUMABLE_TASKS
+
+inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed) {
+ execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed);
+
+ if (ed_ext.wait_ctx) {
+ extended_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target};
+ // The wait_ctx is present only in external_waiter. In that case we leave the current stack
+ // in the abandoned state to resume when waiting completes.
+ thread_data* td = ed_ext.task_disp->m_thread_data;
+ td->set_post_resume_action(thread_data::post_resume_action::register_waiter, &monitor_node);
+
+ extended_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list();
+
+ if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) {
+ return nullptr;
+ }
+
+ td->clear_post_resume_action();
+ td->set_post_resume_action(thread_data::post_resume_action::resume, ed_ext.task_disp->get_suspend_point());
+ } else {
+ // If wait_ctx is null, it can be only a worker thread on outermost level because
+ // coroutine_waiter interrupts bypass loop before the resume_task execution.
+ ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::notify,
+ &ed_ext.task_disp->get_suspend_point()->m_is_owner_recalled);
+ }
+ // Do not access this task because it might be destroyed
+ ed_ext.task_disp->resume(m_target);
+ return nullptr;
+}
+
+inline suspend_point_type::suspend_point_type(arena* a, size_t stack_size, task_dispatcher& task_disp)
+ : m_arena(a)
+ , m_random(this)
+ , m_co_context(stack_size, &task_disp)
+ , m_resume_task(task_disp)
+{
+ assert_pointer_valid(m_arena);
+ assert_pointer_valid(m_arena->my_default_ctx);
+ task_accessor::context(m_resume_task) = m_arena->my_default_ctx;
+ task_accessor::isolation(m_resume_task) = no_isolation;
+ // Initialize the itt_caller for the context of the resume task.
+ // It will be bound to the stack of the first suspend call.
+ task_group_context_impl::bind_to(*task_accessor::context(m_resume_task), task_disp.m_thread_data);
+}
+
+#endif /* __TBB_RESUMABLE_TASKS */
+
+//------------------------------------------------------------------------
+// Task Dispatcher
+//------------------------------------------------------------------------
+inline task_dispatcher::task_dispatcher(arena* a) {
+ m_execute_data_ext.context = a->my_default_ctx;
+ m_execute_data_ext.task_disp = this;
+}
+
+inline bool task_dispatcher::can_steal() {
+ __TBB_ASSERT(m_stealing_threshold != 0, nullptr);
+ stack_anchor_type anchor{};
+ return reinterpret_cast<std::uintptr_t>(&anchor) > m_stealing_threshold;
+}
+
+inline d1::task* task_dispatcher::get_inbox_or_critical_task(
+ execution_data_ext& ed, mail_inbox& inbox, isolation_type isolation, bool critical_allowed)
+{
+ if (inbox.empty())
+ return nullptr;
+ d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed);
+ if (result)
+ return result;
+ // Check if there are tasks mailed to this thread via task-to-thread affinity mechanism.
+ result = get_mailbox_task(inbox, ed, isolation);
+ // There is a race with a thread adding a new task (possibly with suitable isolation)
+ // to our mailbox, so the below conditions might result in a false positive.
+ // Then set_is_idle(false) allows that task to be stolen; it's OK.
+ if (isolation != no_isolation && !result && !inbox.empty() && inbox.is_idle_state(true)) {
+ // We have proxy tasks in our mailbox but the isolation blocks their execution.
+ // So publish the proxy tasks in mailbox to be available for stealing from owner's task pool.
+ inbox.set_is_idle( false );
+ }
+ return result;
+}
+
+inline d1::task* task_dispatcher::get_stream_or_critical_task(
+ execution_data_ext& ed, arena& a, task_stream<front_accessor>& stream, unsigned& hint,
+ isolation_type isolation, bool critical_allowed)
+{
+ if (stream.empty())
+ return nullptr;
+ d1::task* result = get_critical_task(nullptr, ed, isolation, critical_allowed);
+ if (result)
+ return result;
+ return a.get_stream_task(stream, hint);
+}
+
+inline d1::task* task_dispatcher::steal_or_get_critical(
+ execution_data_ext& ed, arena& a, unsigned arena_index, FastRandom& random,
+ isolation_type isolation, bool critical_allowed)
+{
+ if (d1::task* t = a.steal_task(arena_index, random, ed, isolation)) {
+ ed.context = task_accessor::context(*t);
+ ed.isolation = task_accessor::isolation(*t);
+ return get_critical_task(t, ed, isolation, critical_allowed);
+ }
+ return nullptr;
+}
+
+template <bool ITTPossible, typename Waiter>
+d1::task* task_dispatcher::receive_or_steal_task(
+ thread_data& tls, execution_data_ext& ed, Waiter& waiter, isolation_type isolation,
+ bool fifo_allowed, bool critical_allowed)
+{
+ __TBB_ASSERT(governor::is_thread_data_set(&tls), NULL);
+ // Task to return
+ d1::task* t = nullptr;
+ // Get tls data (again)
+ arena& a = *tls.my_arena;
+ arena_slot& slot = *tls.my_arena_slot;
+ unsigned arena_index = tls.my_arena_index;
+ mail_inbox& inbox = tls.my_inbox;
+ task_stream<front_accessor>& resume_stream = a.my_resume_task_stream;
+ unsigned& resume_hint = slot.hint_for_resume_stream;
+ task_stream<front_accessor>& fifo_stream = a.my_fifo_task_stream;
+ unsigned& fifo_hint = slot.hint_for_fifo_stream;
+
+ waiter.reset_wait();
+ // Thread is in idle state now
+ inbox.set_is_idle(true);
+
+ bool stealing_is_allowed = can_steal();
+
+ // Stealing loop mailbox/enqueue/other_slots
+ for (;;) {
+ __TBB_ASSERT(t == nullptr, nullptr);
+ // Check if the resource manager requires our arena to relinquish some threads
+ // For the external thread restore idle state to true after dispatch loop
+ if (!waiter.continue_execution(slot, t)) {
+ __TBB_ASSERT(t == nullptr, nullptr);
+ break;
+ }
+ // Start searching
+ if (t != nullptr) {
+ // continue_execution returned a task
+ }
+ else if ((t = get_inbox_or_critical_task(ed, inbox, isolation, critical_allowed))) {
+ // Successfully got the task from mailbox or critical task
+ }
+ else if ((t = get_stream_or_critical_task(ed, a, resume_stream, resume_hint, isolation, critical_allowed))) {
+ // Successfully got the resume or critical task
+ }
+ else if (fifo_allowed && isolation == no_isolation
+ && (t = get_stream_or_critical_task(ed, a, fifo_stream, fifo_hint, isolation, critical_allowed))) {
+ // Checked if there are tasks in starvation-resistant stream. Only allowed at the outermost dispatch level without isolation.
+ }
+ else if (stealing_is_allowed
+ && (t = steal_or_get_critical(ed, a, arena_index, tls.my_random, isolation, critical_allowed))) {
+ // Stole a task from a random arena slot
+ }
+ else {
+ t = get_critical_task(t, ed, isolation, critical_allowed);
+ }
+
+ if (t != nullptr) {
+ ed.context = task_accessor::context(*t);
+ ed.isolation = task_accessor::isolation(*t);
+ a.my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker);
+ break; // Stealing success, end of stealing attempt
+ }
+ // Nothing to do, pause a little.
+ waiter.pause(slot);
+ } // end of nonlocal task retrieval loop
+ if (inbox.is_idle_state(true)) {
+ inbox.set_is_idle(false);
+ }
+ return t;
+}
+
+template <bool ITTPossible, typename Waiter>
+d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) {
+ assert_pointer_valid(m_thread_data);
+ __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr);
+
+ // Guard an outer/default execution state
+ struct dispatch_loop_guard {
+ task_dispatcher& task_disp;
+ execution_data_ext old_execute_data_ext;
+ properties old_properties;
+
+ ~dispatch_loop_guard() {
+ task_disp.m_execute_data_ext = old_execute_data_ext;
+ task_disp.m_properties = old_properties;
+
+ __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr);
+ __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr);
+ }
+ } dl_guard{ *this, m_execute_data_ext, m_properties };
+
+ // The context guard to track fp setting and itt tasks.
+ context_guard_helper</*report_tasks=*/ITTPossible> context_guard;
+
+ // Current isolation context
+ const isolation_type isolation = dl_guard.old_execute_data_ext.isolation;
+
+ // Critical work inflection point. Once turned false current execution context has taken
+ // critical task on the previous stack frame and cannot take more until that critical path is
+ // finished.
+ bool critical_allowed = dl_guard.old_properties.critical_task_allowed;
+
+ // Extended execution data that is used for dispatching.
+ // Base version is passed to the task::execute method.
+ execution_data_ext& ed = m_execute_data_ext;
+ ed.context = t ? task_accessor::context(*t) : nullptr;
+ ed.original_slot = m_thread_data->my_arena_index;
+ ed.affinity_slot = d1::no_slot;
+ ed.task_disp = this;
+ ed.wait_ctx = waiter.wait_ctx();
+
+ m_properties.outermost = false;
+ m_properties.fifo_tasks_allowed = false;
+
+ t = get_critical_task(t, ed, isolation, critical_allowed);
+
+ // Infinite exception loop
+ for (;;) {
+ try {
+ // Main execution loop
+ do {
+ // We assume that bypass tasks are from the same task group.
+ context_guard.set_ctx(ed.context);
+ // Inner level evaluates tasks coming from nesting loops and those returned
+ // by just executed tasks (bypassing spawn or enqueue calls).
+ while (t != nullptr) {
+ assert_task_valid(t);
+ assert_pointer_valid</*alignment = */alignof(void*)>(ed.context);
+ __TBB_ASSERT(ed.context->my_lifetime_state > d1::task_group_context::lifetime_state::locked &&
+ ed.context->my_lifetime_state < d1::task_group_context::lifetime_state::dying, nullptr);
+ __TBB_ASSERT(m_thread_data->my_inbox.is_idle_state(false), nullptr);
+ __TBB_ASSERT(task_accessor::is_resume_task(*t) || isolation == no_isolation || isolation == ed.isolation, nullptr);
+ // Check premature leave
+ if (Waiter::postpone_execution(*t)) {
+ __TBB_ASSERT(task_accessor::is_resume_task(*t) && dl_guard.old_properties.outermost,
+ "Currently, the bypass loop can be interrupted only for resume task on outermost level");
+ return t;
+ }
+ // Copy itt_caller to a stack because the context might be destroyed after t->execute.
+ void* itt_caller = ed.context->my_itt_caller;
+ suppress_unused_warning(itt_caller);
+
+ ITT_CALLEE_ENTER(ITTPossible, t, itt_caller);
+
+ if (ed.context->is_group_execution_cancelled()) {
+ t = t->cancel(ed);
+ } else {
+ t = t->execute(ed);
+ }
+
+ ITT_CALLEE_LEAVE(ITTPossible, itt_caller);
+
+ // The task affinity in execution data is set for affinitized tasks.
+ // So drop it after the task execution.
+ ed.affinity_slot = d1::no_slot;
+ // Reset task owner id for bypassed task
+ ed.original_slot = m_thread_data->my_arena_index;
+ t = get_critical_task(t, ed, isolation, critical_allowed);
+ }
+ __TBB_ASSERT(m_thread_data && governor::is_thread_data_set(m_thread_data), nullptr);
+ __TBB_ASSERT(m_thread_data->my_task_dispatcher == this, nullptr);
+ // When refactoring, pay attention that m_thread_data can be changed after t->execute()
+ __TBB_ASSERT(m_thread_data->my_arena_slot != nullptr, nullptr);
+ arena_slot& slot = *m_thread_data->my_arena_slot;
+ if (!waiter.continue_execution(slot, t)) {
+ break;
+ }
+ // Retrieve the task from local task pool
+ if (t || (slot.is_task_pool_published() && (t = slot.get_task(ed, isolation)))) {
+ __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, NULL);
+ ed.context = task_accessor::context(*t);
+ ed.isolation = task_accessor::isolation(*t);
+ continue;
+ }
+ // Retrieve the task from global sources
+ t = receive_or_steal_task<ITTPossible>(
+ *m_thread_data, ed, waiter, isolation, dl_guard.old_properties.fifo_tasks_allowed,
+ critical_allowed
+ );
+ } while (t != nullptr); // main dispatch loop
+ break; // Exit exception loop;
+ } catch (...) {
+ if (global_control::active_value(global_control::terminate_on_exception) == 1) {
+ do_throw_noexcept([] { throw; });
+ }
+ if (ed.context->cancel_group_execution()) {
+ /* We are the first to signal cancellation, so store the exception that caused it. */
+ ed.context->my_exception = tbb_exception_ptr::allocate();
+ }
+ }
+ } // Infinite exception loop
+ __TBB_ASSERT(t == nullptr, nullptr);
+
+
+#if __TBB_RESUMABLE_TASKS
+ if (dl_guard.old_properties.outermost) {
+ recall_point();
+ }
+#endif /* __TBB_RESUMABLE_TASKS */
+
+ return nullptr;
+}
+
+#if __TBB_RESUMABLE_TASKS
+inline void task_dispatcher::recall_point() {
+ if (this != &m_thread_data->my_arena_slot->default_task_dispatcher()) {
+ __TBB_ASSERT(m_suspend_point != nullptr, nullptr);
+ __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr);
+ d1::suspend([](suspend_point_type* sp) {
+ sp->m_is_owner_recalled.store(true, std::memory_order_release);
+ auto is_related_suspend_point = [sp] (extended_context context) {
+ std::uintptr_t sp_addr = std::uintptr_t(sp);
+ return sp_addr == context.my_uniq_addr;
+ };
+ sp->m_arena->my_market->get_wait_list().notify(is_related_suspend_point);
+ });
+
+ if (m_thread_data->my_inbox.is_idle_state(true)) {
+ m_thread_data->my_inbox.set_is_idle(false);
+ }
+ }
+}
+#endif /* __TBB_RESUMABLE_TASKS */
+
+#if __TBB_PREVIEW_CRITICAL_TASKS
+inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext& ed, isolation_type isolation, bool critical_allowed) {
+ __TBB_ASSERT( critical_allowed || !m_properties.critical_task_allowed, nullptr );
+
+ if (!critical_allowed) {
+ // The stack is already in the process of critical path execution. Cannot take another
+ // critical work until finish with the current one.
+ __TBB_ASSERT(!m_properties.critical_task_allowed, nullptr);
+ return t;
+ }
+
+ assert_pointers_valid(m_thread_data, m_thread_data->my_arena, m_thread_data->my_arena_slot);
+ thread_data& td = *m_thread_data;
+ arena& a = *td.my_arena;
+ arena_slot& slot = *td.my_arena_slot;
+
+ d1::task* crit_t = a.get_critical_task(slot.hint_for_critical_stream, isolation);
+ if (crit_t != nullptr) {
+ assert_task_valid(crit_t);
+ if (t != nullptr) {
+ assert_pointer_valid</*alignment = */alignof(void*)>(ed.context);
+ r1::spawn(*t, *ed.context);
+ }
+ ed.context = task_accessor::context(*crit_t);
+ ed.isolation = task_accessor::isolation(*crit_t);
+
+ // We cannot execute more than one critical task on the same stack.
+ // In other words, we prevent nested critical tasks.
+ m_properties.critical_task_allowed = false;
+
+ // TODO: add a test that the observer is called when critical task is taken.
+ a.my_observers.notify_entry_observers(td.my_last_observer, td.my_is_worker);
+ t = crit_t;
+ } else {
+ // Was unable to find critical work in the queue. Allow inspecting the queue in nested
+ // invocations. Handles the case when critical task has been just completed.
+ m_properties.critical_task_allowed = true;
+ }
+ return t;
+}
+#else
+inline d1::task* task_dispatcher::get_critical_task(d1::task* t, execution_data_ext&, isolation_type, bool /*critical_allowed*/) {
+ return t;
+}
+#endif
+
+inline d1::task* task_dispatcher::get_mailbox_task(mail_inbox& my_inbox, execution_data_ext& ed, isolation_type isolation) {
+ while (task_proxy* const tp = my_inbox.pop(isolation)) {
+ if (d1::task* result = tp->extract_task<task_proxy::mailbox_bit>()) {
+ ed.original_slot = (unsigned short)(-2);
+ ed.affinity_slot = ed.task_disp->m_thread_data->my_arena_index;
+ return result;
+ }
+ // We have exclusive access to the proxy, and can destroy it.
+ tp->allocator.delete_object(tp, ed);
+ }
+ return NULL;
+}
+
+template <typename Waiter>
+d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter) {
+ if (governor::is_itt_present()) {
+ return local_wait_for_all</*ITTPossible = */ true>(t, waiter);
+ } else {
+ return local_wait_for_all</*ITTPossible = */ false>(t, waiter);
+ }
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // _TBB_task_dispatcher_H
+
diff --git a/contrib/libs/tbb/src/tbb/task_group_context.cpp b/contrib/libs/tbb/src/tbb/task_group_context.cpp
new file mode 100644
index 0000000000..3c296648ec
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/task_group_context.cpp
@@ -0,0 +1,493 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/detail/_config.h"
+#include "oneapi/tbb/tbb_allocator.h"
+#include "oneapi/tbb/task_group.h"
+#include "governor.h"
+#include "thread_data.h"
+#include "scheduler_common.h"
+#include "itt_notify.h"
+#include "task_dispatcher.h"
+
+#include <type_traits>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//------------------------------------------------------------------------
+// tbb_exception_ptr
+//------------------------------------------------------------------------
+tbb_exception_ptr* tbb_exception_ptr::allocate() noexcept {
+ tbb_exception_ptr* eptr = (tbb_exception_ptr*)allocate_memory(sizeof(tbb_exception_ptr));
+ return eptr ? new (eptr) tbb_exception_ptr(std::current_exception()) : nullptr;
+}
+
+void tbb_exception_ptr::destroy() noexcept {
+ this->~tbb_exception_ptr();
+ deallocate_memory(this);
+}
+
+void tbb_exception_ptr::throw_self() {
+ if (governor::rethrow_exception_broken()) fix_broken_rethrow();
+ std::rethrow_exception(my_ptr);
+}
+
+//------------------------------------------------------------------------
+// task_group_context
+//------------------------------------------------------------------------
+
+void task_group_context_impl::destroy(d1::task_group_context& ctx) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+
+ auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed);
+ __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr);
+
+ if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) {
+ // The owner can be destroyed at any moment. Access the associate data with caution.
+ thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed);
+ if (governor::is_thread_data_set(owner)) {
+ thread_data::context_list_state& cls = owner->my_context_list_state;
+ // We are the owner, so cls is valid.
+ // Local update of the context list
+ std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed);
+ // The sequentially-consistent store to prevent load of nonlocal update flag
+ // from being hoisted before the store to local update flag.
+ cls.local_update = 1;
+ if (cls.nonlocal_update.load(std::memory_order_relaxed)) {
+ spin_mutex::scoped_lock lock(cls.mutex);
+ ctx.my_node.remove_relaxed();
+ cls.local_update.store(0, std::memory_order_relaxed);
+ } else {
+ ctx.my_node.remove_relaxed();
+ // Release fence is necessary so that update of our neighbors in
+ // the context list was committed when possible concurrent destroyer
+ // proceeds after local update flag is reset by the following store.
+ cls.local_update.store(0, std::memory_order_release);
+ if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) {
+ // Another thread was propagating cancellation request when we removed
+ // ourselves from the list. We must ensure that it is not accessing us
+ // when this destructor finishes. We'll be able to acquire the lock
+ // below only after the other thread finishes with us.
+ spin_mutex::scoped_lock lock(cls.mutex);
+ }
+ }
+ } else {
+ d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound;
+ if (
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910
+ !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong(
+ (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected,
+ (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked)
+#else
+ !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked)
+#endif
+ ) {
+ __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr);
+ // The "owner" local variable can be a dangling pointer here. Do not access it.
+ owner = nullptr;
+ spin_wait_until_eq(ctx.my_owner, nullptr);
+ // It is unsafe to remove the node because its neighbors might be already destroyed.
+ // TODO: reconsider the logic.
+ // ctx.my_node.remove_relaxed();
+ }
+ else {
+ __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr);
+ __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr);
+ thread_data::context_list_state& cls = owner->my_context_list_state;
+ __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive.");
+
+ ++cls.nonlocal_update;
+ ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release);
+ spin_wait_until_eq(cls.local_update, 0u);
+ {
+ spin_mutex::scoped_lock lock(cls.mutex);
+ ctx.my_node.remove_relaxed();
+ }
+ --cls.nonlocal_update;
+ }
+ }
+ }
+
+ if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) {
+ spin_wait_until_eq(ctx.my_owner, nullptr);
+ }
+
+ d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env);
+#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER
+ suppress_unused_warning(ctl);
+#endif
+ ctl->~cpu_ctl_env();
+
+ if (ctx.my_exception)
+ ctx.my_exception->destroy();
+ ITT_STACK_DESTROY(ctx.my_itt_caller);
+
+ poison_pointer(ctx.my_parent);
+ poison_pointer(ctx.my_parent);
+ poison_pointer(ctx.my_owner);
+ poison_pointer(ctx.my_node.next);
+ poison_pointer(ctx.my_node.prev);
+ poison_pointer(ctx.my_exception);
+ poison_pointer(ctx.my_itt_caller);
+}
+
+void task_group_context_impl::initialize(d1::task_group_context& ctx) {
+ ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr);
+
+ ctx.my_cpu_ctl_env = 0;
+ ctx.my_cancellation_requested = 0;
+ ctx.my_state.store(0, std::memory_order_relaxed);
+ // Set the created state to bound at the first usage.
+ ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed);
+ ctx.my_parent = nullptr;
+ ctx.my_owner = nullptr;
+ ctx.my_node.next.store(nullptr, std::memory_order_relaxed);
+ ctx.my_node.next.store(nullptr, std::memory_order_relaxed);
+ ctx.my_exception = nullptr;
+ ctx.my_itt_caller = nullptr;
+
+ static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t");
+ d1::cpu_ctl_env* ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env;
+ if (ctx.my_traits.fp_settings)
+ ctl->get_env();
+}
+
+void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ __TBB_ASSERT(td, NULL);
+ ctx.my_owner.store(td, std::memory_order_relaxed);
+ thread_data::context_list_state& cls = td->my_context_list_state;
+ // state propagation logic assumes new contexts are bound to head of the list
+ ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed);
+ // Notify threads that may be concurrently destroying contexts registered
+ // in this scheduler's list that local list update is underway.
+ // Prevent load of global propagation epoch counter from being hoisted before
+ // speculative stores above, as well as load of nonlocal update flag from
+ // being hoisted before the store to local update flag.
+ cls.local_update = 1;
+ // Finalize local context list update
+ if (cls.nonlocal_update.load(std::memory_order_relaxed)) {
+ spin_mutex::scoped_lock lock(cls.mutex);
+ d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed);
+ head_next->prev.store(&ctx.my_node, std::memory_order_relaxed);
+ ctx.my_node.next.store(head_next, std::memory_order_relaxed);
+ cls.local_update.store(0, std::memory_order_relaxed);
+ cls.head.next.store(&ctx.my_node, std::memory_order_relaxed);
+ } else {
+ d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed);
+ head_next->prev.store(&ctx.my_node, std::memory_order_relaxed);
+ ctx.my_node.next.store(head_next, std::memory_order_relaxed);
+ cls.local_update.store(0, std::memory_order_release);
+ // Thread-local list of contexts allows concurrent traversal by another thread
+ // while propagating state change. To ensure visibility of ctx.my_node's members
+ // to the concurrently traversing thread, the list's head is updated by means
+ // of store-with-release.
+ cls.head.next.store(&ctx.my_node, std::memory_order_release);
+ }
+}
+
+void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock.");
+ __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding");
+
+ ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context;
+ __TBB_ASSERT(ctx.my_parent, NULL);
+
+ // Inherit FPU settings only if the context has not captured FPU settings yet.
+ if (!ctx.my_traits.fp_settings)
+ copy_fp_settings(ctx, *ctx.my_parent);
+
+ // Condition below prevents unnecessary thrashing parent context's cache line
+ if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) {
+ ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below
+ }
+ if (ctx.my_parent->my_parent) {
+ // Even if this context were made accessible for state change propagation
+ // (by placing store_with_release(td->my_context_list_state.head.my_next, &ctx.my_node)
+ // above), it still could be missed if state propagation from a grand-ancestor
+ // was underway concurrently with binding.
+ // Speculative propagation from the parent together with epoch counters
+ // detecting possibility of such a race allow to avoid taking locks when
+ // there is no contention.
+
+ // Acquire fence is necessary to prevent reordering subsequent speculative
+ // loads of parent state data out of the scope where epoch counters comparison
+ // can reliably validate it.
+ uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire);
+ // Speculative propagation of parent's state. The speculation will be
+ // validated by the epoch counters check further on.
+ ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed);
+ register_with(ctx, td); // Issues full fence
+
+ // If no state propagation was detected by the following condition, the above
+ // full fence guarantees that the parent had correct state during speculative
+ // propagation before the fence. Otherwise the propagation from parent is
+ // repeated under the lock.
+ if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) {
+ // Another thread may be propagating state change right now. So resort to lock.
+ context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
+ ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed);
+ }
+ } else {
+ register_with(ctx, td); // Issues full fence
+ // As we do not have grand-ancestors, concurrent state propagation (if any)
+ // may originate only from the parent context, and thus it is safe to directly
+ // copy the state from it.
+ ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed);
+ }
+
+ ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release);
+}
+
+void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire);
+ if (state <= d1::task_group_context::lifetime_state::locked) {
+ if (state == d1::task_group_context::lifetime_state::created &&
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910
+ ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong(
+ (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state,
+ (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked)
+#else
+ ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked)
+#endif
+ ) {
+ // If we are in the outermost task dispatch loop of an external thread, then
+ // there is nothing to bind this context to, and we skip the binding part
+ // treating the context as isolated.
+ __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr);
+ if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) {
+ if (!ctx.my_traits.fp_settings) {
+ copy_fp_settings(ctx, *td->my_arena->my_default_ctx);
+ }
+ ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release);
+ } else {
+ bind_to_impl(ctx, td);
+ }
+ ITT_STACK_CREATE(ctx.my_itt_caller);
+ }
+ spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked);
+ }
+ __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL);
+ __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL);
+}
+
+template <typename T>
+void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) {
+ // Nothing to do, whether descending from "src" or not, so no need to scan.
+ // Hopefully this happens often thanks to earlier invocations.
+ // This optimization is enabled by LIFO order in the context lists:
+ // - new contexts are bound to the beginning of lists;
+ // - descendants are newer than ancestors;
+ // - earlier invocations are therefore likely to "paint" long chains.
+ } else if (&ctx == &src) {
+ // This clause is disjunct from the traversal below, which skips src entirely.
+ // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again).
+ // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down).
+ // Letting the other thread prevail may also be fairer.
+ } else {
+ for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) {
+ if (ancestor == &src) {
+ for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent)
+ (c->*mptr_state).store(new_state, std::memory_order_relaxed);
+ break;
+ }
+ }
+ }
+}
+
+bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1");
+ if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) {
+ // This task group and any descendants have already been canceled.
+ // (A newly added descendant would inherit its parent's ctx.my_cancellation_requested,
+ // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.)
+ return false;
+ }
+ governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1));
+ return true;
+}
+
+bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_context& ctx) {
+ return ctx.my_cancellation_requested.load(std::memory_order_relaxed) != 0;
+}
+
+// IMPORTANT: It is assumed that this method is not used concurrently!
+void task_group_context_impl::reset(d1::task_group_context& ctx) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ //! TODO: Add assertion that this context does not have children
+ // No fences are necessary since this context can be accessed from another thread
+ // only after stealing happened (which means necessary fences were used).
+ if (ctx.my_exception) {
+ ctx.my_exception->destroy();
+ ctx.my_exception = NULL;
+ }
+ ctx.my_cancellation_requested = 0;
+}
+
+// IMPORTANT: It is assumed that this method is not used concurrently!
+void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ //! TODO: Add assertion that this context does not have children
+ // No fences are necessary since this context can be accessed from another thread
+ // only after stealing happened (which means necessary fences were used).
+ d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env);
+ if (!ctx.my_traits.fp_settings) {
+ ctl = new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env;
+ ctx.my_traits.fp_settings = true;
+ }
+ ctl->get_env();
+}
+
+void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) {
+ __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL);
+ __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings.");
+ __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings.");
+
+ const d1::cpu_ctl_env* src_ctl = reinterpret_cast<const d1::cpu_ctl_env*>(&src.my_cpu_ctl_env);
+ new (&ctx.my_cpu_ctl_env) d1::cpu_ctl_env(*src_ctl);
+ ctx.my_traits.fp_settings = true;
+}
+
+template <typename T>
+void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) {
+ spin_mutex::scoped_lock lock(my_context_list_state.mutex);
+ // Acquire fence is necessary to ensure that the subsequent node->my_next load
+ // returned the correct value in case it was just inserted in another thread.
+ // The fence also ensures visibility of the correct ctx.my_parent value.
+ d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire);
+ while (node != &my_context_list_state.head) {
+ d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node);
+ if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state)
+ task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state);
+ node = node->next.load(std::memory_order_relaxed);
+ }
+ // Sync up local propagation epoch with the global one. Release fence prevents
+ // reordering of possible store to *mptr_state after the sync point.
+ my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release);
+}
+
+template <typename T>
+bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) {
+ if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children)
+ return true;
+ // The whole propagation algorithm is under the lock in order to ensure correctness
+ // in case of concurrent state changes at the different levels of the context tree.
+ // See comment at the bottom of scheduler.cpp
+ context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex);
+ if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state)
+ // Another thread has concurrently changed the state. Back down.
+ return false;
+ // Advance global state propagation epoch
+ ++the_context_state_propagation_epoch;
+ // Propagate to all workers and external threads and sync up their local epochs with the global one
+ unsigned num_workers = my_first_unused_worker_idx;
+ for (unsigned i = 0; i < num_workers; ++i) {
+ thread_data* td = my_workers[i];
+ // If the worker is only about to be registered, skip it.
+ if (td)
+ td->propagate_task_group_state(mptr_state, src, new_state);
+ }
+ // Propagate to all external threads
+ // The whole propagation sequence is locked, thus no contention is expected
+ for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++)
+ it->propagate_task_group_state(mptr_state, src, new_state);
+ return true;
+}
+
+/*
+ Comments:
+
+1. The premise of the cancellation support implementation is that cancellations are
+ not part of the hot path of the program execution. Therefore all changes in its
+ implementation in order to reduce the overhead of the cancellation control flow
+ should be done only in ways that do not increase overhead of the normal execution.
+
+ In general, contexts are used by all threads and their descendants are created in
+ different threads as well. In order to minimize impact of the cross-thread tree
+ maintenance (first of all because of the synchronization), the tree of contexts
+ is split into pieces, each of which is handled by a single thread. Such pieces
+ are represented as lists of contexts, members of which are contexts that were
+ bound to their parents in the given thread.
+
+ The context tree maintenance and cancellation propagation algorithms are designed
+ in such a manner that cross-thread access to a context list will take place only
+ when cancellation signal is sent (by user or when an exception happens), and
+ synchronization is necessary only then. Thus the normal execution flow (without
+ exceptions and cancellation) remains free from any synchronization done on
+ behalf of exception handling and cancellation support.
+
+2. Consider parallel cancellations at the different levels of the context tree:
+
+ Ctx1 <- Cancelled by Thread1 |- Thread2 started processing
+ | |
+ Ctx2 |- Thread1 started processing
+ | T1 |- Thread2 finishes and syncs up local counters
+ Ctx3 <- Cancelled by Thread2 |
+ | |- Ctx5 is bound to Ctx2
+ Ctx4 |
+ T2 |- Thread1 reaches Ctx2
+
+ Thread-propagator of each cancellation increments global counter. However the thread
+ propagating the cancellation from the outermost context (Thread1) may be the last
+ to finish. Which means that the local counters may be synchronized earlier (by Thread2,
+ at Time1) than it propagated cancellation into Ctx2 (at time Time2). If a new context
+ (Ctx5) is created and bound to Ctx2 between Time1 and Time2, checking its parent only
+ (Ctx2) may result in cancellation request being lost.
+
+ This issue is solved by doing the whole propagation under the lock.
+
+ If we need more concurrency while processing parallel cancellations, we could try
+ the following modification of the propagation algorithm:
+
+ advance global counter and remember it
+ for each thread:
+ scan thread's list of contexts
+ for each thread:
+ sync up its local counter only if the global counter has not been changed
+
+ However this version of the algorithm requires more analysis and verification.
+*/
+
+void __TBB_EXPORTED_FUNC initialize(d1::task_group_context& ctx) {
+ task_group_context_impl::initialize(ctx);
+}
+void __TBB_EXPORTED_FUNC destroy(d1::task_group_context& ctx) {
+ task_group_context_impl::destroy(ctx);
+}
+void __TBB_EXPORTED_FUNC reset(d1::task_group_context& ctx) {
+ task_group_context_impl::reset(ctx);
+}
+bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context& ctx) {
+ return task_group_context_impl::cancel_group_execution(ctx);
+}
+bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context& ctx) {
+ return task_group_context_impl::is_group_execution_cancelled(ctx);
+}
+void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context& ctx) {
+ task_group_context_impl::capture_fp_settings(ctx);
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
diff --git a/contrib/libs/tbb/src/tbb/task_stream.h b/contrib/libs/tbb/src/tbb/task_stream.h
new file mode 100644
index 0000000000..f32ef94e80
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/task_stream.h
@@ -0,0 +1,288 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_task_stream_H
+#define _TBB_task_stream_H
+
+//! This file is a possible future replacement for the task_stream class implemented in
+//! task_stream.h. It refactors the code and extends task_stream capabilities by moving lane
+//! management during operations on caller side. Despite the fact that new implementation should not
+//! affect performance of the original task stream, analysis on this subject was not made at the
+//! time it was developed. In addition, it is not clearly seen at the moment that this container
+//! would be suitable for critical tasks due to linear time complexity on its operations.
+
+#include "oneapi/tbb/detail/_utils.h"
+
+#include "oneapi/tbb/spin_mutex.h"
+#include "oneapi/tbb/cache_aligned_allocator.h"
+
+#include "scheduler_common.h"
+#include "misc.h" // for FastRandom
+
+#include <deque>
+#include <climits>
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+//! Essentially, this is just a pair of a queue and a mutex to protect the queue.
+/** The reason std::pair is not used is that the code would look less clean
+ if field names were replaced with 'first' and 'second'. **/
+template< typename T, typename mutex_t >
+struct alignas(max_nfs_size) queue_and_mutex {
+ typedef std::deque< T, cache_aligned_allocator<T> > queue_base_t;
+
+ queue_base_t my_queue{};
+ mutex_t my_mutex{};
+};
+
+using population_t = uintptr_t;
+const population_t one = 1;
+
+inline void set_one_bit( std::atomic<population_t>& dest, int pos ) {
+ __TBB_ASSERT( pos>=0, NULL );
+ __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL );
+ dest.fetch_or( one<<pos );
+}
+
+inline void clear_one_bit( std::atomic<population_t>& dest, int pos ) {
+ __TBB_ASSERT( pos>=0, NULL );
+ __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL );
+ dest.fetch_and( ~(one<<pos) );
+}
+
+inline bool is_bit_set( population_t val, int pos ) {
+ __TBB_ASSERT( pos>=0, NULL );
+ __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL );
+ return (val & (one<<pos)) != 0;
+}
+
+struct random_lane_selector :
+#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500
+ no_assign
+#else
+ no_copy
+#endif
+{
+ random_lane_selector( FastRandom& random ) : my_random( random ) {}
+ unsigned operator()( unsigned out_of ) const {
+ __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." );
+ return my_random.get() & (out_of-1);
+ }
+private:
+ FastRandom& my_random;
+};
+
+struct lane_selector_base :
+#if __INTEL_COMPILER == 1110 || __INTEL_COMPILER == 1500
+ no_assign
+#else
+ no_copy
+#endif
+{
+ unsigned& my_previous;
+ lane_selector_base( unsigned& previous ) : my_previous( previous ) {}
+};
+
+struct subsequent_lane_selector : lane_selector_base {
+ subsequent_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {}
+ unsigned operator()( unsigned out_of ) const {
+ __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." );
+ return (++my_previous &= out_of-1);
+ }
+};
+
+struct preceding_lane_selector : lane_selector_base {
+ preceding_lane_selector( unsigned& previous ) : lane_selector_base( previous ) {}
+ unsigned operator()( unsigned out_of ) const {
+ __TBB_ASSERT( ((out_of-1) & out_of) == 0, "number of lanes is not power of two." );
+ return (--my_previous &= (out_of-1));
+ }
+};
+
+//! Specializes from which side of the underlying container elements are retrieved. Method must be
+//! called under corresponding mutex locked.
+template<task_stream_accessor_type accessor>
+class task_stream_accessor : no_copy {
+protected:
+ using lane_t = queue_and_mutex <d1::task*, spin_mutex>;
+ d1::task* get_item( lane_t::queue_base_t& queue ) {
+ d1::task* result = queue.front();
+ queue.pop_front();
+ return result;
+ }
+};
+
+template<>
+class task_stream_accessor< back_nonnull_accessor > : no_copy {
+protected:
+ using lane_t = queue_and_mutex <d1::task*, spin_mutex>;
+ d1::task* get_item( lane_t::queue_base_t& queue ) {
+ d1::task* result = nullptr;
+ __TBB_ASSERT(!queue.empty(), nullptr);
+ // Isolated task can put zeros in queue see look_specific
+ do {
+ result = queue.back();
+ queue.pop_back();
+ } while ( !result && !queue.empty() );
+
+ __TBB_ASSERT_RELEASE(result, nullptr);
+ return result;
+ }
+};
+
+//! The container for "fairness-oriented" aka "enqueued" tasks.
+template<task_stream_accessor_type accessor>
+class task_stream : public task_stream_accessor< accessor > {
+ using lane_t = typename task_stream_accessor<accessor>::lane_t;
+ std::atomic<population_t> population{};
+ lane_t* lanes{nullptr};
+ unsigned N{};
+
+public:
+ task_stream() = default;
+
+ void initialize( unsigned n_lanes ) {
+ const unsigned max_lanes = sizeof(population_t) * CHAR_BIT;
+
+ N = n_lanes >= max_lanes ? max_lanes : n_lanes > 2 ? 1 << (tbb::detail::log2(n_lanes - 1) + 1) : 2;
+ __TBB_ASSERT( N == max_lanes || (N >= n_lanes && ((N - 1) & N) == 0), "number of lanes miscalculated" );
+ __TBB_ASSERT( N <= sizeof(population_t) * CHAR_BIT, NULL );
+ lanes = static_cast<lane_t*>(cache_aligned_allocate(sizeof(lane_t) * N));
+ for (unsigned i = 0; i < N; ++i) {
+ new (lanes + i) lane_t;
+ }
+ __TBB_ASSERT( !population.load(std::memory_order_relaxed), NULL );
+ }
+
+ ~task_stream() {
+ if (lanes) {
+ for (unsigned i = 0; i < N; ++i) {
+ lanes[i].~lane_t();
+ }
+ cache_aligned_deallocate(lanes);
+ }
+ }
+
+ //! Push a task into a lane. Lane selection is performed by passed functor.
+ template<typename lane_selector_t>
+ void push(d1::task* source, const lane_selector_t& next_lane ) {
+ bool succeed = false;
+ unsigned lane = 0;
+ do {
+ lane = next_lane( /*out_of=*/N );
+ __TBB_ASSERT( lane < N, "Incorrect lane index." );
+ } while( ! (succeed = try_push( source, lane )) );
+ }
+
+ //! Try finding and popping a task using passed functor for lane selection. Last used lane is
+ //! updated inside lane selector.
+ template<typename lane_selector_t>
+ d1::task* pop( const lane_selector_t& next_lane ) {
+ d1::task* popped = NULL;
+ unsigned lane = 0;
+ do {
+ lane = next_lane( /*out_of=*/N );
+ __TBB_ASSERT( lane < N, "Incorrect lane index." );
+ } while( !empty() && !(popped = try_pop( lane )) );
+ return popped;
+ }
+
+ //! Try finding and popping a related task.
+ d1::task* pop_specific( unsigned& last_used_lane, isolation_type isolation ) {
+ d1::task* result = NULL;
+ // Lane selection is round-robin in backward direction.
+ unsigned idx = last_used_lane & (N-1);
+ do {
+ if( is_bit_set( population.load(std::memory_order_relaxed), idx ) ) {
+ lane_t& lane = lanes[idx];
+ spin_mutex::scoped_lock lock;
+ if( lock.try_acquire(lane.my_mutex) && !lane.my_queue.empty() ) {
+ result = look_specific( lane.my_queue, isolation );
+ if( lane.my_queue.empty() )
+ clear_one_bit( population, idx );
+ if( result )
+ break;
+ }
+ }
+ idx=(idx-1)&(N-1);
+ } while( !empty() && idx != last_used_lane );
+ last_used_lane = idx;
+ return result;
+ }
+
+ //! Checks existence of a task.
+ bool empty() {
+ return !population.load(std::memory_order_relaxed);
+ }
+
+private:
+ //! Returns true on successful push, otherwise - false.
+ bool try_push(d1::task* source, unsigned lane_idx ) {
+ spin_mutex::scoped_lock lock;
+ if( lock.try_acquire( lanes[lane_idx].my_mutex ) ) {
+ lanes[lane_idx].my_queue.push_back( source );
+ set_one_bit( population, lane_idx ); // TODO: avoid atomic op if the bit is already set
+ return true;
+ }
+ return false;
+ }
+
+ //! Returns pointer to task on successful pop, otherwise - NULL.
+ d1::task* try_pop( unsigned lane_idx ) {
+ if( !is_bit_set( population.load(std::memory_order_relaxed), lane_idx ) )
+ return NULL;
+ d1::task* result = NULL;
+ lane_t& lane = lanes[lane_idx];
+ spin_mutex::scoped_lock lock;
+ if( lock.try_acquire( lane.my_mutex ) && !lane.my_queue.empty() ) {
+ result = this->get_item( lane.my_queue );
+ if( lane.my_queue.empty() )
+ clear_one_bit( population, lane_idx );
+ }
+ return result;
+ }
+
+ // TODO: unify '*_specific' logic with 'pop' methods above
+ d1::task* look_specific( typename lane_t::queue_base_t& queue, isolation_type isolation ) {
+ __TBB_ASSERT( !queue.empty(), NULL );
+ // TODO: add a worst-case performance test and consider an alternative container with better
+ // performance for isolation search.
+ typename lane_t::queue_base_t::iterator curr = queue.end();
+ do {
+ // TODO: consider logic from get_task to simplify the code.
+ d1::task* result = *--curr;
+ if( result && task_accessor::isolation(*result) == isolation ) {
+ if( queue.end() - curr == 1 )
+ queue.pop_back(); // a little of housekeeping along the way
+ else
+ *curr = 0; // grabbing task with the same isolation
+ // TODO: move one of the container's ends instead if the task has been found there
+ return result;
+ }
+ } while( curr != queue.begin() );
+ return NULL;
+ }
+
+}; // task_stream
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_task_stream_H */
diff --git a/contrib/libs/tbb/src/tbb/thread_data.h b/contrib/libs/tbb/src/tbb/thread_data.h
new file mode 100644
index 0000000000..41d4a0cf60
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/thread_data.h
@@ -0,0 +1,273 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef __TBB_thread_data_H
+#define __TBB_thread_data_H
+
+#include "oneapi/tbb/detail/_task.h"
+#include "oneapi/tbb/task.h"
+
+#include "rml_base.h" // rml::job
+
+#include "scheduler_common.h"
+#include "arena.h"
+#include "concurrent_monitor.h"
+#include "mailbox.h"
+#include "misc.h" // FastRandom
+#include "small_object_pool_impl.h"
+
+#include <atomic>
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+class task;
+class arena_slot;
+class task_group_context;
+class task_dispatcher;
+
+//------------------------------------------------------------------------
+// Thread Data
+//------------------------------------------------------------------------
+class thread_data : public ::rml::job
+ , public intrusive_list_node
+ , no_copy {
+public:
+ thread_data(unsigned short index, bool is_worker)
+ : my_arena_index{ index }
+ , my_is_worker{ is_worker }
+ , my_task_dispatcher{ nullptr }
+ , my_arena{}
+ , my_arena_slot{}
+ , my_inbox{}
+ , my_random{ this }
+ , my_last_observer{ nullptr }
+ , my_small_object_pool{new (cache_aligned_allocate(sizeof(small_object_pool_impl))) small_object_pool_impl{}}
+ , my_context_list_state{}
+#if __TBB_RESUMABLE_TASKS
+ , my_post_resume_action{ post_resume_action::none }
+ , my_post_resume_arg{nullptr}
+#endif /* __TBB_RESUMABLE_TASKS */
+ {
+ ITT_SYNC_CREATE(&my_context_list_state.mutex, SyncType_Scheduler, SyncObj_ContextsList);
+ my_context_list_state.head.next.store(&my_context_list_state.head, std::memory_order_relaxed);
+ my_context_list_state.head.prev.store(&my_context_list_state.head, std::memory_order_relaxed);
+ }
+
+ ~thread_data() {
+ context_list_cleanup();
+ my_small_object_pool->destroy();
+ poison_pointer(my_task_dispatcher);
+ poison_pointer(my_arena);
+ poison_pointer(my_arena_slot);
+ poison_pointer(my_last_observer);
+ poison_pointer(my_small_object_pool);
+#if __TBB_RESUMABLE_TASKS
+ poison_pointer(my_post_resume_arg);
+#endif /* __TBB_RESUMABLE_TASKS */
+ poison_value(my_context_list_state.epoch);
+ poison_value(my_context_list_state.local_update);
+ poison_value(my_context_list_state.nonlocal_update);
+ }
+
+ void attach_arena(arena& a, std::size_t index);
+ bool is_attached_to(arena*);
+ void attach_task_dispatcher(task_dispatcher&);
+ void detach_task_dispatcher();
+ void context_list_cleanup();
+ template <typename T>
+ void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state);
+
+ //! Index of the arena slot the scheduler occupies now, or occupied last time
+ unsigned short my_arena_index;
+
+ //! Indicates if the thread is created by RML
+ const bool my_is_worker;
+
+ //! The current task dipsatcher
+ task_dispatcher* my_task_dispatcher;
+
+ //! The arena that I own (if external thread) or am servicing at the moment (if worker)
+ arena* my_arena;
+
+ //! Pointer to the slot in the arena we own at the moment
+ arena_slot* my_arena_slot;
+
+ //! The mailbox (affinity mechanism) the current thread attached to
+ mail_inbox my_inbox;
+
+ //! The random generator
+ FastRandom my_random;
+
+ //! Last observer in the observers list processed on this slot
+ observer_proxy* my_last_observer;
+
+ //! Pool of small object for fast task allocation
+ small_object_pool_impl* my_small_object_pool;
+
+ struct context_list_state {
+ //! Head of the thread specific list of task group contexts.
+ d1::context_list_node head{};
+
+ //! Mutex protecting access to the list of task group contexts.
+ // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex
+ spin_mutex mutex{};
+
+ //! Last state propagation epoch known to this thread
+ /** Together with the_context_state_propagation_epoch constitute synchronization protocol
+ that keeps hot path of task group context construction destruction mostly
+ lock-free.
+ When local epoch equals the global one, the state of task group contexts
+ registered with this thread is consistent with that of the task group trees
+ they belong to. **/
+ std::atomic<std::uintptr_t> epoch{};
+
+ //! Flag indicating that a context is being destructed by its owner thread
+ /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol
+ that keeps hot path of context destruction (by the owner thread) mostly
+ lock-free. **/
+ std::atomic<std::uintptr_t> local_update{};
+
+ //! Flag indicating that a context is being destructed by non-owner thread.
+ /** See also my_local_update. **/
+ std::atomic<std::uintptr_t> nonlocal_update{};
+ } my_context_list_state;
+
+#if __TBB_RESUMABLE_TASKS
+ //! The list of possible post resume actions.
+ enum class post_resume_action {
+ invalid,
+ register_waiter,
+ resume,
+ callback,
+ cleanup,
+ notify,
+ none
+ };
+
+ //! The callback to call the user callback passed to tbb::suspend.
+ struct suspend_callback_wrapper {
+ suspend_callback_type suspend_callback;
+ void* user_callback;
+ suspend_point_type* tag;
+
+ void operator()() {
+ __TBB_ASSERT(suspend_callback && user_callback && tag, nullptr);
+ suspend_callback(user_callback, tag);
+ }
+ };
+
+ //! Suspends the current coroutine (task_dispatcher).
+ void suspend(void* suspend_callback, void* user_callback);
+
+ //! Resumes the target task_dispatcher.
+ void resume(task_dispatcher& target);
+
+ //! Set post resume action to perform after resume.
+ void set_post_resume_action(post_resume_action pra, void* arg) {
+ __TBB_ASSERT(my_post_resume_action == post_resume_action::none, "The Post resume action must not be set");
+ __TBB_ASSERT(!my_post_resume_arg, "The post resume action must not have an argument");
+ my_post_resume_action = pra;
+ my_post_resume_arg = arg;
+ }
+
+ void clear_post_resume_action() {
+ my_post_resume_action = thread_data::post_resume_action::none;
+ my_post_resume_arg = nullptr;
+ }
+
+ //! Performs post resume action.
+ void do_post_resume_action();
+
+ //! The post resume action requested after the swap contexts.
+ post_resume_action my_post_resume_action;
+
+ //! The post resume action argument.
+ void* my_post_resume_arg;
+#endif /* __TBB_RESUMABLE_TASKS */
+
+ //! The default context
+ // TODO: consider using common default context because it is used only to simplify
+ // cancellation check.
+ d1::task_group_context my_default_context;
+};
+
+inline void thread_data::attach_arena(arena& a, std::size_t index) {
+ my_arena = &a;
+ my_arena_index = static_cast<unsigned short>(index);
+ my_arena_slot = a.my_slots + index;
+ // Read the current slot mail_outbox and attach it to the mail_inbox (remove inbox later maybe)
+ my_inbox.attach(my_arena->mailbox(index));
+}
+
+inline bool thread_data::is_attached_to(arena* a) { return my_arena == a; }
+
+inline void thread_data::context_list_cleanup() {
+ // Detach contexts remaining in the local list.
+ {
+ spin_mutex::scoped_lock lock(my_context_list_state.mutex);
+ d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_relaxed);
+ while (node != &my_context_list_state.head) {
+ using state_t = d1::task_group_context::lifetime_state;
+
+ d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node);
+ std::atomic<state_t>& state = ctx.my_lifetime_state;
+
+ node = node->next.load(std::memory_order_relaxed);
+
+ __TBB_ASSERT(ctx.my_owner == this, "The context should belong to the current thread.");
+ state_t expected = state_t::bound;
+ if (
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910
+ !((std::atomic<typename std::underlying_type<state_t>::type>&)state).compare_exchange_strong(
+ (typename std::underlying_type<state_t>::type&)expected,
+ (typename std::underlying_type<state_t>::type)state_t::detached)
+#else
+ !state.compare_exchange_strong(expected, state_t::detached)
+#endif
+ ) {
+ __TBB_ASSERT(expected == state_t::locked || expected == state_t::dying, nullptr);
+ spin_wait_until_eq(state, state_t::dying);
+ } else {
+ __TBB_ASSERT(expected == state_t::bound, nullptr);
+ ctx.my_owner.store(nullptr, std::memory_order_release);
+ }
+ }
+ }
+ spin_wait_until_eq(my_context_list_state.nonlocal_update, 0u);
+}
+
+inline void thread_data::attach_task_dispatcher(task_dispatcher& task_disp) {
+ __TBB_ASSERT(my_task_dispatcher == nullptr, nullptr);
+ __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr);
+ task_disp.m_thread_data = this;
+ my_task_dispatcher = &task_disp;
+}
+
+inline void thread_data::detach_task_dispatcher() {
+ __TBB_ASSERT(my_task_dispatcher != nullptr, nullptr);
+ __TBB_ASSERT(my_task_dispatcher->m_thread_data == this, nullptr);
+ my_task_dispatcher->m_thread_data = nullptr;
+ my_task_dispatcher = nullptr;
+}
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // __TBB_thread_data_H
+
diff --git a/contrib/libs/tbb/src/tbb/tls.h b/contrib/libs/tbb/src/tbb/tls.h
new file mode 100644
index 0000000000..5d28ca4dae
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tls.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_tls_H
+#define _TBB_tls_H
+
+#include "oneapi/tbb/detail/_config.h"
+
+#if __TBB_USE_POSIX
+#include <pthread.h>
+#else /* assume __TBB_USE_WINAPI */
+#include <windows.h>
+#endif
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+typedef void (*tls_dtor_t)(void*);
+
+//! Basic cross-platform wrapper class for TLS operations.
+template <typename T>
+class basic_tls {
+#if __TBB_USE_POSIX
+ typedef pthread_key_t tls_key_t;
+public:
+ int create( tls_dtor_t dtor = NULL ) {
+ return pthread_key_create(&my_key, dtor);
+ }
+ int destroy() { return pthread_key_delete(my_key); }
+ void set( T value ) { pthread_setspecific(my_key, (void*)value); }
+ T get() { return (T)pthread_getspecific(my_key); }
+#else /* __TBB_USE_WINAPI */
+ typedef DWORD tls_key_t;
+public:
+#if !__TBB_WIN8UI_SUPPORT
+ int create() {
+ tls_key_t tmp = TlsAlloc();
+ if( tmp==TLS_OUT_OF_INDEXES )
+ return TLS_OUT_OF_INDEXES;
+ my_key = tmp;
+ return 0;
+ }
+ int destroy() { TlsFree(my_key); my_key=0; return 0; }
+ void set( T value ) { TlsSetValue(my_key, (LPVOID)value); }
+ T get() { return (T)TlsGetValue(my_key); }
+#else /*!__TBB_WIN8UI_SUPPORT*/
+ int create() {
+ tls_key_t tmp = FlsAlloc(NULL);
+ if( tmp== (DWORD)0xFFFFFFFF )
+ return (DWORD)0xFFFFFFFF;
+ my_key = tmp;
+ return 0;
+ }
+ int destroy() { FlsFree(my_key); my_key=0; return 0; }
+ void set( T value ) { FlsSetValue(my_key, (LPVOID)value); }
+ T get() { return (T)FlsGetValue(my_key); }
+#endif /* !__TBB_WIN8UI_SUPPORT */
+#endif /* __TBB_USE_WINAPI */
+private:
+ tls_key_t my_key;
+};
+
+//! More advanced TLS support template class.
+/** It supports RAII and to some extent mimic __declspec(thread) variables. */
+template <typename T>
+class tls : public basic_tls<T> {
+ typedef basic_tls<T> base;
+public:
+ tls() { base::create(); }
+ ~tls() { base::destroy(); }
+ T operator=(T value) { base::set(value); return value; }
+ operator T() { return base::get(); }
+};
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif /* _TBB_tls_H */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h
new file mode 100644
index 0000000000..e1ba837404
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+
+#pragma warning (disable: 593) /* parameter "XXXX" was set but never used */
+#pragma warning (disable: 344) /* typedef name has already been declared (with same type) */
+#pragma warning (disable: 174) /* expression has no effect */
+#pragma warning (disable: 4127) /* conditional expression is constant */
+#pragma warning (disable: 4306) /* conversion from '?' to '?' of greater size */
+
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if defined __INTEL_COMPILER
+
+#pragma warning (disable: 869) /* parameter "XXXXX" was never referenced */
+#pragma warning (disable: 1418) /* external function definition with no prior declaration */
+#pragma warning (disable: 1419) /* external declaration in primary source file */
+
+#endif /* __INTEL_COMPILER */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h
new file mode 100644
index 0000000000..993b7b0bfd
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h
@@ -0,0 +1,4165 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _ITTNOTIFY_H_
+#define _ITTNOTIFY_H_
+
+/**
+@file
+@brief Public User API functions and types
+@mainpage
+
+The Instrumentation and Tracing Technology API (ITT API) is used to
+annotate a user's program with additional information
+that can be used by correctness and performance tools. The user inserts
+calls in their program. Those calls generate information that is collected
+at runtime, and used by Intel(R) Threading Tools.
+
+@section API Concepts
+The following general concepts are used throughout the API.
+
+@subsection Unicode Support
+Many API functions take character string arguments. On Windows, there
+are two versions of each such function. The function name is suffixed
+by W if Unicode support is enabled, and by A otherwise. Any API function
+that takes a character string argument adheres to this convention.
+
+@subsection Conditional Compilation
+Many users prefer having an option to modify ITT API code when linking it
+inside their runtimes. ITT API header file provides a mechanism to replace
+ITT API function names inside your code with empty strings. To do this,
+define the macros INTEL_NO_ITTNOTIFY_API during compilation and remove the
+static library from the linker script.
+
+@subsection Domains
+[see domains]
+Domains provide a way to separate notification for different modules or
+libraries in a program. Domains are specified by dotted character strings,
+e.g. TBB.Internal.Control.
+
+A mechanism (to be specified) is provided to enable and disable
+domains. By default, all domains are enabled.
+@subsection Named Entities and Instances
+Named entities (frames, regions, tasks, and markers) communicate
+information about the program to the analysis tools. A named entity often
+refers to a section of program code, or to some set of logical concepts
+that the programmer wants to group together.
+
+Named entities relate to the programmer's static view of the program. When
+the program actually executes, many instances of a given named entity
+may be created.
+
+The API annotations denote instances of named entities. The actual
+named entities are displayed using the analysis tools. In other words,
+the named entities come into existence when instances are created.
+
+Instances of named entities may have instance identifiers (IDs). Some
+API calls use instance identifiers to create relationships between
+different instances of named entities. Other API calls associate data
+with instances of named entities.
+
+Some named entities must always have instance IDs. In particular, regions
+and frames always have IDs. Task and markers need IDs only if the ID is
+needed in another API call (such as adding a relation or metadata).
+
+The lifetime of instance IDs is distinct from the lifetime of
+instances. This allows various relationships to be specified separate
+from the actual execution of instances. This flexibility comes at the
+expense of extra API calls.
+
+The same ID may not be reused for different instances, unless a previous
+[ref] __itt_id_destroy call for that ID has been issued.
+*/
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+# define ITT_OS_FREEBSD 4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# elif defined( __FreeBSD__ )
+# define ITT_OS ITT_OS_FREEBSD
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+# define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
+# elif ITT_OS==ITT_OS_FREEBSD
+# define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define ITTAPI_CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define ITTAPI_CDECL __attribute__ ((cdecl))
+# else /* _M_IX86 || __i386__ */
+# define ITTAPI_CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define STDCALL __attribute__ ((stdcall))
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifdef INTEL_ITTNOTIFY_ENABLE_LEGACY
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro")
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+// #warning usage leads to ICC's compilation error
+// # warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro"
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# include "legacy/ittnotify.h"
+#endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR 3
+#define ITT_MINOR 0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x) \
+ ITT_JOIN(x, \
+ ITT_JOIN(_, \
+ ITT_JOIN(ITT_MAJOR, \
+ ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+# define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+# define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args) \
+ typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \
+ extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup public Public API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup control Collection Control
+ * @ingroup public
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ * - Does not analyze or report errors that involve memory access.
+ * - Other errors are reported as usual. Pausing data collection in
+ * Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ * only pauses tracing and analyzing memory access.
+ * It does not pause tracing or analyzing threading APIs.
+ * .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Does continue to record when new threads are started.
+ * .
+ * - Other effects:
+ * - Possible reduction of runtime overhead.
+ * .
+ * @{
+ */
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause, (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr ITTNOTIFY_NAME(pause)
+#define __itt_resume ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr 0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr 0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} control group */
+/** @endcond */
+
+/**
+ * @defgroup Intel Processor Trace control
+ * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data
+ * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35):
+ * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf
+ * Use this API to mark particular code regions for loading detailed performance statistics.
+ * This mode makes your analysis faster and more accurate.
+ * @{
+*/
+typedef unsigned char __itt_pt_region;
+
+/**
+ * @brief function saves a region name marked with Intel PT API and returns a region id.
+ * Only 7 names can be registered. Attempts to register more names will be ignored and a region id with auto names will be returned.
+ * For automatic naming of regions pass NULL as function parameter
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_pt_region ITTAPI __itt_pt_region_createA(const char *name);
+__itt_pt_region ITTAPI __itt_pt_region_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_pt_region_create __itt_pt_region_createW
+#else /* UNICODE */
+# define __itt_pt_region_create __itt_pt_region_createA
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_pt_region ITTAPI __itt_pt_region_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA ITTNOTIFY_DATA(pt_region_createA)
+#define __itt_pt_region_createA_ptr ITTNOTIFY_NAME(pt_region_createA)
+#define __itt_pt_region_createW ITTNOTIFY_DATA(pt_region_createW)
+#define __itt_pt_region_createW_ptr ITTNOTIFY_NAME(pt_region_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create ITTNOTIFY_DATA(pt_region_create)
+#define __itt_pt_region_create_ptr ITTNOTIFY_NAME(pt_region_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA(name) (__itt_pt_region)0
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW(name) (__itt_pt_region)0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create(name) (__itt_pt_region)0
+#define __itt_pt_region_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_pt_region_createA_ptr 0
+#define __itt_pt_region_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_pt_region_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief function contains a special code pattern identified on the post-processing stage and
+ * marks the beginning of a code region targeted for Intel PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+*/
+void __itt_mark_pt_region_begin(__itt_pt_region region);
+/**
+ * @brief function contains a special code pattern identified on the post-processing stage and
+ * marks the end of a code region targeted for Intel PT analysis
+ * @param[in] region - region id, 0 <= region < 8
+*/
+void __itt_mark_pt_region_end(__itt_pt_region region);
+/** @} Intel PT control group*/
+
+/**
+ * @defgroup threads Threads
+ * @ingroup public
+ * Give names to threads
+ * @{
+ */
+/**
+ * @brief Sets thread name of calling thread
+ * @param[in] name - name of thread
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_thread_set_nameA(const char *name);
+void ITTAPI __itt_thread_set_nameW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_thread_set_name __itt_thread_set_nameW
+# define __itt_thread_set_name_ptr __itt_thread_set_nameW_ptr
+#else /* UNICODE */
+# define __itt_thread_set_name __itt_thread_set_nameA
+# define __itt_thread_set_name_ptr __itt_thread_set_nameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_thread_set_name(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name))
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA ITTNOTIFY_VOID(thread_set_nameA)
+#define __itt_thread_set_nameA_ptr ITTNOTIFY_NAME(thread_set_nameA)
+#define __itt_thread_set_nameW ITTNOTIFY_VOID(thread_set_nameW)
+#define __itt_thread_set_nameW_ptr ITTNOTIFY_NAME(thread_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name ITTNOTIFY_VOID(thread_set_name)
+#define __itt_thread_set_name_ptr ITTNOTIFY_NAME(thread_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA(name)
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW(name)
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name(name)
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thread_set_nameA_ptr 0
+#define __itt_thread_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thread_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void ITTAPI __itt_thread_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, thread_ignore, (void))
+#define __itt_thread_ignore ITTNOTIFY_VOID(thread_ignore)
+#define __itt_thread_ignore_ptr ITTNOTIFY_NAME(thread_ignore)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thread_ignore()
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_thread_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} threads group */
+
+/**
+ * @defgroup suppress Error suppression
+ * @ingroup public
+ * General behavior: application continues to run, but errors are suppressed
+ *
+ * @{
+ */
+
+/*****************************************************************//**
+ * @name group of functions used for error suppression in correctness tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask
+ */
+#define __itt_suppress_all_errors 0x7fffffff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from threading analysis)
+ */
+#define __itt_suppress_threading_errors 0x000000ff
+
+/**
+ * @hideinitializer
+ * @brief possible value for suppression mask (suppresses errors from memory analysis)
+ */
+#define __itt_suppress_memory_errors 0x0000ff00
+
+/**
+ * @brief Start suppressing errors identified in mask on this thread
+ */
+void ITTAPI __itt_suppress_push(unsigned int mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask))
+#define __itt_suppress_push ITTNOTIFY_VOID(suppress_push)
+#define __itt_suppress_push_ptr ITTNOTIFY_NAME(suppress_push)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_push(mask)
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_push_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effects of the matching call to __itt_suppress_push
+ */
+void ITTAPI __itt_suppress_pop(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_pop, (void))
+#define __itt_suppress_pop ITTNOTIFY_VOID(suppress_pop)
+#define __itt_suppress_pop_ptr ITTNOTIFY_NAME(suppress_pop)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_pop()
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_pop_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum __itt_suppress_mode {
+ __itt_unsuppress_range,
+ __itt_suppress_range
+} __itt_suppress_mode_t;
+
+/**
+ * @brief Mark a range of memory for error suppression or unsuppression for error types included in mask
+ */
+void ITTAPI __itt_suppress_mark_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_mark_range ITTNOTIFY_VOID(suppress_mark_range)
+#define __itt_suppress_mark_range_ptr ITTNOTIFY_NAME(suppress_mark_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_mark_range(mask)
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_mark_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Undo the effect of a matching call to __itt_suppress_mark_range. If not matching
+ * call is found, nothing is changed.
+ */
+void ITTAPI __itt_suppress_clear_range(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, suppress_clear_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size))
+#define __itt_suppress_clear_range ITTNOTIFY_VOID(suppress_clear_range)
+#define __itt_suppress_clear_range_ptr ITTNOTIFY_NAME(suppress_clear_range)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_suppress_clear_range(mask)
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_suppress_clear_range_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+/** @} suppress group */
+
+/**
+ * @defgroup sync Synchronization
+ * @ingroup public
+ * Indicate user-written synchronization code
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex 2
+
+/**
+@brief Name a synchronization object
+@param[in] addr Handle for the synchronization object. You should
+use a real address to uniquely identify the synchronization object.
+@param[in] objtype null-terminated object type string. If NULL is
+passed, the name will be "User Synchronization".
+@param[in] objname null-terminated object name string. If NULL,
+no name will be assigned to the object.
+@param[in] attribute one of [#__itt_attr_barrier, #__itt_attr_mutex]
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_createA(void *addr, const char *objtype, const char *objname, int attribute);
+void ITTAPI __itt_sync_createW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_create __itt_sync_createW
+# define __itt_sync_create_ptr __itt_sync_createW_ptr
+#else /* UNICODE */
+# define __itt_sync_create __itt_sync_createA
+# define __itt_sync_create_ptr __itt_sync_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_create (void *addr, const char *objtype, const char *objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char* objtype, const char* objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA ITTNOTIFY_VOID(sync_createA)
+#define __itt_sync_createA_ptr ITTNOTIFY_NAME(sync_createA)
+#define __itt_sync_createW ITTNOTIFY_VOID(sync_createW)
+#define __itt_sync_createW_ptr ITTNOTIFY_NAME(sync_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create ITTNOTIFY_VOID(sync_create)
+#define __itt_sync_create_ptr ITTNOTIFY_NAME(sync_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA(addr, objtype, objname, attribute)
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW(addr, objtype, objname, attribute)
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create(addr, objtype, objname, attribute)
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_createA_ptr 0
+#define __itt_sync_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+@brief Rename a synchronization object
+
+You can use the rename call to assign or reassign a name to a given
+synchronization object.
+@param[in] addr handle for the synchronization object.
+@param[in] name null-terminated object name string.
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_renameA(void *addr, const char *name);
+void ITTAPI __itt_sync_renameW(void *addr, const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_rename __itt_sync_renameW
+# define __itt_sync_rename_ptr __itt_sync_renameW_ptr
+#else /* UNICODE */
+# define __itt_sync_rename __itt_sync_renameA
+# define __itt_sync_rename_ptr __itt_sync_renameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_rename(void *addr, const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name))
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA ITTNOTIFY_VOID(sync_renameA)
+#define __itt_sync_renameA_ptr ITTNOTIFY_NAME(sync_renameA)
+#define __itt_sync_renameW ITTNOTIFY_VOID(sync_renameW)
+#define __itt_sync_renameW_ptr ITTNOTIFY_NAME(sync_renameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename ITTNOTIFY_VOID(sync_rename)
+#define __itt_sync_rename_ptr ITTNOTIFY_NAME(sync_rename)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA(addr, name)
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW(addr, name)
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename(addr, name)
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_renameA_ptr 0
+#define __itt_sync_renameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_rename_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ @brief Destroy a synchronization object.
+ @param addr Handle for the synchronization object.
+ */
+void ITTAPI __itt_sync_destroy(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr))
+#define __itt_sync_destroy ITTNOTIFY_VOID(sync_destroy)
+#define __itt_sync_destroy_ptr ITTNOTIFY_NAME(sync_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_destroy(addr)
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/*****************************************************************//**
+ * @name group of functions is used for performance measurement tools
+ *********************************************************************/
+/** @{ */
+/**
+ * @brief Enter spin loop on user-defined sync object
+ */
+void ITTAPI __itt_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_prepare, (void *addr))
+#define __itt_sync_prepare ITTNOTIFY_VOID(sync_prepare)
+#define __itt_sync_prepare_ptr ITTNOTIFY_NAME(sync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_prepare(addr)
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Quit spin loop without acquiring spin object
+ */
+void ITTAPI __itt_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr))
+#define __itt_sync_cancel ITTNOTIFY_VOID(sync_cancel)
+#define __itt_sync_cancel_ptr ITTNOTIFY_NAME(sync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_cancel(addr)
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void ITTAPI __itt_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr))
+#define __itt_sync_acquired ITTNOTIFY_VOID(sync_acquired)
+#define __itt_sync_acquired_ptr ITTNOTIFY_NAME(sync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_acquired(addr)
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void ITTAPI __itt_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, sync_releasing, (void *addr))
+#define __itt_sync_releasing ITTNOTIFY_VOID(sync_releasing)
+#define __itt_sync_releasing_ptr ITTNOTIFY_NAME(sync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_sync_releasing(addr)
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/** @} sync group */
+
+/**************************************************************//**
+ * @name group of functions is used for correctness checking tools
+ ******************************************************************/
+/** @{ */
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_prepare(void* addr);
+ */
+void ITTAPI __itt_fsync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void *addr))
+#define __itt_fsync_prepare ITTNOTIFY_VOID(fsync_prepare)
+#define __itt_fsync_prepare_ptr ITTNOTIFY_NAME(fsync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_prepare(addr)
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_cancel(void *addr);
+ */
+void ITTAPI __itt_fsync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr))
+#define __itt_fsync_cancel ITTNOTIFY_VOID(fsync_cancel)
+#define __itt_fsync_cancel_ptr ITTNOTIFY_NAME(fsync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_cancel(addr)
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_acquired(void *addr);
+ */
+void ITTAPI __itt_fsync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr))
+#define __itt_fsync_acquired ITTNOTIFY_VOID(fsync_acquired)
+#define __itt_fsync_acquired_ptr ITTNOTIFY_NAME(fsync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_acquired(addr)
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup legacy
+ * @deprecated Legacy API
+ * @brief Fast synchronization which does no require spinning.
+ * - This special function is to be used by TBB and OpenMP libraries only when they know
+ * there is no spin but they need to suppress TC warnings about shared variable modifications.
+ * - It only has corresponding pointers in static library and does not have corresponding function
+ * in dynamic library.
+ * @see void __itt_sync_releasing(void* addr);
+ */
+void ITTAPI __itt_fsync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void *addr))
+#define __itt_fsync_releasing ITTNOTIFY_VOID(fsync_releasing)
+#define __itt_fsync_releasing_ptr ITTNOTIFY_NAME(fsync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_fsync_releasing(addr)
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_fsync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} */
+
+/**
+ * @defgroup model Modeling by Intel(R) Parallel Advisor
+ * @ingroup public
+ * This is the subset of itt used for modeling by Intel(R) Parallel Advisor.
+ * This API is called ONLY using annotate.h, by "Annotation" macros
+ * the user places in their sources during the parallelism modeling steps.
+ *
+ * site_begin/end and task_begin/end take the address of handle variables,
+ * which are writeable by the API. Handles must be 0 initialized prior
+ * to the first call to begin, or may cause a run-time failure.
+ * The handles are initialized in a multi-thread safe way by the API if
+ * the handle is 0. The commonly expected idiom is one static handle to
+ * identify a site or task. If a site or task of the same name has already
+ * been started during this collection, the same handle MAY be returned,
+ * but is not required to be - it is unspecified if data merging is done
+ * based on name. These routines also take an instance variable. Like
+ * the lexical instance, these must be 0 initialized. Unlike the lexical
+ * instance, this is used to track a single dynamic instance.
+ *
+ * API used by the Intel(R) Parallel Advisor to describe potential concurrency
+ * and related activities. User-added source annotations expand to calls
+ * to these procedures to enable modeling of a hypothetical concurrent
+ * execution serially.
+ * @{
+ */
+#if !defined(_ADVISOR_ANNOTATE_H_) || defined(ANNOTATE_EXPAND_NULL)
+
+typedef void* __itt_model_site; /*!< @brief handle for lexical site */
+typedef void* __itt_model_site_instance; /*!< @brief handle for dynamic instance */
+typedef void* __itt_model_task; /*!< @brief handle for lexical site */
+typedef void* __itt_model_task_instance; /*!< @brief handle for dynamic instance */
+
+/**
+ * @enum __itt_model_disable
+ * @brief Enumerator for the disable methods
+ */
+typedef enum {
+ __itt_model_disable_observation,
+ __itt_model_disable_collection
+} __itt_model_disable;
+
+#endif /* !_ADVISOR_ANNOTATE_H_ || ANNOTATE_EXPAND_NULL */
+
+/**
+ * @brief ANNOTATE_SITE_BEGIN/ANNOTATE_SITE_END support.
+ *
+ * site_begin/end model a potential concurrency site.
+ * site instances may be recursively nested with themselves.
+ * site_end exits the most recently started but unended site for the current
+ * thread. The handle passed to end may be used to validate structure.
+ * Instances of a site encountered on different threads concurrently
+ * are considered completely distinct. If the site name for two different
+ * lexical sites match, it is unspecified whether they are treated as the
+ * same or different for data presentation.
+ */
+void ITTAPI __itt_model_site_begin(__itt_model_site *site, __itt_model_site_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_site_beginW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_site_beginA(const char *name);
+void ITTAPI __itt_model_site_beginAL(const char *name, size_t siteNameLen);
+void ITTAPI __itt_model_site_end (__itt_model_site *site, __itt_model_site_instance *instance);
+void ITTAPI __itt_model_site_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t siteNameLen))
+ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance))
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void))
+#define __itt_model_site_begin ITTNOTIFY_VOID(model_site_begin)
+#define __itt_model_site_begin_ptr ITTNOTIFY_NAME(model_site_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW ITTNOTIFY_VOID(model_site_beginW)
+#define __itt_model_site_beginW_ptr ITTNOTIFY_NAME(model_site_beginW)
+#endif
+#define __itt_model_site_beginA ITTNOTIFY_VOID(model_site_beginA)
+#define __itt_model_site_beginA_ptr ITTNOTIFY_NAME(model_site_beginA)
+#define __itt_model_site_beginAL ITTNOTIFY_VOID(model_site_beginAL)
+#define __itt_model_site_beginAL_ptr ITTNOTIFY_NAME(model_site_beginAL)
+#define __itt_model_site_end ITTNOTIFY_VOID(model_site_end)
+#define __itt_model_site_end_ptr ITTNOTIFY_NAME(model_site_end)
+#define __itt_model_site_end_2 ITTNOTIFY_VOID(model_site_end_2)
+#define __itt_model_site_end_2_ptr ITTNOTIFY_NAME(model_site_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_site_begin(site, instance, name)
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW(name)
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA(name)
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL(name, siteNameLen)
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end(site, instance)
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2()
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_site_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_site_beginW_ptr 0
+#endif
+#define __itt_model_site_beginA_ptr 0
+#define __itt_model_site_beginAL_ptr 0
+#define __itt_model_site_end_ptr 0
+#define __itt_model_site_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_TASK_BEGIN/ANNOTATE_TASK_END support
+ *
+ * task_begin/end model a potential task, which is contained within the most
+ * closely enclosing dynamic site. task_end exits the most recently started
+ * but unended task. The handle passed to end may be used to validate
+ * structure. It is unspecified if bad dynamic nesting is detected. If it
+ * is, it should be encoded in the resulting data collection. The collector
+ * should not fail due to construct nesting issues, nor attempt to directly
+ * indicate the problem.
+ */
+void ITTAPI __itt_model_task_begin(__itt_model_task *task, __itt_model_task_instance *instance, const char *name);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_model_task_beginW(const wchar_t *name);
+void ITTAPI __itt_model_iteration_taskW(const wchar_t *name);
+#endif
+void ITTAPI __itt_model_task_beginA(const char *name);
+void ITTAPI __itt_model_task_beginAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_iteration_taskA(const char *name);
+void ITTAPI __itt_model_iteration_taskAL(const char *name, size_t taskNameLen);
+void ITTAPI __itt_model_task_end (__itt_model_task *task, __itt_model_task_instance *instance);
+void ITTAPI __itt_model_task_end_2(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name))
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name))
+#endif
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name))
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t taskNameLen))
+ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance))
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void))
+#define __itt_model_task_begin ITTNOTIFY_VOID(model_task_begin)
+#define __itt_model_task_begin_ptr ITTNOTIFY_NAME(model_task_begin)
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW ITTNOTIFY_VOID(model_task_beginW)
+#define __itt_model_task_beginW_ptr ITTNOTIFY_NAME(model_task_beginW)
+#define __itt_model_iteration_taskW ITTNOTIFY_VOID(model_iteration_taskW)
+#define __itt_model_iteration_taskW_ptr ITTNOTIFY_NAME(model_iteration_taskW)
+#endif
+#define __itt_model_task_beginA ITTNOTIFY_VOID(model_task_beginA)
+#define __itt_model_task_beginA_ptr ITTNOTIFY_NAME(model_task_beginA)
+#define __itt_model_task_beginAL ITTNOTIFY_VOID(model_task_beginAL)
+#define __itt_model_task_beginAL_ptr ITTNOTIFY_NAME(model_task_beginAL)
+#define __itt_model_iteration_taskA ITTNOTIFY_VOID(model_iteration_taskA)
+#define __itt_model_iteration_taskA_ptr ITTNOTIFY_NAME(model_iteration_taskA)
+#define __itt_model_iteration_taskAL ITTNOTIFY_VOID(model_iteration_taskAL)
+#define __itt_model_iteration_taskAL_ptr ITTNOTIFY_NAME(model_iteration_taskAL)
+#define __itt_model_task_end ITTNOTIFY_VOID(model_task_end)
+#define __itt_model_task_end_ptr ITTNOTIFY_NAME(model_task_end)
+#define __itt_model_task_end_2 ITTNOTIFY_VOID(model_task_end_2)
+#define __itt_model_task_end_2_ptr ITTNOTIFY_NAME(model_task_end_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_task_begin(task, instance, name)
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW(name)
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA(name)
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL(name, siteNameLen)
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA(name)
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL(name, siteNameLen)
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end(task, instance)
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2()
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_task_begin_ptr 0
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_model_task_beginW_ptr 0
+#endif
+#define __itt_model_task_beginA_ptr 0
+#define __itt_model_task_beginAL_ptr 0
+#define __itt_model_iteration_taskA_ptr 0
+#define __itt_model_iteration_taskAL_ptr 0
+#define __itt_model_task_end_ptr 0
+#define __itt_model_task_end_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_LOCK_ACQUIRE/ANNOTATE_LOCK_RELEASE support
+ *
+ * lock_acquire/release model a potential lock for both lockset and
+ * performance modeling. Each unique address is modeled as a separate
+ * lock, with invalid addresses being valid lock IDs. Specifically:
+ * no storage is accessed by the API at the specified address - it is only
+ * used for lock identification. Lock acquires may be self-nested and are
+ * unlocked by a corresponding number of releases.
+ * (These closely correspond to __itt_sync_acquired/__itt_sync_releasing,
+ * but may not have identical semantics.)
+ */
+void ITTAPI __itt_model_lock_acquire(void *lock);
+void ITTAPI __itt_model_lock_acquire_2(void *lock);
+void ITTAPI __itt_model_lock_release(void *lock);
+void ITTAPI __itt_model_lock_release_2(void *lock);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock))
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock))
+#define __itt_model_lock_acquire ITTNOTIFY_VOID(model_lock_acquire)
+#define __itt_model_lock_acquire_ptr ITTNOTIFY_NAME(model_lock_acquire)
+#define __itt_model_lock_acquire_2 ITTNOTIFY_VOID(model_lock_acquire_2)
+#define __itt_model_lock_acquire_2_ptr ITTNOTIFY_NAME(model_lock_acquire_2)
+#define __itt_model_lock_release ITTNOTIFY_VOID(model_lock_release)
+#define __itt_model_lock_release_ptr ITTNOTIFY_NAME(model_lock_release)
+#define __itt_model_lock_release_2 ITTNOTIFY_VOID(model_lock_release_2)
+#define __itt_model_lock_release_2_ptr ITTNOTIFY_NAME(model_lock_release_2)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_lock_acquire(lock)
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2(lock)
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release(lock)
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2(lock)
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_lock_acquire_ptr 0
+#define __itt_model_lock_acquire_2_ptr 0
+#define __itt_model_lock_release_ptr 0
+#define __itt_model_lock_release_2_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_RECORD_ALLOCATION/ANNOTATE_RECORD_DEALLOCATION support
+ *
+ * record_allocation/deallocation describe user-defined memory allocator
+ * behavior, which may be required for correctness modeling to understand
+ * when storage is not expected to be actually reused across threads.
+ */
+void ITTAPI __itt_model_record_allocation (void *addr, size_t size);
+void ITTAPI __itt_model_record_deallocation(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size))
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr))
+#define __itt_model_record_allocation ITTNOTIFY_VOID(model_record_allocation)
+#define __itt_model_record_allocation_ptr ITTNOTIFY_NAME(model_record_allocation)
+#define __itt_model_record_deallocation ITTNOTIFY_VOID(model_record_deallocation)
+#define __itt_model_record_deallocation_ptr ITTNOTIFY_NAME(model_record_deallocation)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_record_allocation(addr, size)
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation(addr)
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_record_allocation_ptr 0
+#define __itt_model_record_deallocation_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_INDUCTION_USES support
+ *
+ * Note particular storage is inductive through the end of the current site
+ */
+void ITTAPI __itt_model_induction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void *addr, size_t size))
+#define __itt_model_induction_uses ITTNOTIFY_VOID(model_induction_uses)
+#define __itt_model_induction_uses_ptr ITTNOTIFY_NAME(model_induction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_induction_uses(addr, size)
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_induction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_REDUCTION_USES support
+ *
+ * Note particular storage is used for reduction through the end
+ * of the current site
+ */
+void ITTAPI __itt_model_reduction_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void *addr, size_t size))
+#define __itt_model_reduction_uses ITTNOTIFY_VOID(model_reduction_uses)
+#define __itt_model_reduction_uses_ptr ITTNOTIFY_NAME(model_reduction_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_reduction_uses(addr, size)
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_reduction_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_OBSERVE_USES support
+ *
+ * Have correctness modeling record observations about uses of storage
+ * through the end of the current site
+ */
+void ITTAPI __itt_model_observe_uses(void* addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void *addr, size_t size))
+#define __itt_model_observe_uses ITTNOTIFY_VOID(model_observe_uses)
+#define __itt_model_observe_uses_ptr ITTNOTIFY_NAME(model_observe_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_observe_uses(addr, size)
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_observe_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_CLEAR_USES support
+ *
+ * Clear the special handling of a piece of storage related to induction,
+ * reduction or observe_uses
+ */
+void ITTAPI __itt_model_clear_uses(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void *addr))
+#define __itt_model_clear_uses ITTNOTIFY_VOID(model_clear_uses)
+#define __itt_model_clear_uses_ptr ITTNOTIFY_NAME(model_clear_uses)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_clear_uses(addr)
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_clear_uses_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief ANNOTATE_DISABLE_*_PUSH/ANNOTATE_DISABLE_*_POP support
+ *
+ * disable_push/disable_pop push and pop disabling based on a parameter.
+ * Disabling observations stops processing of memory references during
+ * correctness modeling, and all annotations that occur in the disabled
+ * region. This allows description of code that is expected to be handled
+ * specially during conversion to parallelism or that is not recognized
+ * by tools (e.g. some kinds of synchronization operations.)
+ * This mechanism causes all annotations in the disabled region, other
+ * than disable_push and disable_pop, to be ignored. (For example, this
+ * might validly be used to disable an entire parallel site and the contained
+ * tasks and locking in it for data collection purposes.)
+ * The disable for collection is a more expensive operation, but reduces
+ * collector overhead significantly. This applies to BOTH correctness data
+ * collection and performance data collection. For example, a site
+ * containing a task might only enable data collection for the first 10
+ * iterations. Both performance and correctness data should reflect this,
+ * and the program should run as close to full speed as possible when
+ * collection is disabled.
+ */
+void ITTAPI __itt_model_disable_push(__itt_model_disable x);
+void ITTAPI __itt_model_disable_pop(void);
+void ITTAPI __itt_model_aggregate_task(size_t x);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x))
+ITT_STUBV(ITTAPI, void, model_disable_pop, (void))
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t x))
+#define __itt_model_disable_push ITTNOTIFY_VOID(model_disable_push)
+#define __itt_model_disable_push_ptr ITTNOTIFY_NAME(model_disable_push)
+#define __itt_model_disable_pop ITTNOTIFY_VOID(model_disable_pop)
+#define __itt_model_disable_pop_ptr ITTNOTIFY_NAME(model_disable_pop)
+#define __itt_model_aggregate_task ITTNOTIFY_VOID(model_aggregate_task)
+#define __itt_model_aggregate_task_ptr ITTNOTIFY_NAME(model_aggregate_task)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_model_disable_push(x)
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop()
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task(x)
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_model_disable_push_ptr 0
+#define __itt_model_disable_pop_ptr 0
+#define __itt_model_aggregate_task_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} model group */
+
+/**
+ * @defgroup heap Heap
+ * @ingroup public
+ * Heap group
+ * @{
+ */
+
+typedef void* __itt_heap_function;
+
+/**
+ * @brief Create an identification for heap function
+ * @return non-zero identifier or NULL
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_heap_function ITTAPI __itt_heap_function_createA(const char* name, const char* domain);
+__itt_heap_function ITTAPI __itt_heap_function_createW(const wchar_t* name, const wchar_t* domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_heap_function_create __itt_heap_function_createW
+# define __itt_heap_function_create_ptr __itt_heap_function_createW_ptr
+#else
+# define __itt_heap_function_create __itt_heap_function_createA
+# define __itt_heap_function_create_ptr __itt_heap_function_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_heap_function ITTAPI __itt_heap_function_create(const char* name, const char* domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char* name, const char* domain))
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t* name, const wchar_t* domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char* name, const char* domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA ITTNOTIFY_DATA(heap_function_createA)
+#define __itt_heap_function_createA_ptr ITTNOTIFY_NAME(heap_function_createA)
+#define __itt_heap_function_createW ITTNOTIFY_DATA(heap_function_createW)
+#define __itt_heap_function_createW_ptr ITTNOTIFY_NAME(heap_function_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create ITTNOTIFY_DATA(heap_function_create)
+#define __itt_heap_function_create_ptr ITTNOTIFY_NAME(heap_function_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create(name, domain) (__itt_heap_function)0
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_heap_function_createA_ptr 0
+#define __itt_heap_function_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_heap_function_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation begin occurrence.
+ */
+void ITTAPI __itt_heap_allocate_begin(__itt_heap_function h, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized))
+#define __itt_heap_allocate_begin ITTNOTIFY_VOID(heap_allocate_begin)
+#define __itt_heap_allocate_begin_ptr ITTNOTIFY_NAME(heap_allocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_begin(h, size, initialized)
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an allocation end occurrence.
+ */
+void ITTAPI __itt_heap_allocate_end(__itt_heap_function h, void** addr, size_t size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized))
+#define __itt_heap_allocate_end ITTNOTIFY_VOID(heap_allocate_end)
+#define __itt_heap_allocate_end_ptr ITTNOTIFY_NAME(heap_allocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_allocate_end(h, addr, size, initialized)
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_allocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free begin occurrence.
+ */
+void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_begin ITTNOTIFY_VOID(heap_free_begin)
+#define __itt_heap_free_begin_ptr ITTNOTIFY_NAME(heap_free_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_begin(h, addr)
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an free end occurrence.
+ */
+void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr))
+#define __itt_heap_free_end ITTNOTIFY_VOID(heap_free_end)
+#define __itt_heap_free_end_ptr ITTNOTIFY_NAME(heap_free_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_free_end(h, addr)
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_free_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation begin occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_begin ITTNOTIFY_VOID(heap_reallocate_begin)
+#define __itt_heap_reallocate_begin_ptr ITTNOTIFY_NAME(heap_reallocate_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_begin(h, addr, new_size, initialized)
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an reallocation end occurrence.
+ */
+void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized))
+#define __itt_heap_reallocate_end ITTNOTIFY_VOID(heap_reallocate_end)
+#define __itt_heap_reallocate_end_ptr ITTNOTIFY_NAME(heap_reallocate_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reallocate_end(h, addr, new_addr, new_size, initialized)
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reallocate_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access begin */
+void ITTAPI __itt_heap_internal_access_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void))
+#define __itt_heap_internal_access_begin ITTNOTIFY_VOID(heap_internal_access_begin)
+#define __itt_heap_internal_access_begin_ptr ITTNOTIFY_NAME(heap_internal_access_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_begin()
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief internal access end */
+void ITTAPI __itt_heap_internal_access_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void))
+#define __itt_heap_internal_access_end ITTNOTIFY_VOID(heap_internal_access_end)
+#define __itt_heap_internal_access_end_ptr ITTNOTIFY_NAME(heap_internal_access_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_internal_access_end()
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_internal_access_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth begin */
+void ITTAPI __itt_heap_record_memory_growth_begin(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void))
+#define __itt_heap_record_memory_growth_begin ITTNOTIFY_VOID(heap_record_memory_growth_begin)
+#define __itt_heap_record_memory_growth_begin_ptr ITTNOTIFY_NAME(heap_record_memory_growth_begin)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_begin()
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_begin_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief record memory growth end */
+void ITTAPI __itt_heap_record_memory_growth_end(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void))
+#define __itt_heap_record_memory_growth_end ITTNOTIFY_VOID(heap_record_memory_growth_end)
+#define __itt_heap_record_memory_growth_end_ptr ITTNOTIFY_NAME(heap_record_memory_growth_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record_memory_growth_end()
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_memory_growth_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Specify the type of heap detection/reporting to modify.
+ */
+/**
+ * @hideinitializer
+ * @brief Report on memory leaks.
+ */
+#define __itt_heap_leaks 0x00000001
+
+/**
+ * @hideinitializer
+ * @brief Report on memory growth.
+ */
+#define __itt_heap_growth 0x00000002
+
+
+/** @brief heap reset detection */
+void ITTAPI __itt_heap_reset_detection(unsigned int reset_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask))
+#define __itt_heap_reset_detection ITTNOTIFY_VOID(heap_reset_detection)
+#define __itt_heap_reset_detection_ptr ITTNOTIFY_NAME(heap_reset_detection)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_reset_detection()
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_reset_detection_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief report */
+void ITTAPI __itt_heap_record(unsigned int record_mask);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask))
+#define __itt_heap_record ITTNOTIFY_VOID(heap_record)
+#define __itt_heap_record_ptr ITTNOTIFY_NAME(heap_record)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_heap_record()
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_heap_record_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} heap group */
+/** @endcond */
+/* ========================================================================== */
+
+/**
+ * @defgroup domains Domains
+ * @ingroup public
+ * Domains group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_domain
+{
+ volatile int flags; /*!< Zero if disabled, non-zero if enabled. The meaning of different non-zero values is reserved to the runtime */
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_domain* next;
+} __itt_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup domains
+ * @brief Create a domain.
+ * Create domain using some domain name: the URI naming style is recommended.
+ * Because the set of domains is expected to be static over the application's
+ * execution time, there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of
+ * which thread created the domain. This call is thread-safe.
+ * @param[in] name name of domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_domain* ITTAPI __itt_domain_createA(const char *name);
+__itt_domain* ITTAPI __itt_domain_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_domain_create __itt_domain_createW
+# define __itt_domain_create_ptr __itt_domain_createW_ptr
+#else /* UNICODE */
+# define __itt_domain_create __itt_domain_createA
+# define __itt_domain_create_ptr __itt_domain_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_domain* ITTAPI __itt_domain_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA ITTNOTIFY_DATA(domain_createA)
+#define __itt_domain_createA_ptr ITTNOTIFY_NAME(domain_createA)
+#define __itt_domain_createW ITTNOTIFY_DATA(domain_createW)
+#define __itt_domain_createW_ptr ITTNOTIFY_NAME(domain_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create ITTNOTIFY_DATA(domain_create)
+#define __itt_domain_create_ptr ITTNOTIFY_NAME(domain_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA(name) (__itt_domain*)0
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW(name) (__itt_domain*)0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create(name) (__itt_domain*)0
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_domain_createA_ptr 0
+#define __itt_domain_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_domain_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} domains group */
+
+/**
+ * @defgroup ids IDs
+ * @ingroup public
+ * IDs group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_id
+{
+ unsigned long long d1, d2, d3;
+} __itt_id;
+
+#pragma pack(pop)
+/** @endcond */
+
+static const __itt_id __itt_null = { 0, 0, 0 };
+
+/**
+ * @ingroup ids
+ * @brief A convenience function is provided to create an ID without domain control.
+ * @brief This is a convenience function to initialize an __itt_id structure. This function
+ * does not affect the collector runtime in any way. After you make the ID with this
+ * function, you still must create it with the __itt_id_create function before using the ID
+ * to identify a named entity.
+ * @param[in] addr The address of object; high QWORD of the ID value.
+ * @param[in] extra The extra data to unique identify object; low QWORD of the ID value.
+ */
+
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE __itt_id ITTAPI __itt_id_make(void* addr, unsigned long long extra)
+{
+ __itt_id id = __itt_null;
+ id.d1 = (unsigned long long)((uintptr_t)addr);
+ id.d2 = (unsigned long long)extra;
+ id.d3 = (unsigned long long)0; /* Reserved. Must be zero */
+ return id;
+}
+
+/**
+ * @ingroup ids
+ * @brief Create an instance of identifier.
+ * This establishes the beginning of the lifetime of an instance of
+ * the given ID in the trace. Once this lifetime starts, the ID
+ * can be used to tag named entity instances in calls such as
+ * __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * Instance IDs are not domain specific!
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_create(d,x) ITTNOTIFY_VOID_D1(id_create,d,x)
+#define __itt_id_create_ptr ITTNOTIFY_NAME(id_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create(domain,id)
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup ids
+ * @brief Destroy an instance of identifier.
+ * This ends the lifetime of the current instance of the given ID value in the trace.
+ * Any relationships that are established after this lifetime ends are invalid.
+ * This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id))
+#define __itt_id_destroy(d,x) ITTNOTIFY_VOID_D1(id_destroy,d,x)
+#define __itt_id_destroy_ptr ITTNOTIFY_NAME(id_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_destroy(domain,id)
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} ids group */
+
+/**
+ * @defgroup handless String Handles
+ * @ingroup public
+ * String Handles group
+ * @{
+ */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_string_handle
+{
+ const char* strA; /*!< Copy of original string in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* strW; /*!< Copy of original string in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* strW;
+#endif /* UNICODE || _UNICODE */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_string_handle* next;
+} __itt_string_handle;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup handles
+ * @brief Create a string handle.
+ * Create and return handle value that can be associated with a string.
+ * Consecutive calls to __itt_string_handle_create with the same name
+ * return the same value. Because the set of string handles is expected to remain
+ * static during the application's execution time, there is no mechanism to destroy a string handle.
+ * Any string handle can be accessed by any thread in the process, regardless of which thread created
+ * the string handle. This call is thread-safe.
+ * @param[in] name The input string
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_string_handle* ITTAPI __itt_string_handle_createA(const char *name);
+__itt_string_handle* ITTAPI __itt_string_handle_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_string_handle_create __itt_string_handle_createW
+# define __itt_string_handle_create_ptr __itt_string_handle_createW_ptr
+#else /* UNICODE */
+# define __itt_string_handle_create __itt_string_handle_createA
+# define __itt_string_handle_create_ptr __itt_string_handle_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_string_handle* ITTAPI __itt_string_handle_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA ITTNOTIFY_DATA(string_handle_createA)
+#define __itt_string_handle_createA_ptr ITTNOTIFY_NAME(string_handle_createA)
+#define __itt_string_handle_createW ITTNOTIFY_DATA(string_handle_createW)
+#define __itt_string_handle_createW_ptr ITTNOTIFY_NAME(string_handle_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create ITTNOTIFY_DATA(string_handle_create)
+#define __itt_string_handle_create_ptr ITTNOTIFY_NAME(string_handle_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA(name) (__itt_string_handle*)0
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW(name) (__itt_string_handle*)0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create(name) (__itt_string_handle*)0
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_string_handle_createA_ptr 0
+#define __itt_string_handle_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_string_handle_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} handles group */
+
+/** @cond exclude_from_documentation */
+typedef unsigned long long __itt_timestamp;
+/** @endcond */
+
+#define __itt_timestamp_none ((__itt_timestamp)-1LL)
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @ingroup timestamps
+ * @brief Return timestamp corresponding to the current moment.
+ * This returns the timestamp in the format that is the most relevant for the current
+ * host or platform (RDTSC, QPC, and others). You can use the "<" operator to
+ * compare __itt_timestamp values.
+ */
+__itt_timestamp ITTAPI __itt_get_timestamp(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void))
+#define __itt_get_timestamp ITTNOTIFY_DATA(get_timestamp)
+#define __itt_get_timestamp_ptr ITTNOTIFY_NAME(get_timestamp)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_get_timestamp()
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_get_timestamp_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} timestamps */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/**
+ * @defgroup regions Regions
+ * @ingroup public
+ * Regions group
+ * @{
+ */
+/**
+ * @ingroup regions
+ * @brief Begin of region instance.
+ * Successive calls to __itt_region_begin with the same ID are ignored
+ * until a call to __itt_region_end with the same ID
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance. Must not be __itt_null
+ * @param[in] parentid The instance ID for the parent of this region instance, or __itt_null
+ * @param[in] name The name of this region
+ */
+void ITTAPI __itt_region_begin(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup regions
+ * @brief End of region instance.
+ * The first call to __itt_region_end with a given ID ends the
+ * region. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_region_begin call.
+ * @param[in] domain The domain for this region instance
+ * @param[in] id The instance ID for this region instance
+ */
+void ITTAPI __itt_region_end(const __itt_domain *domain, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id))
+#define __itt_region_begin(d,x,y,z) ITTNOTIFY_VOID_D3(region_begin,d,x,y,z)
+#define __itt_region_begin_ptr ITTNOTIFY_NAME(region_begin)
+#define __itt_region_end(d,x) ITTNOTIFY_VOID_D1(region_end,d,x)
+#define __itt_region_end_ptr ITTNOTIFY_NAME(region_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_region_begin(d,x,y,z)
+#define __itt_region_begin_ptr 0
+#define __itt_region_end(d,x)
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_region_begin_ptr 0
+#define __itt_region_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} regions group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup public
+ * Frames are similar to regions, but are intended to be easier to use and to implement.
+ * In particular:
+ * - Frames always represent periods of elapsed time
+ * - By default, frames have no nesting relationships
+ * @{
+ */
+
+/**
+ * @ingroup frames
+ * @brief Begin a frame instance.
+ * Successive calls to __itt_frame_begin with the
+ * same ID are ignored until a call to __itt_frame_end with the same ID.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ */
+void ITTAPI __itt_frame_begin_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief End a frame instance.
+ * The first call to __itt_frame_end with a given ID
+ * ends the frame. Successive calls with the same ID are ignored, as are
+ * calls that do not have a matching __itt_frame_begin call.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL for current
+ */
+void ITTAPI __itt_frame_end_v3(const __itt_domain *domain, __itt_id *id);
+
+/**
+ * @ingroup frames
+ * @brief Submits a frame instance.
+ * Successive calls to __itt_frame_begin or __itt_frame_submit with the
+ * same ID are ignored until a call to __itt_frame_end or __itt_frame_submit
+ * with the same ID.
+ * Passing special __itt_timestamp_none value as "end" argument means
+ * take the current timestamp as the end timestamp.
+ * @param[in] domain The domain for this frame instance
+ * @param[in] id The instance ID for this frame instance or NULL
+ * @param[in] begin Timestamp of the beginning of the frame
+ * @param[in] end Timestamp of the end of the frame
+ */
+void ITTAPI __itt_frame_submit_v3(const __itt_domain *domain, __itt_id *id,
+ __itt_timestamp begin, __itt_timestamp end);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id))
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end))
+#define __itt_frame_begin_v3(d,x) ITTNOTIFY_VOID_D1(frame_begin_v3,d,x)
+#define __itt_frame_begin_v3_ptr ITTNOTIFY_NAME(frame_begin_v3)
+#define __itt_frame_end_v3(d,x) ITTNOTIFY_VOID_D1(frame_end_v3,d,x)
+#define __itt_frame_end_v3_ptr ITTNOTIFY_NAME(frame_end_v3)
+#define __itt_frame_submit_v3(d,x,b,e) ITTNOTIFY_VOID_D3(frame_submit_v3,d,x,b,e)
+#define __itt_frame_submit_v3_ptr ITTNOTIFY_NAME(frame_submit_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin_v3(domain,id)
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3(domain,id)
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3(domain,id,begin,end)
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_v3_ptr 0
+#define __itt_frame_end_v3_ptr 0
+#define __itt_frame_submit_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+/** @endcond */
+
+/**
+ * @defgroup taskgroup Task Group
+ * @ingroup public
+ * Task Group
+ * @{
+ */
+/**
+ * @ingroup task_groups
+ * @brief Denotes a task_group instance.
+ * Successive calls to __itt_task_group with the same ID are ignored.
+ * @param[in] domain The domain for this task_group instance
+ * @param[in] id The instance ID for this task_group instance. Must not be __itt_null.
+ * @param[in] parentid The instance ID for the parent of this task_group instance, or __itt_null.
+ * @param[in] name The name of this task_group
+ */
+void ITTAPI __itt_task_group(const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+#define __itt_task_group(d,x,y,z) ITTNOTIFY_VOID_D3(task_group,d,x,y,z)
+#define __itt_task_group_ptr ITTNOTIFY_NAME(task_group)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_group(d,x,y,z)
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_group_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} taskgroup group */
+
+/**
+ * @defgroup tasks Tasks
+ * @ingroup public
+ * A task instance represents a piece of work performed by a particular
+ * thread for a period of time. A call to __itt_task_begin creates a
+ * task instance. This becomes the current instance for that task on that
+ * thread. A following call to __itt_task_end on the same thread ends the
+ * instance. There may be multiple simultaneous instances of tasks with the
+ * same name on different threads. If an ID is specified, the task instance
+ * receives that ID. Nested tasks are allowed.
+ *
+ * Note: The task is defined by the bracketing of __itt_task_begin and
+ * __itt_task_end on the same thread. If some scheduling mechanism causes
+ * task switching (the thread executes a different user task) or task
+ * switching (the user task switches to a different thread) then this breaks
+ * the notion of current instance. Additional API calls are required to
+ * deal with that possibility.
+ * @{
+ */
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name);
+
+/**
+ * @ingroup tasks
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid The identifier for this task instance (may be 0)
+ * @param[in] parentid The parent of this task (may be 0)
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn(const __itt_domain *domain, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup tasks
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ */
+void ITTAPI __itt_task_end(const __itt_domain *domain);
+
+/**
+ * @ingroup tasks
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped(const __itt_domain* domain, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup tasks
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped(const __itt_domain *domain, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain))
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id taskid, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id taskid))
+#define __itt_task_begin(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin,d,x,y,z)
+#define __itt_task_begin_ptr ITTNOTIFY_NAME(task_begin)
+#define __itt_task_begin_fn(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_fn,d,x,y,z)
+#define __itt_task_begin_fn_ptr ITTNOTIFY_NAME(task_begin_fn)
+#define __itt_task_end(d) ITTNOTIFY_VOID_D0(task_end,d)
+#define __itt_task_end_ptr ITTNOTIFY_NAME(task_end)
+#define __itt_task_begin_overlapped(d,x,y,z) ITTNOTIFY_VOID_D3(task_begin_overlapped,d,x,y,z)
+#define __itt_task_begin_overlapped_ptr ITTNOTIFY_NAME(task_begin_overlapped)
+#define __itt_task_end_overlapped(d,x) ITTNOTIFY_VOID_D1(task_end_overlapped,d,x)
+#define __itt_task_end_overlapped_ptr ITTNOTIFY_NAME(task_end_overlapped)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin(domain,id,parentid,name)
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn(domain,id,parentid,fn)
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end(domain)
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped(domain,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped(domain,taskid)
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ptr 0
+#define __itt_task_begin_fn_ptr 0
+#define __itt_task_end_ptr 0
+#define __itt_task_begin_overlapped_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} tasks group */
+
+
+/**
+ * @defgroup markers Markers
+ * Markers represent a single discreet event in time. Markers have a scope,
+ * described by an enumerated type __itt_scope. Markers are created by
+ * the API call __itt_marker. A marker instance can be given an ID for use in
+ * adding metadata.
+ * @{
+ */
+
+/**
+ * @brief Describes the scope of an event object in the trace.
+ */
+typedef enum
+{
+ __itt_scope_unknown = 0,
+ __itt_scope_global,
+ __itt_scope_track_group,
+ __itt_scope_track,
+ __itt_scope_task,
+ __itt_scope_marker
+} __itt_scope;
+
+/** @cond exclude_from_documentation */
+#define __itt_marker_scope_unknown __itt_scope_unknown
+#define __itt_marker_scope_global __itt_scope_global
+#define __itt_marker_scope_process __itt_scope_track_group
+#define __itt_marker_scope_thread __itt_scope_track
+#define __itt_marker_scope_task __itt_scope_task
+/** @endcond */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance
+ * @param[in] domain The domain for this marker
+ * @param[in] id The instance ID for this marker or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker(const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker(d,x,y,z) ITTNOTIFY_VOID_D3(marker,d,x,y,z)
+#define __itt_marker_ptr ITTNOTIFY_NAME(marker)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker(domain,id,name,scope)
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} markers group */
+
+/**
+ * @defgroup metadata Metadata
+ * The metadata API is used to attach extra information to named
+ * entities. Metadata can be attached to an identified named entity by ID,
+ * or to the current entity (which is always a task).
+ *
+ * Conceptually metadata has a type (what kind of metadata), a key (the
+ * name of the metadata), and a value (the actual data). The encoding of
+ * the value depends on the type of the metadata.
+ *
+ * The type of metadata is specified by an enumerated type __itt_metdata_type.
+ * @{
+ */
+
+/**
+ * @ingroup parameters
+ * @brief describes the type of metadata
+ */
+typedef enum {
+ __itt_metadata_unknown = 0,
+ __itt_metadata_u64, /**< Unsigned 64-bit integer */
+ __itt_metadata_s64, /**< Signed 64-bit integer */
+ __itt_metadata_u32, /**< Unsigned 32-bit integer */
+ __itt_metadata_s32, /**< Signed 32-bit integer */
+ __itt_metadata_u16, /**< Unsigned 16-bit integer */
+ __itt_metadata_s16, /**< Signed 16-bit integer */
+ __itt_metadata_float, /**< Signed 32-bit floating-point */
+ __itt_metadata_double /**< SIgned 64-bit floating-point */
+} __itt_metadata_type;
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add,d,x,y,z,a,b)
+#define __itt_metadata_add_ptr ITTNOTIFY_NAME(metadata_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add(d,x,y,z,a,b)
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_addA(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_addW(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add __itt_metadata_str_addW
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add __itt_metadata_str_addA
+# define __itt_metadata_str_add_ptr __itt_metadata_str_addA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add(const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addA,d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr ITTNOTIFY_NAME(metadata_str_addA)
+#define __itt_metadata_str_addW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_addW,d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr ITTNOTIFY_NAME(metadata_str_addW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add,d,x,y,z,a)
+#define __itt_metadata_str_add_ptr ITTNOTIFY_NAME(metadata_str_add)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA(d,x,y,z,a)
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW(d,x,y,z,a)
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add(d,x,y,z,a)
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_addA_ptr 0
+#define __itt_metadata_str_addW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] type The type of the metadata
+ * @param[in] count The number of elements of the given type. If count == 0, no metadata will be added.
+ * @param[in] data The metadata itself
+*/
+void ITTAPI __itt_metadata_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data))
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(metadata_add_with_scope,d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr ITTNOTIFY_NAME(metadata_add_with_scope)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_metadata_add_with_scope(d,x,y,z,a,b)
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_metadata_add_with_scope_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup parameters
+ * @brief Add string metadata to an instance of a named entity.
+ * @param[in] domain The domain controlling the call
+ * @param[in] scope The scope of the instance to which the metadata is to be added
+
+ * @param[in] id The identifier of the instance to which the metadata is to be added, or __itt_null to add to the current task
+
+ * @param[in] key The name of the metadata
+ * @param[in] data The metadata itself
+ * @param[in] length The number of characters in the string, or -1 if the length is unknown but the string is null-terminated
+*/
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_metadata_str_add_with_scopeA(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+void ITTAPI __itt_metadata_str_add_with_scopeW(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeW
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeW_ptr
+#else /* UNICODE */
+# define __itt_metadata_str_add_with_scope __itt_metadata_str_add_with_scopeA
+# define __itt_metadata_str_add_with_scope_ptr __itt_metadata_str_add_with_scopeA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_metadata_str_add_with_scope(const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length);
+#endif
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeA,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeA)
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scopeW,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr ITTNOTIFY_NAME(metadata_str_add_with_scopeW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a) ITTNOTIFY_VOID_D4(metadata_str_add_with_scope,d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr ITTNOTIFY_NAME(metadata_str_add_with_scope)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope(d,x,y,z,a)
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_metadata_str_add_with_scopeA_ptr 0
+#define __itt_metadata_str_add_with_scopeW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_metadata_str_add_with_scope_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} metadata group */
+
+/**
+ * @defgroup relations Relations
+ * Instances of named entities can be explicitly associated with other
+ * instances using instance IDs and the relationship API calls.
+ *
+ * @{
+ */
+
+/**
+ * @ingroup relations
+ * @brief The kind of relation between two instances is specified by the enumerated type __itt_relation.
+ * Relations between instances can be added with an API call. The relation
+ * API uses instance IDs. Relations can be added before or after the actual
+ * instances are created and persist independently of the instances. This
+ * is the motivation for having different lifetimes for instance IDs and
+ * the actual instances.
+ */
+typedef enum
+{
+ __itt_relation_is_unknown = 0,
+ __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */
+ __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */
+ __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */
+ __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */
+ __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */
+ __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */
+ __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */
+} __itt_relation;
+
+/**
+ * @ingroup relations
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current(const __itt_domain *domain, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup relations
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add(const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current(d,x,y) ITTNOTIFY_VOID_D2(relation_add_to_current,d,x,y)
+#define __itt_relation_add_to_current_ptr ITTNOTIFY_NAME(relation_add_to_current)
+#define __itt_relation_add(d,x,y,z) ITTNOTIFY_VOID_D3(relation_add,d,x,y,z)
+#define __itt_relation_add_ptr ITTNOTIFY_NAME(relation_add)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current(d,x,y)
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add(d,x,y,z)
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ptr 0
+#define __itt_relation_add_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} relations group */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_info
+{
+ unsigned long long clock_freq; /*!< Clock domain frequency */
+ unsigned long long clock_base; /*!< Clock domain base timestamp */
+} __itt_clock_info;
+
+#pragma pack(pop)
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef void (ITTAPI *__itt_get_clock_info_fn)(__itt_clock_info* clock_info, void* data);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_clock_domain
+{
+ __itt_clock_info info; /*!< Most recent clock domain info */
+ __itt_get_clock_info_fn fn; /*!< Callback function pointer */
+ void* fn_data; /*!< Input argument for the callback function */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_clock_domain* next;
+} __itt_clock_domain;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Create a clock domain.
+ * Certain applications require the capability to trace their application using
+ * a clock domain different than the CPU, for instance the instrumentation of events
+ * that occur on a GPU.
+ * Because the set of domains is expected to be static over the application's execution time,
+ * there is no mechanism to destroy a domain.
+ * Any domain can be accessed by any thread in the process, regardless of which thread created
+ * the domain. This call is thread-safe.
+ * @param[in] fn A pointer to a callback function which retrieves alternative CPU timestamps
+ * @param[in] fn_data Argument for a callback function; may be NULL
+ */
+__itt_clock_domain* ITTAPI __itt_clock_domain_create(__itt_get_clock_info_fn fn, void* fn_data);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data))
+#define __itt_clock_domain_create ITTNOTIFY_DATA(clock_domain_create)
+#define __itt_clock_domain_create_ptr ITTNOTIFY_NAME(clock_domain_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_create(fn,fn_data) (__itt_clock_domain*)0
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomains
+ * @brief Recalculate clock domains frequencies and clock base timestamps.
+ */
+void ITTAPI __itt_clock_domain_reset(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void))
+#define __itt_clock_domain_reset ITTNOTIFY_VOID(clock_domain_reset)
+#define __itt_clock_domain_reset_ptr ITTNOTIFY_NAME(clock_domain_reset)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_clock_domain_reset()
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_clock_domain_reset_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Create an instance of identifier. This establishes the beginning of the lifetime of
+ * an instance of the given ID in the trace. Once this lifetime starts, the ID can be used to
+ * tag named entity instances in calls such as __itt_task_begin, and to specify relationships among
+ * identified named entity instances, using the \ref relations APIs.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to create.
+ */
+void ITTAPI __itt_id_create_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/**
+ * @ingroup clockdomain
+ * @brief Destroy an instance of identifier. This ends the lifetime of the current instance of the
+ * given ID value in the trace. Any relationships that are established after this lifetime ends are
+ * invalid. This call must be performed before the given ID value can be reused for a different
+ * named entity instance.
+ * @param[in] domain The domain controlling the execution of this call.
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The ID to destroy.
+ */
+void ITTAPI __itt_id_destroy_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id))
+#define __itt_id_create_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_create_ex,d,x,y,z)
+#define __itt_id_create_ex_ptr ITTNOTIFY_NAME(id_create_ex)
+#define __itt_id_destroy_ex(d,x,y,z) ITTNOTIFY_VOID_D3(id_destroy_ex,d,x,y,z)
+#define __itt_id_destroy_ex_ptr ITTNOTIFY_NAME(id_destroy_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_id_create_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex(domain,clock_domain,timestamp,id)
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_id_create_ex_ptr 0
+#define __itt_id_destroy_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The instance ID for this task instance, or __itt_null
+ * @param[in] parentid The parent instance to which this task instance belongs, or __itt_null
+ * @param[in] name The name of this task
+ */
+void ITTAPI __itt_task_begin_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin a task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, or __itt_null
+ * @param[in] parentid The parent of this task, or __itt_null
+ * @param[in] fn The pointer to the function you are tracing
+ */
+void ITTAPI __itt_task_begin_fn_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, void* fn);
+
+/**
+ * @ingroup clockdomain
+ * @brief End the current task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ */
+void ITTAPI __itt_task_end_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn))
+ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp))
+#define __itt_task_begin_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_ex,d,x,y,z,a,b)
+#define __itt_task_begin_ex_ptr ITTNOTIFY_NAME(task_begin_ex)
+#define __itt_task_begin_fn_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_fn_ex,d,x,y,z,a,b)
+#define __itt_task_begin_fn_ex_ptr ITTNOTIFY_NAME(task_begin_fn_ex)
+#define __itt_task_end_ex(d,x,y) ITTNOTIFY_VOID_D2(task_end_ex,d,x,y)
+#define __itt_task_end_ex_ptr ITTNOTIFY_NAME(task_end_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_ex(domain,clock_domain,timestamp,id,parentid,name)
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex(domain,clock_domain,timestamp,id,parentid,fn)
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex(domain,clock_domain,timestamp)
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_ex_ptr 0
+#define __itt_task_begin_fn_ex_ptr 0
+#define __itt_task_end_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup counters Counters
+ * @ingroup public
+ * Counters are user-defined objects with a monotonically increasing
+ * value. Counter values are 64-bit unsigned integers.
+ * Counters have names that can be displayed in
+ * the tools.
+ * @{
+ */
+
+/**
+ * @brief opaque structure for counter identification
+ */
+/** @cond exclude_from_documentation */
+
+typedef struct ___itt_counter* __itt_counter;
+
+/**
+ * @brief Create an unsigned 64 bits integer counter with given name/domain
+ *
+ * After __itt_counter_create() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter, where value_ptr is a pointer to an unsigned 64 bits integer
+ *
+ * The call is equal to __itt_counter_create_typed(name, domain, __itt_metadata_u64)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_createA(const char *name, const char *domain);
+__itt_counter ITTAPI __itt_counter_createW(const wchar_t *name, const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create __itt_counter_createW
+# define __itt_counter_create_ptr __itt_counter_createW_ptr
+#else /* UNICODE */
+# define __itt_counter_create __itt_counter_createA
+# define __itt_counter_create_ptr __itt_counter_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create(const char *name, const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain))
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA ITTNOTIFY_DATA(counter_createA)
+#define __itt_counter_createA_ptr ITTNOTIFY_NAME(counter_createA)
+#define __itt_counter_createW ITTNOTIFY_DATA(counter_createW)
+#define __itt_counter_createW_ptr ITTNOTIFY_NAME(counter_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create ITTNOTIFY_DATA(counter_create)
+#define __itt_counter_create_ptr ITTNOTIFY_NAME(counter_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA(name, domain)
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW(name, domain)
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create(name, domain)
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_createA_ptr 0
+#define __itt_counter_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Increment the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id))
+#define __itt_counter_inc ITTNOTIFY_VOID(counter_inc)
+#define __itt_counter_inc_ptr ITTNOTIFY_NAME(counter_inc)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc(id)
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Increment the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_inc_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_inc_delta ITTNOTIFY_VOID(counter_inc_delta)
+#define __itt_counter_inc_delta_ptr ITTNOTIFY_NAME(counter_inc_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_delta(id, value)
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id))
+#define __itt_counter_dec ITTNOTIFY_VOID(counter_dec)
+#define __itt_counter_dec_ptr ITTNOTIFY_NAME(counter_dec)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec(id)
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/**
+ * @brief Decrement the unsigned 64 bits integer counter value with x
+ *
+ * Calling this function to non-unsigned 64 bits integer counters has no effect
+ */
+void ITTAPI __itt_counter_dec_delta(__itt_counter id, unsigned long long value);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value))
+#define __itt_counter_dec_delta ITTNOTIFY_VOID(counter_dec_delta)
+#define __itt_counter_dec_delta_ptr ITTNOTIFY_NAME(counter_dec_delta)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_delta(id, value)
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_delta_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls increment the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_inc_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Increment a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to increment the counter
+ */
+void ITTAPI __itt_counter_inc_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_inc_v3(d,x) ITTNOTIFY_VOID_D1(counter_inc_v3,d,x)
+#define __itt_counter_inc_v3_ptr ITTNOTIFY_NAME(counter_inc_v3)
+#define __itt_counter_inc_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_inc_delta_v3,d,x,y)
+#define __itt_counter_inc_delta_v3_ptr ITTNOTIFY_NAME(counter_inc_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_inc_v3(domain,name)
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3(domain,name,delta)
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_inc_v3_ptr 0
+#define __itt_counter_inc_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by one.
+ * The first call with a given name creates a counter by that name and sets its
+ * value to zero. Successive calls decrement the counter value.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ */
+void ITTAPI __itt_counter_dec_v3(const __itt_domain *domain, __itt_string_handle *name);
+
+/**
+ * @ingroup counters
+ * @brief Decrement a counter by the value specified in delta.
+ * @param[in] domain The domain controlling the call. Counter names are not domain specific.
+ * The domain argument is used only to enable or disable the API calls.
+ * @param[in] name The name of the counter
+ * @param[in] delta The amount by which to decrement the counter
+ */
+void ITTAPI __itt_counter_dec_delta_v3(const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name))
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long delta))
+#define __itt_counter_dec_v3(d,x) ITTNOTIFY_VOID_D1(counter_dec_v3,d,x)
+#define __itt_counter_dec_v3_ptr ITTNOTIFY_NAME(counter_dec_v3)
+#define __itt_counter_dec_delta_v3(d,x,y) ITTNOTIFY_VOID_D2(counter_dec_delta_v3,d,x,y)
+#define __itt_counter_dec_delta_v3_ptr ITTNOTIFY_NAME(counter_dec_delta_v3)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_dec_v3(domain,name)
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3(domain,name,delta)
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_dec_v3_ptr 0
+#define __itt_counter_dec_delta_v3_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} counters group */
+
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value(__itt_counter id, void *value_ptr);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr))
+#define __itt_counter_set_value ITTNOTIFY_VOID(counter_set_value)
+#define __itt_counter_set_value_ptr ITTNOTIFY_NAME(counter_set_value)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value(id, value_ptr)
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the counter value
+ */
+void ITTAPI __itt_counter_set_value_ex(__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr))
+#define __itt_counter_set_value_ex ITTNOTIFY_VOID(counter_set_value_ex)
+#define __itt_counter_set_value_ex_ptr ITTNOTIFY_NAME(counter_set_value_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_set_value_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create a typed counter with given name/domain
+ *
+ * After __itt_counter_create_typed() is called, __itt_counter_inc(id), __itt_counter_inc_delta(id, delta),
+ * __itt_counter_set_value(id, value_ptr) or __itt_counter_set_value_ex(id, clock_domain, timestamp, value_ptr)
+ * can be used to change the value of the counter
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_counter ITTAPI __itt_counter_create_typedA(const char *name, const char *domain, __itt_metadata_type type);
+__itt_counter ITTAPI __itt_counter_create_typedW(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_counter_create_typed __itt_counter_create_typedW
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedW_ptr
+#else /* UNICODE */
+# define __itt_counter_create_typed __itt_counter_create_typedA
+# define __itt_counter_create_typed_ptr __itt_counter_create_typedA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_counter ITTAPI __itt_counter_create_typed(const char *name, const char *domain, __itt_metadata_type type);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type))
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA ITTNOTIFY_DATA(counter_create_typedA)
+#define __itt_counter_create_typedA_ptr ITTNOTIFY_NAME(counter_create_typedA)
+#define __itt_counter_create_typedW ITTNOTIFY_DATA(counter_create_typedW)
+#define __itt_counter_create_typedW_ptr ITTNOTIFY_NAME(counter_create_typedW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed ITTNOTIFY_DATA(counter_create_typed)
+#define __itt_counter_create_typed_ptr ITTNOTIFY_NAME(counter_create_typed)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA(name, domain, type)
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW(name, domain, type)
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed(name, domain, type)
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_counter_create_typedA_ptr 0
+#define __itt_counter_create_typedW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_counter_create_typed_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the counter identified by the pointer previously returned by __itt_counter_create() or
+ * __itt_counter_create_typed()
+ */
+void ITTAPI __itt_counter_destroy(__itt_counter id);
+
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id))
+#define __itt_counter_destroy ITTNOTIFY_VOID(counter_destroy)
+#define __itt_counter_destroy_ptr ITTNOTIFY_NAME(counter_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_counter_destroy(id)
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_counter_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} counters group */
+
+/**
+ * @ingroup markers
+ * @brief Create a marker instance.
+ * @param[in] domain The domain for this marker
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] id The instance ID for this marker, or __itt_null
+ * @param[in] name The name for this marker
+ * @param[in] scope The scope for this marker
+ */
+void ITTAPI __itt_marker_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope))
+#define __itt_marker_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(marker_ex,d,x,y,z,a,b)
+#define __itt_marker_ex_ptr ITTNOTIFY_NAME(marker_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_marker_ex(domain,clock_domain,timestamp,id,name,scope)
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_marker_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation to the current task instance.
+ * The current task instance is the head of the relation.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_to_current_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail);
+
+/**
+ * @ingroup clockdomain
+ * @brief Add a relation between two instance identifiers.
+ * @param[in] domain The domain controlling this call
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] head The ID for the head of the relation
+ * @param[in] relation The kind of relation
+ * @param[in] tail The ID for the tail of the relation
+ */
+void ITTAPI __itt_relation_add_ex(const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail))
+ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail))
+#define __itt_relation_add_to_current_ex(d,x,y,z,a) ITTNOTIFY_VOID_D4(relation_add_to_current_ex,d,x,y,z,a)
+#define __itt_relation_add_to_current_ex_ptr ITTNOTIFY_NAME(relation_add_to_current_ex)
+#define __itt_relation_add_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(relation_add_ex,d,x,y,z,a,b)
+#define __itt_relation_add_ex_ptr ITTNOTIFY_NAME(relation_add_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_relation_add_to_current_ex(domain,clock_domain,timestame,relation,tail)
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex(domain,clock_domain,timestamp,head,relation,tail)
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_relation_add_to_current_ex_ptr 0
+#define __itt_relation_add_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+typedef enum ___itt_track_group_type
+{
+ __itt_track_group_type_normal = 0
+} __itt_track_group_type;
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track_group
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ struct ___itt_track* track; /*!< List of child tracks */
+ __itt_track_group_type tgtype; /*!< Type of the track group */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track_group* next;
+} __itt_track_group;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Placeholder for custom track types. Currently, "normal" custom track
+ * is the only available track type.
+ */
+typedef enum ___itt_track_type
+{
+ __itt_track_type_normal = 0
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+ , __itt_track_type_queue
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
+} __itt_track_type;
+
+/** @cond exclude_from_documentation */
+#pragma pack(push, 8)
+
+typedef struct ___itt_track
+{
+ __itt_string_handle* name; /*!< Name of the track group */
+ __itt_track_group* group; /*!< Parent group to a track */
+ __itt_track_type ttype; /*!< Type of the track */
+ int extra1; /*!< Reserved. Must be zero */
+ void* extra2; /*!< Reserved. Must be zero */
+ struct ___itt_track* next;
+} __itt_track;
+
+#pragma pack(pop)
+/** @endcond */
+
+/**
+ * @brief Create logical track group.
+ */
+__itt_track_group* ITTAPI __itt_track_group_create(__itt_string_handle* name, __itt_track_group_type track_group_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type))
+#define __itt_track_group_create ITTNOTIFY_DATA(track_group_create)
+#define __itt_track_group_create_ptr ITTNOTIFY_NAME(track_group_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_group_create(name) (__itt_track_group*)0
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_group_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Create logical track.
+ */
+__itt_track* ITTAPI __itt_track_create(__itt_track_group* track_group, __itt_string_handle* name, __itt_track_type track_type);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type))
+#define __itt_track_create ITTNOTIFY_DATA(track_create)
+#define __itt_track_create_ptr ITTNOTIFY_NAME(track_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_track_create(track_group,name,track_type) (__itt_track*)0
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_track_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Set the logical track.
+ */
+void ITTAPI __itt_set_track(__itt_track* track);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track))
+#define __itt_set_track ITTNOTIFY_VOID(set_track)
+#define __itt_set_track_ptr ITTNOTIFY_NAME(set_track)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_set_track(track)
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_set_track_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/* ========================================================================== */
+/** @cond exclude_from_gpa_documentation */
+/**
+ * @defgroup events Events
+ * @ingroup public
+ * Events group
+ * @{
+ */
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_event_create __itt_event_createW
+# define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+# define __itt_event_create __itt_event_createA
+# define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen) (__itt_event)0
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} events group */
+
+
+/**
+ * @defgroup arrays Arrays Visualizer
+ * @ingroup public
+ * Visualize arrays
+ * @{
+ */
+
+/**
+ * @enum __itt_av_data_type
+ * @brief Defines types of arrays data (for C/C++ intrinsic types)
+ */
+typedef enum
+{
+ __itt_e_first = 0,
+ __itt_e_char = 0, /* 1-byte integer */
+ __itt_e_uchar, /* 1-byte unsigned integer */
+ __itt_e_int16, /* 2-byte integer */
+ __itt_e_uint16, /* 2-byte unsigned integer */
+ __itt_e_int32, /* 4-byte integer */
+ __itt_e_uint32, /* 4-byte unsigned integer */
+ __itt_e_int64, /* 8-byte integer */
+ __itt_e_uint64, /* 8-byte unsigned integer */
+ __itt_e_float, /* 4-byte floating */
+ __itt_e_double, /* 8-byte floating */
+ __itt_e_last = __itt_e_double
+} __itt_av_data_type;
+
+/**
+ * @brief Save an array data to a file.
+ * Output format is defined by the file extension. The csv and bmp formats are supported (bmp - for 2-dimensional array only).
+ * @param[in] data - pointer to the array data
+ * @param[in] rank - the rank of the array
+ * @param[in] dimensions - pointer to an array of integers, which specifies the array dimensions.
+ * The size of dimensions must be equal to the rank
+ * @param[in] type - the type of the array, specified as one of the __itt_av_data_type values (for intrinsic types)
+ * @param[in] filePath - the file path; the output format is defined by the file extension
+ * @param[in] columnOrder - defines how the array is stored in the linear memory.
+ * It should be 1 for column-major order (e.g. in FORTRAN) or 0 - for row-major order (e.g. in C).
+ */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_av_saveA(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+int ITTAPI __itt_av_saveW(void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_av_save __itt_av_saveW
+# define __itt_av_save_ptr __itt_av_saveW_ptr
+#else /* UNICODE */
+# define __itt_av_save __itt_av_saveA
+# define __itt_av_save_ptr __itt_av_saveA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_av_save(void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA ITTNOTIFY_DATA(av_saveA)
+#define __itt_av_saveA_ptr ITTNOTIFY_NAME(av_saveA)
+#define __itt_av_saveW ITTNOTIFY_DATA(av_saveW)
+#define __itt_av_saveW_ptr ITTNOTIFY_NAME(av_saveW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save ITTNOTIFY_DATA(av_save)
+#define __itt_av_save_ptr ITTNOTIFY_NAME(av_save)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA(name)
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW(name)
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save(name)
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_av_saveA_ptr 0
+#define __itt_av_saveW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_av_save_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+void ITTAPI __itt_enable_attach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, enable_attach, (void))
+#define __itt_enable_attach ITTNOTIFY_VOID(enable_attach)
+#define __itt_enable_attach_ptr ITTNOTIFY_NAME(enable_attach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_enable_attach()
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_enable_attach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @cond exclude_from_gpa_documentation */
+
+/** @} arrays group */
+
+/** @endcond */
+
+/**
+ * @brief Module load info
+ * This API is used to report necessary information in case of module relocation
+ * @param[in] start_addr - relocated module start address
+ * @param[in] end_addr - relocated module end address
+ * @param[in] path - file system path to the module
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path);
+void ITTAPI __itt_module_loadW(void *start_addr, void *end_addr, const wchar_t *path);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_module_load __itt_module_loadW
+# define __itt_module_load_ptr __itt_module_loadW_ptr
+#else /* UNICODE */
+# define __itt_module_load __itt_module_loadA
+# define __itt_module_load_ptr __itt_module_loadA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_module_load(void *start_addr, void *end_addr, const char *path);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, void, module_loadA, (void *start_addr, void *end_addr, const char *path))
+ITT_STUB(ITTAPI, void, module_loadW, (void *start_addr, void *end_addr, const wchar_t *path))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA ITTNOTIFY_VOID(module_loadA)
+#define __itt_module_loadA_ptr ITTNOTIFY_NAME(module_loadA)
+#define __itt_module_loadW ITTNOTIFY_VOID(module_loadW)
+#define __itt_module_loadW_ptr ITTNOTIFY_NAME(module_loadW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load ITTNOTIFY_VOID(module_load)
+#define __itt_module_load_ptr ITTNOTIFY_NAME(module_load)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA(start_addr, end_addr, path)
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW(start_addr, end_addr, path)
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load(start_addr, end_addr, path)
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_module_loadA_ptr 0
+#define __itt_module_loadW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_module_load_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_H_ */
+
+#ifdef INTEL_ITTNOTIFY_API_PRIVATE
+
+#ifndef _ITTNOTIFY_PRIVATE_
+#define _ITTNOTIFY_PRIVATE_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @ingroup clockdomain
+ * @brief Begin an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid The identifier for this task instance, *cannot* be __itt_null.
+ * @param[in] parentid The parent of this task, or __itt_null.
+ * @param[in] name The name of this task.
+ */
+void ITTAPI __itt_task_begin_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name);
+
+/**
+ * @ingroup clockdomain
+ * @brief End an overlapped task instance.
+ * @param[in] domain The domain for this task
+ * @param[in] clock_domain The clock domain controlling the execution of this call.
+ * @param[in] timestamp The user defined timestamp.
+ * @param[in] taskid Explicit ID of finished task
+ */
+void ITTAPI __itt_task_end_overlapped_ex(const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid, __itt_id parentid, __itt_string_handle* name))
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain* domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id taskid))
+#define __itt_task_begin_overlapped_ex(d,x,y,z,a,b) ITTNOTIFY_VOID_D5(task_begin_overlapped_ex,d,x,y,z,a,b)
+#define __itt_task_begin_overlapped_ex_ptr ITTNOTIFY_NAME(task_begin_overlapped_ex)
+#define __itt_task_end_overlapped_ex(d,x,y,z) ITTNOTIFY_VOID_D3(task_end_overlapped_ex,d,x,y,z)
+#define __itt_task_end_overlapped_ex_ptr ITTNOTIFY_NAME(task_end_overlapped_ex)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_task_begin_overlapped_ex(domain,clock_domain,timestamp,taskid,parentid,name)
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ex(domain,clock_domain,timestamp,taskid)
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_task_begin_overlapped_ex_ptr 0
+#define __itt_task_end_overlapped_ptr 0
+#define __itt_task_end_overlapped_ex_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @defgroup makrs_internal Marks
+ * @ingroup internal
+ * Marks group
+ * @warning Internal API:
+ * - It is not shipped to outside of Intel
+ * - It is delivered to internal Intel teams using e-mail or SVN access only
+ * @{
+ */
+/** @brief user mark type */
+typedef int __itt_mark_type;
+
+/**
+ * @brief Creates a user mark type with the specified name using char or Unicode string.
+ * @param[in] name - name of mark to create
+ * @return Returns a handle to the mark type
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_mark_type ITTAPI __itt_mark_createA(const char *name);
+__itt_mark_type ITTAPI __itt_mark_createW(const wchar_t *name);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_create __itt_mark_createW
+# define __itt_mark_create_ptr __itt_mark_createW_ptr
+#else /* UNICODE */
+# define __itt_mark_create __itt_mark_createA
+# define __itt_mark_create_ptr __itt_mark_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_mark_type ITTAPI __itt_mark_create(const char *name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name))
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA ITTNOTIFY_DATA(mark_createA)
+#define __itt_mark_createA_ptr ITTNOTIFY_NAME(mark_createA)
+#define __itt_mark_createW ITTNOTIFY_DATA(mark_createW)
+#define __itt_mark_createW_ptr ITTNOTIFY_NAME(mark_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create ITTNOTIFY_DATA(mark_create)
+#define __itt_mark_create_ptr ITTNOTIFY_NAME(mark_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA(name) (__itt_mark_type)0
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW(name) (__itt_mark_type)0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create(name) (__itt_mark_type)0
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_createA_ptr 0
+#define __itt_mark_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates a "discrete" user mark type of the specified type and an optional parameter using char or Unicode string.
+ *
+ * - The mark of "discrete" type is placed to collection results in case of success. It appears in overtime view(s) as a special tick sign.
+ * - The call is "synchronous" - function returns after mark is actually added to results.
+ * - This function is useful, for example, to mark different phases of application
+ * (beginning of the next mark automatically meand end of current region).
+ * - Can be used together with "continuous" marks (see below) at the same collection session
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @param[in] parameter - string parameter of mark
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_markA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_markW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark __itt_markW
+# define __itt_mark_ptr __itt_markW_ptr
+#else /* UNICODE */
+# define __itt_mark __itt_markA
+# define __itt_mark_ptr __itt_markA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA ITTNOTIFY_DATA(markA)
+#define __itt_markA_ptr ITTNOTIFY_NAME(markA)
+#define __itt_markW ITTNOTIFY_DATA(markW)
+#define __itt_markW_ptr ITTNOTIFY_NAME(markW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark ITTNOTIFY_DATA(mark)
+#define __itt_mark_ptr ITTNOTIFY_NAME(mark)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA(mt, parameter) (int)0
+#define __itt_markA_ptr 0
+#define __itt_markW(mt, parameter) (int)0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark(mt, parameter) (int)0
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_markA_ptr 0
+#define __itt_markW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create a "discrete" user event type (mark) for process
+ * rather then for one thread
+ * @see int __itt_mark(__itt_mark_type mt, const char* parameter);
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int ITTAPI __itt_mark_globalA(__itt_mark_type mt, const char *parameter);
+int ITTAPI __itt_mark_globalW(__itt_mark_type mt, const wchar_t *parameter);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_mark_global __itt_mark_globalW
+# define __itt_mark_global_ptr __itt_mark_globalW_ptr
+#else /* UNICODE */
+# define __itt_mark_global __itt_mark_globalA
+# define __itt_mark_global_ptr __itt_mark_globalA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int ITTAPI __itt_mark_global(__itt_mark_type mt, const char *parameter);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter))
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA ITTNOTIFY_DATA(mark_globalA)
+#define __itt_mark_globalA_ptr ITTNOTIFY_NAME(mark_globalA)
+#define __itt_mark_globalW ITTNOTIFY_DATA(mark_globalW)
+#define __itt_mark_globalW_ptr ITTNOTIFY_NAME(mark_globalW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global ITTNOTIFY_DATA(mark_global)
+#define __itt_mark_global_ptr ITTNOTIFY_NAME(mark_global)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA(mt, parameter) (int)0
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW(mt, parameter) (int)0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global(mt, parameter) (int)0
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_mark_globalA_ptr 0
+#define __itt_mark_globalW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_mark_global_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Creates an "end" point for "continuous" mark with specified name.
+ *
+ * - Returns zero value in case of success, non-zero value otherwise.
+ * Also returns non-zero value when preceding "begin" point for the
+ * mark with the same name failed to be created or not created.
+ * - The mark of "continuous" type is placed to collection results in
+ * case of success. It appears in overtime view(s) as a special tick
+ * sign (different from "discrete" mark) together with line from
+ * corresponding "begin" mark to "end" mark.
+ * @note Continuous marks can overlap and be nested inside each other.
+ * Discrete mark can be nested inside marked region
+ * @param[in] mt - mark, created by __itt_mark_create(const char* name) function
+ * @return Returns zero value in case of success, non-zero value otherwise.
+ */
+int ITTAPI __itt_mark_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt))
+#define __itt_mark_off ITTNOTIFY_DATA(mark_off)
+#define __itt_mark_off_ptr ITTNOTIFY_NAME(mark_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_off(mt) (int)0
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Use this if necessary to create an "end" point for mark of process
+ * @see int __itt_mark_off(__itt_mark_type mt);
+ */
+int ITTAPI __itt_mark_global_off(__itt_mark_type mt);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt))
+#define __itt_mark_global_off ITTNOTIFY_DATA(mark_global_off)
+#define __itt_mark_global_off_ptr ITTNOTIFY_NAME(mark_global_off)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_mark_global_off(mt) (int)0
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_mark_global_off_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} marks group */
+
+/**
+ * @defgroup counters_internal Counters
+ * @ingroup internal
+ * Counters group
+ * @{
+ */
+
+
+/**
+ * @defgroup stitch Stack Stitching
+ * @ingroup internal
+ * Stack Stitching group
+ * @{
+ */
+/**
+ * @brief opaque structure for counter identification
+ */
+typedef struct ___itt_caller *__itt_caller;
+
+/**
+ * @brief Create the stitch point e.g. a point in call stack where other stacks should be stitched to.
+ * The function returns a unique identifier which is used to match the cut points with corresponding stitch points.
+ */
+__itt_caller ITTAPI __itt_stack_caller_create(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void))
+#define __itt_stack_caller_create ITTNOTIFY_DATA(stack_caller_create)
+#define __itt_stack_caller_create_ptr ITTNOTIFY_NAME(stack_caller_create)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_create() (__itt_caller)0
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_create_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Destroy the information about stitch point identified by the pointer previously returned by __itt_stack_caller_create()
+ */
+void ITTAPI __itt_stack_caller_destroy(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id))
+#define __itt_stack_caller_destroy ITTNOTIFY_VOID(stack_caller_destroy)
+#define __itt_stack_caller_destroy_ptr ITTNOTIFY_NAME(stack_caller_destroy)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_caller_destroy(id)
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_caller_destroy_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Sets the cut point. Stack from each event which occurs after this call will be cut
+ * at the same stack level the function was called and stitched to the corresponding stitch point.
+ */
+void ITTAPI __itt_stack_callee_enter(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id))
+#define __itt_stack_callee_enter ITTNOTIFY_VOID(stack_callee_enter)
+#define __itt_stack_callee_enter_ptr ITTNOTIFY_NAME(stack_callee_enter)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_enter(id)
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_enter_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief This function eliminates the cut point which was set by latest __itt_stack_callee_enter().
+ */
+void ITTAPI __itt_stack_callee_leave(__itt_caller id);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id))
+#define __itt_stack_callee_leave ITTNOTIFY_VOID(stack_callee_leave)
+#define __itt_stack_callee_leave_ptr ITTNOTIFY_NAME(stack_callee_leave)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_stack_callee_leave(id)
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_stack_callee_leave_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @} stitch group */
+
+/* ***************************************************************************************************************************** */
+
+#include <stdarg.h>
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_error_code
+{
+ __itt_error_success = 0, /*!< no error */
+ __itt_error_no_module = 1, /*!< module can't be loaded */
+ /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */
+ __itt_error_no_symbol = 2, /*!< symbol not found */
+ /* %1$s -- library name, %2$s -- symbol name. */
+ __itt_error_unknown_group = 3, /*!< unknown group specified */
+ /* %1$s -- env var name, %2$s -- group name. */
+ __itt_error_cant_read_env = 4, /*!< GetEnvironmentVariable() failed */
+ /* %1$s -- env var name, %2$d -- system error. */
+ __itt_error_env_too_long = 5, /*!< variable value too long */
+ /* %1$s -- env var name, %2$d -- actual length of the var, %3$d -- max allowed length. */
+ __itt_error_system = 6 /*!< pthread_mutexattr_init or pthread_mutex_init failed */
+ /* %1$s -- function name, %2$d -- errno. */
+} __itt_error_code;
+
+typedef void (__itt_error_handler_t)(__itt_error_code code, va_list);
+__itt_error_handler_t* __itt_set_error_handler(__itt_error_handler_t*);
+
+const char* ITTAPI __itt_api_version(void);
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#define __itt_error_handler ITT_JOIN(INTEL_ITTNOTIFY_PREFIX, error_handler)
+void __itt_error_handler(__itt_error_code code, va_list args);
+extern const int ITTNOTIFY_NAME(err);
+#define __itt_err ITTNOTIFY_NAME(err)
+ITT_STUB(ITTAPI, const char*, api_version, (void))
+#define __itt_api_version ITTNOTIFY_DATA(api_version)
+#define __itt_api_version_ptr ITTNOTIFY_NAME(api_version)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_api_version() (const char*)0
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_api_version_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _ITTNOTIFY_PRIVATE_ */
+
+#endif /* INTEL_ITTNOTIFY_API_PRIVATE */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h
new file mode 100644
index 0000000000..c25730d522
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h
@@ -0,0 +1,585 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _ITTNOTIFY_CONFIG_H_
+#define _ITTNOTIFY_CONFIG_H_
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+# define ITT_OS_FREEBSD 4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# elif defined( __FreeBSD__ )
+# define ITT_OS ITT_OS_FREEBSD
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+# define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
+# elif ITT_OS==ITT_OS_FREEBSD
+# define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define ITTAPI_CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define ITTAPI_CDECL __attribute__ ((cdecl))
+# else /* _M_IX86 || __i386__ */
+# define ITTAPI_CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define STDCALL __attribute__ ((stdcall))
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+#ifndef ITT_ARCH_IA32
+# define ITT_ARCH_IA32 1
+#endif /* ITT_ARCH_IA32 */
+
+#ifndef ITT_ARCH_IA32E
+# define ITT_ARCH_IA32E 2
+#endif /* ITT_ARCH_IA32E */
+
+#ifndef ITT_ARCH_ARM
+# define ITT_ARCH_ARM 4
+#endif /* ITT_ARCH_ARM */
+
+#ifndef ITT_ARCH_PPC64
+# define ITT_ARCH_PPC64 5
+#endif /* ITT_ARCH_PPC64 */
+
+#ifndef ITT_ARCH
+# if defined _M_IX86 || defined __i386__
+# define ITT_ARCH ITT_ARCH_IA32
+# elif defined _M_X64 || defined _M_AMD64 || defined __x86_64__
+# define ITT_ARCH ITT_ARCH_IA32E
+# elif defined _M_IA64 || defined __ia64__
+# define ITT_ARCH ITT_ARCH_IA64
+# elif defined _M_ARM || defined __arm__
+# define ITT_ARCH ITT_ARCH_ARM
+# elif defined __powerpc64__
+# define ITT_ARCH ITT_ARCH_PPC64
+# endif
+#endif
+
+#ifdef __cplusplus
+# define ITT_EXTERN_C extern "C"
+# define ITT_EXTERN_C_BEGIN extern "C" {
+# define ITT_EXTERN_C_END }
+#else
+# define ITT_EXTERN_C /* nothing */
+# define ITT_EXTERN_C_BEGIN /* nothing */
+# define ITT_EXTERN_C_END /* nothing */
+#endif /* __cplusplus */
+
+#define ITT_TO_STR_AUX(x) #x
+#define ITT_TO_STR(x) ITT_TO_STR_AUX(x)
+
+#define __ITT_BUILD_ASSERT(expr, suffix) do { \
+ static char __itt_build_check_##suffix[(expr) ? 1 : -1]; \
+ __itt_build_check_##suffix[0] = 0; \
+} while(0)
+#define _ITT_BUILD_ASSERT(expr, suffix) __ITT_BUILD_ASSERT((expr), suffix)
+#define ITT_BUILD_ASSERT(expr) _ITT_BUILD_ASSERT((expr), __LINE__)
+
+#define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 }
+
+/* Replace with snapshot date YYYYMMDD for promotion build. */
+#define API_VERSION_BUILD 20180723
+
+#ifndef API_VERSION_NUM
+#define API_VERSION_NUM 0.0.0
+#endif /* API_VERSION_NUM */
+
+#define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \
+ " (" ITT_TO_STR(API_VERSION_BUILD) ")"
+
+/* OS communication functions */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <windows.h>
+typedef HMODULE lib_t;
+typedef DWORD TIDT;
+typedef CRITICAL_SECTION mutex_t;
+#define MUTEX_INITIALIZER { 0 }
+#define strong_alias(name, aliasname) /* empty for Windows */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <dlfcn.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1 /* need for PTHREAD_MUTEX_RECURSIVE */
+#endif /* _GNU_SOURCE */
+#ifndef __USE_UNIX98
+#define __USE_UNIX98 1 /* need for PTHREAD_MUTEX_RECURSIVE, on SLES11.1 with gcc 4.3.4 wherein pthread.h missing dependency on __USE_XOPEN2K8 */
+#endif /*__USE_UNIX98*/
+#include <pthread.h>
+typedef void* lib_t;
+typedef pthread_t TIDT;
+typedef pthread_mutex_t mutex_t;
+#define MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER
+#define _strong_alias(name, aliasname) \
+ extern __typeof (name) aliasname __attribute__ ((alias (#name)));
+#define strong_alias(name, aliasname) _strong_alias(name, aliasname)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_get_proc(lib, name) GetProcAddress(lib, name)
+#define __itt_mutex_init(mutex) InitializeCriticalSection(mutex)
+#define __itt_mutex_lock(mutex) EnterCriticalSection(mutex)
+#define __itt_mutex_unlock(mutex) LeaveCriticalSection(mutex)
+#define __itt_load_lib(name) LoadLibraryA(name)
+#define __itt_unload_lib(handle) FreeLibrary(handle)
+#define __itt_system_error() (int)GetLastError()
+#define __itt_fstrcmp(s1, s2) lstrcmpA(s1, s2)
+#define __itt_fstrnlen(s, l) strnlen_s(s, l)
+#define __itt_fstrcpyn(s1, b, s2, l) strncpy_s(s1, b, s2, l)
+#define __itt_fstrdup(s) _strdup(s)
+#define __itt_thread_id() GetCurrentThreadId()
+#define __itt_thread_yield() SwitchToThread()
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return InterlockedIncrement(ptr);
+}
+#endif /* ITT_SIMPLE_INIT */
+
+#define DL_SYMBOLS (1)
+#define PTHREAD_SYMBOLS (1)
+
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#define __itt_get_proc(lib, name) dlsym(lib, name)
+#define __itt_mutex_init(mutex) {\
+ pthread_mutexattr_t mutex_attr; \
+ int error_code = pthread_mutexattr_init(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_init", \
+ error_code); \
+ error_code = pthread_mutexattr_settype(&mutex_attr, \
+ PTHREAD_MUTEX_RECURSIVE); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_settype", \
+ error_code); \
+ error_code = pthread_mutex_init(mutex, &mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutex_init", \
+ error_code); \
+ error_code = pthread_mutexattr_destroy(&mutex_attr); \
+ if (error_code) \
+ __itt_report_error(__itt_error_system, "pthread_mutexattr_destroy", \
+ error_code); \
+}
+#define __itt_mutex_lock(mutex) pthread_mutex_lock(mutex)
+#define __itt_mutex_unlock(mutex) pthread_mutex_unlock(mutex)
+#define __itt_load_lib(name) dlopen(name, RTLD_LAZY)
+#define __itt_unload_lib(handle) dlclose(handle)
+#define __itt_system_error() errno
+#define __itt_fstrcmp(s1, s2) strcmp(s1, s2)
+
+/* makes customer code define safe APIs for SDL_STRNLEN_S and SDL_STRNCPY_S */
+#ifdef SDL_STRNLEN_S
+#define __itt_fstrnlen(s, l) SDL_STRNLEN_S(s, l)
+#else
+#define __itt_fstrnlen(s, l) strlen(s)
+#endif /* SDL_STRNLEN_S */
+#ifdef SDL_STRNCPY_S
+#define __itt_fstrcpyn(s1, b, s2, l) SDL_STRNCPY_S(s1, b, s2, l)
+#else
+#define __itt_fstrcpyn(s1, b, s2, l) { \
+ if (b > 0) { \
+ /* 'volatile' is used to suppress the warning that a destination */ \
+ /* bound depends on the length of the source. */ \
+ volatile size_t num_to_copy = (size_t)(b - 1) < (size_t)(l) ? \
+ (size_t)(b - 1) : (size_t)(l); \
+ strncpy(s1, s2, num_to_copy); \
+ s1[num_to_copy] = 0; \
+ } \
+}
+#endif /* SDL_STRNCPY_S */
+
+#define __itt_fstrdup(s) strdup(s)
+#define __itt_thread_id() pthread_self()
+#define __itt_thread_yield() sched_yield()
+#if ITT_ARCH==ITT_ARCH_IA64
+#ifdef __INTEL_COMPILER
+#define __TBB_machine_fetchadd4(addr, val) __fetchadd4_acq((void *)addr, val)
+#else /* __INTEL_COMPILER */
+/* TODO: Add Support for not Intel compilers for IA-64 architecture */
+#endif /* __INTEL_COMPILER */
+#elif ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_IA32E /* ITT_ARCH!=ITT_ARCH_IA64 */
+ITT_INLINE long
+__TBB_machine_fetchadd4(volatile void* ptr, long addend) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
+{
+ long result;
+ __asm__ __volatile__("lock\nxadd %0,%1"
+ : "=r"(result),"=m"(*(int*)ptr)
+ : "0"(addend), "m"(*(int*)ptr)
+ : "memory");
+ return result;
+}
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
+#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
+#endif /* ITT_ARCH==ITT_ARCH_IA64 */
+#ifndef ITT_SIMPLE_INIT
+ITT_INLINE long
+__itt_interlocked_increment(volatile long* ptr) ITT_INLINE_ATTRIBUTE;
+ITT_INLINE long __itt_interlocked_increment(volatile long* ptr)
+{
+ return __TBB_machine_fetchadd4(ptr, 1) + 1L;
+}
+#endif /* ITT_SIMPLE_INIT */
+
+void* dlopen(const char*, int) __attribute__((weak));
+void* dlsym(void*, const char*) __attribute__((weak));
+int dlclose(void*) __attribute__((weak));
+#define DL_SYMBOLS (dlopen && dlsym && dlclose)
+
+int pthread_mutex_init(pthread_mutex_t*, const pthread_mutexattr_t*) __attribute__((weak));
+int pthread_mutex_lock(pthread_mutex_t*) __attribute__((weak));
+int pthread_mutex_unlock(pthread_mutex_t*) __attribute__((weak));
+int pthread_mutex_destroy(pthread_mutex_t*) __attribute__((weak));
+int pthread_mutexattr_init(pthread_mutexattr_t*) __attribute__((weak));
+int pthread_mutexattr_settype(pthread_mutexattr_t*, int) __attribute__((weak));
+int pthread_mutexattr_destroy(pthread_mutexattr_t*) __attribute__((weak));
+pthread_t pthread_self(void) __attribute__((weak));
+#define PTHREAD_SYMBOLS (pthread_mutex_init && pthread_mutex_lock && pthread_mutex_unlock && pthread_mutex_destroy && pthread_mutexattr_init && pthread_mutexattr_settype && pthread_mutexattr_destroy && pthread_self)
+
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+typedef enum {
+ __itt_collection_normal = 0,
+ __itt_collection_paused = 1
+} __itt_collection_state;
+
+typedef enum {
+ __itt_thread_normal = 0,
+ __itt_thread_ignored = 1
+} __itt_thread_state;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_thread_info
+{
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ TIDT tid;
+ __itt_thread_state state; /*!< Thread state (paused or normal) */
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct ___itt_thread_info* next;
+} __itt_thread_info;
+
+#include "ittnotify_types.h" /* For __itt_group_id definition */
+
+typedef struct ___itt_api_info_20101001
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ __itt_group_id group;
+} __itt_api_info_20101001;
+
+typedef struct ___itt_api_info
+{
+ const char* name;
+ void** func_ptr;
+ void* init_func;
+ void* null_func;
+ __itt_group_id group;
+} __itt_api_info;
+
+typedef struct __itt_counter_info
+{
+ const char* nameA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* nameW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* nameW;
+#endif /* UNICODE || _UNICODE */
+ const char* domainA; /*!< Copy of original name in ASCII. */
+#if defined(UNICODE) || defined(_UNICODE)
+ const wchar_t* domainW; /*!< Copy of original name in UNICODE. */
+#else /* UNICODE || _UNICODE */
+ void* domainW;
+#endif /* UNICODE || _UNICODE */
+ int type;
+ long index;
+ int extra1; /*!< Reserved to the runtime */
+ void* extra2; /*!< Reserved to the runtime */
+ struct __itt_counter_info* next;
+} __itt_counter_info_t;
+
+struct ___itt_domain;
+struct ___itt_string_handle;
+
+typedef struct ___itt_global
+{
+ unsigned char magic[8];
+ unsigned long version_major;
+ unsigned long version_minor;
+ unsigned long version_build;
+ volatile long api_initialized;
+ volatile long mutex_initialized;
+ volatile long atomic_counter;
+ mutex_t mutex;
+ lib_t lib;
+ void* error_handler;
+ const char** dll_path_ptr;
+ __itt_api_info* api_list_ptr;
+ struct ___itt_global* next;
+ /* Joinable structures below */
+ __itt_thread_info* thread_list;
+ struct ___itt_domain* domain_list;
+ struct ___itt_string_handle* string_list;
+ __itt_collection_state state;
+ __itt_counter_info_t* counter_list;
+ unsigned int ipt_collect_events;
+} __itt_global;
+
+#pragma pack(pop)
+
+#define NEW_THREAD_INFO_W(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = NULL; \
+ h->nameW = n ? _wcsdup(n) : NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_THREAD_INFO_A(gptr,h,h_tail,t,s,n) { \
+ h = (__itt_thread_info*)malloc(sizeof(__itt_thread_info)); \
+ if (h != NULL) { \
+ h->tid = t; \
+ h->nameA = n ? __itt_fstrdup(n) : NULL; \
+ h->nameW = NULL; \
+ h->state = s; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->thread_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_W(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 1; /* domain is enabled by default */ \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_DOMAIN_A(gptr,h,h_tail,name) { \
+ h = (__itt_domain*)malloc(sizeof(__itt_domain)); \
+ if (h != NULL) { \
+ h->flags = 1; /* domain is enabled by default */ \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->domain_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_W(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = NULL; \
+ h->strW = name ? _wcsdup(name) : NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_STRING_HANDLE_A(gptr,h,h_tail,name) { \
+ h = (__itt_string_handle*)malloc(sizeof(__itt_string_handle)); \
+ if (h != NULL) { \
+ h->strA = name ? __itt_fstrdup(name) : NULL; \
+ h->strW = NULL; \
+ h->extra1 = 0; /* reserved */ \
+ h->extra2 = NULL; /* reserved */ \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->string_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_COUNTER_W(gptr,h,h_tail,name,domain,type) { \
+ h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \
+ if (h != NULL) { \
+ h->nameA = NULL; \
+ h->nameW = name ? _wcsdup(name) : NULL; \
+ h->domainA = NULL; \
+ h->domainW = name ? _wcsdup(domain) : NULL; \
+ h->type = type; \
+ h->index = 0; \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->counter_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#define NEW_COUNTER_A(gptr,h,h_tail,name,domain,type) { \
+ h = (__itt_counter_info_t*)malloc(sizeof(__itt_counter_info_t)); \
+ if (h != NULL) { \
+ h->nameA = name ? __itt_fstrdup(name) : NULL; \
+ h->nameW = NULL; \
+ h->domainA = domain ? __itt_fstrdup(domain) : NULL; \
+ h->domainW = NULL; \
+ h->type = type; \
+ h->index = 0; \
+ h->next = NULL; \
+ if (h_tail == NULL) \
+ (gptr)->counter_list = h; \
+ else \
+ h_tail->next = h; \
+ } \
+}
+
+#endif /* _ITTNOTIFY_CONFIG_H_ */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c
new file mode 100644
index 0000000000..dd8ca8e755
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c
@@ -0,0 +1,1244 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "ittnotify_config.h"
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define PATH_MAX 512
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+#include <limits.h>
+#include <dlfcn.h>
+#include <errno.h>
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define INTEL_NO_MACRO_BODY
+#define INTEL_ITTNOTIFY_API_PRIVATE
+#include "ittnotify.h"
+#include "legacy/ittnotify.h"
+
+#include "disable_warnings.h"
+
+static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n";
+
+#define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+
+#if ITT_OS==ITT_OS_WIN
+static const char* ittnotify_lib_name = "libittnotify.dll";
+#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD
+static const char* ittnotify_lib_name = "libittnotify.so";
+#elif ITT_OS==ITT_OS_MAC
+static const char* ittnotify_lib_name = "libittnotify.dylib";
+#else
+#error Unsupported or unknown OS.
+#endif
+
+#ifdef __ANDROID__
+#include <android/log.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+
+#ifdef ITT_ANDROID_LOG
+ #define ITT_ANDROID_LOG_TAG "INTEL_VTUNE_USERAPI"
+ #define ITT_ANDROID_LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+ #define ITT_ANDROID_LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+ #define ITT_ANDROID_LOGE(...) ((void)__android_log_print(ANDROID_LOG_ERROR,ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+ #define ITT_ANDROID_LOGD(...) ((void)__android_log_print(ANDROID_LOG_DEBUG,ITT_ANDROID_LOG_TAG, __VA_ARGS__))
+#else
+ #define ITT_ANDROID_LOGI(...)
+ #define ITT_ANDROID_LOGW(...)
+ #define ITT_ANDROID_LOGE(...)
+ #define ITT_ANDROID_LOGD(...)
+#endif
+
+/* default location of userapi collector on Android */
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(x) "/data/data/com.intel.vtune/perfrun/lib" \
+ #x "/runtime/libittnotify.so"
+
+#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(32)
+#else
+#define ANDROID_ITTNOTIFY_DEFAULT_PATH ANDROID_ITTNOTIFY_DEFAULT_PATH_MASK(64)
+#endif
+
+#endif
+
+
+#ifndef LIB_VAR_NAME
+#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM
+#define LIB_VAR_NAME INTEL_LIBITTNOTIFY32
+#else
+#define LIB_VAR_NAME INTEL_LIBITTNOTIFY64
+#endif
+#endif /* LIB_VAR_NAME */
+
+#define ITT_MUTEX_INIT_AND_LOCK(p) { \
+ if (PTHREAD_SYMBOLS) \
+ { \
+ if (!p.mutex_initialized) \
+ { \
+ if (__itt_interlocked_increment(&p.atomic_counter) == 1) \
+ { \
+ __itt_mutex_init(&p.mutex); \
+ p.mutex_initialized = 1; \
+ } \
+ else \
+ while (!p.mutex_initialized) \
+ __itt_thread_yield(); \
+ } \
+ __itt_mutex_lock(&p.mutex); \
+ } \
+}
+
+const int _N_(err) = 0;
+
+typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id);
+
+/* this define used to control initialization function name. */
+#ifndef __itt_init_ittlib_name
+ITT_EXTERN_C int _N_(init_ittlib)(const char*, __itt_group_id);
+static __itt_init_ittlib_t* __itt_init_ittlib_ptr = _N_(init_ittlib);
+#define __itt_init_ittlib_name __itt_init_ittlib_ptr
+#endif /* __itt_init_ittlib_name */
+
+typedef void (__itt_fini_ittlib_t)(void);
+
+/* this define used to control finalization function name. */
+#ifndef __itt_fini_ittlib_name
+ITT_EXTERN_C void _N_(fini_ittlib)(void);
+static __itt_fini_ittlib_t* __itt_fini_ittlib_ptr = _N_(fini_ittlib);
+#define __itt_fini_ittlib_name __itt_fini_ittlib_ptr
+#endif /* __itt_fini_ittlib_name */
+
+extern __itt_global _N_(_ittapi_global);
+
+/* building pointers to imported funcs */
+#undef ITT_STUBV
+#undef ITT_STUB
+#define ITT_STUB(api,type,name,args,params,ptr,group,format) \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args; \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \
+{ \
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \
+ __itt_init_ittlib_name(NULL, __itt_group_all); \
+ if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \
+ return ITTNOTIFY_NAME(name) params; \
+ else \
+ return (type)0; \
+}
+
+#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args; \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args \
+{ \
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) \
+ __itt_init_ittlib_name(NULL, __itt_group_all); \
+ if (ITTNOTIFY_NAME(name) && ITTNOTIFY_NAME(name) != ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init))) \
+ ITTNOTIFY_NAME(name) params; \
+ else \
+ return; \
+}
+
+#undef __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,ptr,group,format) \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args; \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END
+
+#define ITT_STUBV(api,type,name,args,params,ptr,group,format) \
+static type api ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)) args;\
+typedef type api ITT_JOIN(_N_(name),_t) args; \
+ITT_EXTERN_C_BEGIN ITT_JOIN(_N_(name),_t)* ITTNOTIFY_NAME(name) = ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)); ITT_EXTERN_C_END
+
+#define __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+#undef __ITT_INTERNAL_INIT
+
+ITT_GROUP_LIST(group_list);
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_alias
+{
+ const char* env_var;
+ __itt_group_id groups;
+} __itt_group_alias;
+
+static __itt_group_alias group_alias[] = {
+ { "KMP_FOR_TPROFILE", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_mark) },
+ { "KMP_FOR_TCHECK", (__itt_group_id)(__itt_group_control | __itt_group_thread | __itt_group_sync | __itt_group_fsync | __itt_group_mark | __itt_group_suppress) },
+ { NULL, (__itt_group_none) },
+ { api_version, (__itt_group_none) } /* !!! Just to avoid unused code elimination !!! */
+};
+
+#pragma pack(pop)
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static __itt_api_info api_list[] = {
+/* Define functions with static implementation */
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) { ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), (__itt_group_id)(group)},
+#define ITT_STUBV ITT_STUB
+#define __ITT_INTERNAL_INIT
+#include "ittnotify_static.h"
+#undef __ITT_INTERNAL_INIT
+/* Define functions without static implementation */
+#undef ITT_STUB
+#undef ITT_STUBV
+#define ITT_STUB(api,type,name,args,params,nameindll,group,format) {ITT_TO_STR(ITT_JOIN(__itt_,nameindll)), (void**)(void*)&ITTNOTIFY_NAME(name), (void*)(size_t)&ITT_VERSIONIZE(ITT_JOIN(_N_(name),_init)), NULL, (__itt_group_id)(group)},
+#define ITT_STUBV ITT_STUB
+#include "ittnotify_static.h"
+ {NULL, NULL, NULL, NULL, __itt_group_none}
+};
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static const char dll_path[PATH_MAX] = { 0 };
+
+/* static part descriptor which handles. all notification api attributes. */
+__itt_global _N_(_ittapi_global) = {
+ ITT_MAGIC, /* identification info */
+ ITT_MAJOR, ITT_MINOR, API_VERSION_BUILD, /* version info */
+ 0, /* api_initialized */
+ 0, /* mutex_initialized */
+ 0, /* atomic_counter */
+ MUTEX_INITIALIZER, /* mutex */
+ NULL, /* dynamic library handle */
+ NULL, /* error_handler */
+ (const char**)&dll_path, /* dll_path_ptr */
+ (__itt_api_info*)&api_list, /* api_list_ptr */
+ NULL, /* next __itt_global */
+ NULL, /* thread_list */
+ NULL, /* domain_list */
+ NULL, /* string_list */
+ __itt_collection_normal, /* collection state */
+ NULL, /* counter_list */
+ 0 /* ipt_collect_events */
+};
+
+typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id);
+typedef void (__itt_api_fini_t)(__itt_global*);
+
+/* ========================================================================= */
+
+#ifdef ITT_NOTIFY_EXT_REPORT
+ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args);
+#endif /* ITT_NOTIFY_EXT_REPORT */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static void __itt_report_error_impl(int code, ...) {
+ va_list args;
+ va_start(args, code);
+ if (_N_(_ittapi_global).error_handler != NULL)
+ {
+ __itt_error_handler_t* handler = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+ handler((__itt_error_code)code, args);
+ }
+#ifdef ITT_NOTIFY_EXT_REPORT
+ _N_(error_handler)(code, args);
+#endif /* ITT_NOTIFY_EXT_REPORT */
+ va_end(args);
+}
+
+//va_start cannot take enum (__itt_error_code) on clang, so it is necessary to transform it to int
+#define __itt_report_error(code, ...) \
+ __itt_report_error_impl((int)code,__VA_ARGS__)
+
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))(const wchar_t* name)
+{
+ __itt_domain *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ if (ITTNOTIFY_NAME(domain_createW) && ITTNOTIFY_NAME(domain_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(domain_createW)(name);
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameW != NULL && !wcscmp(h->nameW, name)) break;
+ }
+ if (h == NULL)
+ {
+ NEW_DOMAIN_W(&_N_(_ittapi_global),h,h_tail,name);
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return h;
+}
+
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init))(const char* name)
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ __itt_domain *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(domain_createA) && ITTNOTIFY_NAME(domain_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createA),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(domain_createA)(name);
+ }
+#else
+ if (ITTNOTIFY_NAME(domain_create) && ITTNOTIFY_NAME(domain_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init)))
+ {
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(domain_create)(name);
+ }
+#endif
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameA != NULL && !__itt_fstrcmp(h->nameA, name)) break;
+ }
+ if (h == NULL)
+ {
+ NEW_DOMAIN_A(&_N_(_ittapi_global),h,h_tail,name);
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return h;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name)
+{
+ __itt_string_handle *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ if (ITTNOTIFY_NAME(string_handle_createW) && ITTNOTIFY_NAME(string_handle_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(string_handle_createW)(name);
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->strW != NULL && !wcscmp(h->strW, name)) break;
+ }
+ if (h == NULL)
+ {
+ NEW_STRING_HANDLE_W(&_N_(_ittapi_global),h,h_tail,name);
+ }
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return h;
+}
+
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init))(const char* name)
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ __itt_string_handle *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(string_handle_createA) && ITTNOTIFY_NAME(string_handle_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createA),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(string_handle_createA)(name);
+ }
+#else
+ if (ITTNOTIFY_NAME(string_handle_create) && ITTNOTIFY_NAME(string_handle_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_create),_init)))
+ {
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(string_handle_create)(name);
+ }
+#endif
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->strA != NULL && !__itt_fstrcmp(h->strA, name)) break;
+ }
+ if (h == NULL)
+ {
+ NEW_STRING_HANDLE_A(&_N_(_ittapi_global),h,h_tail,name);
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return h;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init))(const wchar_t *name, const wchar_t *domain)
+{
+ __itt_counter_info_t *h_tail = NULL, *h = NULL;
+ __itt_metadata_type type = __itt_metadata_u64;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ if (ITTNOTIFY_NAME(counter_createW) && ITTNOTIFY_NAME(counter_createW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_createW)(name, domain);
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) ||
+ (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break;
+
+ }
+ if (h == NULL)
+ {
+ NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type);
+ }
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_counter)h;
+}
+
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))(const char *name, const char *domain)
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))(const char *name, const char *domain)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ __itt_counter_info_t *h_tail = NULL, *h = NULL;
+ __itt_metadata_type type = __itt_metadata_u64;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_createA)(name, domain);
+ }
+#else
+ if (ITTNOTIFY_NAME(counter_create) && ITTNOTIFY_NAME(counter_create) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init)))
+ {
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_create)(name, domain);
+ }
+#endif
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) ||
+ (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break;
+ }
+ if (h == NULL)
+ {
+ NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type);
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_counter)h;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init))(const wchar_t *name, const wchar_t *domain, __itt_metadata_type type)
+{
+ __itt_counter_info_t *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ if (ITTNOTIFY_NAME(counter_create_typedW) && ITTNOTIFY_NAME(counter_create_typedW) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type);
+ }
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) ||
+ (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break;
+
+ }
+ if (h == NULL)
+ {
+ NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain,type);
+ }
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_counter)h;
+}
+
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init))(const char *name, const char *domain, __itt_metadata_type type)
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init))(const char *name, const char *domain, __itt_metadata_type type)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ __itt_counter_info_t *h_tail = NULL, *h = NULL;
+
+ if (name == NULL)
+ {
+ return NULL;
+ }
+
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(counter_create_typedA) && ITTNOTIFY_NAME(counter_create_typedA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedA),_init)))
+ {
+ __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_create_typedA)(name, domain, type);
+ }
+#else
+ if (ITTNOTIFY_NAME(counter_create_typed) && ITTNOTIFY_NAME(counter_create_typed) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_init)))
+ {
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type);
+ }
+#endif
+ }
+ for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next)
+ {
+ if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) ||
+ (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break;
+ }
+ if (h == NULL)
+ {
+ NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain,type);
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ return (__itt_counter)h;
+}
+
+/* -------------------------------------------------------------------------- */
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void)
+{
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL)
+ {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(pause) && ITTNOTIFY_NAME(pause) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init)))
+ {
+ ITTNOTIFY_NAME(pause)();
+ }
+ else
+ {
+ _N_(_ittapi_global).state = __itt_collection_paused;
+ }
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void)
+{
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL)
+ {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(resume) && ITTNOTIFY_NAME(resume) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init)))
+ {
+ ITTNOTIFY_NAME(resume)();
+ }
+ else
+ {
+ _N_(_ittapi_global).state = __itt_collection_normal;
+ }
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name)
+{
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL)
+ {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(thread_set_nameW) && ITTNOTIFY_NAME(thread_set_nameW) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init)))
+ {
+ ITTNOTIFY_NAME(thread_set_nameW)(name);
+ }
+}
+
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setW),_init))(const wchar_t* name, int namelen)
+{
+ (void)namelen;
+ ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(name);
+ return 0;
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(const char* name)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(const char* name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+{
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL)
+ {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ if (ITTNOTIFY_NAME(thread_set_nameA) && ITTNOTIFY_NAME(thread_set_nameA) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init)))
+ {
+ ITTNOTIFY_NAME(thread_set_nameA)(name);
+ }
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ if (ITTNOTIFY_NAME(thread_set_name) && ITTNOTIFY_NAME(thread_set_name) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init)))
+ {
+ ITTNOTIFY_NAME(thread_set_name)(name);
+ }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_setA),_init))(const char* name, int namelen)
+{
+ (void)namelen;
+ ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameA),_init))(name);
+ return 0;
+}
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+static int ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_name_set),_init))(const char* name, int namelen)
+{
+ (void)namelen;
+ ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_name),_init))(name);
+ return 0;
+}
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))(void)
+{
+ if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL)
+ {
+ __itt_init_ittlib_name(NULL, __itt_group_all);
+ }
+ if (ITTNOTIFY_NAME(thread_ignore) && ITTNOTIFY_NAME(thread_ignore) != ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init)))
+ {
+ ITTNOTIFY_NAME(thread_ignore)();
+ }
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thr_ignore),_init))(void)
+{
+ ITT_VERSIONIZE(ITT_JOIN(_N_(thread_ignore),_init))();
+}
+
+static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(enable_attach),_init))(void)
+{
+#ifdef __ANDROID__
+ /*
+ * if LIB_VAR_NAME env variable were set before then stay previous value
+ * else set default path
+ */
+ setenv(ITT_TO_STR(LIB_VAR_NAME), ANDROID_ITTNOTIFY_DEFAULT_PATH, 0);
+#endif
+}
+
+/* -------------------------------------------------------------------------- */
+
+static const char* __itt_fsplit(const char* s, const char* sep, const char** out, int* len)
+{
+ int i;
+ int j;
+
+ if (!s || !sep || !out || !len)
+ return NULL;
+
+ for (i = 0; s[i]; i++)
+ {
+ int b = 0;
+ for (j = 0; sep[j]; j++)
+ if (s[i] == sep[j])
+ {
+ b = 1;
+ break;
+ }
+ if (!b)
+ break;
+ }
+
+ if (!s[i])
+ return NULL;
+
+ *len = 0;
+ *out = &s[i];
+
+ for (; s[i]; i++, (*len)++)
+ {
+ int b = 0;
+ for (j = 0; sep[j]; j++)
+ if (s[i] == sep[j])
+ {
+ b = 1;
+ break;
+ }
+ if (b)
+ break;
+ }
+
+ for (; s[i]; i++)
+ {
+ int b = 0;
+ for (j = 0; sep[j]; j++)
+ if (s[i] == sep[j])
+ {
+ b = 1;
+ break;
+ }
+ if (!b)
+ break;
+ }
+
+ return &s[i];
+}
+
+/* This function return value of env variable that placed into static buffer.
+ * !!! The same static buffer is used for subsequent calls. !!!
+ * This was done to avoid dynamic allocation for few calls.
+ * Actually we need this function only four times.
+ */
+static const char* __itt_get_env_var(const char* name)
+{
+#define MAX_ENV_VALUE_SIZE 4086
+ static char env_buff[MAX_ENV_VALUE_SIZE];
+ static char* env_value = (char*)env_buff;
+
+ if (name != NULL)
+ {
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff);
+ DWORD rc = GetEnvironmentVariableA(name, env_value, (DWORD)max_len);
+ if (rc >= max_len)
+ __itt_report_error(__itt_error_env_too_long, name, (size_t)rc - 1, (size_t)(max_len - 1));
+ else if (rc > 0)
+ {
+ const char* ret = (const char*)env_value;
+ env_value += rc + 1;
+ return ret;
+ }
+ else
+ {
+ /* If environment variable is empty, GetEnvirornmentVariables()
+ * returns zero (number of characters (not including terminating null),
+ * and GetLastError() returns ERROR_SUCCESS. */
+ DWORD err = GetLastError();
+ if (err == ERROR_SUCCESS)
+ return env_value;
+
+ if (err != ERROR_ENVVAR_NOT_FOUND)
+ __itt_report_error(__itt_error_cant_read_env, name, (int)err);
+ }
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ char* env = getenv(name);
+ if (env != NULL)
+ {
+ size_t len = __itt_fstrnlen(env, MAX_ENV_VALUE_SIZE);
+ size_t max_len = MAX_ENV_VALUE_SIZE - (size_t)(env_value - env_buff);
+ if (len < max_len)
+ {
+ const char* ret = (const char*)env_value;
+ __itt_fstrcpyn(env_value, max_len, env, len + 1);
+ env_value += len + 1;
+ return ret;
+ } else
+ __itt_report_error(__itt_error_env_too_long, name, (size_t)len, (size_t)(max_len - 1));
+ }
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ }
+ return NULL;
+}
+
+static const char* __itt_get_lib_name(void)
+{
+ const char* lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
+
+#ifdef __ANDROID__
+ if (lib_name == NULL)
+ {
+
+#if ITT_ARCH==ITT_ARCH_IA32 || ITT_ARCH==ITT_ARCH_ARM
+ const char* const marker_filename = "com.intel.itt.collector_lib_32";
+#else
+ const char* const marker_filename = "com.intel.itt.collector_lib_64";
+#endif
+
+ char system_wide_marker_filename[PATH_MAX] = {0};
+ int itt_marker_file_fd = -1;
+ ssize_t res = 0;
+
+ res = snprintf(system_wide_marker_filename, PATH_MAX - 1, "%s%s", "/data/local/tmp/", marker_filename);
+ if (res < 0)
+ {
+ ITT_ANDROID_LOGE("Unable to concatenate marker file string.");
+ return lib_name;
+ }
+ itt_marker_file_fd = open(system_wide_marker_filename, O_RDONLY);
+
+ if (itt_marker_file_fd == -1)
+ {
+ const pid_t my_pid = getpid();
+ char cmdline_path[PATH_MAX] = {0};
+ char package_name[PATH_MAX] = {0};
+ char app_sandbox_file[PATH_MAX] = {0};
+ int cmdline_fd = 0;
+
+ ITT_ANDROID_LOGI("Unable to open system-wide marker file.");
+ res = snprintf(cmdline_path, PATH_MAX - 1, "/proc/%d/cmdline", my_pid);
+ if (res < 0)
+ {
+ ITT_ANDROID_LOGE("Unable to get cmdline path string.");
+ return lib_name;
+ }
+
+ ITT_ANDROID_LOGI("CMD file: %s\n", cmdline_path);
+ cmdline_fd = open(cmdline_path, O_RDONLY);
+ if (cmdline_fd == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to open %s file!", cmdline_path);
+ return lib_name;
+ }
+ res = read(cmdline_fd, package_name, PATH_MAX - 1);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to read %s file!", cmdline_path);
+ res = close(cmdline_fd);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path);
+ }
+ return lib_name;
+ }
+ res = close(cmdline_fd);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to close %s file!", cmdline_path);
+ return lib_name;
+ }
+ ITT_ANDROID_LOGI("Package name: %s\n", package_name);
+ res = snprintf(app_sandbox_file, PATH_MAX - 1, "/data/data/%s/%s", package_name, marker_filename);
+ if (res < 0)
+ {
+ ITT_ANDROID_LOGE("Unable to concatenate marker file string.");
+ return lib_name;
+ }
+
+ ITT_ANDROID_LOGI("Lib marker file name: %s\n", app_sandbox_file);
+ itt_marker_file_fd = open(app_sandbox_file, O_RDONLY);
+ if (itt_marker_file_fd == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to open app marker file!");
+ return lib_name;
+ }
+ }
+
+ {
+ char itt_lib_name[PATH_MAX] = {0};
+
+ res = read(itt_marker_file_fd, itt_lib_name, PATH_MAX - 1);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to read %s file!", itt_marker_file_fd);
+ res = close(itt_marker_file_fd);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd);
+ }
+ return lib_name;
+ }
+ ITT_ANDROID_LOGI("ITT Lib path: %s", itt_lib_name);
+ res = close(itt_marker_file_fd);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to close %s file!", itt_marker_file_fd);
+ return lib_name;
+ }
+ ITT_ANDROID_LOGI("Set env %s to %s", ITT_TO_STR(LIB_VAR_NAME), itt_lib_name);
+ res = setenv(ITT_TO_STR(LIB_VAR_NAME), itt_lib_name, 0);
+ if (res == -1)
+ {
+ ITT_ANDROID_LOGE("Unable to set env var!");
+ return lib_name;
+ }
+ lib_name = __itt_get_env_var(ITT_TO_STR(LIB_VAR_NAME));
+ ITT_ANDROID_LOGI("ITT Lib path from env: %s", lib_name);
+ }
+ }
+#endif
+
+ return lib_name;
+}
+
+/* Avoid clashes with std::min */
+#define __itt_min(a,b) ((a) < (b) ? (a) : (b))
+
+static __itt_group_id __itt_get_groups(void)
+{
+ int i;
+ __itt_group_id res = __itt_group_none;
+ const char* var_name = "INTEL_ITTNOTIFY_GROUPS";
+ const char* group_str = __itt_get_env_var(var_name);
+
+ if (group_str != NULL)
+ {
+ int len;
+ char gr[255];
+ const char* chunk;
+ while ((group_str = __itt_fsplit(group_str, ",; ", &chunk, &len)) != NULL)
+ {
+ int min_len = __itt_min(len, (int)(sizeof(gr) - 1));
+ __itt_fstrcpyn(gr, sizeof(gr) - 1, chunk, min_len);
+ gr[min_len] = 0;
+
+ for (i = 0; group_list[i].name != NULL; i++)
+ {
+ if (!__itt_fstrcmp(gr, group_list[i].name))
+ {
+ res = (__itt_group_id)(res | group_list[i].id);
+ break;
+ }
+ }
+ }
+ /* TODO: !!! Workaround for bug with warning for unknown group !!!
+ * Should be fixed in new initialization scheme.
+ * Now the following groups should be set always. */
+ for (i = 0; group_list[i].id != __itt_group_none; i++)
+ if (group_list[i].id != __itt_group_all &&
+ group_list[i].id > __itt_group_splitter_min &&
+ group_list[i].id < __itt_group_splitter_max)
+ res = (__itt_group_id)(res | group_list[i].id);
+ return res;
+ }
+ else
+ {
+ for (i = 0; group_alias[i].env_var != NULL; i++)
+ if (__itt_get_env_var(group_alias[i].env_var) != NULL)
+ return group_alias[i].groups;
+ }
+
+ return res;
+}
+
+#undef __itt_min
+
+static int __itt_lib_version(lib_t lib)
+{
+ if (lib == NULL)
+ return 0;
+ if (__itt_get_proc(lib, "__itt_api_init"))
+ return 2;
+ if (__itt_get_proc(lib, "__itt_api_version"))
+ return 1;
+ return 0;
+}
+
+/* It's not used right now! Comment it out to avoid warnings.
+static void __itt_reinit_all_pointers(void)
+{
+ register int i;
+ // Fill all pointers with initial stubs
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].init_func;
+}
+*/
+
+static void __itt_nullify_all_pointers(void)
+{
+ int i;
+ /* Nulify all pointers except domain_create, string_handle_create and counter_create */
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(push)
+#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
+#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_EXTERN_C void _N_(fini_ittlib)(void)
+{
+ __itt_api_fini_t* __itt_api_fini_ptr = NULL;
+ static volatile TIDT current_thread = 0;
+
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+ if (_N_(_ittapi_global).api_initialized)
+ {
+ if (current_thread == 0)
+ {
+ if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id();
+ if (_N_(_ittapi_global).lib != NULL)
+ {
+ __itt_api_fini_ptr = (__itt_api_fini_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_fini");
+ }
+ if (__itt_api_fini_ptr)
+ {
+ __itt_api_fini_ptr(&_N_(_ittapi_global));
+ }
+
+ __itt_nullify_all_pointers();
+
+ /* TODO: !!! not safe !!! don't support unload so far.
+ * if (_N_(_ittapi_global).lib != NULL)
+ * __itt_unload_lib(_N_(_ittapi_global).lib);
+ * _N_(_ittapi_global).lib = NULL;
+ */
+ _N_(_ittapi_global).api_initialized = 0;
+ current_thread = 0;
+ }
+ }
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+ }
+}
+
+ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups)
+{
+ int i;
+ __itt_group_id groups;
+#ifdef ITT_COMPLETE_GROUP
+ __itt_group_id zero_group = __itt_group_none;
+#endif /* ITT_COMPLETE_GROUP */
+ static volatile TIDT current_thread = 0;
+
+ if (!_N_(_ittapi_global).api_initialized)
+ {
+#ifndef ITT_SIMPLE_INIT
+ ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global));
+#endif /* ITT_SIMPLE_INIT */
+
+ if (!_N_(_ittapi_global).api_initialized)
+ {
+ if (current_thread == 0)
+ {
+ if (PTHREAD_SYMBOLS) current_thread = __itt_thread_id();
+ if (lib_name == NULL)
+ {
+ lib_name = __itt_get_lib_name();
+ }
+ groups = __itt_get_groups();
+ if (DL_SYMBOLS && (groups != __itt_group_none || lib_name != NULL))
+ {
+ _N_(_ittapi_global).lib = __itt_load_lib((lib_name == NULL) ? ittnotify_lib_name : lib_name);
+
+ if (_N_(_ittapi_global).lib != NULL)
+ {
+ __itt_api_init_t* __itt_api_init_ptr;
+ int lib_version = __itt_lib_version(_N_(_ittapi_global).lib);
+
+ switch (lib_version) {
+ case 0:
+ groups = __itt_group_legacy;
+ /* Falls through */
+ case 1:
+ /* Fill all pointers from dynamic library */
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ {
+ if (_N_(_ittapi_global).api_list_ptr[i].group & groups & init_groups)
+ {
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = (void*)__itt_get_proc(_N_(_ittapi_global).lib, _N_(_ittapi_global).api_list_ptr[i].name);
+ if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr == NULL)
+ {
+ /* Restore pointers for function with static implementation */
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+ __itt_report_error(__itt_error_no_symbol, lib_name, _N_(_ittapi_global).api_list_ptr[i].name);
+#ifdef ITT_COMPLETE_GROUP
+ zero_group = (__itt_group_id)(zero_group | _N_(_ittapi_global).api_list_ptr[i].group);
+#endif /* ITT_COMPLETE_GROUP */
+ }
+ }
+ else
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+ }
+
+ if (groups == __itt_group_legacy)
+ {
+ /* Compatibility with legacy tools */
+ ITTNOTIFY_NAME(thread_ignore) = ITTNOTIFY_NAME(thr_ignore);
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ ITTNOTIFY_NAME(sync_createA) = ITTNOTIFY_NAME(sync_set_nameA);
+ ITTNOTIFY_NAME(sync_createW) = ITTNOTIFY_NAME(sync_set_nameW);
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ ITTNOTIFY_NAME(sync_create) = ITTNOTIFY_NAME(sync_set_name);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ ITTNOTIFY_NAME(sync_prepare) = ITTNOTIFY_NAME(notify_sync_prepare);
+ ITTNOTIFY_NAME(sync_cancel) = ITTNOTIFY_NAME(notify_sync_cancel);
+ ITTNOTIFY_NAME(sync_acquired) = ITTNOTIFY_NAME(notify_sync_acquired);
+ ITTNOTIFY_NAME(sync_releasing) = ITTNOTIFY_NAME(notify_sync_releasing);
+ }
+
+#ifdef ITT_COMPLETE_GROUP
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ if (_N_(_ittapi_global).api_list_ptr[i].group & zero_group)
+ *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
+#endif /* ITT_COMPLETE_GROUP */
+ break;
+ case 2:
+ __itt_api_init_ptr = (__itt_api_init_t*)(size_t)__itt_get_proc(_N_(_ittapi_global).lib, "__itt_api_init");
+ if (__itt_api_init_ptr)
+ __itt_api_init_ptr(&_N_(_ittapi_global), init_groups);
+ break;
+ }
+ }
+ else
+ {
+ __itt_nullify_all_pointers();
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ int error = __itt_system_error();
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ const char* error = dlerror();
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ __itt_report_error(__itt_error_no_module, lib_name, error);
+ }
+ }
+ else
+ {
+ __itt_nullify_all_pointers();
+ }
+ _N_(_ittapi_global).api_initialized = 1;
+ current_thread = 0;
+ /* !!! Just to avoid unused code elimination !!! */
+ if (__itt_fini_ittlib_ptr == _N_(fini_ittlib)) current_thread = 0;
+ }
+ }
+
+#ifndef ITT_SIMPLE_INIT
+ if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex);
+#endif /* ITT_SIMPLE_INIT */
+ }
+
+ /* Evaluating if any function ptr is non empty and it's in init_groups */
+ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++)
+ {
+ if (*_N_(_ittapi_global).api_list_ptr[i].func_ptr != _N_(_ittapi_global).api_list_ptr[i].null_func &&
+ _N_(_ittapi_global).api_list_ptr[i].group & init_groups)
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
+ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t* handler)
+{
+ __itt_error_handler_t* prev = (__itt_error_handler_t*)(size_t)_N_(_ittapi_global).error_handler;
+ _N_(_ittapi_global).error_handler = (void*)(size_t)handler;
+ return prev;
+}
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#pragma warning(pop)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** __itt_mark_pt_region functions marks region of interest
+ * region parameter defines different regions.
+ * 0 <= region < 8 */
+
+#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__)
+void __itt_pt_mark(__itt_pt_region region);
+void __itt_pt_mark_event(__itt_pt_region region);
+#endif
+
+ITT_EXTERN_C void _N_(mark_pt_region_begin)(__itt_pt_region region)
+{
+#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__)
+ if (_N_(_ittapi_global).ipt_collect_events == 1)
+ {
+ __itt_pt_mark_event(2*region);
+ }
+ else
+ {
+ __itt_pt_mark(2*region);
+ }
+#else
+ (void)region;
+#endif
+}
+
+ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region)
+{
+#if defined(ITT_API_IPT_SUPPORT) && (ITT_PLATFORM==ITT_PLATFORM_WIN || ITT_PLATFORM==ITT_PLATFORM_POSIX) && !defined(__ANDROID__)
+ if (_N_(_ittapi_global).ipt_collect_events == 1)
+ {
+ __itt_pt_mark_event(2*region + 1);
+ }
+ else
+ {
+ __itt_pt_mark(2*region + 1);
+ }
+#else
+ (void)region;
+#endif
+}
+
diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h
new file mode 100644
index 0000000000..67cf683880
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h
@@ -0,0 +1,354 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "ittnotify_config.h"
+
+#ifndef ITT_FORMAT_DEFINED
+# ifndef ITT_FORMAT
+# define ITT_FORMAT
+# endif /* ITT_FORMAT */
+# ifndef ITT_NO_PARAMS
+# define ITT_NO_PARAMS
+# endif /* ITT_NO_PARAMS */
+#endif /* ITT_FORMAT_DEFINED */
+
+/*
+ * parameters for macro expected:
+ * ITT_STUB(api, type, func_name, arguments, params, func_name_in_dll, group, printf_fmt)
+ */
+#ifdef __ITT_INTERNAL_INIT
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_domain*, domain_createA, (const char *name), (ITT_FORMAT name), domain_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORMAT name), domain_createW, __itt_group_structure, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_create, (const char *name), (ITT_FORMAT name), string_handle_create, __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_createA, __itt_group_counter, "\"%s\", \"%s\"")
+ITT_STUB(ITTAPI, __itt_counter, counter_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), counter_createW, __itt_group_counter, "\"%s\", \"%s\"")
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), counter_create, __itt_group_counter, "\"%s\", \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedA, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedA, __itt_group_counter, "\"%s\", \"%s\", %d")
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typedW, (const wchar_t *name, const wchar_t *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typedW, __itt_group_counter, "\"%s\", \"%s\", %d")
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, const char *domain, __itt_metadata_type type), (ITT_FORMAT name, domain, type), counter_create_typed, __itt_group_counter, "\"%s\", \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+
+ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args")
+ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"")
+ITT_STUBV(ITTAPI, void, thread_set_nameW, (const wchar_t *name), (ITT_FORMAT name), thread_set_nameW, __itt_group_thread, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_set_name, (const char *name), (ITT_FORMAT name), thread_set_name, __itt_group_thread, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, thread_ignore, (void), (ITT_NO_PARAMS), thread_ignore, __itt_group_thread, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setA, __itt_group_thread | __itt_group_legacy, "\"%s\", %d")
+ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), thr_name_setW, __itt_group_thread | __itt_group_legacy, "\"%S\", %d")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen), (ITT_FORMAT name, namelen), thr_name_set, __itt_group_thread | __itt_group_legacy, "\"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(LIBITTAPI, void, thr_ignore, (void), (ITT_NO_PARAMS), thr_ignore, __itt_group_thread | __itt_group_legacy, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args")
+
+#else /* __ITT_INTERNAL_INIT */
+
+ITT_STUBV(ITTAPI, void, detach, (void), (ITT_NO_PARAMS), detach, __itt_group_control | __itt_group_legacy, "no args")
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_createA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_createW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_createW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\", \"%S\", %x")
+ITT_STUBV(ITTAPI, void, sync_renameA, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_renameA, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"")
+ITT_STUBV(ITTAPI, void, sync_renameW, (void *addr, const wchar_t *name), (ITT_FORMAT addr, name), sync_renameW, __itt_group_sync | __itt_group_fsync, "%p, \"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_create, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_create, __itt_group_sync | __itt_group_fsync, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_rename, (void *addr, const char *name), (ITT_FORMAT addr, name), sync_rename, __itt_group_sync | __itt_group_fsync, "%p, \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_destroy, (void *addr), (ITT_FORMAT addr), sync_destroy, __itt_group_sync | __itt_group_fsync, "%p")
+
+ITT_STUBV(ITTAPI, void, sync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_sync, "%p")
+ITT_STUBV(ITTAPI, void, sync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_sync, "%p")
+ITT_STUBV(ITTAPI, void, sync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_sync, "%p")
+ITT_STUBV(ITTAPI, void, sync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_sync, "%p")
+
+ITT_STUBV(ITTAPI, void, suppress_push, (unsigned int mask), (ITT_FORMAT mask), suppress_push, __itt_group_suppress, "%p")
+ITT_STUBV(ITTAPI, void, suppress_pop, (void), (ITT_NO_PARAMS), suppress_pop, __itt_group_suppress, "no args")
+ITT_STUBV(ITTAPI, void, suppress_mark_range, (__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_mark_range, __itt_group_suppress, "%d, %p, %p, %d")
+ITT_STUBV(ITTAPI, void, suppress_clear_range,(__itt_suppress_mode_t mode, unsigned int mask, void * address, size_t size),(ITT_FORMAT mode, mask, address, size), suppress_clear_range,__itt_group_suppress, "%d, %p, %p, %d")
+
+ITT_STUBV(ITTAPI, void, fsync_prepare, (void* addr), (ITT_FORMAT addr), sync_prepare, __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_cancel, (void *addr), (ITT_FORMAT addr), sync_cancel, __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_acquired, (void *addr), (ITT_FORMAT addr), sync_acquired, __itt_group_fsync, "%p")
+ITT_STUBV(ITTAPI, void, fsync_releasing, (void* addr), (ITT_FORMAT addr), sync_releasing, __itt_group_fsync, "%p")
+
+ITT_STUBV(ITTAPI, void, model_site_begin, (__itt_model_site *site, __itt_model_site_instance *instance, const char *name), (ITT_FORMAT site, instance, name), model_site_begin, __itt_group_model, "%p, %p, \"%s\"")
+ITT_STUBV(ITTAPI, void, model_site_end, (__itt_model_site *site, __itt_model_site_instance *instance), (ITT_FORMAT site, instance), model_site_end, __itt_group_model, "%p, %p")
+ITT_STUBV(ITTAPI, void, model_task_begin, (__itt_model_task *task, __itt_model_task_instance *instance, const char *name), (ITT_FORMAT task, instance, name), model_task_begin, __itt_group_model, "%p, %p, \"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_end, (__itt_model_task *task, __itt_model_task_instance *instance), (ITT_FORMAT task, instance), model_task_end, __itt_group_model, "%p, %p")
+ITT_STUBV(ITTAPI, void, model_lock_acquire, (void *lock), (ITT_FORMAT lock), model_lock_acquire, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release, (void *lock), (ITT_FORMAT lock), model_lock_release, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_record_allocation, (void *addr, size_t size), (ITT_FORMAT addr, size), model_record_allocation, __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_record_deallocation, (void *addr), (ITT_FORMAT addr), model_record_deallocation, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_induction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_induction_uses, __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_reduction_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_reduction_uses, __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_observe_uses, (void* addr, size_t size), (ITT_FORMAT addr, size), model_observe_uses, __itt_group_model, "%p, %d")
+ITT_STUBV(ITTAPI, void, model_clear_uses, (void* addr), (ITT_FORMAT addr), model_clear_uses, __itt_group_model, "%p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, model_site_beginW, (const wchar_t *name), (ITT_FORMAT name), model_site_beginW, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_beginW, (const wchar_t *name), (ITT_FORMAT name), model_task_beginW, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskW, (const wchar_t *name), (ITT_FORMAT name), model_iteration_taskW, __itt_group_model, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, model_site_beginA, (const char *name), (ITT_FORMAT name), model_site_beginA, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_site_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_site_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_task_beginA, (const char *name), (ITT_FORMAT name), model_task_beginA, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_task_beginAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_task_beginAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_iteration_taskA, (const char *name), (ITT_FORMAT name), model_iteration_taskA, __itt_group_model, "\"%s\"")
+ITT_STUBV(ITTAPI, void, model_iteration_taskAL, (const char *name, size_t len), (ITT_FORMAT name, len), model_iteration_taskAL, __itt_group_model, "\"%s\", %d")
+ITT_STUBV(ITTAPI, void, model_site_end_2, (void), (ITT_NO_PARAMS), model_site_end_2, __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_task_end_2, (void), (ITT_NO_PARAMS), model_task_end_2, __itt_group_model, "no args")
+ITT_STUBV(ITTAPI, void, model_lock_acquire_2, (void *lock), (ITT_FORMAT lock), model_lock_acquire_2, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_lock_release_2, (void *lock), (ITT_FORMAT lock), model_lock_release_2, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_aggregate_task, (size_t count), (ITT_FORMAT count), model_aggregate_task, __itt_group_model, "%d")
+ITT_STUBV(ITTAPI, void, model_disable_push, (__itt_model_disable x), (ITT_FORMAT x), model_disable_push, __itt_group_model, "%p")
+ITT_STUBV(ITTAPI, void, model_disable_pop, (void), (ITT_NO_PARAMS), model_disable_pop, __itt_group_model, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createA, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_createA, __itt_group_heap, "\"%s\", \"%s\"")
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_createW, (const wchar_t *name, const wchar_t *domain), (ITT_FORMAT name, domain), heap_function_createW, __itt_group_heap, "\"%s\", \"%s\"")
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_heap_function, heap_function_create, (const char *name, const char *domain), (ITT_FORMAT name, domain), heap_function_create, __itt_group_heap, "\"%s\", \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, heap_allocate_begin, (__itt_heap_function h, size_t size, int initialized), (ITT_FORMAT h, size, initialized), heap_allocate_begin, __itt_group_heap, "%p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, size_t size, int initialized), (ITT_FORMAT h, addr, size, initialized), heap_allocate_end, __itt_group_heap, "%p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_begin, __itt_group_heap, "%p, %p")
+ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr), (ITT_FORMAT h, addr), heap_free_end, __itt_group_heap, "%p, %p")
+ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_size, initialized), heap_reallocate_begin, __itt_group_heap, "%p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_reallocate_end, (__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized), (ITT_FORMAT h, addr, new_addr, new_size, initialized), heap_reallocate_end, __itt_group_heap, "%p, %p, %p, %lu, %d")
+ITT_STUBV(ITTAPI, void, heap_internal_access_begin, (void), (ITT_NO_PARAMS), heap_internal_access_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_internal_access_end, (void), (ITT_NO_PARAMS), heap_internal_access_end, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_begin, (void), (ITT_NO_PARAMS), heap_record_memory_growth_begin, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_record_memory_growth_end, (void), (ITT_NO_PARAMS), heap_record_memory_growth_end, __itt_group_heap, "no args")
+ITT_STUBV(ITTAPI, void, heap_reset_detection, (unsigned int reset_mask), (ITT_FORMAT reset_mask), heap_reset_detection, __itt_group_heap, "%u")
+ITT_STUBV(ITTAPI, void, heap_record, (unsigned int record_mask), (ITT_FORMAT record_mask), heap_record, __itt_group_heap, "%u")
+
+ITT_STUBV(ITTAPI, void, id_create, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_create, __itt_group_structure, "%p, %lu")
+ITT_STUBV(ITTAPI, void, id_destroy, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), id_destroy, __itt_group_structure, "%p, %lu")
+
+ITT_STUB(ITTAPI, __itt_timestamp, get_timestamp, (void), (ITT_NO_PARAMS), get_timestamp, __itt_group_structure, "no args")
+
+ITT_STUBV(ITTAPI, void, region_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), region_begin, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, region_end, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), region_end, __itt_group_structure, "%p, %lu")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUBV(ITTAPI, void, frame_begin_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_begin_v3, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_end_v3, (const __itt_domain *domain, __itt_id *id), (ITT_FORMAT domain, id), frame_end_v3, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, frame_submit_v3, (const __itt_domain *domain, __itt_id *id, __itt_timestamp begin, __itt_timestamp end), (ITT_FORMAT domain, id, begin, end), frame_submit_v3, __itt_group_structure, "%p, %p, %lu, %lu")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(ITTAPI, void, task_group, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_group, __itt_group_structure, "%p, %lu, %lu, %p")
+
+ITT_STUBV(ITTAPI, void, task_begin, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_fn, (const __itt_domain *domain, __itt_id id, __itt_id parent, void* fn), (ITT_FORMAT domain, id, parent, fn), task_begin_fn, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end, (const __itt_domain *domain), (ITT_FORMAT domain), task_end, __itt_group_structure, "%p")
+
+ITT_STUBV(ITTAPI, void, counter_inc_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_inc_v3, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, counter_inc_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_inc_delta_v3, __itt_group_structure, "%p, %p, %lu")
+ITT_STUBV(ITTAPI, void, counter_dec_v3, (const __itt_domain *domain, __itt_string_handle *name), (ITT_FORMAT domain, name), counter_dec_v3, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, counter_dec_delta_v3, (const __itt_domain *domain, __itt_string_handle *name, unsigned long long value), (ITT_FORMAT domain, name, value), counter_dec_delta_v3, __itt_group_structure, "%p, %p, %lu")
+
+ITT_STUBV(ITTAPI, void, marker, (const __itt_domain *domain, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, id, name, scope), marker, __itt_group_structure, "%p, %lu, %p, %d")
+
+ITT_STUBV(ITTAPI, void, metadata_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, id, key, type, count, data), metadata_add, __itt_group_structure, "%p, %lu, %p, %d, %lu, %p")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_addA, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addA, __itt_group_structure, "%p, %lu, %p, %p, %lu")
+ITT_STUBV(ITTAPI, void, metadata_str_addW, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const wchar_t* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_addW, __itt_group_structure, "%p, %lu, %p, %p, %lu")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add, (const __itt_domain *domain, __itt_id id, __itt_string_handle *key, const char* data, size_t length), (ITT_FORMAT domain, id, key, data, length), metadata_str_add, __itt_group_structure, "%p, %lu, %p, %p, %lu")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(ITTAPI, void, relation_add_to_current, (const __itt_domain *domain, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, relation, tail), relation_add_to_current, __itt_group_structure, "%p, %lu, %p")
+ITT_STUBV(ITTAPI, void, relation_add, (const __itt_domain *domain, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, head, relation, tail), relation_add, __itt_group_structure, "%p, %p, %lu, %p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_createA, __itt_group_mark | __itt_group_legacy, "\"%s\", %d")
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen), (ITT_FORMAT name, namelen), event_createW, __itt_group_mark | __itt_group_legacy, "\"%S\", %d")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen), (ITT_FORMAT name, namelen), event_create, __itt_group_mark | __itt_group_legacy, "\"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event), (ITT_FORMAT event), event_start, __itt_group_mark | __itt_group_legacy, "%d")
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event), (ITT_FORMAT event), event_end, __itt_group_mark | __itt_group_legacy, "%d")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", \"%s\", %x")
+ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", \"%S\", %x")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute), (ITT_FORMAT addr, objtype, objname, attribute), sync_set_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "p, \"%s\", \"%s\", %x")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameA, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x")
+ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *p, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_nameW, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%S\", %d, \"%S\", %d, %x")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *p, const char *objtype, int typelen, const char *objname, int namelen, int attribute), (ITT_FORMAT p, objtype, typelen, objname, namelen, attribute), notify_sync_name, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p, \"%s\", %d, \"%s\", %d, %x")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *p), (ITT_FORMAT p), notify_sync_prepare, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *p), (ITT_FORMAT p), notify_sync_cancel, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *p), (ITT_FORMAT p), notify_sync_acquired, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *p), (ITT_FORMAT p), notify_sync_releasing, __itt_group_sync | __itt_group_fsync | __itt_group_legacy, "%p")
+#endif /* __ITT_INTERNAL_BODY */
+
+ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_read, __itt_group_legacy, "%p, %lu")
+ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_write, __itt_group_legacy, "%p, %lu")
+ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size), (ITT_FORMAT addr, size), memory_update, __itt_group_legacy, "%p, %lu")
+
+ITT_STUB(LIBITTAPI, __itt_state_t, state_get, (void), (ITT_NO_PARAMS), state_get, __itt_group_legacy, "no args")
+ITT_STUB(LIBITTAPI, __itt_state_t, state_set, (__itt_state_t s), (ITT_FORMAT s), state_set, __itt_group_legacy, "%d")
+ITT_STUB(LIBITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s), (ITT_FORMAT p, s), obj_mode_set, __itt_group_legacy, "%d, %d")
+ITT_STUB(LIBITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s), (ITT_FORMAT p, s), thr_mode_set, __itt_group_legacy, "%d, %d")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain), (ITT_FORMAT domain), frame_createA, __itt_group_frame, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain), (ITT_FORMAT domain), frame_createW, __itt_group_frame, "\"%s\"")
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain), (ITT_FORMAT domain), frame_create, __itt_group_frame, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createA, (const char *name), (ITT_FORMAT name), pt_region_createA, __itt_group_structure, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_createW, (const wchar_t *name), (ITT_FORMAT name), pt_region_createW, __itt_group_structure, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_pt_region, pt_region_create, (const char *name), (ITT_FORMAT name), pt_region_create, __itt_group_structure, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame), (ITT_FORMAT frame), frame_begin, __itt_group_frame, "%p")
+ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame), (ITT_FORMAT frame), frame_end, __itt_group_frame, "%p")
+
+ITT_STUBV(ITTAPI, void, counter_destroy, (__itt_counter id), (ITT_FORMAT id), counter_destroy, __itt_group_counter, "%p")
+ITT_STUBV(ITTAPI, void, counter_inc, (__itt_counter id), (ITT_FORMAT id), counter_inc, __itt_group_counter, "%p")
+ITT_STUBV(ITTAPI, void, counter_inc_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_inc_delta, __itt_group_counter, "%p, %lu")
+ITT_STUBV(ITTAPI, void, counter_dec, (__itt_counter id), (ITT_FORMAT id), counter_dec, __itt_group_counter, "%p")
+ITT_STUBV(ITTAPI, void, counter_dec_delta, (__itt_counter id, unsigned long long value), (ITT_FORMAT id, value), counter_dec_delta, __itt_group_counter, "%p, %lu")
+ITT_STUBV(ITTAPI, void, counter_set_value, (__itt_counter id, void *value_ptr), (ITT_FORMAT id, value_ptr), counter_set_value, __itt_group_counter, "%p, %p")
+ITT_STUBV(ITTAPI, void, counter_set_value_ex, (__itt_counter id, __itt_clock_domain *clock_domain, unsigned long long timestamp, void *value_ptr), (ITT_FORMAT id, clock_domain, timestamp, value_ptr), counter_set_value_ex, __itt_group_counter, "%p, %p, %llu, %p")
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createA, (const char *name), (ITT_FORMAT name), mark_createA, __itt_group_mark, "\"%s\"")
+ITT_STUB(ITTAPI, __itt_mark_type, mark_createW, (const wchar_t *name), (ITT_FORMAT name), mark_createW, __itt_group_mark, "\"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_mark_type, mark_create, (const char *name), (ITT_FORMAT name), mark_create, __itt_group_mark, "\"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, markA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), markA, __itt_group_mark, "%d, \"%s\"")
+ITT_STUB(ITTAPI, int, markW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), markW, __itt_group_mark, "%d, \"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark, __itt_group_mark, "%d, \"%s\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_off, __itt_group_mark, "%d")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, mark_globalA, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_globalA, __itt_group_mark, "%d, \"%s\"")
+ITT_STUB(ITTAPI, int, mark_globalW, (__itt_mark_type mt, const wchar_t *parameter), (ITT_FORMAT mt, parameter), mark_globalW, __itt_group_mark, "%d, \"%S\"")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global, (__itt_mark_type mt, const char *parameter), (ITT_FORMAT mt, parameter), mark_global, __itt_group_mark, "%d, \"%S\"")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, mark_global_off, (__itt_mark_type mt), (ITT_FORMAT mt), mark_global_off, __itt_group_mark, "%d")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUB(ITTAPI, __itt_caller, stack_caller_create, (void), (ITT_NO_PARAMS), stack_caller_create, __itt_group_stitch, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+ITT_STUBV(ITTAPI, void, stack_caller_destroy, (__itt_caller id), (ITT_FORMAT id), stack_caller_destroy, __itt_group_stitch, "%p")
+ITT_STUBV(ITTAPI, void, stack_callee_enter, (__itt_caller id), (ITT_FORMAT id), stack_callee_enter, __itt_group_stitch, "%p")
+ITT_STUBV(ITTAPI, void, stack_callee_leave, (__itt_caller id), (ITT_FORMAT id), stack_callee_leave, __itt_group_stitch, "%p")
+
+ITT_STUB(ITTAPI, __itt_clock_domain*, clock_domain_create, (__itt_get_clock_info_fn fn, void* fn_data), (ITT_FORMAT fn, fn_data), clock_domain_create, __itt_group_structure, "%p, %p")
+ITT_STUBV(ITTAPI, void, clock_domain_reset, (void), (ITT_NO_PARAMS), clock_domain_reset, __itt_group_structure, "no args")
+ITT_STUBV(ITTAPI, void, id_create_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_create_ex, __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, id_destroy_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), id_destroy_ex, __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, task_begin_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_fn_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, void* fn), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, fn), task_begin_fn_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp), (ITT_FORMAT domain, clock_domain, timestamp), task_end_ex, __itt_group_structure, "%p, %p, %lu")
+ITT_STUBV(ITTAPI, void, task_begin_overlapped, (const __itt_domain *domain, __itt_id id, __itt_id parent, __itt_string_handle *name), (ITT_FORMAT domain, id, parent, name), task_begin_overlapped, __itt_group_structure, "%p, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_begin_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_id parentid, __itt_string_handle *name), (ITT_FORMAT domain, clock_domain, timestamp, id, parentid, name), task_begin_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu, %lu, %p")
+ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __itt_id id), (ITT_FORMAT domain, id), task_end_overlapped, __itt_group_structure, "%p, %lu")
+ITT_STUBV(ITTAPI, void, task_end_overlapped_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id), (ITT_FORMAT domain, clock_domain, timestamp, id), task_end_overlapped_ex, __itt_group_structure, "%p, %p, %lu, %lu")
+ITT_STUBV(ITTAPI, void, marker_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id id, __itt_string_handle *name, __itt_scope scope), (ITT_FORMAT domain, clock_domain, timestamp, id, name, scope), marker_ex, __itt_group_structure, "%p, %p, %lu, %lu, %p, %d")
+ITT_STUBV(ITTAPI, void, metadata_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, __itt_metadata_type type, size_t count, void *data), (ITT_FORMAT domain, scope, key, type, count, data), metadata_add_with_scope, __itt_group_structure, "%p, %d, %p, %d, %lu, %p")
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeA, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeA, __itt_group_structure, "%p, %d, %p, %p, %lu")
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scopeW, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const wchar_t *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scopeW, __itt_group_structure, "%p, %d, %p, %p, %lu")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, metadata_str_add_with_scope, (const __itt_domain *domain, __itt_scope scope, __itt_string_handle *key, const char *data, size_t length), (ITT_FORMAT domain, scope, key, data, length), metadata_str_add_with_scope, __itt_group_structure, "%p, %d, %p, %p, %lu")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, relation_add_to_current_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, relation, tail), relation_add_to_current_ex, __itt_group_structure, "%p, %p, %lu, %d, %lu")
+ITT_STUBV(ITTAPI, void, relation_add_ex, (const __itt_domain *domain, __itt_clock_domain* clock_domain, unsigned long long timestamp, __itt_id head, __itt_relation relation, __itt_id tail), (ITT_FORMAT domain, clock_domain, timestamp, head, relation, tail), relation_add_ex, __itt_group_structure, "%p, %p, %lu, %lu, %d, %lu")
+ITT_STUB(ITTAPI, __itt_track_group*, track_group_create, (__itt_string_handle* name, __itt_track_group_type track_group_type), (ITT_FORMAT name, track_group_type), track_group_create, __itt_group_structure, "%p, %d")
+ITT_STUB(ITTAPI, __itt_track*, track_create, (__itt_track_group* track_group,__itt_string_handle* name, __itt_track_type track_type), (ITT_FORMAT track_group, name, track_type), track_create, __itt_group_structure, "%p, %p, %d")
+ITT_STUBV(ITTAPI, void, set_track, (__itt_track *track), (ITT_FORMAT track), set_track, __itt_group_structure, "%p")
+
+#ifndef __ITT_INTERNAL_BODY
+ITT_STUB(ITTAPI, const char*, api_version, (void), (ITT_NO_PARAMS), api_version, __itt_group_all & ~__itt_group_legacy, "no args")
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, int, av_saveA, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveA, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+ITT_STUB(ITTAPI, int, av_saveW, (void *data, int rank, const int *dimensions, int type, const wchar_t *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_saveW, __itt_group_arrays, "%p, %d, %p, %d, \"%S\", %d")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, int type, const char *filePath, int columnOrder), (ITT_FORMAT data, rank, dimensions, type, filePath, columnOrder), av_save, __itt_group_arrays, "%p, %d, %p, %d, \"%s\", %d")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+
+#ifndef __ITT_INTERNAL_BODY
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p")
+ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p")
+#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p")
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* __ITT_INTERNAL_BODY */
+
+
+#endif /* __ITT_INTERNAL_INIT */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h
new file mode 100644
index 0000000000..3849452c27
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h
@@ -0,0 +1,73 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _ITTNOTIFY_TYPES_H_
+#define _ITTNOTIFY_TYPES_H_
+
+typedef enum ___itt_group_id
+{
+ __itt_group_none = 0,
+ __itt_group_legacy = 1<<0,
+ __itt_group_control = 1<<1,
+ __itt_group_thread = 1<<2,
+ __itt_group_mark = 1<<3,
+ __itt_group_sync = 1<<4,
+ __itt_group_fsync = 1<<5,
+ __itt_group_jit = 1<<6,
+ __itt_group_model = 1<<7,
+ __itt_group_splitter_min = 1<<7,
+ __itt_group_counter = 1<<8,
+ __itt_group_frame = 1<<9,
+ __itt_group_stitch = 1<<10,
+ __itt_group_heap = 1<<11,
+ __itt_group_splitter_max = 1<<12,
+ __itt_group_structure = 1<<12,
+ __itt_group_suppress = 1<<13,
+ __itt_group_arrays = 1<<14,
+ __itt_group_all = -1
+} __itt_group_id;
+
+#pragma pack(push, 8)
+
+typedef struct ___itt_group_list
+{
+ __itt_group_id id;
+ const char* name;
+} __itt_group_list;
+
+#pragma pack(pop)
+
+#define ITT_GROUP_LIST(varname) \
+ static __itt_group_list varname[] = { \
+ { __itt_group_all, "all" }, \
+ { __itt_group_control, "control" }, \
+ { __itt_group_thread, "thread" }, \
+ { __itt_group_mark, "mark" }, \
+ { __itt_group_sync, "sync" }, \
+ { __itt_group_fsync, "fsync" }, \
+ { __itt_group_jit, "jit" }, \
+ { __itt_group_model, "model" }, \
+ { __itt_group_counter, "counter" }, \
+ { __itt_group_frame, "frame" }, \
+ { __itt_group_stitch, "stitch" }, \
+ { __itt_group_heap, "heap" }, \
+ { __itt_group_structure, "structure" }, \
+ { __itt_group_suppress, "suppress" }, \
+ { __itt_group_arrays, "arrays" }, \
+ { __itt_group_none, NULL } \
+ }
+
+#endif /* _ITTNOTIFY_TYPES_H_ */
diff --git a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h
new file mode 100644
index 0000000000..b05a199d1f
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h
@@ -0,0 +1,998 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _LEGACY_ITTNOTIFY_H_
+#define _LEGACY_ITTNOTIFY_H_
+
+/**
+ * @file
+ * @brief Legacy User API functions and types
+ */
+
+/** @cond exclude_from_documentation */
+#ifndef ITT_OS_WIN
+# define ITT_OS_WIN 1
+#endif /* ITT_OS_WIN */
+
+#ifndef ITT_OS_LINUX
+# define ITT_OS_LINUX 2
+#endif /* ITT_OS_LINUX */
+
+#ifndef ITT_OS_MAC
+# define ITT_OS_MAC 3
+#endif /* ITT_OS_MAC */
+
+#ifndef ITT_OS_FREEBSD
+# define ITT_OS_FREEBSD 4
+#endif /* ITT_OS_FREEBSD */
+
+#ifndef ITT_OS
+# if defined WIN32 || defined _WIN32
+# define ITT_OS ITT_OS_WIN
+# elif defined( __APPLE__ ) && defined( __MACH__ )
+# define ITT_OS ITT_OS_MAC
+# elif defined( __FreeBSD__ )
+# define ITT_OS ITT_OS_FREEBSD
+# else
+# define ITT_OS ITT_OS_LINUX
+# endif
+#endif /* ITT_OS */
+
+#ifndef ITT_PLATFORM_WIN
+# define ITT_PLATFORM_WIN 1
+#endif /* ITT_PLATFORM_WIN */
+
+#ifndef ITT_PLATFORM_POSIX
+# define ITT_PLATFORM_POSIX 2
+#endif /* ITT_PLATFORM_POSIX */
+
+#ifndef ITT_PLATFORM_MAC
+# define ITT_PLATFORM_MAC 3
+#endif /* ITT_PLATFORM_MAC */
+
+#ifndef ITT_PLATFORM_FREEBSD
+# define ITT_PLATFORM_FREEBSD 4
+#endif /* ITT_PLATFORM_FREEBSD */
+
+#ifndef ITT_PLATFORM
+# if ITT_OS==ITT_OS_WIN
+# define ITT_PLATFORM ITT_PLATFORM_WIN
+# elif ITT_OS==ITT_OS_MAC
+# define ITT_PLATFORM ITT_PLATFORM_MAC
+# elif ITT_OS==ITT_OS_FREEBSD
+# define ITT_PLATFORM ITT_PLATFORM_FREEBSD
+# else
+# define ITT_PLATFORM ITT_PLATFORM_POSIX
+# endif
+#endif /* ITT_PLATFORM */
+
+#if defined(_UNICODE) && !defined(UNICODE)
+#define UNICODE
+#endif
+
+#include <stddef.h>
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#include <tchar.h>
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#include <stdint.h>
+#if defined(UNICODE) || defined(_UNICODE)
+#include <wchar.h>
+#endif /* UNICODE || _UNICODE */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+#ifndef ITTAPI_CDECL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define ITTAPI_CDECL __cdecl
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define ITTAPI_CDECL __attribute__ ((cdecl))
+# else /* _M_IX86 || __i386__ */
+# define ITTAPI_CDECL /* actual only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* ITTAPI_CDECL */
+
+#ifndef STDCALL
+# if ITT_PLATFORM==ITT_PLATFORM_WIN
+# define STDCALL __stdcall
+# else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+# if defined _M_IX86 || defined __i386__
+# define STDCALL __attribute__ ((stdcall))
+# else /* _M_IX86 || __i386__ */
+# define STDCALL /* supported only on x86 platform */
+# endif /* _M_IX86 || __i386__ */
+# endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* STDCALL */
+
+#define ITTAPI ITTAPI_CDECL
+#define LIBITTAPI ITTAPI_CDECL
+
+/* TODO: Temporary for compatibility! */
+#define ITTAPI_CALL ITTAPI_CDECL
+#define LIBITTAPI_CALL ITTAPI_CDECL
+
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+/* use __forceinline (VC++ specific) */
+#define ITT_INLINE __forceinline
+#define ITT_INLINE_ATTRIBUTE /* nothing */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/*
+ * Generally, functions are not inlined unless optimization is specified.
+ * For functions declared inline, this attribute inlines the function even
+ * if no optimization level was specified.
+ */
+#ifdef __STRICT_ANSI__
+#define ITT_INLINE static
+#define ITT_INLINE_ATTRIBUTE __attribute__((unused))
+#else /* __STRICT_ANSI__ */
+#define ITT_INLINE static inline
+#define ITT_INLINE_ATTRIBUTE __attribute__((always_inline, unused))
+#endif /* __STRICT_ANSI__ */
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+/** @endcond */
+
+/** @cond exclude_from_documentation */
+/* Helper macro for joining tokens */
+#define ITT_JOIN_AUX(p,n) p##n
+#define ITT_JOIN(p,n) ITT_JOIN_AUX(p,n)
+
+#ifdef ITT_MAJOR
+#undef ITT_MAJOR
+#endif
+#ifdef ITT_MINOR
+#undef ITT_MINOR
+#endif
+#define ITT_MAJOR 3
+#define ITT_MINOR 0
+
+/* Standard versioning of a token with major and minor version numbers */
+#define ITT_VERSIONIZE(x) \
+ ITT_JOIN(x, \
+ ITT_JOIN(_, \
+ ITT_JOIN(ITT_MAJOR, \
+ ITT_JOIN(_, ITT_MINOR))))
+
+#ifndef INTEL_ITTNOTIFY_PREFIX
+# define INTEL_ITTNOTIFY_PREFIX __itt_
+#endif /* INTEL_ITTNOTIFY_PREFIX */
+#ifndef INTEL_ITTNOTIFY_POSTFIX
+# define INTEL_ITTNOTIFY_POSTFIX _ptr_
+#endif /* INTEL_ITTNOTIFY_POSTFIX */
+
+#define ITTNOTIFY_NAME_AUX(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n)
+#define ITTNOTIFY_NAME(n) ITT_VERSIONIZE(ITTNOTIFY_NAME_AUX(ITT_JOIN(n,INTEL_ITTNOTIFY_POSTFIX)))
+
+#define ITTNOTIFY_VOID(n) (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)
+#define ITTNOTIFY_DATA(n) (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)
+
+#define ITTNOTIFY_VOID_D0(n,d) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_VOID_D1(n,d,x) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_VOID_D2(n,d,x,y) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_VOID_D3(n,d,x,y,z) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_VOID_D4(n,d,x,y,z,a) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_VOID_D5(n,d,x,y,z,a,b) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_VOID_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? (void)0 : (!ITTNOTIFY_NAME(n)) ? (void)0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+#define ITTNOTIFY_DATA_D0(n,d) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d)
+#define ITTNOTIFY_DATA_D1(n,d,x) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x)
+#define ITTNOTIFY_DATA_D2(n,d,x,y) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y)
+#define ITTNOTIFY_DATA_D3(n,d,x,y,z) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z)
+#define ITTNOTIFY_DATA_D4(n,d,x,y,z,a) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a)
+#define ITTNOTIFY_DATA_D5(n,d,x,y,z,a,b) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b)
+#define ITTNOTIFY_DATA_D6(n,d,x,y,z,a,b,c) (!(d)->flags) ? 0 : (!ITTNOTIFY_NAME(n)) ? 0 : ITTNOTIFY_NAME(n)(d,x,y,z,a,b,c)
+
+#ifdef ITT_STUB
+#undef ITT_STUB
+#endif
+#ifdef ITT_STUBV
+#undef ITT_STUBV
+#endif
+#define ITT_STUBV(api,type,name,args) \
+ typedef type (api* ITT_JOIN(ITTNOTIFY_NAME(name),_t)) args; \
+ extern ITT_JOIN(ITTNOTIFY_NAME(name),_t) ITTNOTIFY_NAME(name);
+#define ITT_STUB ITT_STUBV
+/** @endcond */
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/**
+ * @defgroup legacy Legacy API
+ * @{
+ * @}
+ */
+
+/**
+ * @defgroup legacy_control Collection Control
+ * @ingroup legacy
+ * General behavior: application continues to run, but no profiling information is being collected
+ *
+ * Pausing occurs not only for the current thread but for all process as well as spawned processes
+ * - Intel(R) Parallel Inspector and Intel(R) Inspector XE:
+ * - Does not analyze or report errors that involve memory access.
+ * - Other errors are reported as usual. Pausing data collection in
+ * Intel(R) Parallel Inspector and Intel(R) Inspector XE
+ * only pauses tracing and analyzing memory access.
+ * It does not pause tracing or analyzing threading APIs.
+ * .
+ * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE:
+ * - Does continue to record when new threads are started.
+ * .
+ * - Other effects:
+ * - Possible reduction of runtime overhead.
+ * .
+ * @{
+ */
+#ifndef _ITTNOTIFY_H_
+/** @brief Pause collection */
+void ITTAPI __itt_pause(void);
+/** @brief Resume collection */
+void ITTAPI __itt_resume(void);
+/** @brief Detach collection */
+void ITTAPI __itt_detach(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, pause, (void))
+ITT_STUBV(ITTAPI, void, resume, (void))
+ITT_STUBV(ITTAPI, void, detach, (void))
+#define __itt_pause ITTNOTIFY_VOID(pause)
+#define __itt_pause_ptr ITTNOTIFY_NAME(pause)
+#define __itt_resume ITTNOTIFY_VOID(resume)
+#define __itt_resume_ptr ITTNOTIFY_NAME(resume)
+#define __itt_detach ITTNOTIFY_VOID(detach)
+#define __itt_detach_ptr ITTNOTIFY_NAME(detach)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_pause()
+#define __itt_pause_ptr 0
+#define __itt_resume()
+#define __itt_resume_ptr 0
+#define __itt_detach()
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_pause_ptr 0
+#define __itt_resume_ptr 0
+#define __itt_detach_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+#endif /* _ITTNOTIFY_H_ */
+/** @} legacy_control group */
+
+/**
+ * @defgroup legacy_threads Threads
+ * @ingroup legacy
+ * Threads group
+ * @warning Legacy API
+ * @{
+ */
+/**
+ * @deprecated Legacy API
+ * @brief Set name to be associated with thread in analysis GUI.
+ * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int LIBITTAPI __itt_thr_name_setA(const char *name, int namelen);
+int LIBITTAPI __itt_thr_name_setW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_thr_name_set __itt_thr_name_setW
+# define __itt_thr_name_set_ptr __itt_thr_name_setW_ptr
+#else
+# define __itt_thr_name_set __itt_thr_name_setA
+# define __itt_thr_name_set_ptr __itt_thr_name_setA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int LIBITTAPI __itt_thr_name_set(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, thr_name_setA, (const char *name, int namelen))
+ITT_STUB(LIBITTAPI, int, thr_name_setW, (const wchar_t *name, int namelen))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, thr_name_set, (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA ITTNOTIFY_DATA(thr_name_setA)
+#define __itt_thr_name_setA_ptr ITTNOTIFY_NAME(thr_name_setA)
+#define __itt_thr_name_setW ITTNOTIFY_DATA(thr_name_setW)
+#define __itt_thr_name_setW_ptr ITTNOTIFY_NAME(thr_name_setW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set ITTNOTIFY_DATA(thr_name_set)
+#define __itt_thr_name_set_ptr ITTNOTIFY_NAME(thr_name_set)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA(name, namelen)
+#define __itt_thr_name_setA_ptr 0
+#define __itt_thr_name_setW(name, namelen)
+#define __itt_thr_name_setW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set(name, namelen)
+#define __itt_thr_name_set_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_thr_name_setA_ptr 0
+#define __itt_thr_name_setW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_thr_name_set_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Mark current thread as ignored from this point on, for the duration of its existence.
+ */
+void LIBITTAPI __itt_thr_ignore(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, thr_ignore, (void))
+#define __itt_thr_ignore ITTNOTIFY_VOID(thr_ignore)
+#define __itt_thr_ignore_ptr ITTNOTIFY_NAME(thr_ignore)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thr_ignore()
+#define __itt_thr_ignore_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_thr_ignore_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_threads group */
+
+/**
+ * @defgroup legacy_sync Synchronization
+ * @ingroup legacy
+ * Synchronization group
+ * @warning Legacy API
+ * @{
+ */
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_barrier 1
+
+/**
+ * @hideinitializer
+ * @brief possible value of attribute argument for sync object type
+ */
+#define __itt_attr_mutex 2
+
+/**
+ * @deprecated Legacy API
+ * @brief Assign a name to a sync object using char or Unicode string
+ * @param[in] addr - pointer to the sync object. You should use a real pointer to your object
+ * to make sure that the values don't clash with other object addresses
+ * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will
+ * be assumed to be of generic "User Synchronization" type
+ * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned
+ * to the object -- you can use the __itt_sync_rename call later to assign
+ * the name
+ * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the
+ * exact semantics of how prepare/acquired/releasing calls work.
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+void ITTAPI __itt_sync_set_nameA(void *addr, const char *objtype, const char *objname, int attribute);
+void ITTAPI __itt_sync_set_nameW(void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_sync_set_name __itt_sync_set_nameW
+# define __itt_sync_set_name_ptr __itt_sync_set_nameW_ptr
+#else /* UNICODE */
+# define __itt_sync_set_name __itt_sync_set_nameA
+# define __itt_sync_set_name_ptr __itt_sync_set_nameA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+void ITTAPI __itt_sync_set_name(void *addr, const char* objtype, const char* objname, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUBV(ITTAPI, void, sync_set_nameA, (void *addr, const char *objtype, const char *objname, int attribute))
+ITT_STUBV(ITTAPI, void, sync_set_nameW, (void *addr, const wchar_t *objtype, const wchar_t *objname, int attribute))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUBV(ITTAPI, void, sync_set_name, (void *addr, const char *objtype, const char *objname, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA ITTNOTIFY_VOID(sync_set_nameA)
+#define __itt_sync_set_nameA_ptr ITTNOTIFY_NAME(sync_set_nameA)
+#define __itt_sync_set_nameW ITTNOTIFY_VOID(sync_set_nameW)
+#define __itt_sync_set_nameW_ptr ITTNOTIFY_NAME(sync_set_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name ITTNOTIFY_VOID(sync_set_name)
+#define __itt_sync_set_name_ptr ITTNOTIFY_NAME(sync_set_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA(addr, objtype, objname, attribute)
+#define __itt_sync_set_nameA_ptr 0
+#define __itt_sync_set_nameW(addr, objtype, objname, attribute)
+#define __itt_sync_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name(addr, objtype, objname, attribute)
+#define __itt_sync_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_sync_set_nameA_ptr 0
+#define __itt_sync_set_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_sync_set_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Assign a name and type to a sync object using char or Unicode string
+ * @param[in] addr - pointer to the sync object. You should use a real pointer to your object
+ * to make sure that the values don't clash with other object addresses
+ * @param[in] objtype - null-terminated object type string. If NULL is passed, the object will
+ * be assumed to be of generic "User Synchronization" type
+ * @param[in] objname - null-terminated object name string. If NULL, no name will be assigned
+ * to the object -- you can use the __itt_sync_rename call later to assign
+ * the name
+ * @param[in] typelen, namelen - a length of string for appropriate objtype and objname parameter
+ * @param[in] attribute - one of [#__itt_attr_barrier, #__itt_attr_mutex] values which defines the
+ * exact semantics of how prepare/acquired/releasing calls work.
+ * @return __itt_err upon failure (name or namelen being null,name and namelen mismatched)
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+int LIBITTAPI __itt_notify_sync_nameA(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute);
+int LIBITTAPI __itt_notify_sync_nameW(void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_notify_sync_name __itt_notify_sync_nameW
+#else
+# define __itt_notify_sync_name __itt_notify_sync_nameA
+#endif
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+int LIBITTAPI __itt_notify_sync_name(void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, int, notify_sync_nameA, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute))
+ITT_STUB(LIBITTAPI, int, notify_sync_nameW, (void *addr, const wchar_t *objtype, int typelen, const wchar_t *objname, int namelen, int attribute))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, int, notify_sync_name, (void *addr, const char *objtype, int typelen, const char *objname, int namelen, int attribute))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA ITTNOTIFY_DATA(notify_sync_nameA)
+#define __itt_notify_sync_nameA_ptr ITTNOTIFY_NAME(notify_sync_nameA)
+#define __itt_notify_sync_nameW ITTNOTIFY_DATA(notify_sync_nameW)
+#define __itt_notify_sync_nameW_ptr ITTNOTIFY_NAME(notify_sync_nameW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name ITTNOTIFY_DATA(notify_sync_name)
+#define __itt_notify_sync_name_ptr ITTNOTIFY_NAME(notify_sync_name)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_nameA_ptr 0
+#define __itt_notify_sync_nameW(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name(addr, objtype, typelen, objname, namelen, attribute)
+#define __itt_notify_sync_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_notify_sync_nameA_ptr 0
+#define __itt_notify_sync_nameW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_notify_sync_name_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Enter spin loop on user-defined sync object
+ */
+void LIBITTAPI __itt_notify_sync_prepare(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_prepare, (void *addr))
+#define __itt_notify_sync_prepare ITTNOTIFY_VOID(notify_sync_prepare)
+#define __itt_notify_sync_prepare_ptr ITTNOTIFY_NAME(notify_sync_prepare)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_prepare(addr)
+#define __itt_notify_sync_prepare_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_prepare_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Quit spin loop without acquiring spin object
+ */
+void LIBITTAPI __itt_notify_sync_cancel(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_cancel, (void *addr))
+#define __itt_notify_sync_cancel ITTNOTIFY_VOID(notify_sync_cancel)
+#define __itt_notify_sync_cancel_ptr ITTNOTIFY_NAME(notify_sync_cancel)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_cancel(addr)
+#define __itt_notify_sync_cancel_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_cancel_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Successful spin loop completion (sync object acquired)
+ */
+void LIBITTAPI __itt_notify_sync_acquired(void *addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_acquired, (void *addr))
+#define __itt_notify_sync_acquired ITTNOTIFY_VOID(notify_sync_acquired)
+#define __itt_notify_sync_acquired_ptr ITTNOTIFY_NAME(notify_sync_acquired)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_acquired(addr)
+#define __itt_notify_sync_acquired_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_acquired_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Start sync object releasing code. Is called before the lock release call.
+ */
+void LIBITTAPI __itt_notify_sync_releasing(void* addr);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, notify_sync_releasing, (void *addr))
+#define __itt_notify_sync_releasing ITTNOTIFY_VOID(notify_sync_releasing)
+#define __itt_notify_sync_releasing_ptr ITTNOTIFY_NAME(notify_sync_releasing)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_notify_sync_releasing(addr)
+#define __itt_notify_sync_releasing_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_notify_sync_releasing_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_sync group */
+
+#ifndef _ITTNOTIFY_H_
+/**
+ * @defgroup legacy_events Events
+ * @ingroup legacy
+ * Events group
+ * @{
+ */
+
+/** @brief user event type */
+typedef int __itt_event;
+
+/**
+ * @brief Create an event notification
+ * @note name or namelen being null/name and namelen not matching, user event feature not enabled
+ * @return non-zero event identifier upon success and __itt_err otherwise
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_event LIBITTAPI __itt_event_createA(const char *name, int namelen);
+__itt_event LIBITTAPI __itt_event_createW(const wchar_t *name, int namelen);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_event_create __itt_event_createW
+# define __itt_event_create_ptr __itt_event_createW_ptr
+#else
+# define __itt_event_create __itt_event_createA
+# define __itt_event_create_ptr __itt_event_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_event LIBITTAPI __itt_event_create(const char *name, int namelen);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(LIBITTAPI, __itt_event, event_createA, (const char *name, int namelen))
+ITT_STUB(LIBITTAPI, __itt_event, event_createW, (const wchar_t *name, int namelen))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(LIBITTAPI, __itt_event, event_create, (const char *name, int namelen))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA ITTNOTIFY_DATA(event_createA)
+#define __itt_event_createA_ptr ITTNOTIFY_NAME(event_createA)
+#define __itt_event_createW ITTNOTIFY_DATA(event_createW)
+#define __itt_event_createW_ptr ITTNOTIFY_NAME(event_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create ITTNOTIFY_DATA(event_create)
+#define __itt_event_create_ptr ITTNOTIFY_NAME(event_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA(name, namelen) (__itt_event)0
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW(name, namelen) (__itt_event)0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create(name, namelen) (__itt_event)0
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_event_createA_ptr 0
+#define __itt_event_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_event_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event occurrence.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_start(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_start, (__itt_event event))
+#define __itt_event_start ITTNOTIFY_DATA(event_start)
+#define __itt_event_start_ptr ITTNOTIFY_NAME(event_start)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_start(event) (int)0
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_start_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @brief Record an event end occurrence.
+ * @note It is optional if events do not have durations.
+ * @return __itt_err upon failure (invalid event id/user event feature not enabled)
+ */
+int LIBITTAPI __itt_event_end(__itt_event event);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(LIBITTAPI, int, event_end, (__itt_event event))
+#define __itt_event_end ITTNOTIFY_DATA(event_end)
+#define __itt_event_end_ptr ITTNOTIFY_NAME(event_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_event_end(event) (int)0
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_event_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_events group */
+#endif /* _ITTNOTIFY_H_ */
+
+/**
+ * @defgroup legacy_memory Memory Accesses
+ * @ingroup legacy
+ */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on reading
+ */
+void LIBITTAPI __itt_memory_read(void *addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_read, (void *addr, size_t size))
+#define __itt_memory_read ITTNOTIFY_VOID(memory_read)
+#define __itt_memory_read_ptr ITTNOTIFY_NAME(memory_read)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_read(addr, size)
+#define __itt_memory_read_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_read_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on writing
+ */
+void LIBITTAPI __itt_memory_write(void *addr, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_write, (void *addr, size_t size))
+#define __itt_memory_write ITTNOTIFY_VOID(memory_write)
+#define __itt_memory_write_ptr ITTNOTIFY_NAME(memory_write)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_write(addr, size)
+#define __itt_memory_write_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_write_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief Inform the tool of memory accesses on updating
+ */
+void LIBITTAPI __itt_memory_update(void *address, size_t size);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(LIBITTAPI, void, memory_update, (void *addr, size_t size))
+#define __itt_memory_update ITTNOTIFY_VOID(memory_update)
+#define __itt_memory_update_ptr ITTNOTIFY_NAME(memory_update)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_memory_update(addr, size)
+#define __itt_memory_update_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_memory_update_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_memory group */
+
+/**
+ * @defgroup legacy_state Thread and Object States
+ * @ingroup legacy
+ */
+
+/** @brief state type */
+typedef int __itt_state_t;
+
+/** @cond exclude_from_documentation */
+typedef enum __itt_obj_state {
+ __itt_obj_state_err = 0,
+ __itt_obj_state_clr = 1,
+ __itt_obj_state_set = 2,
+ __itt_obj_state_use = 3
+} __itt_obj_state_t;
+
+typedef enum __itt_thr_state {
+ __itt_thr_state_err = 0,
+ __itt_thr_state_clr = 1,
+ __itt_thr_state_set = 2
+} __itt_thr_state_t;
+
+typedef enum __itt_obj_prop {
+ __itt_obj_prop_watch = 1,
+ __itt_obj_prop_ignore = 2,
+ __itt_obj_prop_sharable = 3
+} __itt_obj_prop_t;
+
+typedef enum __itt_thr_prop {
+ __itt_thr_prop_quiet = 1
+} __itt_thr_prop_t;
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object states
+ */
+__itt_state_t LIBITTAPI __itt_state_get(void);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_state_t, state_get, (void))
+#define __itt_state_get ITTNOTIFY_DATA(state_get)
+#define __itt_state_get_ptr ITTNOTIFY_NAME(state_get)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_state_get(void) (__itt_state_t)0
+#define __itt_state_get_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_state_get_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object states
+ */
+__itt_state_t LIBITTAPI __itt_state_set(__itt_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_state_t, state_set, (__itt_state_t s))
+#define __itt_state_set ITTNOTIFY_DATA(state_set)
+#define __itt_state_set_ptr ITTNOTIFY_NAME(state_set)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_state_set(s) (__itt_state_t)0
+#define __itt_state_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_state_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object modes
+ */
+__itt_thr_state_t LIBITTAPI __itt_thr_mode_set(__itt_thr_prop_t p, __itt_thr_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_thr_state_t, thr_mode_set, (__itt_thr_prop_t p, __itt_thr_state_t s))
+#define __itt_thr_mode_set ITTNOTIFY_DATA(thr_mode_set)
+#define __itt_thr_mode_set_ptr ITTNOTIFY_NAME(thr_mode_set)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_thr_mode_set(p, s) (__itt_thr_state_t)0
+#define __itt_thr_mode_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_thr_mode_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/**
+ * @deprecated Legacy API
+ * @brief managing thread and object modes
+ */
+__itt_obj_state_t LIBITTAPI __itt_obj_mode_set(__itt_obj_prop_t p, __itt_obj_state_t s);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUB(ITTAPI, __itt_obj_state_t, obj_mode_set, (__itt_obj_prop_t p, __itt_obj_state_t s))
+#define __itt_obj_mode_set ITTNOTIFY_DATA(obj_mode_set)
+#define __itt_obj_mode_set_ptr ITTNOTIFY_NAME(obj_mode_set)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_obj_mode_set(p, s) (__itt_obj_state_t)0
+#define __itt_obj_mode_set_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_obj_mode_set_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} legacy_state group */
+
+/**
+ * @defgroup frames Frames
+ * @ingroup legacy
+ * Frames group
+ * @{
+ */
+/**
+ * @brief opaque structure for frame identification
+ */
+typedef struct __itt_frame_t *__itt_frame;
+
+/**
+ * @brief Create a global frame with given domain
+ */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+__itt_frame ITTAPI __itt_frame_createA(const char *domain);
+__itt_frame ITTAPI __itt_frame_createW(const wchar_t *domain);
+#if defined(UNICODE) || defined(_UNICODE)
+# define __itt_frame_create __itt_frame_createW
+# define __itt_frame_create_ptr __itt_frame_createW_ptr
+#else /* UNICODE */
+# define __itt_frame_create __itt_frame_createA
+# define __itt_frame_create_ptr __itt_frame_createA_ptr
+#endif /* UNICODE */
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+__itt_frame ITTAPI __itt_frame_create(const char *domain);
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+ITT_STUB(ITTAPI, __itt_frame, frame_createA, (const char *domain))
+ITT_STUB(ITTAPI, __itt_frame, frame_createW, (const wchar_t *domain))
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain))
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA ITTNOTIFY_DATA(frame_createA)
+#define __itt_frame_createA_ptr ITTNOTIFY_NAME(frame_createA)
+#define __itt_frame_createW ITTNOTIFY_DATA(frame_createW)
+#define __itt_frame_createW_ptr ITTNOTIFY_NAME(frame_createW)
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create ITTNOTIFY_DATA(frame_create)
+#define __itt_frame_create_ptr ITTNOTIFY_NAME(frame_create)
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#else /* INTEL_NO_ITTNOTIFY_API */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA(domain)
+#define __itt_frame_createA_ptr 0
+#define __itt_frame_createW(domain)
+#define __itt_frame_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create(domain)
+#define __itt_frame_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#define __itt_frame_createA_ptr 0
+#define __itt_frame_createW_ptr 0
+#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#define __itt_frame_create_ptr 0
+#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+
+/** @brief Record an frame begin occurrence. */
+void ITTAPI __itt_frame_begin(__itt_frame frame);
+/** @brief Record an frame end occurrence. */
+void ITTAPI __itt_frame_end (__itt_frame frame);
+
+/** @cond exclude_from_documentation */
+#ifndef INTEL_NO_MACRO_BODY
+#ifndef INTEL_NO_ITTNOTIFY_API
+ITT_STUBV(ITTAPI, void, frame_begin, (__itt_frame frame))
+ITT_STUBV(ITTAPI, void, frame_end, (__itt_frame frame))
+#define __itt_frame_begin ITTNOTIFY_VOID(frame_begin)
+#define __itt_frame_begin_ptr ITTNOTIFY_NAME(frame_begin)
+#define __itt_frame_end ITTNOTIFY_VOID(frame_end)
+#define __itt_frame_end_ptr ITTNOTIFY_NAME(frame_end)
+#else /* INTEL_NO_ITTNOTIFY_API */
+#define __itt_frame_begin(frame)
+#define __itt_frame_begin_ptr 0
+#define __itt_frame_end(frame)
+#define __itt_frame_end_ptr 0
+#endif /* INTEL_NO_ITTNOTIFY_API */
+#else /* INTEL_NO_MACRO_BODY */
+#define __itt_frame_begin_ptr 0
+#define __itt_frame_end_ptr 0
+#endif /* INTEL_NO_MACRO_BODY */
+/** @endcond */
+/** @} frames group */
+
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* _LEGACY_ITTNOTIFY_H_ */
diff --git a/contrib/libs/tbb/src/tbb/version.cpp b/contrib/libs/tbb/src/tbb/version.cpp
new file mode 100644
index 0000000000..ca113372f1
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/version.cpp
@@ -0,0 +1,26 @@
+/*
+ Copyright (c) 2020-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#include "oneapi/tbb/version.h"
+
+extern "C" int TBB_runtime_interface_version() {
+ return TBB_INTERFACE_VERSION;
+}
+
+extern "C" const char* TBB_runtime_version() {
+ static const char version_str[] = TBB_VERSION_STRING;
+ return version_str;
+}
diff --git a/contrib/libs/tbb/src/tbb/waiters.h b/contrib/libs/tbb/src/tbb/waiters.h
new file mode 100644
index 0000000000..07ee5ab4f0
--- /dev/null
+++ b/contrib/libs/tbb/src/tbb/waiters.h
@@ -0,0 +1,204 @@
+/*
+ Copyright (c) 2005-2021 Intel Corporation
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+#ifndef _TBB_waiters_H
+#define _TBB_waiters_H
+
+#include "oneapi/tbb/detail/_task.h"
+#include "scheduler_common.h"
+#include "arena.h"
+
+namespace tbb {
+namespace detail {
+namespace r1 {
+
+inline d1::task* get_self_recall_task(arena_slot& slot);
+
+class waiter_base {
+public:
+ waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {}
+
+ bool pause() {
+ if (my_backoff.pause()) {
+ my_arena.is_out_of_work();
+ return true;
+ }
+
+ return false;
+ }
+
+ void reset_wait() {
+ my_backoff.reset_wait();
+ }
+
+protected:
+ arena& my_arena;
+ stealing_loop_backoff my_backoff;
+};
+
+class outermost_worker_waiter : public waiter_base {
+public:
+ using waiter_base::waiter_base;
+
+ bool continue_execution(arena_slot& slot, d1::task*& t) const {
+ __TBB_ASSERT(t == nullptr, nullptr);
+
+ if (is_worker_should_leave(slot)) {
+ // Leave dispatch loop
+ return false;
+ }
+
+ t = get_self_recall_task(slot);
+ return true;
+ }
+
+ void pause(arena_slot&) {
+ waiter_base::pause();
+ }
+
+
+ d1::wait_context* wait_ctx() {
+ return nullptr;
+ }
+
+ static bool postpone_execution(d1::task&) {
+ return false;
+ }
+
+private:
+ using base_type = waiter_base;
+
+ bool is_worker_should_leave(arena_slot& slot) const {
+ bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed);
+ bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool;
+
+ if (is_top_priority_arena) {
+ // Worker in most priority arena do not leave arena, until all work in task_pool is done
+ if (is_task_pool_empty && my_arena.is_recall_requested()) {
+ return true;
+ }
+ } else {
+ if (my_arena.is_recall_requested()) {
+ // If worker has work in task pool, we must notify other threads,
+ // because can appear missed wake up of other threads
+ if (!is_task_pool_empty) {
+ my_arena.advertise_new_work<arena::wakeup>();
+ }
+ return true;
+ }
+ }
+
+ return false;
+ }
+};
+
+class sleep_waiter : public waiter_base {
+protected:
+ using waiter_base::waiter_base;
+
+ bool is_arena_empty() {
+ return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY;
+ }
+
+ template <typename Pred>
+ void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) {
+ my_arena.my_market->get_wait_list().wait<extended_concurrent_monitor::thread_context>(wakeup_condition,
+ extended_context{uniq_tag, &my_arena});
+ }
+};
+
+class external_waiter : public sleep_waiter {
+public:
+ external_waiter(arena& a, d1::wait_context& wo)
+ : sleep_waiter(a), my_wait_ctx(wo)
+ {}
+
+ bool continue_execution(arena_slot& slot, d1::task*& t) const {
+ __TBB_ASSERT(t == nullptr, nullptr);
+ if (!my_wait_ctx.continue_execution())
+ return false;
+ t = get_self_recall_task(slot);
+ return true;
+ }
+
+ void pause(arena_slot&) {
+ if (!sleep_waiter::pause()) {
+ return;
+ }
+
+ auto wakeup_condition = [&] { return !is_arena_empty() || !my_wait_ctx.continue_execution(); };
+
+ sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition);
+ my_backoff.reset_wait();
+ }
+
+ d1::wait_context* wait_ctx() {
+ return &my_wait_ctx;
+ }
+
+ static bool postpone_execution(d1::task&) {
+ return false;
+ }
+
+private:
+ d1::wait_context& my_wait_ctx;
+};
+
+#if __TBB_RESUMABLE_TASKS
+
+class coroutine_waiter : public sleep_waiter {
+public:
+ using sleep_waiter::sleep_waiter;
+
+ bool continue_execution(arena_slot& slot, d1::task*& t) const {
+ __TBB_ASSERT(t == nullptr, nullptr);
+ t = get_self_recall_task(slot);
+ return true;
+ }
+
+ void pause(arena_slot& slot) {
+ if (!sleep_waiter::pause()) {
+ return;
+ }
+
+ suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point;
+
+ auto wakeup_condition = [&] { return !is_arena_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); };
+
+ sleep(std::uintptr_t(sp), wakeup_condition);
+ my_backoff.reset_wait();
+ }
+
+ void reset_wait() {
+ my_backoff.reset_wait();
+ }
+
+ d1::wait_context* wait_ctx() {
+ return nullptr;
+ }
+
+ static bool postpone_execution(d1::task& t) {
+ return task_accessor::is_resume_task(t);
+ }
+};
+
+#endif // __TBB_RESUMABLE_TASKS
+
+} // namespace r1
+} // namespace detail
+} // namespace tbb
+
+#endif // _TBB_waiters_H