diff options
author | Alexander Smirnov <alex@ydb.tech> | 2025-02-27 00:51:37 +0000 |
---|---|---|
committer | Alexander Smirnov <alex@ydb.tech> | 2025-02-27 00:51:37 +0000 |
commit | 73c5b3617320d761190ec40aed6f6ce9c24270f3 (patch) | |
tree | 6abf1b4a2c847e9a119fc5582a610c4cf7ab2de0 /contrib | |
parent | 3f91ea9163746386488746f9bdfb4a8f0eda110d (diff) | |
parent | 629114ad6e988a524969a8b3f6afd20d1d720928 (diff) | |
download | ydb-73c5b3617320d761190ec40aed6f6ce9c24270f3.tar.gz |
Merge branch 'rightlib' into merge-libs-250227-0050
Diffstat (limited to 'contrib')
104 files changed, 6205 insertions, 2314 deletions
diff --git a/contrib/libs/cxxsupp/libcxxrt/exception.cc b/contrib/libs/cxxsupp/libcxxrt/exception.cc index ea19474857..dc8d507655 100644 --- a/contrib/libs/cxxsupp/libcxxrt/exception.cc +++ b/contrib/libs/cxxsupp/libcxxrt/exception.cc @@ -287,39 +287,62 @@ namespace std using namespace ABI_NAMESPACE; +/** + * Callback function used with _Unwind_Backtrace(). + * + * Prints a stack trace. Used only for debugging help. + * + * Note: As of FreeBSD 8.1, dladd() still doesn't work properly, so this only + * correctly prints function names from public, relocatable, symbols. + */ +static _Unwind_Reason_Code trace(struct _Unwind_Context *context, void *c) +{ + Dl_info myinfo; + int mylookup = + dladdr(reinterpret_cast<void *>(__cxa_current_exception_type), &myinfo); + void *ip = reinterpret_cast<void*>(_Unwind_GetIP(context)); + Dl_info info; + if (dladdr(ip, &info) != 0) + { + if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0) + { + printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname); + } + } + return _URC_CONTINUE_UNWIND; +} + static void bt_terminate_handler() { __cxa_eh_globals* globals = __cxa_get_globals(); __cxa_exception* thrown_exception = globals->caughtExceptions; - if (!thrown_exception) { - abort(); - } + if (thrown_exception) { + fprintf(stderr, "uncaught exception:\n address -> %p\n", (void*)thrown_exception); + thrown_exception = realExceptionFromException(thrown_exception); - fprintf(stderr, "uncaught exception:\n address -> %p\n", (void*)thrown_exception); - thrown_exception = realExceptionFromException(thrown_exception); + const __class_type_info *e_ti = static_cast<const __class_type_info*>(&typeid(std::exception)); + const __class_type_info *throw_ti = dynamic_cast<const __class_type_info*>(thrown_exception->exceptionType); - const __class_type_info *e_ti = static_cast<const __class_type_info*>(&typeid(std::exception)); - const __class_type_info *throw_ti = dynamic_cast<const __class_type_info*>(thrown_exception->exceptionType); + if (throw_ti) { + void* ptr = thrown_exception + 1; - if (throw_ti) { - void* ptr = thrown_exception + 1; + if (throw_ti->__do_upcast(e_ti, &ptr)) { + std::exception* e = static_cast<std::exception*>(ptr); - if (throw_ti->__do_upcast(e_ti, &ptr)) { - std::exception* e = static_cast<std::exception*>(ptr); - - if (e) { - fprintf(stderr, " what() -> \"%s\"\n", e->what()); - } - } - } + if (e) { + fprintf(stderr, " what() -> \"%s\"\n", e->what()); + } + } + } - size_t bufferSize = 128; - char *demangled = static_cast<char*>(malloc(bufferSize)); - const char *mangled = thrown_exception->exceptionType->name(); - int status; - demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status); - fprintf(stderr, " type -> %s\n", status == 0 ? demangled : mangled); - if (status == 0) { free(demangled); } + size_t bufferSize = 128; + char *demangled = static_cast<char*>(malloc(bufferSize)); + const char *mangled = thrown_exception->exceptionType->name(); + int status; + demangled = __cxa_demangle(mangled, demangled, &bufferSize, &status); + fprintf(stderr, " type -> %s\n", status == 0 ? demangled : mangled); + if (status == 0) { free(demangled); } + } abort(); } @@ -760,31 +783,6 @@ void __cxa_free_dependent_exception(void *thrown_exception) } /** - * Callback function used with _Unwind_Backtrace(). - * - * Prints a stack trace. Used only for debugging help. - * - * Note: As of FreeBSD 8.1, dladd() still doesn't work properly, so this only - * correctly prints function names from public, relocatable, symbols. - */ -static _Unwind_Reason_Code trace(struct _Unwind_Context *context, void *c) -{ - Dl_info myinfo; - int mylookup = - dladdr(reinterpret_cast<void *>(__cxa_current_exception_type), &myinfo); - void *ip = reinterpret_cast<void*>(_Unwind_GetIP(context)); - Dl_info info; - if (dladdr(ip, &info) != 0) - { - if (mylookup == 0 || strcmp(info.dli_fname, myinfo.dli_fname) != 0) - { - printf("%p:%s() in %s\n", ip, info.dli_sname, info.dli_fname); - } - } - return _URC_CONTINUE_UNWIND; -} - -/** * Report a failure that occurred when attempting to throw an exception. * * If the failure happened by falling off the end of the stack without finding diff --git a/contrib/libs/tbb/.yandex_meta/devtools.copyrights.report b/contrib/libs/tbb/.yandex_meta/devtools.copyrights.report index 041a1b725e..693b11977a 100644 --- a/contrib/libs/tbb/.yandex_meta/devtools.copyrights.report +++ b/contrib/libs/tbb/.yandex_meta/devtools.copyrights.report @@ -38,25 +38,39 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/oneapi/tbb/detail/_flow_graph_node_set_impl.h [2:2] include/oneapi/tbb/detail/_namespace_injection.h [2:2] include/oneapi/tbb/detail/_small_object_pool.h [2:2] - include/oneapi/tbb/detail/_task_handle.h [2:2] src/tbb/concurrent_bounded_queue.cpp [2:2] src/tbb/small_object_pool.cpp [2:2] src/tbb/small_object_pool_impl.h [2:2] src/tbb/version.cpp [2:2] -KEEP COPYRIGHT_SERVICE_LABEL 3866399ed19ebc1f304703a69bd54dfb +KEEP COPYRIGHT_SERVICE_LABEL 183dae8f315abe7f52eca9c76d0e9cbb BELONGS ya.make License text: - Copyright (c) 2020-2023 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Scancode info: Original SPDX id: COPYRIGHT_SERVICE_LABEL Score : 100.00 Match type : COPYRIGHT Files with this license: + include/oneapi/tbb/detail/_flow_graph_node_set_impl.h [2:2] include/oneapi/tbb/detail/_task.h [2:2] + include/oneapi/tbb/detail/_task_handle.h [2:2] + src/tbb/task_dispatcher.h [2:2] + src/tbb/thread_data.h [2:2] + +KEEP COPYRIGHT_SERVICE_LABEL 37f751c87d04968f0f9f061a184f9a3b +BELONGS ya.make + License text: + Copyright (c) 2023-2024 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/tbb/tcm.h [2:2] + src/tbb/tcm_adaptor.cpp [2:2] KEEP COPYRIGHT_SERVICE_LABEL 386e9140344d54f8b754c4445aff38ff BELONGS ya.make @@ -67,27 +81,24 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: + include/oneapi/tbb.h [2:2] include/oneapi/tbb/concurrent_queue.h [2:2] - include/oneapi/tbb/detail/_concurrent_unordered_base.h [2:2] - include/oneapi/tbb/detail/_config.h [2:2] - include/oneapi/tbb/detail/_flow_graph_body_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_node_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h [2:2] - include/oneapi/tbb/detail/_machine.h [2:2] - include/oneapi/tbb/detail/_pipeline_filters.h [2:2] include/oneapi/tbb/detail/_pipeline_filters_deduction.h [2:2] include/oneapi/tbb/detail/_template_helpers.h [2:2] include/oneapi/tbb/detail/_utils.h [2:2] - include/oneapi/tbb/flow_graph.h [2:2] - include/oneapi/tbb/parallel_for.h [2:2] - include/oneapi/tbb/parallel_for_each.h [2:2] - include/oneapi/tbb/parallel_reduce.h [2:2] - include/oneapi/tbb/parallel_scan.h [2:2] + include/oneapi/tbb/parallel_invoke.h [2:2] include/oneapi/tbb/partitioner.h [2:2] include/oneapi/tbb/profiling.h [2:2] - include/oneapi/tbb/version.h [2:2] - src/tbb/dynamic_link.cpp [2:2] - src/tbb/tools_api/ittnotify_config.h [2:2] + include/oneapi/tbb/scalable_allocator.h [2:2] + include/oneapi/tbb/task_arena.h [2:2] + src/tbb/arena_slot.h [2:2] + src/tbb/concurrent_monitor.h [2:2] + src/tbb/market.cpp [2:2] + src/tbb/market.h [2:2] + src/tbb/misc_ex.cpp [2:2] + src/tbb/rml_tbb.cpp [2:2] + src/tbb/task_group_context.cpp [2:2] + src/tbb/tools_api/ittnotify_static.h [2:2] KEEP COPYRIGHT_SERVICE_LABEL 3abceee12813f5f5c1ed7a506777af26 BELONGS ya.make @@ -98,15 +109,11 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/oneapi/tbb/collaborative_call_once.h [2:2] include/oneapi/tbb/detail/_attach.h [2:2] include/oneapi/tbb/detail/_mutex_common.h [2:2] - include/oneapi/tbb/detail/_waitable_atomic.h [2:2] - include/oneapi/tbb/mutex.h [2:2] include/oneapi/tbb/rw_mutex.h [2:2] include/tbb/collaborative_call_once.h [2:2] src/tbb/address_waiter.cpp [2:2] - src/tbb/market_concurrent_monitor.h [2:2] KEEP COPYRIGHT_SERVICE_LABEL 58712991d6dcb330c78b0224b7cdd4db BELONGS ya.make @@ -124,6 +131,30 @@ BELONGS ya.make include/tbb/concurrent_set.h [2:2] include/tbb/info.h [2:2] +KEEP COPYRIGHT_SERVICE_LABEL 5a49c5cb24d81c860e0cb98b5868178d +BELONGS ya.make + License text: + Copyright (c) 2021-2024 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/oneapi/tbb/collaborative_call_once.h [2:2] + +KEEP COPYRIGHT_SERVICE_LABEL 5d79465eeb8872701c89074169ac01e5 +BELONGS ya.make + License text: + Copyright (c) 2023 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/tbb/mutex.h [2:2] + include/tbb/rw_mutex.h [2:2] + src/tbb/tcm_adaptor.h [2:2] + KEEP COPYRIGHT_SERVICE_LABEL 766614077ece9f54dcc96e471051491a BELONGS ya.make License text: @@ -137,66 +168,34 @@ BELONGS ya.make include/oneapi/tbb/concurrent_hash_map.h [2:2] include/oneapi/tbb/concurrent_lru_cache.h [2:2] include/oneapi/tbb/concurrent_priority_queue.h [2:2] - include/oneapi/tbb/concurrent_unordered_map.h [2:2] - include/oneapi/tbb/concurrent_unordered_set.h [2:2] include/oneapi/tbb/concurrent_vector.h [2:2] include/oneapi/tbb/detail/_aggregator.h [2:2] include/oneapi/tbb/detail/_assert.h [2:2] include/oneapi/tbb/detail/_concurrent_queue_base.h [2:2] - include/oneapi/tbb/detail/_flow_graph_cache_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_join_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_trace_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_types_impl.h [2:2] include/oneapi/tbb/detail/_scoped_lock.h [2:2] include/oneapi/tbb/detail/_segment_table.h [2:2] - include/oneapi/tbb/enumerable_thread_specific.h [2:2] - include/oneapi/tbb/memory_pool.h [2:2] include/oneapi/tbb/queuing_mutex.h [2:2] include/oneapi/tbb/queuing_rw_mutex.h [2:2] - include/oneapi/tbb/task_arena.h [2:2] - include/oneapi/tbb/task_group.h [2:2] - src/tbb/allocator.cpp [2:2] - src/tbb/arena.cpp [2:2] - src/tbb/arena.h [2:2] - src/tbb/arena_slot.h [2:2] src/tbb/co_context.h [2:2] src/tbb/dynamic_link.h [2:2] src/tbb/exception.cpp [2:2] - src/tbb/global_control.cpp [2:2] - src/tbb/governor.cpp [2:2] - src/tbb/governor.h [2:2] src/tbb/itt_notify.cpp [2:2] src/tbb/itt_notify.h [2:2] - src/tbb/main.cpp [2:2] - src/tbb/market.cpp [2:2] - src/tbb/market.h [2:2] - src/tbb/misc.h [2:2] - src/tbb/misc_ex.cpp [2:2] src/tbb/observer_proxy.cpp [2:2] src/tbb/observer_proxy.h [2:2] src/tbb/parallel_pipeline.cpp [2:2] src/tbb/private_server.cpp [2:2] src/tbb/profiling.cpp [2:2] src/tbb/queuing_rw_mutex.cpp [2:2] - src/tbb/rml_tbb.cpp [2:2] src/tbb/rml_thread_monitor.h [2:2] src/tbb/rtm_mutex.cpp [2:2] src/tbb/rtm_rw_mutex.cpp [2:2] - src/tbb/scheduler_common.h [2:2] src/tbb/semaphore.cpp [2:2] src/tbb/semaphore.h [2:2] - src/tbb/task.cpp [2:2] - src/tbb/task_group_context.cpp [2:2] src/tbb/task_stream.h [2:2] src/tbb/tls.h [2:2] src/tbb/tools_api/disable_warnings.h [2:2] - src/tbb/tools_api/ittnotify.h [2:2] - src/tbb/tools_api/ittnotify_static.c [2:2] - src/tbb/tools_api/ittnotify_static.h [2:2] src/tbb/tools_api/ittnotify_types.h [2:2] - src/tbb/tools_api/legacy/ittnotify.h [2:2] KEEP COPYRIGHT_SERVICE_LABEL 868e48910e495ab3c256b805ace087a1 BELONGS ya.make @@ -209,6 +208,20 @@ BELONGS ya.make Files with this license: src/tbb/environment.h [2:2] +KEEP COPYRIGHT_SERVICE_LABEL 884bb2a5dd7cd901adf43ea931101bc4 +BELONGS ya.make + License text: + Copyright (c) 2022-2023 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/tbb/cancellation_disseminator.h [2:2] + src/tbb/permit_manager.h [2:2] + src/tbb/pm_client.h [2:2] + src/tbb/threading_control_client.h [2:2] + KEEP COPYRIGHT_SERVICE_LABEL 8f05eaf1a9c2ff98a245e5fbb90aa09f BELONGS ya.make License text: @@ -221,6 +234,74 @@ BELONGS ya.make include/oneapi/tbb/detail/_concurrent_skip_list.h [2:2] include/oneapi/tbb/info.h [2:2] +KEEP COPYRIGHT_SERVICE_LABEL a64887874f771449c9238549356cdf97 +BELONGS ya.make + License text: + Copyright (c) 2005-2024 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/oneapi/tbb/concurrent_unordered_map.h [2:2] + include/oneapi/tbb/concurrent_unordered_set.h [2:2] + include/oneapi/tbb/detail/_concurrent_unordered_base.h [2:2] + include/oneapi/tbb/detail/_config.h [2:2] + include/oneapi/tbb/detail/_flow_graph_body_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_cache_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_indexer_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_join_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_node_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h [2:2] + include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_trace_impl.h [2:2] + include/oneapi/tbb/detail/_flow_graph_types_impl.h [2:2] + include/oneapi/tbb/detail/_machine.h [2:2] + include/oneapi/tbb/detail/_pipeline_filters.h [2:2] + include/oneapi/tbb/enumerable_thread_specific.h [2:2] + include/oneapi/tbb/flow_graph.h [2:2] + include/oneapi/tbb/flow_graph_abstractions.h [2:2] + include/oneapi/tbb/memory_pool.h [2:2] + include/oneapi/tbb/parallel_for.h [2:2] + include/oneapi/tbb/parallel_for_each.h [2:2] + include/oneapi/tbb/parallel_reduce.h [2:2] + include/oneapi/tbb/parallel_scan.h [2:2] + include/oneapi/tbb/task_group.h [2:2] + include/oneapi/tbb/version.h [2:2] + src/tbb/allocator.cpp [2:2] + src/tbb/arena.cpp [2:2] + src/tbb/arena.h [2:2] + src/tbb/def/lin64-tbb.def [2:2] + src/tbb/dynamic_link.cpp [2:2] + src/tbb/global_control.cpp [2:2] + src/tbb/governor.cpp [2:2] + src/tbb/governor.h [2:2] + src/tbb/main.cpp [2:2] + src/tbb/misc.cpp [2:2] + src/tbb/misc.h [2:2] + src/tbb/scheduler_common.h [2:2] + src/tbb/task.cpp [2:2] + src/tbb/tools_api/ittnotify.h [2:2] + src/tbb/tools_api/ittnotify_config.h [2:2] + src/tbb/tools_api/ittnotify_static.c [2:2] + src/tbb/tools_api/legacy/ittnotify.h [2:2] + src/tbb/waiters.h [2:2] + +KEEP COPYRIGHT_SERVICE_LABEL bae1a7c05aa5cd16a0a147c10d11af9a +BELONGS ya.make + License text: + Copyright (c) 2021-2023 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + include/oneapi/tbb/detail/_waitable_atomic.h [2:2] + include/oneapi/tbb/mutex.h [2:2] + src/tbb/thread_control_monitor.h [2:2] + KEEP COPYRIGHT_SERVICE_LABEL ca359ffcc18a725e17810fbe93800626 BELONGS ya.make License text: @@ -230,7 +311,6 @@ BELONGS ya.make Score : 100.00 Match type : COPYRIGHT Files with this license: - include/oneapi/tbb.h [2:2] include/oneapi/tbb/blocked_range.h [2:2] include/oneapi/tbb/blocked_range2d.h [2:2] include/oneapi/tbb/blocked_range3d.h [2:2] @@ -240,22 +320,17 @@ BELONGS ya.make include/oneapi/tbb/detail/_containers_helpers.h [2:2] include/oneapi/tbb/detail/_exception.h [2:2] include/oneapi/tbb/detail/_export.h [2:2] - include/oneapi/tbb/detail/_flow_graph_indexer_impl.h [2:2] - include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h [2:2] include/oneapi/tbb/detail/_hash_compare.h [2:2] include/oneapi/tbb/detail/_intrusive_list_node.h [2:2] include/oneapi/tbb/detail/_range_common.h [2:2] include/oneapi/tbb/detail/_rtm_mutex.h [2:2] include/oneapi/tbb/detail/_rtm_rw_mutex.h [2:2] include/oneapi/tbb/detail/_string_resource.h [2:2] - include/oneapi/tbb/flow_graph_abstractions.h [2:2] include/oneapi/tbb/global_control.h [2:2] include/oneapi/tbb/null_mutex.h [2:2] include/oneapi/tbb/null_rw_mutex.h [2:2] - include/oneapi/tbb/parallel_invoke.h [2:2] include/oneapi/tbb/parallel_pipeline.h [2:2] include/oneapi/tbb/parallel_sort.h [2:2] - include/oneapi/tbb/scalable_allocator.h [2:2] include/oneapi/tbb/spin_mutex.h [2:2] include/oneapi/tbb/spin_rw_mutex.h [2:2] include/oneapi/tbb/task.h [2:2] @@ -307,16 +382,12 @@ BELONGS ya.make include/tbb/version.h [2:2] src/tbb/arena_slot.cpp [2:2] src/tbb/assert_impl.h [2:2] - src/tbb/concurrent_monitor.h [2:2] src/tbb/concurrent_monitor_mutex.h [2:2] - src/tbb/def/lin64-tbb.def [2:2] src/tbb/intrusive_list.h [2:2] src/tbb/mailbox.h [2:2] src/tbb/main.h [2:2] - src/tbb/misc.cpp [2:2] src/tbb/rml_base.h [2:2] src/tbb/rml_tbb.h [2:2] - src/tbb/waiters.h [2:2] KEEP COPYRIGHT_SERVICE_LABEL da3083268e79dd67d6ab0935b4c82192 BELONGS ya.make @@ -328,8 +399,6 @@ BELONGS ya.make Match type : COPYRIGHT Files with this license: src/tbb/task_dispatcher.cpp [2:2] - src/tbb/task_dispatcher.h [2:2] - src/tbb/thread_data.h [2:2] KEEP COPYRIGHT_SERVICE_LABEL e8223cce660b0cdff8448d0d968f2688 BELONGS ya.make @@ -342,3 +411,20 @@ BELONGS ya.make Files with this license: include/oneapi/tbb/blocked_rangeNd.h [2:2] include/tbb/blocked_rangeNd.h [2:2] + +KEEP COPYRIGHT_SERVICE_LABEL f3c560deab83212d24ce88e384481cf3 +BELONGS ya.make + License text: + Copyright (c) 2022-2024 Intel Corporation + Scancode info: + Original SPDX id: COPYRIGHT_SERVICE_LABEL + Score : 100.00 + Match type : COPYRIGHT + Files with this license: + src/tbb/thread_dispatcher.cpp [2:2] + src/tbb/thread_dispatcher.h [2:2] + src/tbb/thread_dispatcher_client.h [2:2] + src/tbb/thread_request_serializer.cpp [2:2] + src/tbb/thread_request_serializer.h [2:2] + src/tbb/threading_control.cpp [2:2] + src/tbb/threading_control.h [2:2] diff --git a/contrib/libs/tbb/.yandex_meta/devtools.licenses.report b/contrib/libs/tbb/.yandex_meta/devtools.licenses.report index 7231077852..33a74ab6e6 100644 --- a/contrib/libs/tbb/.yandex_meta/devtools.licenses.report +++ b/contrib/libs/tbb/.yandex_meta/devtools.licenses.report @@ -84,7 +84,7 @@ BELONGS ya.make Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: - README.md [48:49] + README.md [62:63] KEEP Apache-2.0 AND Apache-2.0 640d2de508aaacd65f56f04b562671d3 BELONGS ya.make @@ -125,6 +125,8 @@ BELONGS ya.make Match type : NOTICE Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: + RELEASE_NOTES.md [4:14] + SUPPORT.md [4:14] SYSTEM_REQUIREMENTS.md [4:14] KEEP Apache-2.0 c151e01d926ee27aa3edf0604861b96d @@ -251,6 +253,7 @@ BELONGS ya.make include/tbb/global_control.h [4:14] include/tbb/info.h [4:14] include/tbb/memory_pool.h [4:14] + include/tbb/mutex.h [4:14] include/tbb/null_mutex.h [4:14] include/tbb/null_rw_mutex.h [4:14] include/tbb/parallel_for.h [4:14] @@ -264,6 +267,7 @@ BELONGS ya.make include/tbb/profiling.h [4:14] include/tbb/queuing_mutex.h [4:14] include/tbb/queuing_rw_mutex.h [4:14] + include/tbb/rw_mutex.h [4:14] include/tbb/scalable_allocator.h [4:14] include/tbb/spin_mutex.h [4:14] include/tbb/spin_rw_mutex.h [4:14] @@ -283,6 +287,7 @@ BELONGS ya.make src/tbb/arena_slot.cpp [4:14] src/tbb/arena_slot.h [4:14] src/tbb/assert_impl.h [4:14] + src/tbb/cancellation_disseminator.h [4:14] src/tbb/co_context.h [4:14] src/tbb/concurrent_bounded_queue.cpp [4:14] src/tbb/concurrent_monitor.h [4:14] @@ -303,13 +308,14 @@ BELONGS ya.make src/tbb/main.h [4:14] src/tbb/market.cpp [4:14] src/tbb/market.h [4:14] - src/tbb/market_concurrent_monitor.h [4:14] src/tbb/misc.cpp [4:14] src/tbb/misc.h [4:14] src/tbb/misc_ex.cpp [4:14] src/tbb/observer_proxy.cpp [4:14] src/tbb/observer_proxy.h [4:14] src/tbb/parallel_pipeline.cpp [4:14] + src/tbb/permit_manager.h [4:14] + src/tbb/pm_client.h [4:14] src/tbb/private_server.cpp [4:14] src/tbb/profiling.cpp [4:14] src/tbb/queuing_rw_mutex.cpp [4:14] @@ -329,7 +335,19 @@ BELONGS ya.make src/tbb/task_dispatcher.h [4:14] src/tbb/task_group_context.cpp [4:14] src/tbb/task_stream.h [4:14] + src/tbb/tcm.h [4:14] + src/tbb/tcm_adaptor.cpp [4:14] + src/tbb/tcm_adaptor.h [4:14] + src/tbb/thread_control_monitor.h [4:14] src/tbb/thread_data.h [4:14] + src/tbb/thread_dispatcher.cpp [4:14] + src/tbb/thread_dispatcher.h [4:14] + src/tbb/thread_dispatcher_client.h [4:14] + src/tbb/thread_request_serializer.cpp [4:14] + src/tbb/thread_request_serializer.h [4:14] + src/tbb/threading_control.cpp [4:14] + src/tbb/threading_control.h [4:14] + src/tbb/threading_control_client.h [4:14] src/tbb/tls.h [4:14] src/tbb/tools_api/disable_warnings.h [4:14] src/tbb/tools_api/ittnotify.h [4:14] @@ -350,5 +368,4 @@ BELONGS ya.make Links : http://www.apache.org/licenses/, http://www.apache.org/licenses/LICENSE-2.0, https://spdx.org/licenses/Apache-2.0 Files with this license: CONTRIBUTING.md [4:14] - RELEASE_NOTES.md [4:14] WASM_Support.md [4:14] diff --git a/contrib/libs/tbb/.yandex_meta/licenses.list.txt b/contrib/libs/tbb/.yandex_meta/licenses.list.txt index c2abc130b0..90d43155cb 100644 --- a/contrib/libs/tbb/.yandex_meta/licenses.list.txt +++ b/contrib/libs/tbb/.yandex_meta/licenses.list.txt @@ -255,6 +255,10 @@ Licensing is very important to open source projects. It helps ensure the softwar ====================COPYRIGHT==================== + Copyright (c) 2005-2024 Intel Corporation + + +====================COPYRIGHT==================== Copyright (c) 2017-2021 Intel Corporation @@ -279,8 +283,32 @@ Licensing is very important to open source projects. It helps ensure the softwar ====================COPYRIGHT==================== - Copyright (c) 2020-2023 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation ====================COPYRIGHT==================== Copyright (c) 2021 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2021-2023 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2021-2024 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2022-2023 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2022-2024 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2023 Intel Corporation + + +====================COPYRIGHT==================== + Copyright (c) 2023-2024 Intel Corporation diff --git a/contrib/libs/tbb/.yandex_meta/override.nix b/contrib/libs/tbb/.yandex_meta/override.nix index 928ebd5da0..07f78fb784 100644 --- a/contrib/libs/tbb/.yandex_meta/override.nix +++ b/contrib/libs/tbb/.yandex_meta/override.nix @@ -1,11 +1,11 @@ pkgs: attrs: with pkgs; rec { - version = "2021.10.0"; + version = "2022.0.0"; src = fetchFromGitHub { owner = "uxlfoundation"; repo = "oneTBB"; rev = "v${version}"; - hash = "sha256-HhZ4TBXqIZKkMB6bafOs8kt4EqkqStFjpoVQ3G+Rn4M="; + hash = "sha256-XOlC1+rf65oEGKDba9N561NuFo1YJhn3Q1CTGtvkn7A="; }; patches = []; diff --git a/contrib/libs/tbb/CODEOWNERS b/contrib/libs/tbb/CODEOWNERS new file mode 100644 index 0000000000..78105ac7e8 --- /dev/null +++ b/contrib/libs/tbb/CODEOWNERS @@ -0,0 +1,27 @@ +# Where component owners are known, add them here. + +/oneTBB/src/tbb/ @pavelkumbrasev +/oneTBB/src/tbb/ @dnmokhov +/oneTBB/src/tbb/ @JhaShweta1 +/oneTBB/src/tbb/ @sarathnandu +/oneTBB/include/oneapi/tbb/parallel_* @pavelkumbrasev +/oneTBB/include/oneapi/tbb/concurrent_* @kboyarinov +/oneTBB/include/oneapi/tbb/flow_graph* @kboyarinov +/oneTBB/include/oneapi/tbb/flow_graph* @aleksei-fedotov +/oneTBB/include/oneapi/tbb/detail/_flow_graph* @kboyarinov +/oneTBB/include/oneapi/tbb/detail/_flow_graph* @aleksei-fedotov +/oneTBB/include/oneapi/tbb/detail/_concurrent* @kboyarinov +/oneTBB/src/doc @aepanchi +/oneTBB/src/tbbbind/ @isaevil +/oneTBB/src/tbbmalloc/ @lplewa +/oneTBB/src/tbbmalloc_proxy/ @lplewa +/oneTBB/cmake/ @isaevil +/oneTBB/*CMakeLists.txt @isaevil +/oneTBB/python/ @sarathnandu +/oneTBB/python/ @isaevil + +# Bazel build related files. +/oneTBB/.bazelversion @Vertexwahn +/oneTBB/Bazel.md @Vertexwahn +/oneTBB/BUILD.bazel @Vertexwahn +/oneTBB/MODULE.bazel @Vertexwahn diff --git a/contrib/libs/tbb/CODE_OF_CONDUCT.md b/contrib/libs/tbb/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..c169707396 --- /dev/null +++ b/contrib/libs/tbb/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +oneTBBCodeOfConduct At intel DOT com. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/contrib/libs/tbb/CONTRIBUTING.md b/contrib/libs/tbb/CONTRIBUTING.md index c8b437083a..b2b6a968cd 100644 --- a/contrib/libs/tbb/CONTRIBUTING.md +++ b/contrib/libs/tbb/CONTRIBUTING.md @@ -21,28 +21,15 @@ As an open source project, we welcome community contributions to oneAPI Threadin Licensing is very important to open source projects. It helps ensure the software continues to be available under the terms that the author desired. The oneTBB project uses the [Apache 2.0 License](https://github.com/oneapi-src/oneTBB/blob/master/LICENSE.txt), a permissive open source license that allows you to freely use, modify, and distribute your own products that include Apache 2.0 licensed software. By contributing to the oneTBB project, you agree to the license and copyright terms therein and release your own contributions under these terms. -Some imported or reused components within oneTBB use other licenses, as described in [third-party-programs.txt](https://github.com/oneapi-src/oneTBB/blob/master/third-party-programs.txt). By carefully reviewing potential contributions and enforcing a [Developer Certification of Origin (DCO)](https://developercertificate.org/) for contributed code, we can ensure that the community can develop products with oneTBB without concerns over patent or copyright issues. - -The DCO is an attestation attached to every contribution made by every developer. In the commit message of the contribution, (described later), the developer simply adds a Signed-off-by statement and thereby agrees to the DCO. +Some imported or reused components within oneTBB use other licenses, as described in [third-party-programs.txt](https://github.com/oneapi-src/oneTBB/blob/master/third-party-programs.txt). By carefully reviewing potential contributions, we can ensure that the community can develop products with oneTBB without concerns over patent or copyright issues. ## Prerequisites As a contributor, you’ll want to be familiar with the oneTBB project and the repository layout. You should also know how to use it as explained in the [oneTBB documentation](https://oneapi-src.github.io/oneTBB/) and how to set up your build development environment to configure, build, and test oneTBB as explained in the [oneTBB Build System Description](cmake/README.md). -## Issues -If you face a problem, first check out open [oneTBB GitHub issues](https://github.com/oneapi-src/oneTBB/issues) to see if the issue you’d like to address is already reported. You may find users that have encountered the bug you’re finding or have similar ideas for changes or additions. - -You can use issues to report a problem, make a feature request, or add comments on an existing issue. - ## Pull Requests You can find all [open oneTBB pull requests](https://github.com/oneapi-src/oneTBB/pulls) on GitHub. - -No anonymous contributions are accepted. The name in the commit message Signed-off-by line and your email must match the change authorship information. Make sure your .gitconfig is set up correctly so you can use `git commit -s` for signing your patches: - -`git config --global user.name "Taylor Developer"` - -`git config --global user.email taylor.developer@company.com` ### Before contributing changes directly to the oneTBB repository diff --git a/contrib/libs/tbb/INSTALL.md b/contrib/libs/tbb/INSTALL.md index 3c63c9fd84..0ac95f8755 100644 --- a/contrib/libs/tbb/INSTALL.md +++ b/contrib/libs/tbb/INSTALL.md @@ -61,7 +61,7 @@ You can use the ``install`` components for partial installation. The following install components are supported: - `runtime` - oneTBB runtime package (core shared libraries and `.dll` files on Windows* OS). - `devel` - oneTBB development package (header files, CMake integration files, library symbolic links, and `.lib` files on Windows* OS). -- `tbb4py` - [oneTBB Module for Python](#onetbb-python-module-support). +- `tbb4py` - [oneTBB Module for Python](https://github.com/oneapi-src/oneTBB/blob/master/python/README.md). If you want to install specific components after configuration and build, run: diff --git a/contrib/libs/tbb/MODULE.bazel.lock b/contrib/libs/tbb/MODULE.bazel.lock new file mode 100644 index 0000000000..06f9098032 --- /dev/null +++ b/contrib/libs/tbb/MODULE.bazel.lock @@ -0,0 +1,65 @@ +{ + "lockFileVersion": 11, + "registryFileHashes": { + "https://bcr.bazel.build/bazel_registry.json": "8a28e4aff06ee60aed2a8c281907fb8bcbf3b753c91fb5a5c57da3215d5b3497", + "https://bcr.bazel.build/modules/abseil-cpp/20210324.2/MODULE.bazel": "7cd0312e064fde87c8d1cd79ba06c876bd23630c83466e9500321be55c96ace2", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/MODULE.bazel": "70390338f7a5106231d20620712f7cccb659cd0e9d073d1991c038eb9fc57589", + "https://bcr.bazel.build/modules/abseil-cpp/20211102.0/source.json": "7e3a9adf473e9af076ae485ed649d5641ad50ec5c11718103f34de03170d94ad", + "https://bcr.bazel.build/modules/apple_support/1.5.0/MODULE.bazel": "50341a62efbc483e8a2a6aec30994a58749bd7b885e18dd96aa8c33031e558ef", + "https://bcr.bazel.build/modules/apple_support/1.5.0/source.json": "eb98a7627c0bc486b57f598ad8da50f6625d974c8f723e9ea71bd39f709c9862", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/MODULE.bazel": "f9382337dd5a474c3b7d334c2f83e50b6eaedc284253334cf823044a26de03e8", + "https://bcr.bazel.build/modules/bazel_features/1.11.0/source.json": "c9320aa53cd1c441d24bd6b716da087ad7e4ff0d9742a9884587596edfe53015", + "https://bcr.bazel.build/modules/bazel_skylib/1.0.3/MODULE.bazel": "bcb0fd896384802d1ad283b4e4eb4d718eebd8cb820b0a2c3a347fb971afd9d8", + "https://bcr.bazel.build/modules/bazel_skylib/1.2.1/MODULE.bazel": "f35baf9da0efe45fa3da1696ae906eea3d615ad41e2e3def4aeb4e8bc0ef9a7a", + "https://bcr.bazel.build/modules/bazel_skylib/1.3.0/MODULE.bazel": "20228b92868bf5cfc41bda7afc8a8ba2a543201851de39d990ec957b513579c5", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/MODULE.bazel": "8fdee2dbaace6c252131c00e1de4b165dc65af02ea278476187765e1a617b917", + "https://bcr.bazel.build/modules/bazel_skylib/1.6.1/source.json": "082ed5f9837901fada8c68c2f3ddc958bb22b6d654f71dd73f3df30d45d4b749", + "https://bcr.bazel.build/modules/buildozer/7.1.2/MODULE.bazel": "2e8dd40ede9c454042645fd8d8d0cd1527966aa5c919de86661e62953cd73d84", + "https://bcr.bazel.build/modules/buildozer/7.1.2/source.json": "c9028a501d2db85793a6996205c8de120944f50a0d570438fcae0457a5f9d1f8", + "https://bcr.bazel.build/modules/googletest/1.11.0/MODULE.bazel": "3a83f095183f66345ca86aa13c58b59f9f94a2f81999c093d4eeaa2d262d12f4", + "https://bcr.bazel.build/modules/googletest/1.11.0/source.json": "c73d9ef4268c91bd0c1cd88f1f9dfa08e814b1dbe89b5f594a9f08ba0244d206", + "https://bcr.bazel.build/modules/platforms/0.0.10/MODULE.bazel": "8cb8efaf200bdeb2150d93e162c40f388529a25852b332cec879373771e48ed5", + "https://bcr.bazel.build/modules/platforms/0.0.10/source.json": "f22828ff4cf021a6b577f1bf6341cb9dcd7965092a439f64fc1bb3b7a5ae4bd5", + "https://bcr.bazel.build/modules/platforms/0.0.4/MODULE.bazel": "9b328e31ee156f53f3c416a64f8491f7eb731742655a47c9eec4703a71644aee", + "https://bcr.bazel.build/modules/platforms/0.0.5/MODULE.bazel": "5733b54ea419d5eaf7997054bb55f6a1d0b5ff8aedf0176fef9eea44f3acda37", + "https://bcr.bazel.build/modules/platforms/0.0.6/MODULE.bazel": "ad6eeef431dc52aefd2d77ed20a4b353f8ebf0f4ecdd26a807d2da5aa8cd0615", + "https://bcr.bazel.build/modules/platforms/0.0.7/MODULE.bazel": "72fd4a0ede9ee5c021f6a8dd92b503e089f46c227ba2813ff183b71616034814", + "https://bcr.bazel.build/modules/platforms/0.0.9/MODULE.bazel": "4a87a60c927b56ddd67db50c89acaa62f4ce2a1d2149ccb63ffd871d5ce29ebc", + "https://bcr.bazel.build/modules/protobuf/21.7/MODULE.bazel": "a5a29bb89544f9b97edce05642fac225a808b5b7be74038ea3640fae2f8e66a7", + "https://bcr.bazel.build/modules/protobuf/21.7/source.json": "bbe500720421e582ff2d18b0802464205138c06056f443184de39fbb8187b09b", + "https://bcr.bazel.build/modules/protobuf/3.19.0/MODULE.bazel": "6b5fbb433f760a99a22b18b6850ed5784ef0e9928a72668b66e4d7ccd47db9b0", + "https://bcr.bazel.build/modules/protobuf/3.19.6/MODULE.bazel": "9233edc5e1f2ee276a60de3eaa47ac4132302ef9643238f23128fea53ea12858", + "https://bcr.bazel.build/modules/rules_cc/0.0.1/MODULE.bazel": "cb2aa0747f84c6c3a78dad4e2049c154f08ab9d166b1273835a8174940365647", + "https://bcr.bazel.build/modules/rules_cc/0.0.2/MODULE.bazel": "6915987c90970493ab97393024c156ea8fb9f3bea953b2f3ec05c34f19b5695c", + "https://bcr.bazel.build/modules/rules_cc/0.0.8/MODULE.bazel": "964c85c82cfeb6f3855e6a07054fdb159aced38e99a5eecf7bce9d53990afa3e", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/MODULE.bazel": "836e76439f354b89afe6a911a7adf59a6b2518fafb174483ad78a2a2fde7b1c5", + "https://bcr.bazel.build/modules/rules_cc/0.0.9/source.json": "1f1ba6fea244b616de4a554a0f4983c91a9301640c8fe0dd1d410254115c8430", + "https://bcr.bazel.build/modules/rules_java/4.0.0/MODULE.bazel": "5a78a7ae82cd1a33cef56dc578c7d2a46ed0dca12643ee45edbb8417899e6f74", + "https://bcr.bazel.build/modules/rules_java/7.6.1/MODULE.bazel": "2f14b7e8a1aa2f67ae92bc69d1ec0fa8d9f827c4e17ff5e5f02e91caa3b2d0fe", + "https://bcr.bazel.build/modules/rules_java/7.6.1/source.json": "8f3f3076554e1558e8e468b2232991c510ecbcbed9e6f8c06ac31c93bcf38362", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/MODULE.bazel": "a56b85e418c83eb1839819f0b515c431010160383306d13ec21959ac412d2fe7", + "https://bcr.bazel.build/modules/rules_jvm_external/4.4.2/source.json": "a075731e1b46bc8425098512d038d416e966ab19684a10a34f4741295642fc35", + "https://bcr.bazel.build/modules/rules_license/0.0.3/MODULE.bazel": "627e9ab0247f7d1e05736b59dbb1b6871373de5ad31c3011880b4133cafd4bd0", + "https://bcr.bazel.build/modules/rules_license/0.0.7/MODULE.bazel": "088fbeb0b6a419005b89cf93fe62d9517c0a2b8bb56af3244af65ecfe37e7d5d", + "https://bcr.bazel.build/modules/rules_license/0.0.7/source.json": "355cc5737a0f294e560d52b1b7a6492d4fff2caf0bef1a315df5a298fca2d34a", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/MODULE.bazel": "df99f03fc7934a4737122518bb87e667e62d780b610910f0447665a7e2be62dc", + "https://bcr.bazel.build/modules/rules_pkg/0.7.0/source.json": "c2557066e0c0342223ba592510ad3d812d4963b9024831f7f66fd0584dd8c66c", + "https://bcr.bazel.build/modules/rules_proto/4.0.0/MODULE.bazel": "a7a7b6ce9bee418c1a760b3d84f83a299ad6952f9903c67f19e4edd964894e06", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/MODULE.bazel": "e8dff86b0971688790ae75528fe1813f71809b5afd57facb44dad9e8eca631b7", + "https://bcr.bazel.build/modules/rules_proto/5.3.0-21.7/source.json": "d57902c052424dfda0e71646cb12668d39c4620ee0544294d9d941e7d12bc3a9", + "https://bcr.bazel.build/modules/rules_python/0.10.2/MODULE.bazel": "cc82bc96f2997baa545ab3ce73f196d040ffb8756fd2d66125a530031cd90e5f", + "https://bcr.bazel.build/modules/rules_python/0.22.1/MODULE.bazel": "26114f0c0b5e93018c0c066d6673f1a2c3737c7e90af95eff30cfee38d0bbac7", + "https://bcr.bazel.build/modules/rules_python/0.22.1/source.json": "57226905e783bae7c37c2dd662be078728e48fa28ee4324a7eabcafb5a43d014", + "https://bcr.bazel.build/modules/rules_python/0.4.0/MODULE.bazel": "9208ee05fd48bf09ac60ed269791cf17fb343db56c8226a720fbb1cdf467166c", + "https://bcr.bazel.build/modules/stardoc/0.5.1/MODULE.bazel": "1a05d92974d0c122f5ccf09291442580317cdd859f07a8655f1db9a60374f9f8", + "https://bcr.bazel.build/modules/stardoc/0.5.1/source.json": "a96f95e02123320aa015b956f29c00cb818fa891ef823d55148e1a362caacf29", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/MODULE.bazel": "7298990c00040a0e2f121f6c32544bab27d4452f80d9ce51349b1a28f3005c43", + "https://bcr.bazel.build/modules/upb/0.0.0-20220923-a547704/source.json": "f1ef7d3f9e0e26d4b23d1c39b5f5de71f584dd7d1b4ef83d9bbba6ec7a6a6459", + "https://bcr.bazel.build/modules/zlib/1.2.11/MODULE.bazel": "07b389abc85fdbca459b69e2ec656ae5622873af3f845e1c9d80fe179f3effa0", + "https://bcr.bazel.build/modules/zlib/1.2.12/MODULE.bazel": "3b1a8834ada2a883674be8cbd36ede1b6ec481477ada359cd2d3ddc562340b27", + "https://bcr.bazel.build/modules/zlib/1.3/MODULE.bazel": "6a9c02f19a24dcedb05572b2381446e27c272cd383aed11d41d99da9e3167a72", + "https://bcr.bazel.build/modules/zlib/1.3/source.json": "b6b43d0737af846022636e6e255fd4a96fee0d34f08f3830e6e0bac51465c37c" + }, + "selectedYankedVersions": {}, + "moduleExtensions": {} +} diff --git a/contrib/libs/tbb/README.md b/contrib/libs/tbb/README.md index b96e1fb000..2e7c2e81ba 100644 --- a/contrib/libs/tbb/README.md +++ b/contrib/libs/tbb/README.md @@ -1,5 +1,8 @@ -# oneAPI Threading Building Blocks +# oneAPI Threading Building Blocks (oneTBB) <img align="right" width="200" height="100" src="https://raw.githubusercontent.com/uxlfoundation/artwork/e98f1a7a3d305c582d02c5f532e41487b710d470/foundation/uxl-foundation-logo-horizontal-color.svg"> [](LICENSE.txt) [](https://github.com/oneapi-src/oneTBB/actions/workflows/ci.yml?query=branch%3Amaster) +[](https://github.com/oneapi-src/oneTBB/discussions) +[](https://www.bestpractices.dev/projects/9125) +[](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/oneTBB) oneTBB is a flexible C++ library that simplifies the work of adding parallelism to complex applications, even if you are not a threading expert. @@ -18,12 +21,13 @@ The library differs from typical threading packages in the following ways: Refer to oneTBB [examples](examples) and [samples](https://github.com/oneapi-src/oneAPI-samples/tree/master/Libraries/oneTBB) to see how you can use the library. -oneTBB is a part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification. +oneTBB is a part of the [UXL Foundation](http://www.uxlfoundation.org) and is an implementation of [oneAPI specification](https://oneapi.io). > **_NOTE:_** Threading Building Blocks (TBB) is now called oneAPI Threading Building Blocks (oneTBB) to highlight that the tool is a part of the oneAPI ecosystem. ## Release Information -Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). + +See [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). ## Documentation * [oneTBB Specification](https://spec.oneapi.com/versions/latest/elements/oneTBB/source/nested-index.html) @@ -38,18 +42,27 @@ Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQU ## Installation See [Installation from Sources](INSTALL.md) to learn how to install oneTBB. +## Governance + +The oneTBB project is governed by the UXL Foundation. +You can get involved in this project in following ways: +* Join the [Open Source and Specification Working Group](https://github.com/uxlfoundation/foundation/tree/main?tab=readme-ov-file#working-groups) meetings. +* Join the mailing lists for the [UXL Foundation](https://lists.uxlfoundation.org/g/main/subgroups) to receive meetings schedule and latest updates. +* Contribute to oneTBB project or oneTBB specification. Read [CONTRIBUTING](./CONTRIBUTING.md) for more information. + ## Support -Please report issues and suggestions via [GitHub issues](https://github.com/oneapi-src/oneTBB/issues). See our [documentation](./CONTRIBUTING.md##Issues) to learn how to work with them. +See our [documentation](./SUPPORT.md) to learn how to request help. ## How to Contribute We welcome community contributions, so check our [Contributing Guidelines](CONTRIBUTING.md) to learn more. +Use GitHub Issues for feature requests, bug reports, and minor inquiries. For broader questions and development-related discussions, use GitHub Discussions. + ## License oneAPI Threading Building Blocks is licensed under [Apache License, Version 2.0](LICENSE.txt). By its terms, contributions submitted to the project are also done under that license. - ## Engineering team contacts * [Email us.](mailto:inteltbbdevelopers@intel.com) diff --git a/contrib/libs/tbb/RELEASE_NOTES.md b/contrib/libs/tbb/RELEASE_NOTES.md index 57258416fe..c9b8e97135 100644 --- a/contrib/libs/tbb/RELEASE_NOTES.md +++ b/contrib/libs/tbb/RELEASE_NOTES.md @@ -18,26 +18,25 @@ This document contains changes of oneTBB compared to the last release. ## Table of Contents <!-- omit in toc --> -- [New Features](#new-features) - [Known Limitations](#known-limitations) - [Fixed Issues](#fixed-issues) -## :tada: New Features -- Since C++17, parallel algorithms and Flow Graph nodes are allowed to accept pointers to the member functions and member objects as the user-provided callables. -- Added missed member functions, such as assignment operators and swap function, to the ``concurrent_queue`` and ``concurrent_bounded_queue`` containers. - ## :rotating_light: Known Limitations -- A static assert will cause compilation failures in oneTBB headers when compiling with clang 12.0.0 or newer if using the LLVM standard library with ``-ffreestanding`` and C++11/14 compiler options. -- An application using Parallel STL algorithms in libstdc++ versions 9 and 10 may fail to compile due to incompatible interface changes between earlier versions of Threading Building Blocks (TBB) and oneAPI Threading Building Blocks (oneTBB). Disable support for Parallel STL algorithms by defining ``PSTL_USE_PARALLEL_POLICIES`` (in libstdc++ 9) or ``_GLIBCXX_USE_TBB_PAR_BACKEND`` (in libstdc++ 10) macro to zero before inclusion of the first standard header file in each translation unit. -- On Linux* OS, if oneAPI Threading Building Blocks (oneTBB) or Threading Building Blocks (TBB) are installed in a system folder like ``/usr/lib64``, the application may fail to link due to the order in which the linker searches for libraries. Use the ``-L`` linker option to specify the correct location of oneTBB library. This issue does not affect the program execution. -- The ``oneapi::tbb::info`` namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc* version lower than 2.5. -- Using a hwloc* version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows* OS. See https://github.com/open-mpi/hwloc/issues/477 for details. -- The NUMA* topology may be detected incorrectly on Windows* OS machines where the number of NUMA* node threads exceeds the size of 1 processor group. -- On Windows* OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying ``/wd4324`` to the compiler command line. -- oneTBB does not support ``fork()``, to work-around the issue, consider using task_scheduler_handle to join oneTBB worker threads before using fork(). -- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293). +- The ``oneapi::tbb::info`` namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc version lower than 2.5. +- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows OS. See https://github.com/open-mpi/hwloc/issues/477 for details. +- The NUMA topology may be detected incorrectly on Windows* OS machines where the number of NUMA node threads exceeds the size of 1 processor group. +- On Windows OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying /wd4324 to the compiler command line. +- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293. +- When CPU resource coordination is enabled, tasks from a lower-priority ``task_arena`` might be executed before tasks from a higher-priority ``task_arena``. + +> **_NOTE:_** To see known limitations that impact all versions of oneTBB, refer to [oneTBB Documentation](https://oneapi-src.github.io/oneTBB/main/intro/limitations.html). + ## :hammer: Fixed Issues -- Fixed the hang in the reserve method of concurrent unordered containers ([GitHub* #1056](http://github.com/oneapi-src/oneTBB/issues/1056)). -- Fixed the C++20 three-way comparison feature detection ([GitHub* #1093](http://github.com/oneapi-src/oneTBB/issues/1093)). -- Fixed oneTBB integration with CMake* in the Conda* environment. +- Fixed ``parallel_for_each`` algorithm behavior for iterators defining ``iterator_concept`` trait instead of ``iterator_category``. +- Fixed the redefinition issue for ``std::min`` and ``std::max`` on Windows* OS ([GitHub* #832](https://github.com/oneapi-src/oneTBB/issues/832)). +- Fixed the incorrect binary search order in ``TBBConfig.cmake``. +- Enabled the oneTBB library search using the pkg-config tool in Conda packages. + +## :octocat: Open-source Contributions Integrated +- Fixed the compiler warning for missing virtual destructor. Contributed by Elias Engelbert Plank (https://github.com/oneapi-src/oneTBB/pull/1215). diff --git a/contrib/libs/tbb/SECURITY.md b/contrib/libs/tbb/SECURITY.md index c4a49dd553..4926041fc2 100644 --- a/contrib/libs/tbb/SECURITY.md +++ b/contrib/libs/tbb/SECURITY.md @@ -1,7 +1,66 @@ # Security Policy -Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, -impact, severity and mitigation. +As an open-source project, we understand the importance of and responsibility +for security. This Security Policy outlines our guidelines and procedures to +ensure the highest level of security and trust for oneTBB users. -## Reporting a Vulnerability -Please report any security vulnerabilities in this project -[utilizing the guidelines here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). +## Supported Versions +Security vulnerabilities are fixed in the [latest version][1] +and delivered as a patch release. We don't guarantee security fixes to be +back-ported to older oneTBB versions. + +## Report a Vulnerability +We are very grateful to the security researchers and users that report back +security vulnerabilities. We investigate every report thoroughly. +We strongly encourage you to report security vulnerabilities to us privately, +before disclosing them on public forums or opening a public GitHub* issue. + +Report a vulnerability to us in one of two ways: +* Open a draft **[GitHub* Security Advisory][2]** +* Send an e-mail to: **security@uxlfoundation.org**. +Along with the report, provide the following info: + * A descriptive title. + * Your name and affiliation (if any). + * A description of the technical details of the vulnerabilities. + * A minimal example of the vulnerability so we can reproduce your findings. + * An explanation of who can exploit this vulnerability, and what they gain + doing so. + * Whether this vulnerability is public or known to third parties. If it is, + provide details. + +### When Should I Report a Vulnerability? +* You think you discovered a potential security vulnerability in oneTBB. +* You are unsure how the potential vulnerability affects oneTBB. +* You think you discovered a vulnerability in another project or 3rd party +component on which oneTBB depends. If the issue is not fixed in the 3rd party +component, try to report directly there first. + +### When Should I NOT Report a Vulnerability? +* You got an automated scan hit and are unable to provide details. +* You need help using oneTBB for security. +* You need help applying security-related updates. +* Your issue is not security-related. + +## Security Reports Review Process +We aim to respond quickly to your inquiry and coordinate a fix and +disclosure with you. All confirmed security vulnerabilities will be addressed +according to severity level and impact on oneTBB. Normally, security issues +are fixed in the next planned release. + +## Disclosure Policy +We will publish security advisories using the +[**GitHub Security Advisories feature**][3] +to keep our community well-informed, and will credit you for your findings +unless you prefer to stay anonymous. We request that you refrain from +exploiting the vulnerability or making it public before the official disclosure. + +We will disclose the vulnerabilities and bugs as soon as possible once +mitigation is implemented and available. + +## Feedback on This Policy +If you have any suggestions on how this Policy could be improved, submit +an issue or a pull request to this repository. **Do not** report +potential vulnerabilities or security flaws via a pull request. + +[1]: https://github.com/oneapi-src/oneTBB/releases/latest +[2]: https://github.com/oneapi-src/oneTBB/security/advisories/new +[3]: https://github.com/oneapi-src/oneTBB/security/advisories diff --git a/contrib/libs/tbb/SUPPORT.md b/contrib/libs/tbb/SUPPORT.md new file mode 100644 index 0000000000..47bb60a538 --- /dev/null +++ b/contrib/libs/tbb/SUPPORT.md @@ -0,0 +1,35 @@ +<!-- +****************************************************************************** +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +*     http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/--> + +# oneTBB Support + +We are committed to providing support and assistance to help you make the most out of oneTBB. +Use the following methods if you face any challenges. + +## Issues + +If you have a problem, check out the [GitHub Issues](https://github.com/oneapi-src/oneTBB/issues) to see if the issue you want to address is already reported. +You may find users that have encountered the same bug or have similar ideas for changes or updates. + +You can use issues to report a problem, make a feature request, or add comments on an existing issue. + +## Discussions + +Visit the [GitHub Discussions](https://github.com/oneapi-src/oneTBB/discussions) to engage with the community, ask questions, or help others. + +## Email + +Reach out to us privately via [email](mailto:inteltbbdevelopers@intel.com).
\ No newline at end of file diff --git a/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md b/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md index 803041c641..7f9d816161 100644 --- a/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md +++ b/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md @@ -44,10 +44,10 @@ This document provides details about hardware, operating system, and software pr - Microsoft* Windows* Server 2022 - Systems with Linux* operating systems: - Oracle Linux* 8 - - Amazon* Linux* 2 + - Amazon* Linux 2, 2022 - Debian* 9, 10, 11 - - Fedora* 36, 37 - - Rocky* Linux* 9 + - Fedora* 36, 37, 38 + - Rocky* Linux* 8, 9 - Red Hat* Enterprise Linux* 8, 9 - SuSE* Linux* Enterprise Server 15 - Ubuntu* 20.04, 22.04 @@ -64,12 +64,12 @@ This document provides details about hardware, operating system, and software pr ### Supported Compilers - Intel* oneAPI DPC++/C++ Compiler -- Intel* C++ Compiler 19.0 and 19.1 version +- Intel® C++ Compiler Classic 2021.1 - 2021.9 - Microsoft* Visual C++ 14.2 (Microsoft* Visual Studio* 2019, Windows* OS only) - Microsoft* Visual C++ 14.3 (Microsoft* Visual Studio* 2022, Windows* OS only) - For each supported Linux* operating system, the standard gcc version provided with that operating system is supported: - - GNU Compilers (gcc) 4.8.5 - 11.2.1 - - GNU C Library (glibc) version 2.17 - 2.34 + - GNU Compilers (gcc) 8.x – 12.x + - GNU C Library (glibc) version 2.28 – 2.36 - Clang* 6.0.0 - 13.0.0 ## Limitations diff --git a/contrib/libs/tbb/WASM_Support.md b/contrib/libs/tbb/WASM_Support.md index 67925ee496..6306620d7c 100644 --- a/contrib/libs/tbb/WASM_Support.md +++ b/contrib/libs/tbb/WASM_Support.md @@ -16,16 +16,66 @@ # WASM Support +oneTBB extends its capabilities by offering robust support for ``WASM`` (see ``Limitation`` sections). + ``WASM`` stands for WebAssembly, a low-level binary format for executing code in web browsers. -It is designed to be a portable target for compilers and to be efficient to parse and execute. +It is designed to be a portable target for compilers and efficient to parse and execute. + +Using oneTBB with WASM, you can take full advantage of parallelism and concurrency while working on web-based applications, interactive websites, and a variety of other WASM-compatible platforms. + +oneTBB offers WASM support through the integration with [Emscripten*](https://emscripten.org/docs/introducing_emscripten/index.html), a powerful toolchain for compiling C and C++ code into WASM-compatible runtimes. + +## Build + +**Prerequisites:** Download and install Emscripten*. See the [instructions](https://emscripten.org/docs/getting_started/downloads.html). + +To build the system, run: + +``` +mkdir build && cd build +emcmake cmake .. -DCMAKE_CXX_COMPILER=em++ -DCMAKE_C_COMPILER=emcc -DTBB_STRICT=OFF -DCMAKE_CXX_FLAGS=-Wno-unused-command-line-argument -DTBB_DISABLE_HWLOC_AUTOMATIC_SEARCH=ON -DBUILD_SHARED_LIBS=ON -DTBB_EXAMPLES=ON -DTBB_TEST=ON +``` +To compile oneTBB without ``pthreads``, set the flag ``-DEMSCRIPTEN_WITHOUT_PTHREAD=true`` in the command above. By default, oneTBB uses the ``pthreads``. +``` +cmake --build . <options> +cmake --install . <options> +``` +Where: + +* ``emcmake`` - a tool that sets up the environment for Emscripten*. +* ``-DCMAKE_CXX_COMPILER=em++`` - specifies the C++ compiler as Emscripten* C++ compiler. +* ``-DCMAKE_C_COMPILER=emcc`` - specifies the C compiler as Emscripten* C compiler. + + +> **_NOTE:_** See [CMake documentation](https://github.com/oneapi-src/oneTBB/blob/master/cmake/README.md) to learn about other options. -WebAssembly aims to provide a fast, efficient, and safe way to run code in web browsers without needing plugins or other software. Code written in a variety of programming languages, including C, C++, Rust and others, can be compiled into WebAssembly format for use in web pages. This allows you to write high-performance applications that run directly in the browser. -We currently have an [under development branch that provides you with WASM support](https://github.com/oneapi-src/oneTBB/tree/tbb_wasm). +## Run Test -By using WASM, you can: -* Create highly performant and scalable applications that can meet the demands of modern web-based systems. -* Take advantage of oneTBB features to optimize the performance of your web-based applications. +To run tests, use: +``` +ctest +``` +# Limitations +You can successfully build your application with oneTBB using WASM, but you may not achieve optimal performance immediately. This is due to the limitation with nested Web Workers: a Web Worker cannot schedule another worker without help from a browser thread. This can lead to unexpected performance outcomes, such as the application running in serial. +Find more information in the [issue](https://github.com/emscripten-core/emscripten/discussions/21963) in the Emscripten repository. +To workaround this issue, try one of the following ways: +1. **Recommended Solution: Use the ``-sPROXY_TO_PTHREAD`` Flag**. +This flag splits the initial thread into a browser thread and a main thread (proxied by a Web Worker), effectively resolving the issue as the browser thread is always present in the event loop and can participate in Web Workers scheduling. Refer to the [Emscripten documentation](https://emscripten.org/docs/porting/pthreads.html) for more details about ``-sPROXY_TO_PTHREAD`` since using this flag may require refactoring the code. +2. **Alternative Solution: Warm Up the oneTBB Thread Pool** +Initialize the oneTBB thread pool before making the first call to oneTBB. This approach forces the browser thread to participate in Web Workers scheduling. +```cpp + int num_threads = tbb::this_task_arena::max_concurrency(); + std::atomic<int> barrier{num_threads}; + tbb::parallel_for(0, num_threads, [&barrier] (int) { + barrier--; + while (barrier > 0) { + // Send browser thread to event loop + std::this_thread::yield(); + } + }, tbb::static_partitioner{}); +``` +> **_NOTE:_** Be aware that it might cause delays on the browser side. diff --git a/contrib/libs/tbb/include/oneapi/tbb.h b/contrib/libs/tbb/include/oneapi/tbb.h index 3782c74dcb..ad96011373 100644 --- a/contrib/libs/tbb/include/oneapi/tbb.h +++ b/contrib/libs/tbb/include/oneapi/tbb.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -63,6 +63,8 @@ #include "oneapi/tbb/queuing_rw_mutex.h" #include "oneapi/tbb/spin_mutex.h" #include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/mutex.h" +#include "oneapi/tbb/rw_mutex.h" #include "oneapi/tbb/task.h" #include "oneapi/tbb/task_arena.h" #include "oneapi/tbb/task_group.h" diff --git a/contrib/libs/tbb/include/oneapi/tbb/collaborative_call_once.h b/contrib/libs/tbb/include/oneapi/tbb/collaborative_call_once.h index db082f891a..18e3bbb245 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/collaborative_call_once.h +++ b/contrib/libs/tbb/include/oneapi/tbb/collaborative_call_once.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Intel Corporation + Copyright (c) 2021-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,6 +32,27 @@ namespace d1 { #pragma warning (disable: 4324) #endif +template <typename F> +class collaborative_call_stack_task : public task { + const F& m_func; + wait_context& m_wait_ctx; + + void finalize() { + m_wait_ctx.release(); + } + task* execute(d1::execution_data&) override { + task* res = d2::task_ptr_or_nullptr(m_func); + finalize(); + return res; + } + task* cancel(d1::execution_data&) override { + finalize(); + return nullptr; + } +public: + collaborative_call_stack_task(const F& f, wait_context& wctx) : m_func(f), m_wait_ctx(wctx) {} +}; + constexpr std::uintptr_t collaborative_once_max_references = max_nfs_size; constexpr std::uintptr_t collaborative_once_references_mask = collaborative_once_max_references-1; @@ -103,7 +124,7 @@ public: task_group_context context{ task_group_context::bound, task_group_context::default_traits | task_group_context::concurrent_wait }; - function_stack_task<F> t{ std::forward<F>(f), m_storage.m_wait_context }; + collaborative_call_stack_task<F> t{ std::forward<F>(f), m_storage.m_wait_context }; // Set the ready flag after entering the execute body to prevent // moonlighting threads from occupying all slots inside the arena. @@ -151,7 +172,7 @@ class collaborative_once_flag : no_copy { spin_wait_until_eq(m_state, expected); } while (!m_state.compare_exchange_strong(expected, desired)); } - + template <typename Fn> void do_collaborative_call_once(Fn&& f) { std::uintptr_t expected = m_state.load(std::memory_order_acquire); diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h index 9e8a02f64f..3dca932893 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h @@ -693,7 +693,7 @@ concurrent_bounded_queue( It, It, Alloc = Alloc() ) #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ } //namespace d2 -} // namesapce detail +} // namespace detail inline namespace v1 { diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h index 336425cc8f..9cade0a94e 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,14 +24,14 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> struct concurrent_unordered_map_traits { using value_type = std::pair<const Key, T>; using key_type = Key; using allocator_type = Allocator; - using hash_compare_type = hash_compare<Key, Hash, KeyEqual>; + using hash_compare_type = d1::hash_compare<Key, Hash, KeyEqual>; static constexpr bool allow_multimapping = AllowMultimapping; static constexpr const key_type& get_key( const value_type& value ) { @@ -399,13 +399,13 @@ void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs lhs.swap(rhs); } -} // namespace d1 +} // namespace d2 } // namespace detail inline namespace v1 { -using detail::d1::concurrent_unordered_map; -using detail::d1::concurrent_unordered_multimap; +using detail::d2::concurrent_unordered_map; +using detail::d2::concurrent_unordered_multimap; using detail::split; } // inline namespace v1 diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h index c135b92222..b7e4b4cafc 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,14 +23,14 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> struct concurrent_unordered_set_traits { using key_type = Key; using value_type = key_type; using allocator_type = Allocator; - using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>; + using hash_compare_type = d1::hash_compare<key_type, Hash, KeyEqual>; static constexpr bool allow_multimapping = AllowMultimapping; static constexpr const key_type& get_key( const value_type& value ) { @@ -318,13 +318,13 @@ void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs, lhs.swap(rhs); } -} // namespace d1 +} // namespace d2 } // namespace detail inline namespace v1 { -using detail::d1::concurrent_unordered_set; -using detail::d1::concurrent_unordered_multiset; +using detail::d2::concurrent_unordered_set; +using detail::d2::concurrent_unordered_multiset; using detail::split; } // inline namespace v1 diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h index ade91c330f..85f54d0a57 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { template <typename Traits> class concurrent_unordered_base; @@ -171,7 +171,7 @@ public: value_node( sokey_type ord_key ) : base_type(ord_key) {} ~value_node() {} value_type* storage() { - return reinterpret_cast<value_type*>(&my_value); + return &my_value; } value_type& value() { @@ -179,8 +179,9 @@ public: } private: - using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type; - aligned_storage_type my_value; + union { + value_type my_value; + }; }; // class value_node template <typename Traits> @@ -237,7 +238,7 @@ private: template <typename T> using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>; public: - using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>; + using node_type = d1::node_handle<key_type, value_type, value_node_type, allocator_type>; explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) @@ -441,7 +442,7 @@ public: std::pair<iterator, bool> insert( node_type&& nh ) { if (!nh.empty()) { - value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); + value_node_ptr insert_node = d1::node_handle_accessor::get_node_ptr(nh); auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { insert_node->init(order_key); return insert_node; @@ -451,7 +452,7 @@ public: // If the insertion succeeded - set node handle to the empty state __TBB_ASSERT(insert_result.remaining_node == nullptr, "internal_insert_node should not return the remaining node if the insertion succeeded"); - node_handle_accessor::deactivate(nh); + d1::node_handle_accessor::deactivate(nh); } return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; } @@ -521,12 +522,12 @@ public: node_type unsafe_extract( const_iterator pos ) { internal_extract(pos.get_node_ptr()); - return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + return d1::node_handle_accessor::construct<node_type>(pos.get_node_ptr()); } node_type unsafe_extract( iterator pos ) { internal_extract(pos.get_node_ptr()); - return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + return d1::node_handle_accessor::construct<node_type>(pos.get_node_ptr()); } node_type unsafe_extract( const key_type& key ) { @@ -787,11 +788,11 @@ private: static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; class unordered_segment_table - : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> + : public d1::segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> { using self_type = unordered_segment_table; using atomic_node_ptr = std::atomic<node_ptr>; - using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; + using base_type = d1::segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; using segment_type = typename base_type::segment_type; using base_allocator_type = typename base_type::allocator_type; @@ -921,7 +922,7 @@ private: node_allocator_traits::deallocate(dummy_node_allocator, node, 1); } else { // GCC 11.1 issues a warning here that incorrect destructor might be called for dummy_nodes - #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 130000 ) && !__clang__ && !__INTEL_COMPILER + #if (__TBB_GCC_VERSION >= 110100 && __TBB_GCC_VERSION < 150000 ) && !__clang__ && !__INTEL_COMPILER volatile #endif value_node_ptr val_node = static_cast<value_node_ptr>(node); @@ -1212,7 +1213,7 @@ protected: // Node handle with curr cannot be used directly in insert call, because // the destructor of node_type will destroy curr - node_type curr_node = node_handle_accessor::construct<node_type>(curr); + node_type curr_node = d1::node_handle_accessor::construct<node_type>(curr); // If the insertion fails - return ownership of the node to the source if (!insert(std::move(curr_node)).second) { @@ -1230,7 +1231,7 @@ protected: curr->set_next(next_node); source_prev->set_next(curr); source_prev = curr; - node_handle_accessor::deactivate(curr_node); + d1::node_handle_accessor::deactivate(curr_node); } else { source.my_size.fetch_sub(1, std::memory_order_relaxed); } @@ -1507,7 +1508,7 @@ bool operator!=( const concurrent_unordered_base<Traits>& lhs, #pragma warning(pop) // warning 4127 is back #endif -} // namespace d1 +} // namespace d2 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h index 04deb630f0..2cb9d4d77e 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -188,7 +188,7 @@ /** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ #ifndef __TBB_DYNAMIC_LOAD_ENABLED - #define __TBB_DYNAMIC_LOAD_ENABLED 1 + #define __TBB_DYNAMIC_LOAD_ENABLED (!__EMSCRIPTEN__) #endif /** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load @@ -201,7 +201,7 @@ /** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ #ifndef __TBB_WEAK_SYMBOLS_PRESENT - #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) + #define __TBB_WEAK_SYMBOLS_PRESENT ( !__EMSCRIPTEN__ && !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) #endif /** Presence of compiler features **/ @@ -339,7 +339,7 @@ #define __TBB_TSX_INTRINSICS_PRESENT (__RTM__ || __INTEL_COMPILER || (_MSC_VER>=1700 && (__TBB_x86_64 || __TBB_x86_32))) -#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) \ +#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || (__TBB_GCC_VERSION >= 110000 && __TBB_GNU_ASM_VERSION >= 2032) || __TBB_CLANG_VERSION >= 120000) \ && (_WIN32 || _WIN64 || __unix__ || __APPLE__) && (__TBB_x86_32 || __TBB_x86_64) && !__ANDROID__) /** Internal TBB features & modes **/ @@ -384,6 +384,9 @@ #define __TBB_ARENA_BINDING 1 #endif +// Thread pinning is not available on macOS* +#define __TBB_CPUBIND_PRESENT (__TBB_ARENA_BINDING && !__APPLE__) + #ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 #endif @@ -522,6 +525,11 @@ #define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) #endif +#ifndef __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +#define __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT (TBB_PREVIEW_FLOW_GRAPH_FEATURES \ + || TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT) +#endif + #if TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS #define __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS 1 #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h index 8ac11211f6..21da06ce03 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// included in namespace tbb::detail::d1 (in flow_graph.h) +// included in namespace tbb::detail::d2 (in flow_graph.h) typedef std::uint64_t tag_value; @@ -53,7 +53,7 @@ namespace graph_policy_namespace { // K == type of field used for key-matching. Each tag-matching port will be provided // functor that, given an object accepted by the port, will return the /// field of type K being used for matching. - template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > > + template<typename K, typename KHash=d1::tbb_hash_compare<typename std::decay<K>::type > > __TBB_requires(tbb::detail::hash_compare<KHash, K>) struct key_matching { typedef K key_type; @@ -77,7 +77,7 @@ template< typename Output > class input_body : no_assign { public: virtual ~input_body() {} - virtual Output operator()(flow_control& fc) = 0; + virtual Output operator()(d1::flow_control& fc) = 0; virtual input_body* clone() = 0; }; @@ -86,7 +86,7 @@ template< typename Output, typename Body> class input_body_leaf : public input_body<Output> { public: input_body_leaf( const Body &_body ) : body(_body) { } - Output operator()(flow_control& fc) override { return body(fc); } + Output operator()(d1::flow_control& fc) override { return body(fc); } input_body_leaf* clone() override { return new input_body_leaf< Output, Body >(body); } @@ -249,12 +249,12 @@ template< typename NodeType > class forward_task_bypass : public graph_task { NodeType &my_node; public: - forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n + forward_task_bypass( graph& g, d1::small_object_allocator& allocator, NodeType &n , node_priority_t node_priority = no_priority ) : graph_task(g, allocator, node_priority), my_node(n) {} - task* execute(execution_data& ed) override { + d1::task* execute(d1::execution_data& ed) override { graph_task* next_task = my_node.forward_task(); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; @@ -264,7 +264,7 @@ public: return next_task; } - task* cancel(execution_data& ed) override { + d1::task* cancel(d1::execution_data& ed) override { finalize<forward_task_bypass>(ed); return nullptr; } @@ -272,29 +272,57 @@ public: //! A task that calls a node's apply_body_bypass function, passing in an input of type Input // return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return nullptr -template< typename NodeType, typename Input > -class apply_body_task_bypass : public graph_task { +template< typename NodeType, typename Input, typename BaseTaskType = graph_task> +class apply_body_task_bypass + : public BaseTaskType +{ NodeType &my_node; Input my_input; + + using check_metainfo = std::is_same<BaseTaskType, graph_task>; + using without_metainfo = std::true_type; + using with_metainfo = std::false_type; + + graph_task* call_apply_body_bypass_impl(without_metainfo) { + return my_node.apply_body_bypass(my_input + __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* call_apply_body_bypass_impl(with_metainfo) { + return my_node.apply_body_bypass(my_input, message_metainfo{this->get_msg_wait_context_vertices()}); + } +#endif + + graph_task* call_apply_body_bypass() { + return call_apply_body_bypass_impl(check_metainfo{}); + } + public: +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename Metainfo> + apply_body_task_bypass( graph& g, d1::small_object_allocator& allocator, NodeType &n, const Input &i, + node_priority_t node_priority, Metainfo&& metainfo ) + : BaseTaskType(g, allocator, node_priority, std::forward<Metainfo>(metainfo).waiters()) + , my_node(n), my_input(i) {} +#endif - apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i - , node_priority_t node_priority = no_priority - ) : graph_task(g, allocator, node_priority), - my_node(n), my_input(i) {} + apply_body_task_bypass( graph& g, d1::small_object_allocator& allocator, NodeType& n, const Input& i, + node_priority_t node_priority = no_priority ) + : BaseTaskType(g, allocator, node_priority), my_node(n), my_input(i) {} - task* execute(execution_data& ed) override { - graph_task* next_task = my_node.apply_body_bypass( my_input ); + d1::task* execute(d1::execution_data& ed) override { + graph_task* next_task = call_apply_body_bypass(); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; else if (next_task) next_task = prioritize_task(my_node.graph_reference(), *next_task); - finalize<apply_body_task_bypass>(ed); + BaseTaskType::template finalize<apply_body_task_bypass>(ed); return next_task; } - task* cancel(execution_data& ed) override { - finalize<apply_body_task_bypass>(ed); + d1::task* cancel(d1::execution_data& ed) override { + BaseTaskType::template finalize<apply_body_task_bypass>(ed); return nullptr; } }; @@ -304,10 +332,10 @@ template< typename NodeType > class input_node_task_bypass : public graph_task { NodeType &my_node; public: - input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) + input_node_task_bypass( graph& g, d1::small_object_allocator& allocator, NodeType &n ) : graph_task(g, allocator), my_node(n) {} - task* execute(execution_data& ed) override { + d1::task* execute(d1::execution_data& ed) override { graph_task* next_task = my_node.apply_body_bypass( ); if (SUCCESSFULLY_ENQUEUED == next_task) next_task = nullptr; @@ -317,7 +345,7 @@ public: return next_task; } - task* cancel(execution_data& ed) override { + d1::task* cancel(d1::execution_data& ed) override { finalize<input_node_task_bypass>(ed); return nullptr; } @@ -343,6 +371,15 @@ protected: return result; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + // Intentionally ignore the metainformation + // If there are more items associated with passed metainfo to be processed + // They should be stored in the buffer before the limiter_node + graph_task* try_put_task(const DecrementType& value, const message_metainfo&) override { + return try_put_task(value); + } +#endif + graph& graph_reference() const override { return my_node->my_graph; } @@ -361,7 +398,14 @@ class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_ T *my_node; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + // Intentionally ignore the metainformation + // If there are more items associated with passed metainfo to be processed + // They should be stored in the buffer before the limiter_node + graph_task* execute(const message_metainfo&) override { +#else graph_task* execute() override { +#endif return my_node->decrement_counter( 1 ); } diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h index 059f198055..647f3dc1b6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// included in namespace tbb::detail::d1 (in flow_graph.h) +// included in namespace tbb::detail::d2 (in flow_graph.h) //! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock. template< typename T, typename M=spin_mutex > @@ -98,9 +98,12 @@ public: // Do not work with the passed pointer here as it may not be fully initialized yet } - bool get_item( output_type& v ) { +private: + bool get_item_impl( output_type& v + __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo* metainfo_ptr = nullptr) ) + { - bool msg = false; + bool successful_get = false; do { predecessor_type *src; @@ -113,18 +116,35 @@ public: } // Try to get from this sender - msg = src->try_get( v ); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (metainfo_ptr) { + successful_get = src->try_get( v, *metainfo_ptr ); + } else +#endif + { + successful_get = src->try_get( v ); + } - if (msg == false) { + if (successful_get == false) { // Relinquish ownership of the edge register_successor(*src, *my_owner); } else { // Retain ownership of the edge this->add(*src); } - } while ( msg == false ); - return msg; + } while ( successful_get == false ); + return successful_get; } +public: + bool get_item( output_type& v ) { + return get_item_impl(v); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool get_item( output_type& v, message_metainfo& metainfo ) { + return get_item_impl(v, &metainfo); + } +#endif // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). void reset() { @@ -157,8 +177,9 @@ public: // Do not work with the passed pointer here as it may not be fully initialized yet } - bool try_reserve( output_type &v ) { - bool msg = false; +private: + bool try_reserve_impl( output_type &v __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo* metainfo) ) { + bool successful_reserve = false; do { predecessor_type* pred = nullptr; @@ -172,9 +193,16 @@ public: } // Try to get from this sender - msg = pred->try_reserve( v ); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (metainfo) { + successful_reserve = pred->try_reserve( v, *metainfo ); + } else +#endif + { + successful_reserve = pred->try_reserve( v ); + } - if (msg == false) { + if (successful_reserve == false) { typename mutex_type::scoped_lock lock(this->my_mutex); // Relinquish ownership of the edge register_successor( *pred, *this->my_owner ); @@ -183,11 +211,21 @@ public: // Retain ownership of the edge this->add( *pred); } - } while ( msg == false ); + } while ( successful_reserve == false ); - return msg; + return successful_reserve; + } +public: + bool try_reserve( output_type& v ) { + return try_reserve_impl(v __TBB_FLOW_GRAPH_METAINFO_ARG(nullptr)); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_reserve( output_type& v, message_metainfo& metainfo ) { + return try_reserve_impl(v, &metainfo); + } +#endif + bool try_release() { reserved_src.load(std::memory_order_relaxed)->try_release(); reserved_src.store(nullptr, std::memory_order_relaxed); @@ -268,6 +306,9 @@ public: } virtual graph_task* try_put_task( const T& t ) = 0; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual graph_task* try_put_task( const T& t, const message_metainfo& metainfo ) = 0; +#endif }; // successor_cache<T> //! An abstract cache of successors, specialized to continue_msg @@ -327,6 +368,9 @@ public: } virtual graph_task* try_put_task( const continue_msg& t ) = 0; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual graph_task* try_put_task( const continue_msg& t, const message_metainfo& metainfo ) = 0; +#endif }; // successor_cache< continue_msg > //! A cache of successors that are broadcast to @@ -336,19 +380,12 @@ class broadcast_cache : public successor_cache<T, M> { typedef M mutex_type; typedef typename successor_cache<T,M>::successors_type successors_type; -public: - - broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { - // Do not work with the passed pointer here as it may not be fully initialized yet - } - - // as above, but call try_put_task instead, and return the last task we received (if any) - graph_task* try_put_task( const T &t ) override { + graph_task* try_put_task_impl( const T& t __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) { graph_task * last_task = nullptr; typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { - graph_task *new_task = (*i)->try_put_task(t); + graph_task *new_task = (*i)->try_put_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); // workaround for icc bug graph& graph_ref = (*i)->graph_reference(); last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary @@ -365,6 +402,21 @@ public: } return last_task; } +public: + + broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + graph_task* try_put_task( const T &t ) override { + return try_put_task_impl(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task( const T &t, const message_metainfo& metainfo ) override { + return try_put_task_impl(t, metainfo); + } +#endif // call try_put_task and return list of received tasks bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { @@ -411,11 +463,15 @@ public: return this->my_successors.size(); } - graph_task* try_put_task( const T &t ) override { +private: + + graph_task* try_put_task_impl( const T &t + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) + { typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); typename successors_type::iterator i = this->my_successors.begin(); while ( i != this->my_successors.end() ) { - graph_task* new_task = (*i)->try_put_task(t); + graph_task* new_task = (*i)->try_put_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); if ( new_task ) { return new_task; } else { @@ -429,6 +485,17 @@ public: } return nullptr; } + +public: + graph_task* try_put_task(const T& t) override { + return try_put_task_impl(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task( const T& t, const message_metainfo& metainfo ) override { + return try_put_task_impl(t, metainfo); + } +#endif }; #endif // __TBB__flow_graph_cache_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h index 8207667f37..55063b93e1 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { class graph_task; static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; @@ -123,32 +123,98 @@ void enqueue_in_graph_arena(graph &g, graph_task& arena_task); class graph; //! Base class for tasks generated by graph nodes. -class graph_task : public task { +class graph_task : public d1::task { public: - graph_task(graph& g, small_object_allocator& allocator - , node_priority_t node_priority = no_priority - ) - : my_graph(g) - , priority(node_priority) - , my_allocator(allocator) - {} + graph_task(graph& g, d1::small_object_allocator& allocator, + node_priority_t node_priority = no_priority); + graph& my_graph; // graph instance the task belongs to // TODO revamp: rename to my_priority node_priority_t priority; template <typename DerivedType> - void destruct_and_deallocate(const execution_data& ed); + void destruct_and_deallocate(const d1::execution_data& ed); protected: template <typename DerivedType> - void finalize(const execution_data& ed); + void finalize(const d1::execution_data& ed); private: // To organize task_list graph_task* my_next{ nullptr }; - small_object_allocator my_allocator; + d1::small_object_allocator my_allocator; + d1::wait_tree_vertex_interface* my_reference_vertex; // TODO revamp: elaborate internal interfaces to avoid friends declarations friend class graph_task_list; friend graph_task* prioritize_task(graph& g, graph_task& gt); }; +inline bool is_this_thread_in_graph_arena(graph& g); + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +class trackable_messages_graph_task : public graph_task { +public: + trackable_messages_graph_task(graph& g, d1::small_object_allocator& allocator, + node_priority_t node_priority, + const std::forward_list<d1::wait_context_vertex*>& msg_waiters) + : graph_task(g, allocator, node_priority) + , my_msg_wait_context_vertices(msg_waiters) + { + auto last_iterator = my_msg_reference_vertices.cbefore_begin(); + + for (auto& msg_waiter : my_msg_wait_context_vertices) { + // If the task is created by the thread outside the graph arena, the lifetime of the thread reference vertex + // may be shorter that the lifetime of the task, so thread reference vertex approach cannot be used + // and the task should be associated with the msg wait context itself + d1::wait_tree_vertex_interface* ref_vertex = is_this_thread_in_graph_arena(g) ? + r1::get_thread_reference_vertex(msg_waiter) : + msg_waiter; + last_iterator = my_msg_reference_vertices.emplace_after(last_iterator, + ref_vertex); + ref_vertex->reserve(1); + } + } + + trackable_messages_graph_task(graph& g, d1::small_object_allocator& allocator, + node_priority_t node_priority, + std::forward_list<d1::wait_context_vertex*>&& msg_waiters) + : graph_task(g, allocator, node_priority) + , my_msg_wait_context_vertices(std::move(msg_waiters)) + { + } + + const std::forward_list<d1::wait_context_vertex*> get_msg_wait_context_vertices() const { + return my_msg_wait_context_vertices; + } + +protected: + template <typename DerivedType> + void finalize(const d1::execution_data& ed) { + auto wait_context_vertices = std::move(my_msg_wait_context_vertices); + auto msg_reference_vertices = std::move(my_msg_reference_vertices); + graph_task::finalize<DerivedType>(ed); + + // If there is no thread reference vertices associated with the task + // then this task was created by transferring the ownership from other metainfo + // instance (e.g. while taking from the buffer) + if (msg_reference_vertices.empty()) { + for (auto& msg_waiter : wait_context_vertices) { + msg_waiter->release(1); + } + } else { + for (auto& msg_waiter : msg_reference_vertices) { + msg_waiter->release(1); + } + } + } +private: + // Each task that holds information about single message wait_contexts should hold two lists + // The first one is wait_contexts associated with the message itself. They are needed + // to be able to broadcast the list of wait_contexts to the node successors while executing the task. + // The second list is a list of reference vertices for each wait_context_vertex in the first list + // to support the distributed reference counting schema + std::forward_list<d1::wait_context_vertex*> my_msg_wait_context_vertices; + std::forward_list<d1::wait_tree_vertex_interface*> my_msg_reference_vertices; +}; // class trackable_messages_graph_task +#endif // __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + struct graph_task_comparator { bool operator()(const graph_task* left, const graph_task* right) { return left->priority < right->priority; @@ -157,18 +223,18 @@ struct graph_task_comparator { typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t; -class priority_task_selector : public task { +class priority_task_selector : public d1::task { public: - priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) + priority_task_selector(graph_task_priority_queue_t& priority_queue, d1::small_object_allocator& allocator) : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} - task* execute(execution_data& ed) override { + task* execute(d1::execution_data& ed) override { next_task(); __TBB_ASSERT(my_task, nullptr); task* t_next = my_task->execute(ed); my_allocator.delete_object(this, ed); return t_next; } - task* cancel(execution_data& ed) override { + task* cancel(d1::execution_data& ed) override { if (!my_task) { next_task(); } @@ -190,7 +256,7 @@ private: } graph_task_priority_queue_t& my_priority_queue; - small_object_allocator my_allocator; + d1::small_object_allocator my_allocator; graph_task* my_task; }; @@ -281,7 +347,7 @@ public: caught_exception = false; try_call([this] { my_task_arena->execute([this] { - wait(my_wait_context, *my_context); + d1::wait(my_wait_context_vertex.get_context(), *my_context); }); cancelled = my_context->is_group_execution_cancelled(); }).on_exception([this] { @@ -332,7 +398,7 @@ public: bool exception_thrown() { return caught_exception; } private: - wait_context my_wait_context; + d1::wait_context_vertex my_wait_context_vertex; task_group_context *my_context; bool own_context; bool cancelled; @@ -349,19 +415,25 @@ private: graph_task_priority_queue_t my_priority_queue; + d1::wait_context_vertex& get_wait_context_vertex() { return my_wait_context_vertex; } + friend void activate_graph(graph& g); friend void deactivate_graph(graph& g); friend bool is_graph_active(graph& g); + friend bool is_this_thread_in_graph_arena(graph& g); friend graph_task* prioritize_task(graph& g, graph_task& arena_task); friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); - friend class task_arena_base; + friend class d1::task_arena_base; + friend class graph_task; + template <typename T> + friend class receiver; }; // class graph template<typename DerivedType> -inline void graph_task::destruct_and_deallocate(const execution_data& ed) { +inline void graph_task::destruct_and_deallocate(const d1::execution_data& ed) { auto allocator = my_allocator; // TODO: investigate if direct call of derived destructor gives any benefits. this->~graph_task(); @@ -369,10 +441,27 @@ inline void graph_task::destruct_and_deallocate(const execution_data& ed) { } template<typename DerivedType> -inline void graph_task::finalize(const execution_data& ed) { - graph& g = my_graph; +inline void graph_task::finalize(const d1::execution_data& ed) { + d1::wait_tree_vertex_interface* reference_vertex = my_reference_vertex; destruct_and_deallocate<DerivedType>(ed); - g.release_wait(); + reference_vertex->release(); +} + +inline graph_task::graph_task(graph& g, d1::small_object_allocator& allocator, + node_priority_t node_priority) + : my_graph(g) + , priority(node_priority) + , my_allocator(allocator) +{ + // If the task is created by the thread outside the graph arena, the lifetime of the thread reference vertex + // may be shorter that the lifetime of the task, so thread reference vertex approach cannot be used + // and the task should be associated with the graph wait context itself + // TODO: consider how reference counting can be improved for such a use case. Most common example is the async_node + d1::wait_context_vertex* graph_wait_context_vertex = &my_graph.get_wait_context_vertex(); + my_reference_vertex = is_this_thread_in_graph_arena(g) ? r1::get_thread_reference_vertex(graph_wait_context_vertex) + : graph_wait_context_vertex; + __TBB_ASSERT(my_reference_vertex, nullptr); + my_reference_vertex->reserve(); } //******************************************************************************** @@ -424,15 +513,20 @@ inline bool is_graph_active(graph& g) { return g.my_is_active; } +inline bool is_this_thread_in_graph_arena(graph& g) { + __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), nullptr); + return r1::execution_slot(*g.my_task_arena) != d1::slot_id(-1); +} + inline graph_task* prioritize_task(graph& g, graph_task& gt) { if( no_priority == gt.priority ) return > //! Non-preemptive priority pattern. The original task is submitted as a work item to the //! priority queue, and a new critical task is created to take and execute a work item with - //! the highest known priority. The reference counting responsibility is transferred (via - //! allocate_continuation) to the new task. - task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); + //! the highest known priority. The reference counting responsibility is transferred to + //! the new task. + d1::task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); __TBB_ASSERT( critical_task, "bad_alloc?" ); g.my_priority_queue.push(>); using tbb::detail::d1::submit; @@ -443,7 +537,7 @@ inline graph_task* prioritize_task(graph& g, graph_task& gt) { //! Spawns a task inside graph arena inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { if (is_graph_active(g)) { - task* gt = prioritize_task(g, arena_task); + d1::task* gt = prioritize_task(g, arena_task); if( !gt ) return; @@ -464,12 +558,12 @@ inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); // TODO revamp: decide on the approach that does not postpone critical task - if( task* gt = prioritize_task(g, arena_task) ) + if( d1::task* gt = prioritize_task(g, arena_task) ) submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); } } -} // namespace d1 +} // namespace d2 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h index f4f55a6c7a..a743310079 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// included in namespace tbb::detail::d1 +// included in namespace tbb::detail::d2 #include "_flow_graph_types_impl.h" @@ -31,9 +31,9 @@ // successor. template<typename IndexerNodeBaseType, typename T, size_t K> - graph_task* do_try_put(const T &v, void *p) { + graph_task* do_try_put(const T &v, void *p __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { typename IndexerNodeBaseType::output_type o(K, v); - return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o); + return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } template<typename TupleTypes,int N> @@ -41,7 +41,7 @@ template<typename IndexerNodeBaseType, typename PortTuple> static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { typedef typename std::tuple_element<N-1, TupleTypes>::type T; - graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>; + auto indexer_node_put_task = do_try_put<IndexerNodeBaseType, T, N-1>; std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g); indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g); } @@ -52,7 +52,7 @@ template<typename IndexerNodeBaseType, typename PortTuple> static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { typedef typename std::tuple_element<0, TupleTypes>::type T; - graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>; + auto indexer_node_put_task = do_try_put<IndexerNodeBaseType, T, 0>; std::get<0>(my_input).set_up(p, indexer_node_put_task, g); } }; @@ -61,7 +61,8 @@ class indexer_input_port : public receiver<T> { private: void* my_indexer_ptr; - typedef graph_task* (* forward_function_ptr)(T const &, void* ); + typedef graph_task* (* forward_function_ptr)(T const &, void* + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo&)); forward_function_ptr my_try_put_task; graph* my_graph; public: @@ -76,9 +77,15 @@ template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; graph_task* try_put_task(const T &v) override { - return my_try_put_task(v, my_indexer_ptr); + return my_try_put_task(v, my_indexer_ptr __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& v, const message_metainfo& metainfo) override { + return my_try_put_task(v, my_indexer_ptr, metainfo); + } +#endif + graph& graph_reference() const override { return *my_graph; } @@ -118,7 +125,7 @@ }; typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type; - class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> { + class indexer_node_base_operation : public d1::aggregated_operation<indexer_node_base_operation> { public: char type; union { @@ -126,15 +133,23 @@ successor_type *my_succ; graph_task* bypass_t; }; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo const* metainfo; +#endif indexer_node_base_operation(const output_type* e, op_type t) : - type(char(t)), my_arg(e) {} + type(char(t)), my_arg(e) __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(nullptr)) + {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + indexer_node_base_operation(const output_type* e, op_type t, const message_metainfo& info) + : type(char(t)), my_arg(e), metainfo(&info) {} +#endif indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), my_succ(const_cast<successor_type *>(&s)) {} }; - typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type; - friend class aggregating_functor<class_type, indexer_node_base_operation>; - aggregator<handler_type, indexer_node_base_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, indexer_node_base_operation> handler_type; + friend class d1::aggregating_functor<class_type, indexer_node_base_operation>; + d1::aggregator<handler_type, indexer_node_base_operation> my_aggregator; void handle_operations(indexer_node_base_operation* op_list) { indexer_node_base_operation *current; @@ -153,7 +168,8 @@ current->status.store( SUCCEEDED, std::memory_order_release); break; case try__put_task: { - current->bypass_t = my_successors.try_put_task(*(current->my_arg)); + current->bypass_t = my_successors.try_put_task(*(current->my_arg) + __TBB_FLOW_GRAPH_METAINFO_ARG(*(current->metainfo))); current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value } break; @@ -186,8 +202,11 @@ return op_data.status == SUCCEEDED; } - graph_task* try_put_task(output_type const *v) { // not a virtual method in this class - indexer_node_base_operation op_data(v, try__put_task); + // not a virtual method in this class + graph_task* try_put_task(output_type const *v + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { + indexer_node_base_operation op_data(v, try__put_task __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); my_aggregator.execute(&op_data); return op_data.bypass_t; } diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h index 423033b1d5..cf7c54b852 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,8 +37,14 @@ public: typedef T item_type; enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; protected: + struct aligned_space_item { + item_type item; + buffer_item_state state; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + }; typedef size_t size_type; - typedef std::pair<item_type, buffer_item_state> aligned_space_item; typedef aligned_space<aligned_space_item> buffer_item_type; typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type; buffer_item_type *my_array; @@ -49,45 +55,89 @@ protected: bool buffer_empty() const { return my_head == my_tail; } - aligned_space_item &item(size_type i) { - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), nullptr); - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), nullptr); + aligned_space_item &element(size_type i) { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->state))%alignment_of<buffer_item_state>::value), nullptr); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->item))%alignment_of<item_type>::value), nullptr); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->metainfo))%alignment_of<message_metainfo>::value), nullptr); +#endif return *my_array[i & (my_array_size - 1) ].begin(); } - const aligned_space_item &item(size_type i) const { - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), nullptr); - __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), nullptr); + const aligned_space_item &element(size_type i) const { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->state))%alignment_of<buffer_item_state>::value), nullptr); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->item))%alignment_of<item_type>::value), nullptr); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->metainfo))%alignment_of<message_metainfo>::value), nullptr); +#endif return *my_array[i & (my_array_size-1)].begin(); } - bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } + bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (element(i).state != no_item); } #if TBB_USE_ASSERT - bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } + bool my_item_reserved(size_type i) const { return element(i).state == reserved_item; } #endif // object management in buffer const item_type &get_my_item(size_t i) const { __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); - item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first)); - return *itm; + return element(i).item; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo& get_my_metainfo(size_t i) { + __TBB_ASSERT(my_item_valid(i), "attempt to get invalid item"); + return element(i).metainfo; + } +#endif + // may be called with an empty slot or a slot that has already been constructed into. - void set_my_item(size_t i, const item_type &o) { - if(item(i).second != no_item) { + void set_my_item(size_t i, const item_type &o + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { + if(element(i).state != no_item) { destroy_item(i); } - new(&(item(i).first)) item_type(o); - item(i).second = has_item; + new(&(element(i).item)) item_type(o); + element(i).state = has_item; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + new(&element(i).metainfo) message_metainfo(metainfo); + + for (auto& waiter : metainfo.waiters()) { + waiter->reserve(1); + } +#endif + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + void set_my_item(size_t i, const item_type& o, message_metainfo&& metainfo) { + if(element(i).state != no_item) { + destroy_item(i); + } + + new(&(element(i).item)) item_type(o); + new(&element(i).metainfo) message_metainfo(std::move(metainfo)); + // Skipping the reservation on metainfo.waiters since the ownership + // is moving from metainfo to the cache + element(i).state = has_item; } +#endif // destructively-fetch an object from the buffer +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + void fetch_item(size_t i, item_type& o, message_metainfo& metainfo) { + __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); + o = get_my_item(i); // could have std::move assign semantics + metainfo = std::move(get_my_metainfo(i)); + destroy_item(i); + } +#else void fetch_item(size_t i, item_type &o) { __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); o = get_my_item(i); // could have std::move assign semantics destroy_item(i); } +#endif // move an existing item from one slot to another. The moved-to slot must be unoccupied, // the moved-from slot must exist and not be reserved. The after, from will be empty, @@ -95,12 +145,22 @@ protected: void move_item(size_t to, size_t from) { __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); - set_my_item(to, get_my_item(from)); // could have std::move semantics + // could have std::move semantics + set_my_item(to, get_my_item(from) __TBB_FLOW_GRAPH_METAINFO_ARG(get_my_metainfo(from))); destroy_item(from); - } // put an item in an empty slot. Return true if successful, else false +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename Metainfo> + bool place_item(size_t here, const item_type &me, Metainfo&& metainfo) { +#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES + if(my_item_valid(here)) return false; +#endif + set_my_item(here, me, std::forward<Metainfo>(metainfo)); + return true; + } +#else bool place_item(size_t here, const item_type &me) { #if !TBB_DEPRECATED_SEQUENCER_DUPLICATES if(my_item_valid(here)) return false; @@ -108,19 +168,36 @@ protected: set_my_item(here, me); return true; } +#endif // could be implemented with std::move semantics void swap_items(size_t i, size_t j) { __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); item_type temp = get_my_item(i); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo temp_metainfo = get_my_metainfo(i); + set_my_item(i, get_my_item(j), get_my_metainfo(j)); + set_my_item(j, temp, temp_metainfo); +#else set_my_item(i, get_my_item(j)); set_my_item(j, temp); +#endif } void destroy_item(size_type i) { __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); - item(i).first.~item_type(); - item(i).second = no_item; + + auto& e = element(i); + e.item.~item_type(); + e.state = no_item; + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (auto& msg_waiter : e.metainfo.waiters()) { + msg_waiter->release(1); + } + + e.metainfo.~message_metainfo(); +#endif } // returns the front element @@ -130,6 +207,14 @@ protected: return get_my_item(my_head); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + const message_metainfo& front_metainfo() const + { + __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); + return element(my_head).metainfo; + } +#endif + // returns the back element const item_type& back() const { @@ -137,9 +222,23 @@ protected: return get_my_item(my_tail - 1); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + const message_metainfo& back_metainfo() const { + __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); + return element(my_tail - 1).metainfo; + } +#endif + // following methods are for reservation of the front of a buffer. - void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } - void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } + void reserve_item(size_type i) { + __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); + element(i).state = reserved_item; + } + + void release_item(size_type i) { + __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); + element(i).state = has_item; + } void destroy_front() { destroy_item(my_head); ++my_head; } void destroy_back() { destroy_item(my_tail-1); --my_tail; } @@ -163,14 +262,18 @@ protected: buffer_item_type* new_array = allocator_type().allocate(new_size); // initialize validity to "no" - for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; } + for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->state = no_item; } for( size_type i=my_head; i<my_tail; ++i) { if(my_item_valid(i)) { // sequencer_node may have empty slots // placement-new copy-construct; could be std::move - char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first); + char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->item); (void)new(new_space) item_type(get_my_item(i)); - new_array[i&(new_size-1)].begin()->second = item(i).second; + new_array[i&(new_size-1)].begin()->state = element(i).state; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + char* meta_space = (char *)&(new_array[i&(new_size-1)].begin()->metainfo); + ::new(meta_space) message_metainfo(std::move(element(i).metainfo)); +#endif } } @@ -180,33 +283,61 @@ protected: my_array_size = new_size; } - bool push_back(item_type &v) { - if(buffer_full()) { + bool push_back(item_type& v + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { + if (buffer_full()) { grow_my_array(size() + 1); } - set_my_item(my_tail, v); + set_my_item(my_tail, v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); ++my_tail; return true; } - bool pop_back(item_type &v) { - if (!my_item_valid(my_tail-1)) { + bool pop_back(item_type& v + __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& metainfo)) + { + if (!my_item_valid(my_tail - 1)) { return false; } - v = this->back(); + auto& e = element(my_tail - 1); + v = e.item; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + metainfo = std::move(e.metainfo); +#endif + destroy_back(); return true; } - bool pop_front(item_type &v) { - if(!my_item_valid(my_head)) { + bool pop_front(item_type& v + __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& metainfo)) + { + if (!my_item_valid(my_head)) { return false; } - v = this->front(); + auto& e = element(my_head); + v = e.item; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + metainfo = std::move(e.metainfo); +#endif + destroy_front(); return true; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool pop_back(item_type& v) { + message_metainfo metainfo; + return pop_back(v, metainfo); + } + + bool pop_front(item_type& v) { + message_metainfo metainfo; + return pop_front(v, metainfo); + } +#endif + // This is used both for reset and for grow_my_array. In the case of grow_my_array // we want to retain the values of the head and tail. void clean_up_buffer(bool reset_pointers) { @@ -261,6 +392,18 @@ protected: return true; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool reserve_front(T& v, message_metainfo& metainfo) { + if (my_reserved || !my_item_valid(this->my_head)) return false; + my_reserved = true; + // reserving the head + v = this->front(); + metainfo = this->front_metainfo(); + this->reserve_item(this->my_head); + return true; + } +#endif + void consume_front() { __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); this->destroy_front(); diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h index 5515421ede..8bca9a2c41 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// included into namespace tbb::detail::d1 +// included into namespace tbb::detail::d2 struct forwarding_base : no_assign { forwarding_base(graph &g) : graph_ref(g) {} @@ -89,17 +89,49 @@ return true; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool reserve(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + message_metainfo element_metainfo; + if (!std::get<N - 1>(my_input).reserve(std::get<N - 1>(out), element_metainfo)) return false; + if (!join_helper<N - 1>::reserve(my_input, out, metainfo)) { + release_my_reservation(my_input); + return false; + } + metainfo.merge(element_metainfo); + return true; + + } +#endif + template<typename InputTuple, typename OutputTuple> static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail return join_helper<N-1>::get_my_item(my_input, out) && res; // do get on other inputs before returning } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool get_my_item(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + message_metainfo element_metainfo; + bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out), element_metainfo); + metainfo.merge(element_metainfo); + return join_helper<N-1>::get_my_item(my_input, out, metainfo) && res; + } +#endif + template<typename InputTuple, typename OutputTuple> static inline bool get_items(InputTuple &my_input, OutputTuple &out) { return get_my_item(my_input, out); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + return get_my_item(my_input, out, metainfo); + } +#endif + template<typename InputTuple> static inline void reset_my_port(InputTuple &my_input) { join_helper<N-1>::reset_my_port(my_input); @@ -163,16 +195,43 @@ return std::get<0>( my_input ).reserve( std::get<0>( out ) ); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool reserve(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + message_metainfo element_metainfo; + bool result = std::get<0>(my_input).reserve(std::get<0>(out), element_metainfo); + metainfo.merge(element_metainfo); + return result; + } +#endif + template<typename InputTuple, typename OutputTuple> static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { return std::get<0>(my_input).get_item(std::get<0>(out)); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool get_my_item(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + message_metainfo element_metainfo; + bool res = std::get<0>(my_input).get_item(std::get<0>(out), element_metainfo); + metainfo.merge(element_metainfo); + return res; + } +#endif + template<typename InputTuple, typename OutputTuple> static inline bool get_items(InputTuple &my_input, OutputTuple &out) { return get_my_item(my_input, out); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple& my_input, OutputTuple& out, message_metainfo& metainfo) { + return get_my_item(my_input, out, metainfo); + } +#endif + template<typename InputTuple> static inline void reset_my_port(InputTuple &my_input) { std::get<0>(my_input).reset_port(); @@ -216,23 +275,31 @@ }; typedef reserving_port<T> class_type; - class reserving_port_operation : public aggregated_operation<reserving_port_operation> { + class reserving_port_operation : public d1::aggregated_operation<reserving_port_operation> { public: char type; union { T *my_arg; predecessor_type *my_pred; }; - reserving_port_operation(const T& e, op_type t) : - type(char(t)), my_arg(const_cast<T*>(&e)) {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo; +#endif + reserving_port_operation(const T& e, op_type t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& info)) : + type(char(t)), my_arg(const_cast<T*>(&e)) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(&info)) {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + reserving_port_operation(const T& e, op_type t) + : type(char(t)), my_arg(const_cast<T*>(&e)), metainfo(nullptr) {} +#endif reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), my_pred(const_cast<predecessor_type *>(&s)) {} reserving_port_operation(op_type t) : type(char(t)) {} }; - typedef aggregating_functor<class_type, reserving_port_operation> handler_type; - friend class aggregating_functor<class_type, reserving_port_operation>; - aggregator<handler_type, reserving_port_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, reserving_port_operation> handler_type; + friend class d1::aggregating_functor<class_type, reserving_port_operation>; + d1::aggregator<handler_type, reserving_port_operation> my_aggregator; void handle_operations(reserving_port_operation* op_list) { reserving_port_operation *current; @@ -262,14 +329,26 @@ if ( reserved ) { current->status.store( FAILED, std::memory_order_release); } - else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { - reserved = true; - current->status.store( SUCCEEDED, std::memory_order_release); - } else { - if ( my_predecessors.empty() ) { - my_join->increment_port_count(); + else { + bool reserve_result = false; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (current->metainfo) { + reserve_result = my_predecessors.try_reserve(*(current->my_arg), + *(current->metainfo)); + } else +#endif + { + reserve_result = my_predecessors.try_reserve(*(current->my_arg)); + } + if (reserve_result) { + reserved = true; + current->status.store( SUCCEEDED, std::memory_order_release); + } else { + if ( my_predecessors.empty() ) { + my_join->increment_port_count(); + } + current->status.store( FAILED, std::memory_order_release); } - current->status.store( FAILED, std::memory_order_release); } break; case rel_res: @@ -294,6 +373,10 @@ return nullptr; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T&, const message_metainfo&) override { return nullptr; } +#endif + graph& graph_reference() const override { return my_join->graph_ref; } @@ -333,6 +416,14 @@ return op_data.status == SUCCEEDED; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool reserve( T& v, message_metainfo& metainfo ) { + reserving_port_operation op_data(v, res_item, metainfo); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } +#endif + //! Release the port void release( ) { reserving_port_operation op_data(rel_res); @@ -376,31 +467,42 @@ enum op_type { get__item, res_port, try__put_task }; - class queueing_port_operation : public aggregated_operation<queueing_port_operation> { + class queueing_port_operation : public d1::aggregated_operation<queueing_port_operation> { public: char type; T my_val; T* my_arg; graph_task* bypass_t; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo; +#endif // constructor for value parameter - queueing_port_operation(const T& e, op_type t) : - type(char(t)), my_val(e), my_arg(nullptr) + queueing_port_operation(const T& e, op_type t __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& info)) + : type(char(t)), my_val(e), my_arg(nullptr) , bypass_t(nullptr) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(const_cast<message_metainfo*>(&info))) {} // constructor for pointer parameter - queueing_port_operation(const T* p, op_type t) : + queueing_port_operation(const T* p, op_type t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& info)) : type(char(t)), my_arg(const_cast<T*>(p)) , bypass_t(nullptr) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(&info)) + {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + queueing_port_operation(const T* p, op_type t) + : type(char(t)), my_arg(const_cast<T*>(p)), bypass_t(nullptr), metainfo(nullptr) {} +#endif // constructor with no parameter queueing_port_operation(op_type t) : type(char(t)), my_arg(nullptr) , bypass_t(nullptr) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(nullptr)) {} }; - typedef aggregating_functor<class_type, queueing_port_operation> handler_type; - friend class aggregating_functor<class_type, queueing_port_operation>; - aggregator<handler_type, queueing_port_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, queueing_port_operation> handler_type; + friend class d1::aggregating_functor<class_type, queueing_port_operation>; + d1::aggregator<handler_type, queueing_port_operation> my_aggregator; void handle_operations(queueing_port_operation* op_list) { queueing_port_operation *current; @@ -412,7 +514,12 @@ case try__put_task: { graph_task* rtask = nullptr; was_empty = this->buffer_empty(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(current->metainfo, nullptr); + this->push_back(current->my_val, *(current->metainfo)); +#else this->push_back(current->my_val); +#endif if (was_empty) rtask = my_join->decrement_port_count(false); else rtask = SUCCESSFULLY_ENQUEUED; @@ -424,6 +531,11 @@ if(!this->buffer_empty()) { __TBB_ASSERT(current->my_arg, nullptr); *(current->my_arg) = this->front(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (current->metainfo) { + *(current->metainfo) = this->front_metainfo(); + } +#endif current->status.store( SUCCEEDED, std::memory_order_release); } else { @@ -447,14 +559,27 @@ template< typename R, typename B > friend class run_and_put_task; template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task(const T &v) override { - queueing_port_operation op_data(v, try__put_task); + + private: + graph_task* try_put_task_impl(const T& v __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { + queueing_port_operation op_data(v, try__put_task __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); my_aggregator.execute(&op_data); __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; return op_data.bypass_t; } + protected: + graph_task* try_put_task(const T &v) override { + return try_put_task_impl(v __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& v, const message_metainfo& metainfo) override { + return try_put_task_impl(v, metainfo); + } +#endif + graph& graph_reference() const override { return my_join->graph_ref; } @@ -481,6 +606,14 @@ return op_data.status == SUCCEEDED; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool get_item( T& v, message_metainfo& metainfo ) { + queueing_port_operation op_data(&v, get__item, metainfo); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } +#endif + // reset_port is called when item is accepted by successor, but // is initiated by join_node. void reset_port() { @@ -517,13 +650,23 @@ const K& operator()(const table_item_type& v) { return v.my_key; } }; + template <typename K, typename T, typename TtoK, typename KHash> + struct key_matching_port_base { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + using type = metainfo_hash_buffer<K, T, TtoK, KHash>; +#else + using type = hash_buffer<K, T, TtoK, KHash>; +#endif + }; + // the ports can have only one template parameter. We wrap the types needed in // a traits type template< class TraitsType > class key_matching_port : public receiver<typename TraitsType::T>, - public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, - typename TraitsType::KHash > { + public key_matching_port_base< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, + typename TraitsType::KHash >::type + { public: typedef TraitsType traits; typedef key_matching_port<traits> class_type; @@ -533,7 +676,7 @@ typedef typename receiver<input_type>::predecessor_type predecessor_type; typedef typename TraitsType::TtoK type_to_key_func_type; typedef typename TraitsType::KHash hash_compare_type; - typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; + typedef typename key_matching_port_base<key_type, input_type, type_to_key_func_type, hash_compare_type>::type buffer_type; private: // ----------- Aggregator ------------ @@ -541,24 +684,33 @@ enum op_type { try__put, get__item, res_port }; - class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> { + class key_matching_port_operation : public d1::aggregated_operation<key_matching_port_operation> { public: char type; input_type my_val; input_type *my_arg; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo = nullptr; +#endif // constructor for value parameter - key_matching_port_operation(const input_type& e, op_type t) : - type(char(t)), my_val(e), my_arg(nullptr) {} + key_matching_port_operation(const input_type& e, op_type t + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& info)) + : type(char(t)), my_val(e), my_arg(nullptr) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(const_cast<message_metainfo*>(&info))) {} + // constructor for pointer parameter - key_matching_port_operation(const input_type* p, op_type t) : - type(char(t)), my_arg(const_cast<input_type*>(p)) {} + key_matching_port_operation(const input_type* p, op_type t + __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& info)) + : type(char(t)), my_arg(const_cast<input_type*>(p)) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(&info)) {} + // constructor with no parameter key_matching_port_operation(op_type t) : type(char(t)), my_arg(nullptr) {} }; - typedef aggregating_functor<class_type, key_matching_port_operation> handler_type; - friend class aggregating_functor<class_type, key_matching_port_operation>; - aggregator<handler_type, key_matching_port_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, key_matching_port_operation> handler_type; + friend class d1::aggregating_functor<class_type, key_matching_port_operation>; + d1::aggregator<handler_type, key_matching_port_operation> my_aggregator; void handle_operations(key_matching_port_operation* op_list) { key_matching_port_operation *current; @@ -567,18 +719,35 @@ op_list = op_list->next; switch(current->type) { case try__put: { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(current->metainfo, nullptr); + bool was_inserted = this->insert_with_key(current->my_val, *(current->metainfo)); +#else bool was_inserted = this->insert_with_key(current->my_val); +#endif // return failure if a duplicate insertion occurs current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); } break; - case get__item: + case get__item: { // use current_key from FE for item __TBB_ASSERT(current->my_arg, nullptr); - if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(current->metainfo, nullptr); + bool find_result = this->find_with_key(my_join->current_key, *(current->my_arg), + *(current->metainfo)); +#else + bool find_result = this->find_with_key(my_join->current_key, *(current->my_arg)); +#endif +#if TBB_USE_DEBUG + if (!find_result) { __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); } +#else + tbb::detail::suppress_unused_warning(find_result); +#endif current->status.store( SUCCEEDED, std::memory_order_release); + } break; case res_port: // use current_key from FE for item @@ -593,17 +762,28 @@ template< typename R, typename B > friend class run_and_put_task; template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; - graph_task* try_put_task(const input_type& v) override { - key_matching_port_operation op_data(v, try__put); + private: + graph_task* try_put_task_impl(const input_type& v __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { + key_matching_port_operation op_data(v, try__put __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); graph_task* rtask = nullptr; my_aggregator.execute(&op_data); if(op_data.status == SUCCEEDED) { - rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn + rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn // rtask has to reflect the return status of the try_put if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; } return rtask; } + protected: + graph_task* try_put_task(const input_type& v) override { + return try_put_task_impl(v __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const input_type& v, const message_metainfo& metainfo) override { + return try_put_task_impl(v, metainfo); + } +#endif graph& graph_reference() const override { return my_join->graph_ref; @@ -640,6 +820,15 @@ return op_data.status == SUCCEEDED; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool get_item( input_type& v, message_metainfo& metainfo ) { + // aggregator uses current_key from FE for Key + key_matching_port_operation op_data(&v, get__item, metainfo); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } +#endif + // reset_port is called when item is accepted by successor, but // is initiated by join_node. void reset_port() { @@ -695,10 +884,9 @@ graph_task* decrement_port_count() override { if(ports_with_no_inputs.fetch_sub(1) == 1) { if(is_graph_active(this->graph_ref)) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<base_node_type> task_type; graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); - graph_ref.reserve_wait(); spawn_in_graph_arena(this->graph_ref, *t); } } @@ -726,6 +914,13 @@ return join_helper<N>::reserve(my_inputs, out); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_to_make_tuple(output_type &out, message_metainfo& metainfo) { + if (ports_with_no_inputs) return false; + return join_helper<N>::reserve(my_inputs, out, metainfo); + } +#endif + void tuple_accepted() { join_helper<N>::consume_reservations(my_inputs); } @@ -768,10 +963,9 @@ { if(ports_with_no_items.fetch_sub(1) == 1) { if(is_graph_active(this->graph_ref)) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<base_node_type> task_type; graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); - graph_ref.reserve_wait(); if( !handle_task ) return t; spawn_in_graph_arena(this->graph_ref, *t); @@ -800,6 +994,13 @@ return join_helper<N>::get_items(my_inputs, out); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_to_make_tuple(output_type &out, message_metainfo& metainfo) { + if(ports_with_no_items) return false; + return join_helper<N>::get_items(my_inputs, out, metainfo); + } +#endif + void tuple_accepted() { reset_port_count(); join_helper<N>::reset_ports(my_inputs); @@ -854,23 +1055,30 @@ enum op_type { res_count, inc_count, may_succeed, try_make }; typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type; - class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> { + class key_matching_FE_operation : public d1::aggregated_operation<key_matching_FE_operation> { public: char type; unref_key_type my_val; output_type* my_output; graph_task* bypass_t; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo = nullptr; +#endif // constructor for value parameter key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), my_output(nullptr), bypass_t(nullptr) {} key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + key_matching_FE_operation(output_type *p, op_type t, message_metainfo& info) + : type(char(t)), my_output(p), bypass_t(nullptr), metainfo(&info) {} +#endif // constructor with no parameter key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} }; - typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type; - friend class aggregating_functor<class_type, key_matching_FE_operation>; - aggregator<handler_type, key_matching_FE_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, key_matching_FE_operation> handler_type; + friend class d1::aggregating_functor<class_type, key_matching_FE_operation>; + d1::aggregator<handler_type, key_matching_FE_operation> my_aggregator; // called from aggregator, so serialized // returns a task pointer if the a task would have been enqueued but we asked that @@ -881,13 +1089,15 @@ bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); this->current_key = t; this->delete_with_key(this->current_key); // remove the key - if(join_helper<N>::get_items(my_inputs, l_out)) { // <== call back - this->push_back(l_out); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + if(join_helper<N>::get_items(my_inputs, l_out __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo))) { // <== call back + this->push_back(l_out __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); if(do_fwd) { // we enqueue if receiving an item from predecessor, not if successor asks for item - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<base_node_type> task_type; rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node); - this->graph_ref.reserve_wait(); do_fwd = false; } // retire the input values @@ -937,6 +1147,11 @@ } else { *(current->my_output) = this->front(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (current->metainfo) { + *(current->metainfo) = this->front_metainfo(); + } +#endif current->status.store( SUCCEEDED, std::memory_order_release); } break; @@ -1010,6 +1225,14 @@ return op_data.status == SUCCEEDED; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_to_make_tuple(output_type &out, message_metainfo& metainfo) { + key_matching_FE_operation op_data(&out, try_make, metainfo); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } +#endif + void tuple_accepted() { reset_port_count(); // reset current_key after ports reset. } @@ -1044,7 +1267,7 @@ }; typedef join_node_base<JP,InputTuple,OutputTuple> class_type; - class join_node_base_operation : public aggregated_operation<join_node_base_operation> { + class join_node_base_operation : public d1::aggregated_operation<join_node_base_operation> { public: char type; union { @@ -1052,17 +1275,25 @@ successor_type *my_succ; }; graph_task* bypass_t; - join_node_base_operation(const output_type& e, op_type t) : type(char(t)), - my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo; +#endif + join_node_base_operation(const output_type& e, op_type t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo& info)) + : type(char(t)), my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo(&info)) {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + join_node_base_operation(const output_type& e, op_type t) + : type(char(t)), my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr), metainfo(nullptr) {} +#endif join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {} join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} }; - typedef aggregating_functor<class_type, join_node_base_operation> handler_type; - friend class aggregating_functor<class_type, join_node_base_operation>; + typedef d1::aggregating_functor<class_type, join_node_base_operation> handler_type; + friend class d1::aggregating_functor<class_type, join_node_base_operation>; bool forwarder_busy; - aggregator<handler_type, join_node_base_operation> my_aggregator; + d1::aggregator<handler_type, join_node_base_operation> my_aggregator; void handle_operations(join_node_base_operation* op_list) { join_node_base_operation *current; @@ -1073,10 +1304,9 @@ case reg_succ: { my_successors.register_successor(*(current->my_succ)); if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type; graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); spawn_in_graph_arena(my_graph, *t); forwarder_busy = true; } @@ -1089,7 +1319,26 @@ break; case try__get: if(tuple_build_may_succeed()) { - if(try_to_make_tuple(*(current->my_arg))) { + bool make_tuple_result = false; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (current->metainfo) { + make_tuple_result = try_to_make_tuple(*(current->my_arg), *(current->metainfo)); + } else +#endif + { + make_tuple_result = try_to_make_tuple(*(current->my_arg)); + } + if(make_tuple_result) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (current->metainfo) { + // Since elements would be removed from queues while calling to tuple_accepted + // together with corresponding message_metainfo objects + // we need to prolong the wait until the successor would create a task for removed elements + for (auto waiter : current->metainfo->waiters()) { + waiter->reserve(1); + } + } +#endif tuple_accepted(); current->status.store( SUCCEEDED, std::memory_order_release); } @@ -1110,9 +1359,14 @@ // them from the input ports after forwarding is complete? if(tuple_build_may_succeed()) { // checks output queue of FE do { - build_succeeded = try_to_make_tuple(out); // fetch front_end of queue +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + // fetch front_end of queue + build_succeeded = try_to_make_tuple(out __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); if(build_succeeded) { - graph_task *new_task = my_successors.try_put_task(out); + graph_task *new_task = + my_successors.try_put_task(out __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); last_task = combine_tasks(my_graph, last_task, new_task); if(new_task) { tuple_accepted(); @@ -1175,6 +1429,14 @@ return op_data.status == SUCCEEDED; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_get( output_type &v, message_metainfo& metainfo) override { + join_node_base_operation op_data(v, try__get, metainfo); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } +#endif + protected: void reset_node(reset_flags f) override { input_ports_type::reset(f); diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h index b79c53ddbf..336cb069c6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,6 +34,12 @@ public: return this->item_buffer<T, A>::front(); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + const message_metainfo& front_metainfo() const { + return this->item_buffer<T,A>::front_metainfo(); + } +#endif + void pop() { this->destroy_front(); } @@ -41,6 +47,12 @@ public: bool push( T& t ) { return this->push_back( t ); } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool push( T& t, const message_metainfo& metainfo ) { + return this->push_back(t, metainfo); + } +#endif }; //! Input and scheduling for a function node that takes a type Input as input @@ -87,11 +99,14 @@ public: } graph_task* try_put_task( const input_type& t) override { - if ( my_is_no_throw ) - return try_put_task_impl(t, has_policy<lightweight, Policy>()); - else - return try_put_task_impl(t, std::false_type()); + return try_put_task_base(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task( const input_type& t, const message_metainfo& metainfo ) override { + return try_put_task_base(t, metainfo); } +#endif // __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT //! Adds src to the list of cached predecessors. bool register_predecessor( predecessor_type &src ) override { @@ -148,9 +163,12 @@ protected: private: friend class apply_body_task_bypass< class_type, input_type >; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + friend class apply_body_task_bypass< class_type, input_type, trackable_messages_graph_task >; +#endif friend class forward_task_bypass< class_type >; - class operation_type : public aggregated_operation< operation_type > { + class operation_type : public d1::aggregated_operation< operation_type > { public: char type; union { @@ -158,31 +176,49 @@ private: predecessor_type *r; }; graph_task* bypass_t; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo; +#endif operation_type(const input_type& e, op_type t) : - type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr) {} + type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr) +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + , metainfo(nullptr) +#endif + {} +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + operation_type(const input_type& e, op_type t, const message_metainfo& info) : + type(char(t)), elem(const_cast<input_type*>(&e)), bypass_t(nullptr), + metainfo(const_cast<message_metainfo*>(&info)) {} +#endif operation_type(op_type t) : type(char(t)), r(nullptr), bypass_t(nullptr) {} }; bool forwarder_busy; - typedef aggregating_functor<class_type, operation_type> handler_type; - friend class aggregating_functor<class_type, operation_type>; - aggregator< handler_type, operation_type > my_aggregator; + typedef d1::aggregating_functor<class_type, operation_type> handler_type; + friend class d1::aggregating_functor<class_type, operation_type>; + d1::aggregator< handler_type, operation_type > my_aggregator; graph_task* perform_queued_requests() { graph_task* new_task = nullptr; if(my_queue) { if(!my_queue->empty()) { ++my_concurrency; - new_task = create_body_task(my_queue->front()); + // TODO: consider removing metainfo from the queue using move semantics to avoid + // ref counter increase + new_task = create_body_task(my_queue->front() + __TBB_FLOW_GRAPH_METAINFO_ARG(my_queue->front_metainfo())); my_queue->pop(); } } else { input_type i; - if(my_predecessors.get_item(i)) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + if(my_predecessors.get_item(i __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo))) { ++my_concurrency; - new_task = create_body_task(i); + new_task = create_body_task(i __TBB_FLOW_GRAPH_METAINFO_ARG(std::move(metainfo))); } } return new_task; @@ -233,10 +269,13 @@ private: __TBB_ASSERT(my_max_concurrency != 0, nullptr); if (my_concurrency < my_max_concurrency) { ++my_concurrency; - graph_task * new_task = create_body_task(*(op->elem)); + graph_task* new_task = create_body_task(*(op->elem) + __TBB_FLOW_GRAPH_METAINFO_ARG(*(op->metainfo))); op->bypass_t = new_task; op->status.store(SUCCEEDED, std::memory_order_release); - } else if ( my_queue && my_queue->push(*(op->elem)) ) { + } else if ( my_queue && my_queue->push(*(op->elem) + __TBB_FLOW_GRAPH_METAINFO_ARG(*(op->metainfo))) ) + { op->bypass_t = SUCCESSFULLY_ENQUEUED; op->status.store(SUCCEEDED, std::memory_order_release); } else { @@ -258,8 +297,10 @@ private: } } - graph_task* internal_try_put_bypass( const input_type& t ) { - operation_type op_data(t, tryput_bypass); + graph_task* internal_try_put_bypass( const input_type& t + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { + operation_type op_data(t, tryput_bypass __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); my_aggregator.execute(&op_data); if( op_data.status == SUCCEEDED ) { return op_data.bypass_t; @@ -267,43 +308,75 @@ private: return nullptr; } - graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { + graph_task* try_put_task_base(const input_type& t + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { + if ( my_is_no_throw ) + return try_put_task_impl(t, has_policy<lightweight, Policy>() + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); + else + return try_put_task_impl(t, std::false_type() + __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); + } + + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { if( my_max_concurrency == 0 ) { - return apply_body_bypass(t); + return apply_body_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } else { operation_type check_op(t, occupy_concurrency); my_aggregator.execute(&check_op); if( check_op.status == SUCCEEDED ) { - return apply_body_bypass(t); + return apply_body_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } - return internal_try_put_bypass(t); + return internal_try_put_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } } - graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { if( my_max_concurrency == 0 ) { - return create_body_task(t); + return create_body_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } else { - return internal_try_put_bypass(t); + return internal_try_put_bypass(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } } //! Applies the body to the provided input // then decides if more work is available - graph_task* apply_body_bypass( const input_type &i ) { - return static_cast<ImplType *>(this)->apply_body_impl_bypass(i); + graph_task* apply_body_bypass( const input_type &i + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + + { + return static_cast<ImplType *>(this)->apply_body_impl_bypass(i __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); } //! allocates a task to apply a body - graph_task* create_body_task( const input_type &input ) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename Metainfo> + graph_task* create_body_task( const input_type &input, Metainfo&& metainfo ) +#else + graph_task* create_body_task( const input_type &input ) +#endif + { if (!is_graph_active(my_graph_ref)) { return nullptr; } // TODO revamp: extract helper for common graph task allocation part - small_object_allocator allocator{}; - typedef apply_body_task_bypass<class_type, input_type> task_type; - graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority ); - graph_reference().reserve_wait(); + d1::small_object_allocator allocator{}; + graph_task* t = nullptr; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (!metainfo.empty()) { + using task_type = apply_body_task_bypass<class_type, input_type, trackable_messages_graph_task>; + t = allocator.new_object<task_type>(my_graph_ref, allocator, *this, input, my_priority, std::forward<Metainfo>(metainfo)); + } else +#endif + { + using task_type = apply_body_task_bypass<class_type, input_type>; + t = allocator.new_object<task_type>(my_graph_ref, allocator, *this, input, my_priority); + } return t; } @@ -327,10 +400,9 @@ private: if (!is_graph_active(my_graph_ref)) { return nullptr; } - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<class_type> task_type; graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority ); - graph_reference().reserve_wait(); return t; } @@ -398,7 +470,9 @@ public: } //TODO: consider moving into the base class - graph_task* apply_body_impl_bypass( const input_type &i) { + graph_task* apply_body_impl_bypass( const input_type &i + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) + { output_type v = apply_body_impl(i); graph_task* postponed_task = nullptr; if( base_type::my_max_concurrency != 0 ) { @@ -410,7 +484,7 @@ public: // execution policy spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); } - graph_task* successor_task = successors().try_put_task(v); + graph_task* successor_task = successors().try_put_task(v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); #if _MSC_VER && !__INTEL_COMPILER #pragma warning (push) #pragma warning (disable: 4127) /* suppress conditional expression is constant */ @@ -524,7 +598,9 @@ public: // for multifunction nodes we do not have a single successor as such. So we just tell // the task we were successful. //TODO: consider moving common parts with implementation in function_input into separate function - graph_task* apply_body_impl_bypass( const input_type &i ) { + graph_task* apply_body_impl_bypass( const input_type &i + __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo&) ) + { fgt_begin_body( my_body ); (*my_body)(i, my_output_ports); fgt_end_body( my_body ); @@ -578,6 +654,18 @@ struct emit_element { check_task_and_spawn(g, last_task); return emit_element<N-1>::emit_this(g,t,p); } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename TupleType, typename PortsType> + static graph_task* emit_this(graph& g, const TupleType& t, PortsType& p, + const message_metainfo& metainfo) + { + // TODO: consider to collect all the tasks in task_list and spawn them all at once + graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t), metainfo); + check_task_and_spawn(g, last_task); + return emit_element<N-1>::emit_this(g, t, p, metainfo); + } +#endif }; template<> @@ -588,6 +676,17 @@ struct emit_element<1> { check_task_and_spawn(g, last_task); return SUCCESSFULLY_ENQUEUED; } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + template <typename TupleType, typename PortsType> + static graph_task* emit_this(graph& g, const TupleType& t, PortsType& ports, + const message_metainfo& metainfo) + { + graph_task* last_task = std::get<0>(ports).try_put_task(std::get<0>(t), metainfo); + check_task_and_spawn(g, last_task); + return SUCCESSFULLY_ENQUEUED; + } +#endif }; //! Implements methods for an executable node that takes continue_msg as input @@ -654,18 +753,25 @@ protected: virtual broadcast_cache<output_type > &successors() = 0; friend class apply_body_task_bypass< class_type, continue_msg >; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + friend class apply_body_task_bypass< class_type, continue_msg, trackable_messages_graph_task >; +#endif //! Applies the body to the provided input - graph_task* apply_body_bypass( input_type ) { + graph_task* apply_body_bypass( input_type __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) { // There is an extra copied needed to capture the // body execution without the try_put fgt_begin_body( my_body ); output_type v = (*my_body)( continue_msg() ); fgt_end_body( my_body ); - return successors().try_put_task( v ); + return successors().try_put_task( v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) ); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* execute(const message_metainfo& metainfo) override { +#else graph_task* execute() override { +#endif if(!is_graph_active(my_graph_ref)) { return nullptr; } @@ -677,13 +783,21 @@ protected: #if _MSC_VER && !__INTEL_COMPILER #pragma warning (pop) #endif - return apply_body_bypass( continue_msg() ); + return apply_body_bypass( continue_msg() __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) ); } else { - small_object_allocator allocator{}; - typedef apply_body_task_bypass<class_type, continue_msg> task_type; - graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); - graph_reference().reserve_wait(); + d1::small_object_allocator allocator{}; + graph_task* t = nullptr; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (!metainfo.empty()) { + using task_type = apply_body_task_bypass<class_type, continue_msg, trackable_messages_graph_task>; + t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority, metainfo ); + } else +#endif + { + using task_type = apply_body_task_bypass<class_type, continue_msg>; + t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); + } return t; } } @@ -755,6 +869,12 @@ protected: return my_successors.try_put_task(i); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const output_type& i, const message_metainfo& metainfo) { + return my_successors.try_put_task(i, metainfo); + } +#endif + template <int N> friend struct emit_element; }; // multifunction_output diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h index ce867121f9..8440bd7008 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// Included in namespace tbb::detail::d1 (in flow_graph.h) +// Included in namespace tbb::detail::d2 (in flow_graph.h) #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET // Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h index 8c20993795..47ecfb2a84 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { template <typename Input, typename Output> struct declare_body_types { @@ -51,10 +51,10 @@ template <typename T, typename Input, typename Output> struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {}; template <typename T, typename Output> -struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {}; +struct body_types<Output (T::*)(d1::flow_control&) const> : declare_body_types<NoInputBody, Output> {}; template <typename T, typename Output> -struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; +struct body_types<Output (T::*)(d1::flow_control&)> : declare_body_types<NoInputBody, Output> {}; template <typename Input, typename Output> struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {}; @@ -63,7 +63,7 @@ template <typename Input, typename Output> struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {}; template <typename Output> -struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; +struct body_types<Output (*)(d1::flow_control&)> : declare_body_types<NoInputBody, Output> {}; template <typename Body> using input_t = typename body_types<Body>::input_type; @@ -100,7 +100,7 @@ decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_t template <typename GraphOrSet, typename Body> input_node(GraphOrSet&&, Body) ->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>; - + #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET template <typename NodeSet> @@ -268,7 +268,7 @@ template <typename NodeSet> write_once_node(const NodeSet&) ->write_once_node<decide_on_set_t<NodeSet>>; #endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET -} // namespace d1 +} // namespace d2 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h index 0d9de17654..0f7c0d174f 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,32 +30,88 @@ // elements in the table are a simple list; we need pointer to next element to // traverse the chain -template<typename ValueType> -struct buffer_element_type { - // the second parameter below is void * because we can't forward-declare the type - // itself, so we just reinterpret_cast below. - typedef typename aligned_pair<ValueType, void *>::type type; + +template <typename Key, typename ValueType> +struct hash_buffer_element : public aligned_pair<ValueType, void*> { + using key_type = Key; + using value_type = ValueType; + + value_type* get_value_ptr() { return reinterpret_cast<value_type*>(this->first); } + hash_buffer_element* get_next() { return reinterpret_cast<hash_buffer_element*>(this->second); } + void set_next(hash_buffer_element* new_next) { this->second = reinterpret_cast<void*>(new_next); } + + void create_element(const value_type& v) { + ::new(this->first) value_type(v); + } + + void create_element(hash_buffer_element&& other) { + ::new(this->first) value_type(std::move(*other.get_value_ptr())); + } + + void destroy_element() { + get_value_ptr()->~value_type(); + } +}; + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +template <typename Key, typename ValueType> +struct metainfo_hash_buffer_element : public aligned_triple<ValueType, void*, message_metainfo> { + using key_type = Key; + using value_type = ValueType; + + value_type* get_value_ptr() { return reinterpret_cast<value_type*>(this->first); } + metainfo_hash_buffer_element* get_next() { + return reinterpret_cast<metainfo_hash_buffer_element*>(this->second); + } + void set_next(metainfo_hash_buffer_element* new_next) { this->second = reinterpret_cast<void*>(new_next); } + message_metainfo& get_metainfo() { return this->third; } + + void create_element(const value_type& v, const message_metainfo& metainfo) { + __TBB_ASSERT(this->third.empty(), nullptr); + ::new(this->first) value_type(v); + this->third = metainfo; + + for (auto waiter : metainfo.waiters()) { + waiter->reserve(1); + } + } + + void create_element(metainfo_hash_buffer_element&& other) { + __TBB_ASSERT(this->third.empty(), nullptr); + ::new(this->first) value_type(std::move(*other.get_value_ptr())); + this->third = std::move(other.get_metainfo()); + } + + void destroy_element() { + get_value_ptr()->~value_type(); + + for (auto waiter : get_metainfo().waiters()) { + waiter->release(1); + } + get_metainfo() = message_metainfo{}; + } }; +#endif template < - typename Key, // type of key within ValueType - typename ValueType, + typename ElementType, typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType typename HashCompare, // has hash and equal - typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type > + typename Allocator=tbb::cache_aligned_allocator<ElementType> > -class hash_buffer : public HashCompare { +class hash_buffer_impl : public HashCompare { public: static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table - typedef ValueType value_type; - typedef typename buffer_element_type< value_type >::type element_type; + typedef typename ElementType::key_type key_type; + typedef typename ElementType::value_type value_type; + typedef ElementType element_type; typedef value_type *pointer_type; typedef element_type *list_array_type; // array we manage manually typedef list_array_type *pointer_array_type; typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type; typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator; - typedef typename std::decay<Key>::type Knoref; + typedef typename std::decay<key_type>::type Knoref; private: ValueToKey *my_key; @@ -69,9 +125,9 @@ private: void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { for(size_t i=0; i < sz - 1; ++i ) { // construct free list - la[i].second = &(la[i+1]); + la[i].set_next(&(la[i + 1])); } - la[sz-1].second = nullptr; + la[sz - 1].set_next(nullptr); *p_free_list = (element_type *)&(la[0]); } @@ -101,15 +157,18 @@ private: { DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); new_elements_array = elements_array_allocator().allocate(my_size); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (std::size_t i = 0; i < my_size; ++i) { + ::new(new_elements_array + i) element_type(); + } +#endif new_pointer_array = pointer_array_allocator_type().allocate(new_size); for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = nullptr; set_up_free_list(&new_free_list, new_elements_array, my_size ); for(size_t i=0; i < my_size; ++i) { - for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { - value_type *ov = reinterpret_cast<value_type *>(&(op->first)); - // could have std::move semantics - internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); + for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->get_next())) { + internal_insert_with_key(new_pointer_array, new_size, new_free_list, std::move(*op)); } } my_cleanup.my_pa = nullptr; @@ -126,15 +185,26 @@ private: // v should have perfect forwarding if std::move implemented. // we use this method to move elements in grow_array, so can't use class fields + template <typename Value, typename... Args> + const value_type& get_value_from_pack(const Value& value, const Args&...) { + return value; + } + + template <typename Element> + const value_type& get_value_from_pack(Element&& element) { + return *(element.get_value_ptr()); + } + + template <typename... Args> void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, - const value_type &v) { + Args&&... args) { size_t l_mask = p_sz-1; __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - size_t h = this->hash(tbb::detail::invoke(*my_key, v)) & l_mask; + size_t h = this->hash(tbb::detail::invoke(*my_key, get_value_from_pack(args...))) & l_mask; __TBB_ASSERT(p_free_list, "Error: free list not set up."); - element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); - (void) new(&(my_elem->first)) value_type(v); - my_elem->second = p_pointer_array[h]; + element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->get_next()); + my_elem->create_element(std::forward<Args>(args)...); + my_elem->set_next(p_pointer_array[h]); p_pointer_array[h] = my_elem; } @@ -142,6 +212,11 @@ private: pointer_array = pointer_array_allocator_type().allocate(my_size); for(size_t i = 0; i < my_size; ++i) pointer_array[i] = nullptr; elements_array = elements_array_allocator().allocate(my_size / 2); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (std::size_t i = 0; i < my_size / 2; ++i) { + ::new(elements_array + i) element_type(); + } +#endif set_up_free_list(&free_list, elements_array, my_size / 2); } @@ -151,13 +226,8 @@ private: for(size_t i = 0; i < sz; ++i ) { element_type *p_next; for( element_type *p = pa[i]; p; p = p_next) { - p_next = (element_type *)p->second; - // TODO revamp: make sure type casting is correct. - void* ptr = (void*)(p->first); -#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER - suppress_unused_warning(ptr); -#endif - ((value_type*)ptr)->~value_type(); + p_next = p->get_next(); + p->destroy_element(); } } pointer_array_allocator_type().deallocate(pa, sz); @@ -166,6 +236,11 @@ private: // Separate test (if allocation of pa throws, el may be allocated. // but no elements will be constructed.) if(el) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (std::size_t i = 0; i < sz / 2; ++i) { + (el + i)->~element_type(); + } +#endif elements_array_allocator().deallocate(el, sz / 2); el = nullptr; } @@ -174,17 +249,17 @@ private: } public: - hash_buffer() : my_key(nullptr), my_size(INITIAL_SIZE), nelements(0) { + hash_buffer_impl() : my_key(nullptr), my_size(INITIAL_SIZE), nelements(0) { internal_initialize_buffer(); } - ~hash_buffer() { + ~hash_buffer_impl() { internal_free_buffer(pointer_array, elements_array, my_size, nelements); delete my_key; my_key = nullptr; } - hash_buffer(const hash_buffer&) = delete; - hash_buffer& operator=(const hash_buffer&) = delete; + hash_buffer_impl(const hash_buffer_impl&) = delete; + hash_buffer_impl& operator=(const hash_buffer_impl&) = delete; void reset() { internal_free_buffer(pointer_array, elements_array, my_size, nelements); @@ -197,34 +272,41 @@ public: // pointer is used to clone() ValueToKey* get_key_func() { return my_key; } - bool insert_with_key(const value_type &v) { - pointer_type p = nullptr; + template <typename... Args> + bool insert_with_key(const value_type &v, Args&&... args) { + element_type* p = nullptr; __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - if(find_ref_with_key(tbb::detail::invoke(*my_key, v), p)) { - p->~value_type(); - (void) new(p) value_type(v); // copy-construct into the space + if(find_element_ref_with_key(tbb::detail::invoke(*my_key, v), p)) { + p->destroy_element(); + p->create_element(v, std::forward<Args>(args)...); return false; } ++nelements; if(nelements*2 > my_size) grow_array(); - internal_insert_with_key(pointer_array, my_size, free_list, v); + internal_insert_with_key(pointer_array, my_size, free_list, v, std::forward<Args>(args)...); return true; } - // returns true and sets v to array element if found, else returns false. - bool find_ref_with_key(const Knoref& k, pointer_type &v) { + bool find_element_ref_with_key(const Knoref& k, element_type*& v) { size_t i = this->hash(k) & mask(); - for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { - pointer_type pv = reinterpret_cast<pointer_type>(&(p->first)); + for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->get_next())) { __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); - if(this->equal(tbb::detail::invoke(*my_key, *pv), k)) { - v = pv; + if(this->equal(tbb::detail::invoke(*my_key, *p->get_value_ptr()), k)) { + v = p; return true; } } return false; } + // returns true and sets v to array element if found, else returns false. + bool find_ref_with_key(const Knoref& k, pointer_type &v) { + element_type* element_ptr = nullptr; + bool res = find_element_ref_with_key(k, element_ptr); + v = element_ptr->get_value_ptr(); + return res; + } + bool find_with_key( const Knoref& k, value_type &v) { value_type *p; if(find_ref_with_key(k, p)) { @@ -238,14 +320,14 @@ public: void delete_with_key(const Knoref& k) { size_t h = this->hash(k) & mask(); element_type* prev = nullptr; - for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { - value_type *vp = reinterpret_cast<value_type *>(&(p->first)); + for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->get_next())) { + value_type *vp = p->get_value_ptr(); __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); if(this->equal(tbb::detail::invoke(*my_key, *vp), k)) { - vp->~value_type(); - if(prev) prev->second = p->second; - else pointer_array[h] = (element_type *)(p->second); - p->second = free_list; + p->destroy_element(); + if(prev) prev->set_next(p->get_next()); + else pointer_array[h] = (element_type *)(p->get_next()); + p->set_next(free_list); free_list = p; --nelements; return; @@ -254,4 +336,45 @@ public: __TBB_ASSERT(false, "key not found for delete"); } }; + +template + < + typename Key, // type of key within ValueType + typename ValueType, + typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType + typename HashCompare, // has hash and equal + typename Allocator=tbb::cache_aligned_allocator<hash_buffer_element<Key, ValueType>> + > +using hash_buffer = hash_buffer_impl<hash_buffer_element<Key, ValueType>, + ValueToKey, HashCompare, Allocator>; + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +template + < + typename Key, // type of key within ValueType + typename ValueType, + typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType + typename HashCompare, // has hash and equal + typename Allocator=tbb::cache_aligned_allocator<metainfo_hash_buffer_element<Key, ValueType>> + > +struct metainfo_hash_buffer : public hash_buffer_impl<metainfo_hash_buffer_element<Key, ValueType>, + ValueToKey, HashCompare, Allocator> +{ +private: + using base_type = hash_buffer_impl<metainfo_hash_buffer_element<Key, ValueType>, + ValueToKey, HashCompare, Allocator>; +public: + bool find_with_key(const typename base_type::Knoref& k, + typename base_type::value_type& v, message_metainfo& metainfo) + { + typename base_type::element_type* p = nullptr; + bool result = this->find_element_ref_with_key(k, p); + if (result) { + v = *(p->get_value_ptr()); + metainfo = p->get_metainfo(); + } + return result; + } +}; +#endif // __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT #endif // __TBB__flow_graph_hash_buffer_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h index a161dd0362..74ebf08456 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { template< typename T > class sender; template< typename T > class receiver; @@ -44,29 +44,29 @@ template< typename T > class receiver; static inline void fgt_alias_port(void *node, void *p, bool visible) { if(visible) - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); + itt_relation_add( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); else - itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); + itt_relation_add( d1::ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); } static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { - itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != nullptr) { - register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + register_node_addr(d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); } static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); - itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); } template<typename InputType> @@ -109,15 +109,15 @@ struct fgt_internal_output_alias_helper<PortsTuple, 0> { }; static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { - itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); } static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { - itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); + itt_make_task_group(d1::ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != nullptr) { - register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + register_node_addr(d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } @@ -167,40 +167,40 @@ struct fgt_internal_output_helper<PortsTuple,1> { template< typename NodeType > void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); + itt_metadata_str_add( d1::ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } template< typename NodeType > void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { void *addr = const_cast<NodeType *>(node); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); + itt_metadata_str_add( d1::ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } template< typename NodeType > static inline void fgt_node_desc( const NodeType *node, const char *desc ) { void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); + itt_metadata_str_add( d1::ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); } static inline void fgt_graph_desc( const void *g, const char *desc ) { void *addr = const_cast< void *>(g); - itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); + itt_metadata_str_add( d1::ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); } static inline void fgt_body( void *node, void *body ) { - itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); + itt_relation_add( d1::ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); } template< int N, typename PortsTuple > static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { - itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports ); } template< int N, typename PortsTuple > static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { - itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports ); fgt_body( input_port, body ); @@ -208,28 +208,28 @@ static inline void fgt_multioutput_node_with_body( void* codeptr, string_resourc template< int N, typename PortsTuple > static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports ); } static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { - itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); suppress_unused_warning( codeptr ); #if __TBB_FLOW_TRACE_CODEPTR if (codeptr != nullptr) { - register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); + register_node_addr(d1::ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); } #endif } static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); } static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { - itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); fgt_body( output_port, body ); } @@ -251,47 +251,47 @@ static inline void fgt_node( void* codeptr, string_resource_index t, void *g, v } static inline void fgt_make_edge( void *output_port, void *input_port ) { - itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); + itt_relation_add( d1::ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); } static inline void fgt_remove_edge( void *output_port, void *input_port ) { - itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); + itt_relation_add( d1::ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); } static inline void fgt_graph( void *g ) { - itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_GRAPH ); + itt_make_task_group( d1::ITT_DOMAIN_FLOW, g, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_GRAPH ); } static inline void fgt_begin_body( void *body ) { - itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, nullptr, FLOW_NULL, FLOW_BODY ); + itt_task_begin( d1::ITT_DOMAIN_FLOW, body, FLOW_BODY, nullptr, FLOW_NULL, FLOW_BODY ); } static inline void fgt_end_body( void * ) { - itt_task_end( ITT_DOMAIN_FLOW ); + itt_task_end( d1::ITT_DOMAIN_FLOW ); } static inline void fgt_async_try_put_begin( void *node, void *port ) { - itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); + itt_task_begin( d1::ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); } static inline void fgt_async_try_put_end( void *, void * ) { - itt_task_end( ITT_DOMAIN_FLOW ); + itt_task_end( d1::ITT_DOMAIN_FLOW ); } static inline void fgt_async_reserve( void *node, void *graph ) { - itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); + itt_region_begin( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); } static inline void fgt_async_commit( void *node, void * /*graph*/) { - itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); + itt_region_end( d1::ITT_DOMAIN_FLOW, node, FLOW_NODE ); } static inline void fgt_reserve_wait( void *graph ) { - itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_NULL ); + itt_region_begin( d1::ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, nullptr, FLOW_NULL, FLOW_NULL ); } static inline void fgt_release_wait( void *graph ) { - itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); + itt_region_end( d1::ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); } #else // TBB_USE_PROFILING_TOOLS @@ -357,7 +357,7 @@ struct fgt_internal_output_alias_helper { #endif // TBB_USE_PROFILING_TOOLS -} // d1 +} // d2 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h index 4827551d85..e361b23e7b 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ #error Do not #include this internal file directly; use public TBB headers instead. #endif -// included in namespace tbb::detail::d1 +// included in namespace tbb::detail::d2 // the change to key_matching (adding a K and KHash template parameter, making it a class) // means we have to pass this data to the key_matching_port. All the ports have only one @@ -73,40 +73,55 @@ struct make_sequence < 0, S... > { typedef sequence<S...> type; }; -//! type mimicking std::pair but with trailing fill to ensure each element of an array -//* will have the correct alignment -template<typename T1, typename T2, size_t REM> -struct type_plus_align { - char first[sizeof(T1)]; - T2 second; - char fill1[REM]; +template<class U> struct alignment_of { + typedef struct { char t; U padded; } test_alignment; + static const size_t value = sizeof(test_alignment) - sizeof(U); }; -template<typename T1, typename T2> -struct type_plus_align<T1,T2,0> { - char first[sizeof(T1)]; - T2 second; +template <typename... Types> +struct max_alignment_helper; + +template <typename T1, typename... Types> +struct max_alignment_helper<T1, Types...> { + using type = typename max_alignment_helper<T1, typename max_alignment_helper<Types...>::type>::type; }; -template<class U> struct alignment_of { - typedef struct { char t; U padded; } test_alignment; - static const size_t value = sizeof(test_alignment) - sizeof(U); +template <typename T1, typename T2> +struct max_alignment_helper<T1, T2> { + using type = typename std::conditional<alignof(T1) < alignof(T2), T2, T1>::type; }; +template <typename... Types> +using max_alignment_helper_t = typename max_alignment_helper<Types...>::type; + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4324) // warning C4324: structure was padded due to alignment specifier +#endif + // T1, T2 are actual types stored. The space defined for T1 in the type returned // is a char array of the correct size. Type T2 should be trivially-constructible, // T1 must be explicitly managed. -template<typename T1, typename T2> -struct aligned_pair { - static const size_t t1_align = alignment_of<T1>::value; - static const size_t t2_align = alignment_of<T2>::value; - typedef type_plus_align<T1, T2, 0 > just_pair; - static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; - static const size_t extra_bytes = sizeof(just_pair) % max_align; - static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; -public: - typedef type_plus_align<T1,T2,remainder> type; -}; // aligned_pair + +template <typename T1, typename T2> +struct alignas(alignof(max_alignment_helper_t<T1, T2>)) aligned_pair { + char first[sizeof(T1)]; + T2 second; +}; + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +template <typename T1, typename T2, typename T3> +struct alignas(alignof(max_alignment_helper_t<T1, T2, T3>)) aligned_triple { + char first[sizeof(T1)]; + T2 second; + T3 third; +}; +#endif + + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4324 is back +#endif // support for variant type // type we use when we're not storing a value diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h index 7a4a1e31cb..ca481380c5 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -96,7 +96,7 @@ static inline void machine_pause(int32_t delay) { #if __TBB_x86_64 || __TBB_x86_32 while (delay-- > 0) { _mm_pause(); } #elif __ARM_ARCH_7A__ || __aarch64__ - while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } + while (delay-- > 0) { __asm__ __volatile__("isb sy" ::: "memory"); } #else /* Generic */ (void)delay; // suppress without including _template_helpers.h yield(); diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h index 46e7b95d6c..8121946729 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,6 +32,12 @@ namespace d1 { class base_filter; } +namespace d2 { +template <typename Output> +__TBB_requires(std::copyable<Output>) +class input_node; +} + namespace r1 { TBB_EXPORT void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); class pipeline; @@ -131,7 +137,7 @@ class flow_control { template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; template<typename Output> __TBB_requires(std::copyable<Output>) - friend class input_node; + friend class d2::input_node; public: void stop() { is_pipeline_stopped = true; } }; diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h index 636aea97b4..e1bb70c5be 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2023 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -43,6 +43,13 @@ class task; class wait_context; class task_group_context; struct execution_data; +class wait_tree_vertex_interface; +class task_arena_base; +} + +namespace d2 { +class task_group; +class task_group_base; } namespace r1 { @@ -52,7 +59,9 @@ TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& c TBB_EXPORT void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); TBB_EXPORT d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); +TBB_EXPORT d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::task_arena_base&); TBB_EXPORT d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); +TBB_EXPORT d1::wait_tree_vertex_interface* get_thread_reference_vertex(d1::wait_tree_vertex_interface* wc); // Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. struct suspend_point_type; @@ -124,8 +133,7 @@ class wait_context { friend class r1::thread_data; friend class r1::task_dispatcher; friend class r1::external_waiter; - friend class task_group; - friend class task_group_base; + friend class wait_context_vertex; friend struct r1::task_arena_impl; friend struct r1::suspend_point_type; public: @@ -147,6 +155,67 @@ public: } }; +class wait_tree_vertex_interface { +public: + virtual void reserve(std::uint32_t delta = 1) = 0; + virtual void release(std::uint32_t delta = 1) = 0; + +protected: + virtual ~wait_tree_vertex_interface() = default; +}; + +class wait_context_vertex : public wait_tree_vertex_interface { +public: + wait_context_vertex(std::uint32_t ref = 0) : m_wait(ref) {} + + void reserve(std::uint32_t delta = 1) override { + m_wait.reserve(delta); + } + + void release(std::uint32_t delta = 1) override { + m_wait.release(delta); + } + + wait_context& get_context() { + return m_wait; + } +private: + friend class d2::task_group; + friend class d2::task_group_base; + + bool continue_execution() const { + return m_wait.continue_execution(); + } + + wait_context m_wait; +}; + +class reference_vertex : public wait_tree_vertex_interface { +public: + reference_vertex(wait_tree_vertex_interface* parent, std::uint32_t ref_count) : my_parent{parent}, m_ref_count{ref_count} + {} + + void reserve(std::uint32_t delta = 1) override { + if (m_ref_count.fetch_add(static_cast<std::uint64_t>(delta)) == 0) { + my_parent->reserve(); + } + } + + void release(std::uint32_t delta = 1) override { + std::uint64_t ref = m_ref_count.fetch_sub(static_cast<std::uint64_t>(delta)) - static_cast<std::uint64_t>(delta); + if (ref == 0) { + my_parent->release(); + } + } + + std::uint32_t get_num_child() { + return static_cast<std::uint32_t>(m_ref_count.load(std::memory_order_acquire)); + } +private: + wait_tree_vertex_interface* my_parent; + std::atomic<std::uint64_t> m_ref_count; +}; + struct execution_data { task_group_context* context{}; slot_id original_slot{}; diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h index e32154f409..26212b462c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ class task_handle; class task_handle_task : public d1::task { std::uint64_t m_version_and_traits{}; - d1::wait_context& m_wait_ctx; + d1::wait_tree_vertex_interface* m_wait_tree_vertex; d1::task_group_context& m_ctx; d1::small_object_allocator m_allocator; public: @@ -46,15 +46,16 @@ public: } } - task_handle_task(d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) - : m_wait_ctx(wo) + task_handle_task(d1::wait_tree_vertex_interface* vertex, d1::task_group_context& ctx, d1::small_object_allocator& alloc) + : m_wait_tree_vertex(vertex) , m_ctx(ctx) , m_allocator(alloc) { suppress_unused_warning(m_version_and_traits); + m_wait_tree_vertex->reserve(); } ~task_handle_task() override { - m_wait_ctx.release(); + m_wait_tree_vertex->release(); } d1::task_group_context& ctx() const { return m_ctx; } diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h index 3491371047..50ce3d2d3b 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h @@ -401,4 +401,3 @@ using type_identity_t = typename type_identity<T>::type; } // namespace tbb #endif // __TBB_detail__template_helpers_H - diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h index 1ac2e3baa6..1f480702f5 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h @@ -133,6 +133,12 @@ bool timed_spin_wait_until(Condition condition) { } template <typename T> +T clamp(T value, T lower_bound, T upper_bound) { + __TBB_ASSERT(lower_bound <= upper_bound, "Incorrect bounds"); + return value > lower_bound ? (value > upper_bound ? upper_bound : value) : lower_bound; +} + +template <typename T> std::uintptr_t log2(T in) { __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); return machine_log2(in); diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h index fa7280a577..1b18d11e5a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Intel Corporation + Copyright (c) 2021-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -69,22 +69,6 @@ public: } } - void wait_until(T expected, std::uintptr_t context, std::memory_order order) { - auto wakeup_condition = [&] { return my_atomic.load(order) == expected; }; - if (!timed_spin_wait_until(wakeup_condition)) { - // We need to use while here, because notify_all() will wake up all threads - // But predicate for them might be false - d1::delegated_function<decltype(wakeup_condition)> pred(wakeup_condition); - do { - r1::wait_on_address(this, pred, context); - } while (!wakeup_condition()); - } - } - - void notify_relaxed(std::uintptr_t context) { - r1::notify_by_address(this, context); - } - void notify_one_relaxed() { r1::notify_by_address_one(this); } @@ -92,6 +76,8 @@ public: // TODO: consider adding following interfaces: // store(desired, memory_order) // notify_all_relaxed() + // wait_until(T, std::uintptr_t, std::memory_order) + // notify_relaxed(std::uintptr_t context) private: std::atomic<T> my_atomic{}; diff --git a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h index 34bcab6821..caa53fa0d6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h +++ b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,7 +36,15 @@ #include "task.h" // for task::suspend_point #if _WIN32 || _WIN64 +#ifndef NOMINMAX +#define NOMINMAX +#define __TBB_DEFINED_NOMINMAX 1 +#endif #include <windows.h> +#if __TBB_DEFINED_NOMINMAX +#undef NOMINMAX +#undef __TBB_DEFINED_NOMINMAX +#endif #else #include <pthread.h> #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h index 2df4b14050..5b438faabf 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -52,6 +52,7 @@ #include <tuple> #include <list> +#include <forward_list> #include <queue> #if __TBB_CPP20_CONCEPTS_PRESENT #include <concepts> @@ -70,7 +71,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { //! An enumeration the provides the two most common concurrency levels: unlimited and serial enum concurrency { unlimited = 0, serial = 1 }; @@ -81,19 +82,19 @@ struct null_type {}; //! An empty class used for messages that mean "I'm done" class continue_msg {}; -} // namespace d1 +} // namespace d2 #if __TBB_CPP20_CONCEPTS_PRESENT namespace d0 { template <typename ReturnType, typename OutputType> -concept node_body_return_type = std::same_as<OutputType, tbb::detail::d1::continue_msg> || +concept node_body_return_type = std::same_as<OutputType, tbb::detail::d2::continue_msg> || std::convertible_to<OutputType, ReturnType>; // TODO: consider using std::invocable here template <typename Body, typename Output> concept continue_node_body = std::copy_constructible<Body> && - requires( Body& body, const tbb::detail::d1::continue_msg& v ) { + requires( Body& body, const tbb::detail::d2::continue_msg& v ) { { body(v) } -> node_body_return_type<Output>; }; @@ -129,7 +130,7 @@ concept async_node_body = std::copy_constructible<Body> && } // namespace d0 #endif // __TBB_CPP20_CONCEPTS_PRESENT -namespace d1 { +namespace d2 { //! Forward declaration section template< typename T > class sender; @@ -153,7 +154,7 @@ template<typename Order, typename... Args> struct node_set; #endif -} // namespace d1 +} // namespace d2 } // namespace detail } // namespace tbb @@ -162,7 +163,7 @@ template<typename Order, typename... Args> struct node_set; namespace tbb { namespace detail { -namespace d1 { +namespace d2 { static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) { if (second->priority > first->priority) @@ -187,6 +188,37 @@ static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* return left; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +class message_metainfo { +public: + using waiters_type = std::forward_list<d1::wait_context_vertex*>; + + message_metainfo() = default; + + message_metainfo(const waiters_type& waiters) : my_waiters(waiters) {} + message_metainfo(waiters_type&& waiters) : my_waiters(std::move(waiters)) {} + + const waiters_type& waiters() const & { return my_waiters; } + waiters_type&& waiters() && { return std::move(my_waiters); } + + bool empty() const { return my_waiters.empty(); } + + void merge(const message_metainfo& other) { + // TODO: should we avoid duplications on merging + my_waiters.insert_after(my_waiters.before_begin(), + other.waiters().begin(), + other.waiters().end()); + } +private: + waiters_type my_waiters; +}; // class message_metainfo + +#define __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) , metainfo + +#else +#define __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo) +#endif // __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + //! Pure virtual template class that defines a sender of messages of type T template< typename T > class sender { @@ -196,9 +228,17 @@ public: //! Request an item from the sender virtual bool try_get( T & ) { return false; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual bool try_get( T &, message_metainfo& ) { return false; } +#endif + //! Reserves an item in the sender virtual bool try_reserve( T & ) { return false; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual bool try_reserve( T &, message_metainfo& ) { return false; } +#endif + //! Releases the reserved item virtual bool try_release( ) { return false; } @@ -238,17 +278,38 @@ bool remove_successor(sender<C>& s, receiver<C>& r) { //! Pure virtual template class that defines a receiver of messages of type T template< typename T > class receiver { +private: + template <typename... TryPutTaskArgs> + bool internal_try_put(const T& t, TryPutTaskArgs&&... args) { + graph_task* res = try_put_task(t, std::forward<TryPutTaskArgs>(args)...); + if (!res) return false; + if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); + return true; + } + public: //! Destructor virtual ~receiver() {} //! Put an item to the receiver bool try_put( const T& t ) { - graph_task *res = try_put_task(t); - if (!res) return false; - if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); - return true; + return internal_try_put(t); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + //! Put an item to the receiver and wait for completion + bool try_put_and_wait( const T& t ) { + // Since try_put_and_wait is a blocking call, it is safe to create wait_context on stack + d1::wait_context_vertex msg_wait_vertex{}; + + bool res = internal_try_put(t, message_metainfo{message_metainfo::waiters_type{&msg_wait_vertex}}); + if (res) { + __TBB_ASSERT(graph_reference().my_context != nullptr, "No wait_context associated with the Flow Graph"); + d1::wait(msg_wait_vertex.get_context(), *graph_reference().my_context); + } + return res; } +#endif //! put item to successor; return task to run the successor if possible. protected: @@ -262,6 +323,9 @@ protected: template< typename X, typename Y > friend class broadcast_cache; template< typename X, typename Y > friend class round_robin_cache; virtual graph_task *try_put_task(const T& t) = 0; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual graph_task *try_put_task(const T& t, const message_metainfo&) = 0; +#endif virtual graph& graph_reference() const = 0; template<typename TT, typename M> friend class successor_cache; @@ -337,23 +401,61 @@ protected: template< typename R, typename B > friend class run_and_put_task; template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; + +private: // execute body is supposed to be too small to create a task for. - graph_task* try_put_task( const input_type & ) override { + graph_task* try_put_task_impl( const input_type& __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo predecessor_metainfo; +#endif { spin_mutex::scoped_lock l(my_mutex); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + // Prolong the wait and store the metainfo until receiving signals from all the predecessors + for (auto waiter : metainfo.waiters()) { + waiter->reserve(1); + } + my_current_metainfo.merge(metainfo); +#endif if ( ++my_current_count < my_predecessor_count ) return SUCCESSFULLY_ENQUEUED; - else + else { my_current_count = 0; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + predecessor_metainfo = my_current_metainfo; + my_current_metainfo = message_metainfo{}; +#endif + } + } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* res = execute(predecessor_metainfo); + for (auto waiter : predecessor_metainfo.waiters()) { + waiter->release(1); } +#else graph_task* res = execute(); +#endif return res? res : SUCCESSFULLY_ENQUEUED; } +protected: + graph_task* try_put_task( const input_type& input ) override { + return try_put_task_impl(input __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task( const input_type& input, const message_metainfo& metainfo ) override { + return try_put_task_impl(input, metainfo); + } +#endif + spin_mutex my_mutex; int my_predecessor_count; int my_current_count; int my_initial_predecessor_count; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo my_current_metainfo; +#endif node_priority_t my_priority; // the friend declaration in the base class did not eliminate the "protected class" // error in gcc 4.1.2 @@ -369,7 +471,11 @@ protected: //! Does whatever should happen when the threshold is reached /** This should be very fast or else spawn a task. This is called while the sender is blocked in the try_put(). */ +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + virtual graph_task* execute(const message_metainfo& metainfo) = 0; +#else virtual graph_task* execute() = 0; +#endif template<typename TT, typename M> friend class successor_cache; bool is_continue_receiver() override { return true; } @@ -392,7 +498,7 @@ protected: namespace tbb { namespace detail { -namespace d1 { +namespace d2 { #include "detail/_flow_graph_body_impl.h" #include "detail/_flow_graph_cache_impl.h" @@ -424,7 +530,7 @@ void graph_iterator<C,N>::internal_forward() { } //! Constructs a graph with isolated task_group_context -inline graph::graph() : my_wait_context(0), my_nodes(nullptr), my_nodes_last(nullptr), my_task_arena(nullptr) { +inline graph::graph() : my_wait_context_vertex(0), my_nodes(nullptr), my_nodes_last(nullptr), my_task_arena(nullptr) { prepare_task_arena(); own_context = true; cancelled = false; @@ -435,7 +541,7 @@ inline graph::graph() : my_wait_context(0), my_nodes(nullptr), my_nodes_last(nul } inline graph::graph(task_group_context& use_this_context) : - my_wait_context(0), my_context(&use_this_context), my_nodes(nullptr), my_nodes_last(nullptr), my_task_arena(nullptr) { + my_wait_context_vertex(0), my_context(&use_this_context), my_nodes(nullptr), my_nodes_last(nullptr), my_task_arena(nullptr) { prepare_task_arena(); own_context = false; cancelled = false; @@ -454,13 +560,13 @@ inline graph::~graph() { } inline void graph::reserve_wait() { - my_wait_context.reserve(); + my_wait_context_vertex.reserve(); fgt_reserve_wait(this); } inline void graph::release_wait() { fgt_release_wait(this); - my_wait_context.release(); + my_wait_context_vertex.release(); } inline void graph::register_node(graph_node *n) { @@ -633,6 +739,18 @@ public: } } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +private: + bool try_reserve( output_type& v, message_metainfo& ) override { + return try_reserve(v); + } + + bool try_get( output_type& v, message_metainfo& ) override { + return try_get(v); + } +public: +#endif + //! Release a reserved item. /** true = item has been released and so remains in sender, dest must request or reserve future items */ bool try_release( ) override { @@ -703,7 +821,7 @@ private: return false; } if ( !my_has_cached_item ) { - flow_control control; + d1::flow_control control; fgt_begin_body( my_body ); @@ -722,10 +840,9 @@ private: } graph_task* create_put_task() { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef input_node_task_bypass< input_node<output_type> > task_type; graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); return t; } @@ -962,6 +1079,14 @@ protected: // Also, we do not have successors here. So we just tell the task returned here is successful. return emit_element<N>::emit_this(this->my_graph, t, output_ports()); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const TupleType& t, const message_metainfo& metainfo) override { + // Sending split messages in parallel is not justified, as overheads would prevail. + // Also, we do not have successors here. So we just tell the task returned here is successful. + return emit_element<N>::emit_this(this->my_graph, t, output_ports(), metainfo); + } +#endif + void reset_node(reset_flags f) override { if (f & rf_clear_edges) clear_element<N>::clear_this(my_output_ports); @@ -1119,17 +1244,28 @@ public: return true; } +private: + graph_task* try_put_task_impl(const T& t __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { + graph_task* new_task = my_successors.try_put_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); + if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; + return new_task; + } + protected: template< typename R, typename B > friend class run_and_put_task; template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; //! build a task to run the successor if possible. Default is old behavior. - graph_task *try_put_task(const T& t) override { - graph_task *new_task = my_successors.try_put_task(t); - if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; - return new_task; + graph_task* try_put_task(const T& t) override { + return try_put_task_impl(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& t, const message_metainfo& metainfo) override { + return try_put_task_impl(t, metainfo); + } +#endif + graph& graph_reference() const override { return my_graph; } @@ -1168,24 +1304,37 @@ protected: }; // implements the aggregator_operation concept - class buffer_operation : public aggregated_operation< buffer_operation > { + class buffer_operation : public d1::aggregated_operation< buffer_operation > { public: char type; T* elem; graph_task* ltask; successor_type *r; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo* metainfo{ nullptr }; +#endif buffer_operation(const T& e, op_type t) : type(char(t)) , elem(const_cast<T*>(&e)) , ltask(nullptr) , r(nullptr) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + buffer_operation(const T& e, op_type t, const message_metainfo& info) + : type(char(t)), elem(const_cast<T*>(&e)), ltask(nullptr), r(nullptr) + , metainfo(const_cast<message_metainfo*>(&info)) + {} + + buffer_operation(op_type t, message_metainfo& info) + : type(char(t)), elem(nullptr), ltask(nullptr), r(nullptr), metainfo(&info) {} +#endif buffer_operation(op_type t) : type(char(t)), elem(nullptr), ltask(nullptr), r(nullptr) {} }; bool forwarder_busy; - typedef aggregating_functor<class_type, buffer_operation> handler_type; - friend class aggregating_functor<class_type, buffer_operation>; - aggregator< handler_type, buffer_operation> my_aggregator; + typedef d1::aggregating_functor<class_type, buffer_operation> handler_type; + friend class d1::aggregating_functor<class_type, buffer_operation>; + d1::aggregator< handler_type, buffer_operation> my_aggregator; virtual void handle_operations(buffer_operation *op_list) { handle_operations_impl(op_list, this); @@ -1218,9 +1367,8 @@ protected: if(is_graph_active(this->my_graph)) { forwarder_busy = true; typedef forward_task_bypass<class_type> task_type; - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this); - my_graph.reserve_wait(); // tmp should point to the last item handled by the aggregator. This is the operation // the handling thread enqueued. So modifying that record will be okay. // TODO revamp: check that the issue is still present @@ -1286,7 +1434,8 @@ private: } void try_put_and_add_task(graph_task*& last_task) { - graph_task *new_task = my_successors.try_put_task(this->back()); + graph_task* new_task = my_successors.try_put_task(this->back() + __TBB_FLOW_GRAPH_METAINFO_ARG(this->back_metainfo())); if (new_task) { // workaround for icc bug graph& g = this->my_graph; @@ -1328,14 +1477,25 @@ protected: virtual bool internal_push(buffer_operation *op) { __TBB_ASSERT(op->elem, nullptr); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(op->metainfo, nullptr); + this->push_back(*(op->elem), (*op->metainfo)); +#else this->push_back(*(op->elem)); +#endif op->status.store(SUCCEEDED, std::memory_order_release); return true; } virtual void internal_pop(buffer_operation *op) { __TBB_ASSERT(op->elem, nullptr); - if(this->pop_back(*(op->elem))) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool pop_result = op->metainfo ? this->pop_back(*(op->elem), *(op->metainfo)) + : this->pop_back(*(op->elem)); +#else + bool pop_result = this->pop_back(*(op->elem)); +#endif + if (pop_result) { op->status.store(SUCCEEDED, std::memory_order_release); } else { @@ -1345,7 +1505,13 @@ protected: virtual void internal_reserve(buffer_operation *op) { __TBB_ASSERT(op->elem, nullptr); - if(this->reserve_front(*(op->elem))) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool reserve_result = op->metainfo ? this->reserve_front(*(op->elem), *(op->metainfo)) + : this->reserve_front(*(op->elem)); +#else + bool reserve_result = this->reserve_front(*(op->elem)); +#endif + if (reserve_result) { op->status.store(SUCCEEDED, std::memory_order_release); } else { @@ -1403,7 +1569,7 @@ public: It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ bool remove_successor( successor_type &r ) override { // TODO revamp: investigate why full qualification is necessary here - tbb::detail::d1::remove_predecessor(r, *this); + tbb::detail::d2::remove_predecessor(r, *this); buffer_operation op_data(rem_succ); op_data.r = &r; my_aggregator.execute(&op_data); @@ -1425,6 +1591,16 @@ public: return (op_data.status==SUCCEEDED); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_get( T &v, message_metainfo& metainfo ) override { + buffer_operation op_data(req_item, metainfo); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return (op_data.status==SUCCEEDED); + } +#endif + //! Reserves an item. /** false = no item can be reserved<BR> true = an item is reserved */ @@ -1436,6 +1612,16 @@ public: return (op_data.status==SUCCEEDED); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_reserve( output_type& v, message_metainfo& metainfo ) override { + buffer_operation op_data(res_item, metainfo); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return op_data.status==SUCCEEDED; + } +#endif + //! Release a reserved item. /** true = item has been released and so remains in sender */ bool try_release() override { @@ -1454,14 +1640,9 @@ public: return true; } -protected: - - template< typename R, typename B > friend class run_and_put_task; - template<typename X, typename Y> friend class broadcast_cache; - template<typename X, typename Y> friend class round_robin_cache; - //! receive an item, return a task *if possible - graph_task *try_put_task(const T &t) override { - buffer_operation op_data(t, put_item); +private: + graph_task* try_put_task_impl(const T& t __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { + buffer_operation op_data(t, put_item __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); my_aggregator.execute(&op_data); graph_task *ft = grab_forwarding_task(op_data); // sequencer_nodes can return failure (if an item has been previously inserted) @@ -1479,6 +1660,22 @@ protected: return ft; } +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! receive an item, return a task *if possible + graph_task *try_put_task(const T &t) override { + return try_put_task_impl(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& t, const message_metainfo& metainfo) override { + return try_put_task_impl(t, metainfo); + } +#endif + graph& graph_reference() const override { return my_graph; } @@ -1511,7 +1708,9 @@ private: } void try_put_and_add_task(graph_task*& last_task) { - graph_task *new_task = this->my_successors.try_put_task(this->front()); + graph_task* new_task = this->my_successors.try_put_task(this->front() + __TBB_FLOW_GRAPH_METAINFO_ARG(this->front_metainfo())); + if (new_task) { // workaround for icc bug graph& graph_ref = this->graph_reference(); @@ -1530,7 +1729,14 @@ protected: op->status.store(FAILED, std::memory_order_release); } else { - this->pop_front(*(op->elem)); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (op->metainfo) { + this->pop_front(*(op->elem), *(op->metainfo)); + } else +#endif + { + this->pop_front(*(op->elem)); + } op->status.store(SUCCEEDED, std::memory_order_release); } } @@ -1539,7 +1745,15 @@ protected: op->status.store(FAILED, std::memory_order_release); } else { - this->reserve_front(*(op->elem)); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (op->metainfo) { + this->reserve_front(*(op->elem), *(op->metainfo)); + } + else +#endif + { + this->reserve_front(*(op->elem)); + } op->status.store(SUCCEEDED, std::memory_order_release); } } @@ -1647,7 +1861,13 @@ private: } this->my_tail = new_tail; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(op->metainfo, nullptr); + bool place_item_result = this->place_item(tag, *(op->elem), *(op->metainfo)); + const op_stat res = place_item_result ? SUCCEEDED : FAILED; +#else const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; +#endif op->status.store(res, std::memory_order_release); return res ==SUCCEEDED; } @@ -1710,7 +1930,12 @@ protected: } bool internal_push(prio_operation *op) override { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + __TBB_ASSERT(op->metainfo, nullptr); + prio_push(*(op->elem), *(op->metainfo)); +#else prio_push(*(op->elem)); +#endif op->status.store(SUCCEEDED, std::memory_order_release); return true; } @@ -1723,6 +1948,11 @@ protected: } *(op->elem) = prio(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (op->metainfo) { + *(op->metainfo) = std::move(prio_metainfo()); + } +#endif op->status.store(SUCCEEDED, std::memory_order_release); prio_pop(); @@ -1736,6 +1966,12 @@ protected: } this->my_reserved = true; *(op->elem) = prio(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + if (op->metainfo) { + *(op->metainfo) = std::move(prio_metainfo()); + reserved_metainfo = *(op->metainfo); + } +#endif reserved_item = *(op->elem); op->status.store(SUCCEEDED, std::memory_order_release); prio_pop(); @@ -1745,13 +1981,27 @@ protected: op->status.store(SUCCEEDED, std::memory_order_release); this->my_reserved = false; reserved_item = input_type(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (auto waiter : reserved_metainfo.waiters()) { + waiter->release(1); + } + + reserved_metainfo = message_metainfo{}; +#endif } void internal_release(prio_operation *op) override { op->status.store(SUCCEEDED, std::memory_order_release); - prio_push(reserved_item); + prio_push(reserved_item __TBB_FLOW_GRAPH_METAINFO_ARG(reserved_metainfo)); this->my_reserved = false; reserved_item = input_type(); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (auto waiter : reserved_metainfo.waiters()) { + waiter->release(1); + } + + reserved_metainfo = message_metainfo{}; +#endif } private: @@ -1767,7 +2017,8 @@ private: } void try_put_and_add_task(graph_task*& last_task) { - graph_task * new_task = this->my_successors.try_put_task(this->prio()); + graph_task* new_task = this->my_successors.try_put_task(this->prio() + __TBB_FLOW_GRAPH_METAINFO_ARG(this->prio_metainfo())); if (new_task) { // workaround for icc bug graph& graph_ref = this->graph_reference(); @@ -1781,6 +2032,9 @@ private: size_type mark; input_type reserved_item; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo reserved_metainfo; +#endif // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item bool prio_use_tail() { @@ -1789,10 +2043,10 @@ private: } // prio_push: checks that the item will fit, expand array if necessary, put at end - void prio_push(const T &src) { + void prio_push(const T &src __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { if ( this->my_tail >= this->my_array_size ) this->grow_my_array( this->my_tail + 1 ); - (void) this->place_item(this->my_tail, src); + (void) this->place_item(this->my_tail, src __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); ++(this->my_tail); __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); } @@ -1826,6 +2080,12 @@ private: return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo& prio_metainfo() { + return this->get_my_metainfo(prio_use_tail() ? this->my_tail-1 : 0); + } +#endif + // turn array into heap void heapify() { if(this->my_tail == 0) { @@ -1836,7 +2096,10 @@ private: for (; mark<this->my_tail; ++mark) { // for each unheaped element size_type cur_pos = mark; input_type to_place; - this->fetch_item(mark,to_place); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + this->fetch_item(mark, to_place __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); do { // push to_place up the heap size_type parent = (cur_pos-1)>>1; if (!compare(this->get_my_item(parent), to_place)) @@ -1844,7 +2107,7 @@ private: this->move_item(cur_pos, parent); cur_pos = parent; } while( cur_pos ); - (void) this->place_item(cur_pos, to_place); + this->place_item(cur_pos, to_place __TBB_FLOW_GRAPH_METAINFO_ARG(std::move(metainfo))); } } @@ -1944,9 +2207,12 @@ private: //SUCCESS // if we can reserve and can put, we consume the reservation // we increment the count and decrement the tries - if ( (my_predecessors.try_reserve(v)) == true ) { +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo metainfo; +#endif + if ( (my_predecessors.try_reserve(v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo))) == true ) { reserved = true; - if ( (rval = my_successors.try_put_task(v)) != nullptr ) { + if ( (rval = my_successors.try_put_task(v __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo))) != nullptr ) { { spin_mutex::scoped_lock lock(my_mutex); ++my_count; @@ -1965,9 +2231,8 @@ private: if ( check_conditions() ) { if ( is_graph_active(this->my_graph) ) { typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this ); - my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *rtask); } } @@ -1984,10 +2249,9 @@ private: if (reserved) my_predecessors.try_release(); if ( check_conditions() ) { if ( is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); __TBB_ASSERT(!rval, "Have two tasks to handle"); return t; } @@ -2035,10 +2299,9 @@ public: //spawn a forward task if this is the only successor if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { if ( is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *t); } } @@ -2049,7 +2312,7 @@ public: /** r.remove_predecessor(*this) is also called. */ bool remove_successor( successor_type &r ) override { // TODO revamp: investigate why qualification is needed for remove_predecessor() call - tbb::detail::d1::remove_predecessor(r, *this); + tbb::detail::d2::remove_predecessor(r, *this); my_successors.remove_successor(r); return true; } @@ -2059,10 +2322,9 @@ public: spin_mutex::scoped_lock lock(my_mutex); my_predecessors.add( src ); if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); spawn_in_graph_arena(graph_reference(), *t); } return true; @@ -2079,8 +2341,10 @@ protected: template< typename R, typename B > friend class run_and_put_task; template<typename X, typename Y> friend class broadcast_cache; template<typename X, typename Y> friend class round_robin_cache; + +private: //! Puts an item to this receiver - graph_task* try_put_task( const T &t ) override { + graph_task* try_put_task_impl( const T &t __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo) ) { { spin_mutex::scoped_lock lock(my_mutex); if ( my_count + my_tries >= my_threshold ) @@ -2089,15 +2353,14 @@ protected: ++my_tries; } - graph_task* rtask = my_successors.try_put_task(t); + graph_task* rtask = my_successors.try_put_task(t __TBB_FLOW_GRAPH_METAINFO_ARG(metainfo)); if ( !rtask ) { // try_put_task failed. spin_mutex::scoped_lock lock(my_mutex); --my_tries; if (check_conditions() && is_graph_active(this->my_graph)) { - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; rtask = allocator.new_object<task_type>(my_graph, allocator, *this); - my_graph.reserve_wait(); } } else { @@ -2118,6 +2381,16 @@ protected: return rtask; } +protected: + graph_task* try_put_task(const T& t) override { + return try_put_task_impl(t __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& t, const message_metainfo& metainfo) override { + return try_put_task_impl(t, metainfo); + } +#endif + graph& graph_reference() const override { return my_graph; } void reset_node( reset_flags f ) override { @@ -3054,10 +3327,9 @@ public: // because failed reserve does not mean that register_successor is not ready to put a message immediately. // We have some sort of infinite loop: reserving node tries to set pull state for the edge, // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. - small_object_allocator allocator{}; + d1::small_object_allocator allocator{}; typedef register_predecessor_task task_type; graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s); - graph_reference().reserve_wait(); spawn_in_graph_arena( my_graph, *t ); } } else { @@ -3082,11 +3354,45 @@ public: return false; } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + bool try_get( input_type &v, message_metainfo& metainfo ) override { + spin_mutex::scoped_lock l( my_mutex ); + if (my_buffer_is_valid) { + v = my_buffer; + metainfo = my_buffered_metainfo; + + // Since the successor of the node will use move semantics while wrapping the metainfo + // that is designed to transfer the ownership of the value from single-push buffer to the task + // It is required to reserve one more reference here because the value keeps in the buffer + // and the ownership is not transferred + for (auto msg_waiter : metainfo.waiters()) { + msg_waiter->reserve(1); + } + return true; + } + return false; + } +#endif + //! Reserves an item bool try_reserve( T &v ) override { return try_get(v); } +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT +private: + bool try_reserve(T& v, message_metainfo& metainfo) override { + spin_mutex::scoped_lock l( my_mutex ); + if (my_buffer_is_valid) { + v = my_buffer; + metainfo = my_buffered_metainfo; + return true; + } + return false; + } +public: +#endif + //! Releases the reserved item bool try_release() override { return true; } @@ -3101,6 +3407,12 @@ public: void clear() { spin_mutex::scoped_lock l( my_mutex ); my_buffer_is_valid = false; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + for (auto msg_waiter : my_buffered_metainfo.waiters()) { + msg_waiter->release(1); + } + my_buffered_metainfo = message_metainfo{}; +#endif } protected: @@ -3110,13 +3422,33 @@ protected: template<typename X, typename Y> friend class round_robin_cache; graph_task* try_put_task( const input_type &v ) override { spin_mutex::scoped_lock l( my_mutex ); - return try_put_task_impl(v); + return try_put_task_impl(v __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const input_type& v, const message_metainfo& metainfo) override { + spin_mutex::scoped_lock l( my_mutex ); + return try_put_task_impl(v, metainfo); } +#endif - graph_task * try_put_task_impl(const input_type &v) { + graph_task * try_put_task_impl(const input_type &v __TBB_FLOW_GRAPH_METAINFO_ARG(const message_metainfo& metainfo)) { my_buffer = v; my_buffer_is_valid = true; - graph_task* rtask = my_successors.try_put_task(v); +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + // Since the new item is pushed to the buffer - reserving the waiters + for (auto msg_waiter : metainfo.waiters()) { + msg_waiter->reserve(1); + } + + // Since the item is taken out from the buffer - releasing the stored waiters + for (auto msg_waiter : my_buffered_metainfo.waiters()) { + msg_waiter->release(1); + } + + my_buffered_metainfo = metainfo; +#endif + graph_task* rtask = my_successors.try_put_task(v __TBB_FLOW_GRAPH_METAINFO_ARG(my_buffered_metainfo) ); if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; return rtask; } @@ -3128,13 +3460,13 @@ protected: //! Breaks an infinite loop between the node reservation and register_successor call struct register_predecessor_task : public graph_task { register_predecessor_task( - graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) + graph& g, d1::small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) : graph_task(g, allocator), o(owner), s(succ) {}; - task* execute(execution_data& ed) override { + d1::task* execute(d1::execution_data& ed) override { // TODO revamp: investigate why qualification is needed for register_successor() call - using tbb::detail::d1::register_predecessor; - using tbb::detail::d1::register_successor; + using tbb::detail::d2::register_predecessor; + using tbb::detail::d2::register_successor; if ( !register_predecessor(s, o) ) { register_successor(o, s); } @@ -3142,7 +3474,7 @@ protected: return nullptr; } - task* cancel(execution_data& ed) override { + d1::task* cancel(d1::execution_data& ed) override { finalize<register_predecessor_task>(ed); return nullptr; } @@ -3154,6 +3486,9 @@ protected: spin_mutex my_mutex; broadcast_cache< input_type, null_rw_mutex > my_successors; input_type my_buffer; +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + message_metainfo my_buffered_metainfo; +#endif bool my_buffer_is_valid; void reset_node( reset_flags f) override { @@ -3200,8 +3535,15 @@ protected: template<typename X, typename Y> friend class round_robin_cache; graph_task *try_put_task( const T &v ) override { spin_mutex::scoped_lock l( this->my_mutex ); - return this->my_buffer_is_valid ? nullptr : this->try_put_task_impl(v); + return this->my_buffer_is_valid ? nullptr : this->try_put_task_impl(v __TBB_FLOW_GRAPH_METAINFO_ARG(message_metainfo{})); } + +#if __TBB_PREVIEW_FLOW_GRAPH_TRY_PUT_AND_WAIT + graph_task* try_put_task(const T& v, const message_metainfo& metainfo) override { + spin_mutex::scoped_lock l( this->my_mutex ); + return this->my_buffer_is_valid ? nullptr : this->try_put_task_impl(v, metainfo); + } +#endif }; // write_once_node inline void set_name(const graph& g, const char *name) { @@ -3293,7 +3635,7 @@ inline void set_name(const async_node<Input, Output, Policy>& node, const char * { fgt_multioutput_node_desc(&node, name); } -} // d1 +} // d2 } // detail } // tbb @@ -3304,56 +3646,56 @@ inline void set_name(const async_node<Input, Output, Policy>& node, const char * namespace tbb { namespace flow { inline namespace v1 { - using detail::d1::receiver; - using detail::d1::sender; - - using detail::d1::serial; - using detail::d1::unlimited; - - using detail::d1::reset_flags; - using detail::d1::rf_reset_protocol; - using detail::d1::rf_reset_bodies; - using detail::d1::rf_clear_edges; - - using detail::d1::graph; - using detail::d1::graph_node; - using detail::d1::continue_msg; - - using detail::d1::input_node; - using detail::d1::function_node; - using detail::d1::multifunction_node; - using detail::d1::split_node; - using detail::d1::output_port; - using detail::d1::indexer_node; - using detail::d1::tagged_msg; - using detail::d1::cast_to; - using detail::d1::is_a; - using detail::d1::continue_node; - using detail::d1::overwrite_node; - using detail::d1::write_once_node; - using detail::d1::broadcast_node; - using detail::d1::buffer_node; - using detail::d1::queue_node; - using detail::d1::sequencer_node; - using detail::d1::priority_queue_node; - using detail::d1::limiter_node; - using namespace detail::d1::graph_policy_namespace; - using detail::d1::join_node; - using detail::d1::input_port; - using detail::d1::copy_body; - using detail::d1::make_edge; - using detail::d1::remove_edge; - using detail::d1::tag_value; - using detail::d1::composite_node; - using detail::d1::async_node; - using detail::d1::node_priority_t; - using detail::d1::no_priority; + using detail::d2::receiver; + using detail::d2::sender; + + using detail::d2::serial; + using detail::d2::unlimited; + + using detail::d2::reset_flags; + using detail::d2::rf_reset_protocol; + using detail::d2::rf_reset_bodies; + using detail::d2::rf_clear_edges; + + using detail::d2::graph; + using detail::d2::graph_node; + using detail::d2::continue_msg; + + using detail::d2::input_node; + using detail::d2::function_node; + using detail::d2::multifunction_node; + using detail::d2::split_node; + using detail::d2::output_port; + using detail::d2::indexer_node; + using detail::d2::tagged_msg; + using detail::d2::cast_to; + using detail::d2::is_a; + using detail::d2::continue_node; + using detail::d2::overwrite_node; + using detail::d2::write_once_node; + using detail::d2::broadcast_node; + using detail::d2::buffer_node; + using detail::d2::queue_node; + using detail::d2::sequencer_node; + using detail::d2::priority_queue_node; + using detail::d2::limiter_node; + using namespace detail::d2::graph_policy_namespace; + using detail::d2::join_node; + using detail::d2::input_port; + using detail::d2::copy_body; + using detail::d2::make_edge; + using detail::d2::remove_edge; + using detail::d2::tag_value; + using detail::d2::composite_node; + using detail::d2::async_node; + using detail::d2::node_priority_t; + using detail::d2::no_priority; #if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET - using detail::d1::follows; - using detail::d1::precedes; - using detail::d1::make_node_set; - using detail::d1::make_edges; + using detail::d2::follows; + using detail::d2::precedes; + using detail::d2::make_node_set; + using detail::d2::make_edges; #endif } // v1 @@ -3362,7 +3704,7 @@ inline namespace v1 { using detail::d1::flow_control; namespace profiling { - using detail::d1::set_name; + using detail::d2::set_name; } // profiling } // tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h index 121f167c4d..329e75c43e 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { //! Pure virtual template classes that define interfaces for async communication class graph_proxy { @@ -43,7 +43,7 @@ public: virtual bool try_put(const input_type&) = 0; }; -} // d1 +} // d2 } // detail diff --git a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h index b2e6b05191..5ece879002 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h +++ b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -97,10 +97,10 @@ public: typedef memory_pool_allocator<U, P> other; }; - explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} - memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + explicit memory_pool_allocator(pool_type &pool) noexcept : my_pool(&pool) {} + memory_pool_allocator(const memory_pool_allocator& src) noexcept : my_pool(src.my_pool) {} template<typename U> - memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + memory_pool_allocator(const memory_pool_allocator<U,P>& src) noexcept : my_pool(src.my_pool) {} pointer address(reference x) const { return &x; } const_pointer address(const_reference x) const { return &x; } @@ -117,7 +117,7 @@ public: my_pool->free(p); } //! Largest value for which method allocate might succeed. - size_type max_size() const throw() { + size_type max_size() const noexcept { size_type max = static_cast<size_type>(-1) / sizeof (value_type); return (max > 0 ? max : 1); } @@ -149,10 +149,10 @@ public: typedef memory_pool_allocator<U, P> other; }; - explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} - memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + explicit memory_pool_allocator( pool_type &pool) noexcept : my_pool(&pool) {} + memory_pool_allocator( const memory_pool_allocator& src) noexcept : my_pool(src.my_pool) {} template<typename U> - memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + memory_pool_allocator(const memory_pool_allocator<U,P>& src) noexcept : my_pool(src.my_pool) {} protected: pool_type *my_pool; diff --git a/contrib/libs/tbb/include/oneapi/tbb/mutex.h b/contrib/libs/tbb/include/oneapi/tbb/mutex.h index a4d2a9a3de..169b7a3ca9 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Intel Corporation + Copyright (c) 2021-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -36,9 +36,7 @@ public: }; //! Destructor - ~mutex() { - __TBB_ASSERT(!my_flag.load(std::memory_order_relaxed), "destruction of an acquired mutex"); - } + ~mutex() = default; //! No Copy mutex(const mutex&) = delete; diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h index 91c7c44c87..37a2613508 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -319,7 +319,7 @@ void parallel_for_impl(Index first, Index last, Index step, const Function& f, P template <typename Index, typename Function> __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); + parallel_for_impl<Index,Function,const __TBB_DEFAULT_PARTITIONER>(first, last, step, f, __TBB_DEFAULT_PARTITIONER()); } //! Parallel iteration over a range of integers with a step provided and simple partitioner template <typename Index, typename Function> @@ -350,7 +350,7 @@ void parallel_for(Index first, Index last, Index step, const Function& f, affini template <typename Index, typename Function> __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); + parallel_for_impl<Index,Function,const __TBB_DEFAULT_PARTITIONER>(first, last, static_cast<Index>(1), f, __TBB_DEFAULT_PARTITIONER()); } //! Parallel iteration over a range of integers with a default step value and simple partitioner template <typename Index, typename Function> @@ -395,7 +395,7 @@ void parallel_for_impl(Index first, Index last, Index step, const Function& f, P template <typename Index, typename Function> __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); + parallel_for_impl<Index,Function,const __TBB_DEFAULT_PARTITIONER>(first, last, step, f, __TBB_DEFAULT_PARTITIONER(), context); } //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner template <typename Index, typename Function> @@ -426,7 +426,7 @@ void parallel_for(Index first, Index last, Index step, const Function& f, affini template <typename Index, typename Function> __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { - parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); + parallel_for_impl<Index,Function,const __TBB_DEFAULT_PARTITIONER>(first, last, static_cast<Index>(1), f, __TBB_DEFAULT_PARTITIONER(), context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner template <typename Index, typename Function> diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h index 56dbeb4101..85c0269196 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -118,14 +118,17 @@ struct feeder_item_task: public task { using feeder_type = feeder_impl<Body, Item>; template <typename ItemType> - feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc, wait_tree_vertex_interface& wait_vertex) : item(std::forward<ItemType>(input_item)), my_feeder(feeder), - my_allocator(alloc) - {} + my_allocator(alloc), + m_wait_tree_vertex(r1::get_thread_reference_vertex(&wait_vertex)) + { + m_wait_tree_vertex->reserve(); + } void finalize(const execution_data& ed) { - my_feeder.my_wait_context.release(); + m_wait_tree_vertex->release(); my_allocator.delete_object(this, ed); } @@ -160,6 +163,7 @@ struct feeder_item_task: public task { Item item; feeder_type& my_feeder; small_object_allocator my_allocator; + wait_tree_vertex_interface* m_wait_tree_vertex; }; // class feeder_item_task /** Implements new task adding procedure. @@ -170,9 +174,8 @@ class feeder_impl : public feeder<Item> { void internal_add_copy_impl(std::true_type, const Item& item) { using feeder_task = feeder_item_task<Body, Item>; small_object_allocator alloc; - auto task = alloc.new_object<feeder_task>(item, *this, alloc); + auto task = alloc.new_object<feeder_task>(item, *this, alloc, my_wait_context); - my_wait_context.reserve(); spawn(*task, my_execution_context); } @@ -187,20 +190,19 @@ class feeder_impl : public feeder<Item> { void internal_add_move(Item&& item) override { using feeder_task = feeder_item_task<Body, Item>; small_object_allocator alloc{}; - auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc); + auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc, my_wait_context); - my_wait_context.reserve(); spawn(*task, my_execution_context); } public: - feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) + feeder_impl(const Body& body, wait_context_vertex& w_context, task_group_context &context) : my_body(body), my_wait_context(w_context) , my_execution_context(context) {} const Body& my_body; - wait_context& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; }; // class feeder_impl @@ -263,7 +265,7 @@ struct input_block_handling_task : public task { using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type; using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>; - input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, + input_block_handling_task(wait_context_vertex& root_wait_context, task_group_context& e_context, const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), my_execution_context(e_context), my_allocator(alloc) @@ -312,7 +314,7 @@ struct input_block_handling_task : public task { aligned_space<iteration_task, max_block_size> task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class input_block_handling_task @@ -326,7 +328,7 @@ struct forward_block_handling_task : public task { using iteration_task = for_each_iteration_task<Iterator, Body, Item>; forward_block_handling_task(Iterator first, std::size_t size, - wait_context& w_context, task_group_context& e_context, + wait_context_vertex& w_context, task_group_context& e_context, const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) : my_size(size), my_wait_context(0), my_root_wait_context(w_context), @@ -373,7 +375,7 @@ struct forward_block_handling_task : public task { aligned_space<iteration_task, max_block_size> task_pool; std::size_t my_size; wait_context my_wait_context; - wait_context& my_root_wait_context; + wait_context_vertex& my_root_wait_context; task_group_context& my_execution_context; small_object_allocator my_allocator; }; // class forward_block_handling_task @@ -407,6 +409,34 @@ public: template<typename It> using tag = typename std::iterator_traits<It>::iterator_category; +#if __TBB_CPP20_PRESENT +template <typename It> +struct move_iterator_dispatch_helper { + using type = It; +}; + +// Until C++23, std::move_iterator::iterator_concept always defines +// to std::input_iterator_tag and hence std::forward_iterator concept +// always evaluates to false, so std::move_iterator dispatch should be +// made according to the base iterator type. +template <typename It> +struct move_iterator_dispatch_helper<std::move_iterator<It>> { + using type = It; +}; + +template <typename It> +using iterator_tag_dispatch_impl = + std::conditional_t<std::random_access_iterator<It>, + std::random_access_iterator_tag, + std::conditional_t<std::forward_iterator<It>, + std::forward_iterator_tag, + std::input_iterator_tag>>; + +template <typename It> +using iterator_tag_dispatch = + iterator_tag_dispatch_impl<typename move_iterator_dispatch_helper<It>::type>; + +#else template<typename It> using iterator_tag_dispatch = typename std::conditional< @@ -418,6 +448,7 @@ using iterator_tag_dispatch = typename std::input_iterator_tag >::type >::type; +#endif // __TBB_CPP20_PRESENT template <typename Body, typename Iterator, typename Item> using feeder_is_required = tbb::detail::void_t<decltype(tbb::detail::invoke(std::declval<const Body>(), @@ -427,7 +458,7 @@ using feeder_is_required = tbb::detail::void_t<decltype(tbb::detail::invoke(std: // Creates feeder object only if the body can accept it template <typename Iterator, typename Body, typename Item, typename = void> struct feeder_holder { - feeder_holder( wait_context&, task_group_context&, const Body& ) {} + feeder_holder( wait_context_vertex&, task_group_context&, const Body& ) {} feeder_impl<Body, Item>* feeder_ptr() { return nullptr; } }; // class feeder_holder @@ -435,7 +466,7 @@ struct feeder_holder { template <typename Iterator, typename Body, typename Item> class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> { public: - feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) + feeder_holder( wait_context_vertex& w_context, task_group_context& context, const Body& body ) : my_feeder(body, w_context, context) {} feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; } @@ -446,7 +477,7 @@ private: template <typename Iterator, typename Body, typename Item> class for_each_root_task_base : public task { public: - for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context_vertex& w_context, task_group_context& e_context) : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) { @@ -460,7 +491,7 @@ private: protected: Iterator my_first; Iterator my_last; - wait_context& my_wait_context; + wait_context_vertex& my_wait_context; task_group_context& my_execution_context; const Body& my_body; feeder_holder<Iterator, Body, Item> my_feeder_holder; @@ -595,11 +626,11 @@ void run_parallel_for_each( Iterator first, Iterator last, const Body& body, tas { if (!(first == last)) { using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>; - wait_context w_context(0); + wait_context_vertex w_context(0); for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context); - execute_and_wait(root_task, context, w_context, context); + execute_and_wait(root_task, context, w_context.get_context(), context); } } diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h index 6eb0f2e530..4bc5d85339 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -59,7 +59,7 @@ struct function_invoker : public task { }; // struct function_invoker //! Task object for managing subroots in trinary task trees. -// Endowed with additional synchronization logic (compatible with wait object intefaces) to support +// Endowed with additional synchronization logic (compatible with wait object interfaces) to support // continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors // and then executes first functor by itself. But only the last executed functor must destruct and deallocate // the subroot task. diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h index 401ad00467..205c97ef95 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -42,16 +42,16 @@ concept parallel_reduce_body = splittable<Body> && template <typename Function, typename Range, typename Value> concept parallel_reduce_function = std::invocable<const std::remove_reference_t<Function>&, - const Range&, const Value&> && + const Range&, Value&&> && std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Function>&, - const Range&, const Value&>, + const Range&, Value&&>, Value>; template <typename Combine, typename Value> concept parallel_reduce_combine = std::invocable<const std::remove_reference_t<Combine>&, - const Value&, const Value&> && + Value&&, Value&&> && std::convertible_to<std::invoke_result_t<const std::remove_reference_t<Combine>&, - const Value&, const Value&>, + Value&&, Value&&>, Value>; } // namespace d0 @@ -390,14 +390,15 @@ public: , my_value(other.my_identity_element) { } void operator()(Range& range) { - my_value = tbb::detail::invoke(my_real_body, range, const_cast<const Value&>(my_value)); + my_value = tbb::detail::invoke(my_real_body, range, std::move(my_value)); } + void join( lambda_reduce_body& rhs ) { - my_value = tbb::detail::invoke(my_reduction, const_cast<const Value&>(my_value), - const_cast<const Value&>(rhs.my_value)); + my_value = tbb::detail::invoke(my_reduction, std::move(my_value), std::move(rhs.my_value)); } - Value result() const { - return my_value; + + __TBB_nodiscard Value&& result() && noexcept { + return std::move(my_value); } }; @@ -514,7 +515,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and simple_partitioner. @@ -527,7 +528,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> ::run(range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and auto_partitioner @@ -540,7 +541,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and static_partitioner @@ -553,7 +554,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction and affinity_partitioner @@ -566,7 +567,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> ::run( range, body, partitioner ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, default partitioner and user-supplied context. @@ -579,7 +580,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, simple partitioner and user-supplied context. @@ -592,7 +593,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, auto_partitioner and user-supplied context @@ -605,7 +606,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, static_partitioner and user-supplied context @@ -618,7 +619,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with reduction, affinity_partitioner and user-supplied context @@ -631,7 +632,7 @@ Value parallel_reduce( const Range& range, const Value& identity, const RealBody lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> ::run( range, body, partitioner, context ); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction and default simple partitioner. @@ -704,7 +705,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> ::run(range, body, partitioner); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction and static partitioner. @@ -716,7 +717,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> ::run(range, body, partitioner); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. @@ -739,7 +740,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> ::run(range, body, partitioner, context); - return body.result(); + return std::move(body).result(); } //! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. @@ -752,7 +753,7 @@ Value parallel_deterministic_reduce( const Range& range, const Value& identity, lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> ::run(range, body, partitioner, context); - return body.result(); + return std::move(body).result(); } //@} diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h index 6d2a4d6401..d624f7ebdb 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -562,7 +562,7 @@ public: template<typename Range, typename Body> __TBB_requires(tbb_range<Range> && parallel_scan_body<Body, Range>) void parallel_scan( const Range& range, Body& body ) { - start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER()); + start_scan<Range, Body, __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); } //! Parallel prefix with simple_partitioner diff --git a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h index 98de0d42b7..f09786c022 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h +++ b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h @@ -340,7 +340,7 @@ struct proportional_mode : adaptive_mode<Partition> { // Create the proportion from partitioner internal resources (threads) that would be used: // - into proportional_mode constructor to split the partitioner // - if Range supports the proportional_split constructor it would use proposed proportion, - // otherwise, the tbb::proportional_split object will be implicitly (for Range implementor) + // otherwise, the tbb::proportional_split object will be implicitly (for Range implementer) // casted to tbb::split std::size_t n = self().my_divisor / my_partition::factor; diff --git a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h index de589fe4e8..4e1af991fd 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #ifdef __cplusplus #include "oneapi/tbb/detail/_config.h" #include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_namespace_injection.h" #include <cstdlib> #include <utility> #include <new> /* std::bad_alloc() */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h index 0de49aef07..5ce41d99c9 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -110,7 +110,8 @@ inline void enqueue_impl(task_handle&& th, d1::task_arena_base* ta) { namespace d1 { -static constexpr int priority_stride = INT_MAX / 4; +static constexpr unsigned num_priority_levels = 3; +static constexpr int priority_stride = INT_MAX / (num_priority_levels + 1); class task_arena_base { friend struct r1::task_arena_impl; diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_group.h b/contrib/libs/tbb/include/oneapi/tbb/task_group.h index 2bbacd5578..c0811c8502 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_group.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_group.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -45,13 +45,12 @@ namespace d1 { class delegate_base; class task_arena_base; class task_group_context; -class task_group_base; } namespace r1 { // Forward declarations class tbb_exception_ptr; -class market; +class cancellation_disseminator; class thread_data; class task_dispatcher; template <bool> @@ -97,8 +96,8 @@ private: } public: template<typename FF> - function_task(FF&& f, d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) - : task_handle_task{wo, ctx, alloc}, + function_task(FF&& f, d1::wait_tree_vertex_interface* vertex, d1::task_group_context& ctx, d1::small_object_allocator& alloc) + : task_handle_task{vertex, ctx, alloc}, m_func(std::forward<FF>(f)) {} }; @@ -407,18 +406,27 @@ public: } private: //// TODO: cleanup friends - friend class r1::market; + friend class r1::cancellation_disseminator; friend class r1::thread_data; friend class r1::task_dispatcher; template <bool> friend class r1::context_guard_helper; friend struct r1::task_arena_impl; friend struct r1::task_group_context_impl; - friend class task_group_base; + friend class d2::task_group_base; }; // class task_group_context static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); +inline bool is_current_task_group_canceling() { + task_group_context* ctx = current_context(); + return ctx ? ctx->is_group_execution_cancelled() : false; +} + +} // namespace d1 + +namespace d2 { + enum task_group_status { not_complete, complete, @@ -431,77 +439,41 @@ class structured_task_group; class isolated_task_group; #endif -template<typename F> -class function_task : public task { - const F m_func; - wait_context& m_wait_ctx; - small_object_allocator m_allocator; - - void finalize(const execution_data& ed) { - // Make a local reference not to access this after destruction. - wait_context& wo = m_wait_ctx; - // Copy allocator to the stack - auto allocator = m_allocator; - // Destroy user functor before release wait. - this->~function_task(); - wo.release(); - - allocator.deallocate(this, ed); - } - task* execute(execution_data& ed) override { - task* res = d2::task_ptr_or_nullptr(m_func); - finalize(ed); - return res; - } - task* cancel(execution_data& ed) override { - finalize(ed); - return nullptr; - } -public: - function_task(const F& f, wait_context& wo, small_object_allocator& alloc) - : m_func(f) - , m_wait_ctx(wo) - , m_allocator(alloc) {} - - function_task(F&& f, wait_context& wo, small_object_allocator& alloc) - : m_func(std::move(f)) - , m_wait_ctx(wo) - , m_allocator(alloc) {} -}; - template <typename F> -class function_stack_task : public task { +class function_stack_task : public d1::task { const F& m_func; - wait_context& m_wait_ctx; + d1::wait_tree_vertex_interface* m_wait_tree_vertex; void finalize() { - m_wait_ctx.release(); + m_wait_tree_vertex->release(); } - task* execute(execution_data&) override { + task* execute(d1::execution_data&) override { task* res = d2::task_ptr_or_nullptr(m_func); finalize(); return res; } - task* cancel(execution_data&) override { + task* cancel(d1::execution_data&) override { finalize(); return nullptr; } public: - function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} + function_stack_task(const F& f, d1::wait_tree_vertex_interface* vertex) : m_func(f), m_wait_tree_vertex(vertex) { + m_wait_tree_vertex->reserve(); + } }; class task_group_base : no_copy { protected: - wait_context m_wait_ctx; - task_group_context m_context; + d1::wait_context_vertex m_wait_vertex; + d1::task_group_context m_context; template<typename F> task_group_status internal_run_and_wait(const F& f) { - function_stack_task<F> t{ f, m_wait_ctx }; - m_wait_ctx.reserve(); + function_stack_task<F> t{ f, r1::get_thread_reference_vertex(&m_wait_vertex) }; + bool cancellation_status = false; try_call([&] { - execute_and_wait(t, context(), m_wait_ctx, context()); + execute_and_wait(t, context(), m_wait_vertex.get_context(), context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = context().is_group_execution_cancelled(); @@ -518,7 +490,7 @@ protected: bool cancellation_status = false; try_call([&] { - execute_and_wait(*acs::release(h), context(), m_wait_ctx, context()); + execute_and_wait(*acs::release(h), context(), m_wait_vertex.get_context(), context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = context().is_group_execution_cancelled(); @@ -528,39 +500,39 @@ protected: } template<typename F> - task* prepare_task(F&& f) { - m_wait_ctx.reserve(); - small_object_allocator alloc{}; - return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); + d1::task* prepare_task(F&& f) { + d1::small_object_allocator alloc{}; + return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), + r1::get_thread_reference_vertex(&m_wait_vertex), context(), alloc); } - task_group_context& context() noexcept { + d1::task_group_context& context() noexcept { return m_context.actual_context(); } template<typename F> d2::task_handle prepare_task_handle(F&& f) { - m_wait_ctx.reserve(); - small_object_allocator alloc{}; + d1::small_object_allocator alloc{}; using function_task_t = d2::function_task<typename std::decay<F>::type>; - d2::task_handle_task* function_task_p = alloc.new_object<function_task_t>(std::forward<F>(f), m_wait_ctx, context(), alloc); + d2::task_handle_task* function_task_p = alloc.new_object<function_task_t>(std::forward<F>(f), + r1::get_thread_reference_vertex(&m_wait_vertex), context(), alloc); return d2::task_handle_accessor::construct(function_task_p); } public: task_group_base(uintptr_t traits = 0) - : m_wait_ctx(0) - , m_context(task_group_context::bound, task_group_context::default_traits | traits) + : m_wait_vertex(0) + , m_context(d1::task_group_context::bound, d1::task_group_context::default_traits | traits) {} - task_group_base(task_group_context& ctx) - : m_wait_ctx(0) + task_group_base(d1::task_group_context& ctx) + : m_wait_vertex(0) , m_context(&ctx) {} ~task_group_base() noexcept(false) { - if (m_wait_ctx.continue_execution()) { + if (m_wait_vertex.continue_execution()) { #if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; #else @@ -570,7 +542,7 @@ public: // in case of missing wait (for the sake of better testability & debuggability) if (!context().is_group_execution_cancelled()) cancel(); - d1::wait(m_wait_ctx, context()); + d1::wait(m_wait_vertex.get_context(), context()); if (!stack_unwinding_in_progress) throw_exception(exception_id::missing_wait); } @@ -579,7 +551,7 @@ public: task_group_status wait() { bool cancellation_status = false; try_call([&] { - d1::wait(m_wait_ctx, context()); + d1::wait(m_wait_vertex.get_context(), context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = m_context.is_group_execution_cancelled(); @@ -595,12 +567,12 @@ public: class task_group : public task_group_base { public: - task_group() : task_group_base(task_group_context::concurrent_wait) {} - task_group(task_group_context& ctx) : task_group_base(ctx) {} + task_group() : task_group_base(d1::task_group_context::concurrent_wait) {} + task_group(d1::task_group_context& ctx) : task_group_base(ctx) {} template<typename F> void run(F&& f) { - spawn(*prepare_task(std::forward<F>(f)), context()); + d1::spawn(*prepare_task(std::forward<F>(f)), context()); } void run(d2::task_handle&& h) { @@ -609,7 +581,7 @@ public: using acs = d2::task_handle_accessor; __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); - spawn(*acs::release(h), context()); + d1::spawn(*acs::release(h), context()); } template<typename F> @@ -629,20 +601,20 @@ public: }; // class task_group #if TBB_PREVIEW_ISOLATED_TASK_GROUP -class spawn_delegate : public delegate_base { - task* task_to_spawn; - task_group_context& context; +class spawn_delegate : public d1::delegate_base { + d1::task* task_to_spawn; + d1::task_group_context& context; bool operator()() const override { spawn(*task_to_spawn, context); return true; } public: - spawn_delegate(task* a_task, task_group_context& ctx) + spawn_delegate(d1::task* a_task, d1::task_group_context& ctx) : task_to_spawn(a_task), context(ctx) {} }; -class wait_delegate : public delegate_base { +class wait_delegate : public d1::delegate_base { bool operator()() const override { status = tg.wait(); return true; @@ -674,7 +646,7 @@ class isolated_task_group : public task_group { public: isolated_task_group() : task_group() {} - isolated_task_group(task_group_context& ctx) : task_group(ctx) {} + isolated_task_group(d1::task_group_context& ctx) : task_group(ctx) {} template<typename F> void run(F&& f) { @@ -710,26 +682,20 @@ public: } }; // class isolated_task_group #endif // TBB_PREVIEW_ISOLATED_TASK_GROUP - -inline bool is_current_task_group_canceling() { - task_group_context* ctx = current_context(); - return ctx ? ctx->is_group_execution_cancelled() : false; -} - -} // namespace d1 +} // namespace d2 } // namespace detail inline namespace v1 { using detail::d1::task_group_context; -using detail::d1::task_group; +using detail::d2::task_group; #if TBB_PREVIEW_ISOLATED_TASK_GROUP -using detail::d1::isolated_task_group; +using detail::d2::isolated_task_group; #endif -using detail::d1::task_group_status; -using detail::d1::not_complete; -using detail::d1::complete; -using detail::d1::canceled; +using detail::d2::task_group_status; +using detail::d2::not_complete; +using detail::d2::complete; +using detail::d2::canceled; using detail::d1::is_current_task_group_canceling; using detail::r1::missing_wait; diff --git a/contrib/libs/tbb/include/oneapi/tbb/version.h b/contrib/libs/tbb/include/oneapi/tbb/version.h index 965af129a8..c8f3ad50e3 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/version.h +++ b/contrib/libs/tbb/include/oneapi/tbb/version.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -27,20 +27,24 @@ #endif // Product version -#define TBB_VERSION_MAJOR 2021 +#define TBB_VERSION_MAJOR 2022 // Update version -#define TBB_VERSION_MINOR 10 +#define TBB_VERSION_MINOR 0 // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string #define __TBB_VERSION_SUFFIX "" // Full official version string -#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX +#define TBB_VERSION_STRING \ + __TBB_STRING(TBB_VERSION_MAJOR) "." \ + __TBB_STRING(TBB_VERSION_MINOR) "." \ + __TBB_STRING(TBB_VERSION_PATCH) \ + __TBB_VERSION_SUFFIX // OneAPI oneTBB specification version #define ONETBB_SPEC_VERSION "1.0" // Full interface version -#define TBB_INTERFACE_VERSION 12100 +#define TBB_INTERFACE_VERSION 12140 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version @@ -51,37 +55,37 @@ #define __TBB_BINARY_VERSION 12 //! TBB_VERSION support -#ifndef ENDL -#define ENDL "\n" +#ifndef TBB_ENDL +#define TBB_ENDL "\n" #endif //TBB_REVAMP_TODO: consider enabling version_string.ver generation //TBB_REVAMP_TODO: #include "version_string.ver" -#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL -#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL -#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL +#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION TBB_ENDL +#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING TBB_ENDL +#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) TBB_ENDL #ifndef TBB_USE_DEBUG - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" TBB_ENDL #elif TBB_USE_DEBUG==0 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" TBB_ENDL #elif TBB_USE_DEBUG==1 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" TBB_ENDL #elif TBB_USE_DEBUG==2 - #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" TBB_ENDL #else #error Unexpected value for TBB_USE_DEBUG #endif #ifndef TBB_USE_ASSERT - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" TBB_ENDL #elif TBB_USE_ASSERT==0 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" TBB_ENDL #elif TBB_USE_ASSERT==1 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" TBB_ENDL #elif TBB_USE_ASSERT==2 - #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" TBB_ENDL #else #error Unexpected value for TBB_USE_ASSERT #endif diff --git a/contrib/libs/tbb/include/tbb/mutex.h b/contrib/libs/tbb/include/tbb/mutex.h new file mode 100644 index 0000000000..91dbee0fa0 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/mutex.h" diff --git a/contrib/libs/tbb/include/tbb/rw_mutex.h b/contrib/libs/tbb/include/tbb/rw_mutex.h new file mode 100644 index 0000000000..f2499ebace --- /dev/null +++ b/contrib/libs/tbb/include/tbb/rw_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/rw_mutex.h" diff --git a/contrib/libs/tbb/patches/pr1543-fix-lookup.patch b/contrib/libs/tbb/patches/pr1543-fix-lookup.patch new file mode 100644 index 0000000000..1f09ddf5cd --- /dev/null +++ b/contrib/libs/tbb/patches/pr1543-fix-lookup.patch @@ -0,0 +1,37 @@ +From c767a91536942a39497afc65c1e320852d3a0c0f Mon Sep 17 00:00:00 2001 +From: "Isaev, Ilya" <ilya.isaev@intel.com> +Date: Thu, 31 Oct 2024 17:28:58 +0100 +Subject: [PATCH] Fix flow_graph tests build when compiling with GCC 13.3 + +Signed-off-by: Isaev, Ilya <ilya.isaev@intel.com> +--- + include/oneapi/tbb/detail/_flow_graph_impl.h | 2 +- + include/oneapi/tbb/flow_graph.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/oneapi/tbb/detail/_flow_graph_impl.h b/include/oneapi/tbb/detail/_flow_graph_impl.h +index 19e00a8ef1..55063b93e1 100644 +--- a/include/oneapi/tbb/detail/_flow_graph_impl.h ++++ b/include/oneapi/tbb/detail/_flow_graph_impl.h +@@ -347,7 +347,7 @@ class graph : no_copy, public graph_proxy { + caught_exception = false; + try_call([this] { + my_task_arena->execute([this] { +- wait(my_wait_context_vertex.get_context(), *my_context); ++ d1::wait(my_wait_context_vertex.get_context(), *my_context); + }); + cancelled = my_context->is_group_execution_cancelled(); + }).on_exception([this] { +diff --git a/include/oneapi/tbb/flow_graph.h b/include/oneapi/tbb/flow_graph.h +index 20916fa7c2..5b438faabf 100644 +--- a/include/oneapi/tbb/flow_graph.h ++++ b/include/oneapi/tbb/flow_graph.h +@@ -305,7 +305,7 @@ class receiver { + bool res = internal_try_put(t, message_metainfo{message_metainfo::waiters_type{&msg_wait_vertex}}); + if (res) { + __TBB_ASSERT(graph_reference().my_context != nullptr, "No wait_context associated with the Flow Graph"); +- wait(msg_wait_vertex.get_context(), *graph_reference().my_context); ++ d1::wait(msg_wait_vertex.get_context(), *graph_reference().my_context); + } + return res; + } diff --git a/contrib/libs/tbb/src/tbb/allocator.cpp b/contrib/libs/tbb/src/tbb/allocator.cpp index 5453aeab12..689c51255d 100644 --- a/contrib/libs/tbb/src/tbb/allocator.cpp +++ b/contrib/libs/tbb/src/tbb/allocator.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -118,7 +118,7 @@ static const dynamic_link_descriptor MallocLinkTable[] = { #if _WIN32||_WIN64 #define MALLOCLIB_NAME "tbbmalloc" DEBUG_SUFFIX ".dll" #elif __APPLE__ -#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" +#define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".2.dylib" #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" #elif __unix__ // Note that order of these #elif's is important! @@ -157,6 +157,14 @@ void initialize_cache_aligned_allocator() { } //! Executed on very first call through allocate_handler +/** Only one of initialize_allocate_handler() and initialize_cache_aligned_allocate_handler() + is called, since each one of them also initializes the other. + + In the current implementation of oneTBB library initialization, cache_aligned_allocate() is + used, which in turn calls initialize_cache_aligned_allocate_handler(). As mentioned above, + that also initializes the regular allocate_handler. + + Therefore, initialize_allocate_handler() is not called in the current library implementation. */ static void* initialize_allocate_handler(std::size_t size) { initialize_cache_aligned_allocator(); __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr); diff --git a/contrib/libs/tbb/src/tbb/arena.cpp b/contrib/libs/tbb/src/tbb/arena.cpp index e79f689b82..6ca062d02f 100644 --- a/contrib/libs/tbb/src/tbb/arena.cpp +++ b/contrib/libs/tbb/src/tbb/arena.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "task_dispatcher.h" #include "governor.h" +#include "threading_control.h" #include "arena.h" #include "itt_notify.h" #include "semaphore.h" @@ -59,7 +60,6 @@ numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_s if ((core_type >= 0 && core_type_count() > 1) || (numa_id >= 0 && numa_node_count() > 1) || max_threads_per_core > 0) { binding_observer = new(allocate_memory(sizeof(numa_binding_observer))) numa_binding_observer(ta, num_slots, numa_id, core_type, max_threads_per_core); __TBB_ASSERT(binding_observer, "Failure during NUMA binding observer allocation and construction"); - binding_observer->observe(true); } return binding_observer; } @@ -72,6 +72,83 @@ void destroy_binding_observer( numa_binding_observer* binding_observer ) { } #endif /*!__TBB_ARENA_BINDING*/ +void arena::on_thread_leaving(unsigned ref_param) { + // + // Implementation of arena destruction synchronization logic contained various + // bugs/flaws at the different stages of its evolution, so below is a detailed + // description of the issues taken into consideration in the framework of the + // current design. + // + // In case of using fire-and-forget tasks (scheduled via task::enqueue()) + // external thread is allowed to leave its arena before all its work is executed, + // and market may temporarily revoke all workers from this arena. Since revoked + // workers never attempt to reset arena state to EMPTY and cancel its request + // to RML for threads, the arena object is destroyed only when both the last + // thread is leaving it and arena's state is EMPTY (that is its external thread + // left and it does not contain any work). + // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not + // be done here (or anywhere else in the external thread to that matter); doing so + // can result either in arena's premature destruction (at least without + // additional costly checks in workers) or in unnecessary arena state changes + // (and ensuing workers migration). + // + // A worker that checks for work presence and transitions arena to the EMPTY + // state (in snapshot taking procedure arena::out_of_work()) updates + // arena::my_pool_state first and only then arena::my_num_workers_requested. + // So the check for work absence must be done against the latter field. + // + // In a time window between decrementing the active threads count and checking + // if there is an outstanding request for workers. New worker thread may arrive, + // finish remaining work, set arena state to empty, and leave decrementing its + // refcount and destroying. Then the current thread will destroy the arena + // the second time. To preclude it a local copy of the outstanding request + // value can be stored before decrementing active threads count. + // + // But this technique may cause two other problem. When the stored request is + // zero, it is possible that arena still has threads and they can generate new + // tasks and thus re-establish non-zero requests. Then all the threads can be + // revoked (as described above) leaving this thread the last one, and causing + // it to destroy non-empty arena. + // + // The other problem takes place when the stored request is non-zero. Another + // thread may complete the work, set arena state to empty, and leave without + // arena destruction before this thread decrements the refcount. This thread + // cannot destroy the arena either. Thus the arena may be "orphaned". + // + // In both cases we cannot dereference arena pointer after the refcount is + // decremented, as our arena may already be destroyed. + // + // If this is the external thread, the market is protected by refcount to it. + // In case of workers market's liveness is ensured by the RML connection + // rundown protocol, according to which the client (i.e. the market) lives + // until RML server notifies it about connection termination, and this + // notification is fired only after all workers return into RML. + // + // Thus if we decremented refcount to zero we ask the market to check arena + // state (including the fact if it is alive) under the lock. + // + + __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); + + // When there is no workers someone must free arena, as + // without workers, no one calls out_of_work(). + if (ref_param == ref_external && !my_mandatory_concurrency.test()) { + out_of_work(); + } + + threading_control* tc = my_threading_control; + auto tc_client_snapshot = tc->prepare_client_destruction(my_tc_client); + // Release our reference to sync with destroy_client + unsigned remaining_ref = my_references.fetch_sub(ref_param, std::memory_order_release) - ref_param; + // do not access `this` it might be destroyed already + if (remaining_ref == 0) { + if (tc->try_destroy_client(tc_client_snapshot)) { + // We are requested to destroy ourself + free_arena(); + } + } +} + std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lower, std::size_t upper ) { if ( lower >= upper ) return out_of_arena; // Start search for an empty slot from the one we occupied the last time @@ -104,19 +181,20 @@ std::size_t arena::occupy_free_slot(thread_data& tls) { std::uintptr_t arena::calculate_stealing_threshold() { stack_anchor_type anchor; - return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_market->worker_stack_size()); + return r1::calculate_stealing_threshold(reinterpret_cast<std::uintptr_t>(&anchor), my_threading_control->worker_stack_size()); } void arena::process(thread_data& tls) { governor::set_thread_data(tls); // TODO: consider moving to create_one_job. __TBB_ASSERT( is_alive(my_guard), nullptr); - __TBB_ASSERT( my_num_slots > 1, nullptr); + __TBB_ASSERT( my_num_slots >= 1, nullptr); std::size_t index = occupy_free_slot</*as_worker*/true>(tls); if (index == out_of_arena) { - on_thread_leaving<ref_worker>(); + on_thread_leaving(ref_worker); return; } + __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); tls.attach_arena(*this, index); // worker thread enters the dispatch loop to look for a work @@ -159,24 +237,22 @@ void arena::process(thread_data& tls) { // In contrast to earlier versions of TBB (before 3.0 U5) now it is possible // that arena may be temporarily left unpopulated by threads. See comments in // arena::on_thread_leaving() for more details. - on_thread_leaving<ref_worker>(); + on_thread_leaving(ref_worker); __TBB_ASSERT(tls.my_arena == this, "my_arena is used as a hint when searching the arena to join"); } -arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level ) -{ +arena::arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned priority_level) { __TBB_ASSERT( !my_guard, "improperly allocated arena?" ); __TBB_ASSERT( sizeof(my_slots[0]) % cache_line_size()==0, "arena::slot size not multiple of cache line size" ); __TBB_ASSERT( is_aligned(this, cache_line_size()), "arena misaligned" ); - my_market = &m; + my_threading_control = control; my_limit = 1; // Two slots are mandatory: for the external thread, and for 1 worker (required to support starvation resistant tasks). - my_num_slots = num_arena_slots(num_slots); + my_num_slots = num_arena_slots(num_slots, num_reserved_slots); my_num_reserved_slots = num_reserved_slots; my_max_num_workers = num_slots-num_reserved_slots; my_priority_level = priority_level; my_references = ref_external; // accounts for the external thread - my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed); my_observers.my_arena = this; my_co_cache.init(4 * num_slots); __TBB_ASSERT ( my_max_num_workers <= my_num_slots, nullptr); @@ -199,36 +275,29 @@ arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsig #if __TBB_PREVIEW_CRITICAL_TASKS my_critical_task_stream.initialize(my_num_slots); #endif -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - my_local_concurrency_requests = 0; - my_local_concurrency_flag.clear(); - my_global_concurrency_mode.store(false, std::memory_order_relaxed); -#endif + my_mandatory_requests = 0; } -arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, - unsigned priority_level ) +arena& arena::allocate_arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level) { __TBB_ASSERT( sizeof(base_type) + sizeof(arena_slot) == sizeof(arena), "All arena data fields must go to arena_base" ); __TBB_ASSERT( sizeof(base_type) % cache_line_size() == 0, "arena slots area misaligned: wrong padding" ); __TBB_ASSERT( sizeof(mail_outbox) == max_nfs_size, "Mailbox padding is wrong" ); - std::size_t n = allocation_size(num_arena_slots(num_slots)); + std::size_t n = allocation_size(num_arena_slots(num_slots, num_reserved_slots)); unsigned char* storage = (unsigned char*)cache_aligned_allocate(n); // Zero all slots to indicate that they are empty std::memset( storage, 0, n ); - return *new( storage + num_arena_slots(num_slots) * sizeof(mail_outbox) ) - arena(m, num_slots, num_reserved_slots, priority_level); + + return *new( storage + num_arena_slots(num_slots, num_reserved_slots) * sizeof(mail_outbox) ) + arena(control, num_slots, num_reserved_slots, priority_level); } void arena::free_arena () { __TBB_ASSERT( is_alive(my_guard), nullptr); __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); - __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, - "Inconsistent state of a dying arena" ); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - __TBB_ASSERT( !my_global_concurrency_mode, nullptr); -#endif + __TBB_ASSERT( !my_total_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); + __TBB_ASSERT( is_empty(), "Inconsistent state of a dying arena" ); #if __TBB_ARENA_BINDING if (my_numa_binding_observer != nullptr) { destroy_binding_observer(my_numa_binding_observer); @@ -254,15 +323,11 @@ void arena::free_arena () { #if __TBB_PREVIEW_CRITICAL_TASKS __TBB_ASSERT( my_critical_task_stream.empty(), "Not all critical tasks were executed"); #endif - // remove an internal reference - my_market->release( /*is_public=*/false, /*blocking_terminate=*/false ); - // Clear enfources synchronization with observe(false) my_observers.clear(); void* storage = &mailbox(my_num_slots-1); __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, nullptr); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, nullptr); this->~arena(); #if TBB_USE_ASSERT > 1 std::memset( storage, 0, allocation_size(my_num_slots) ); @@ -274,80 +339,102 @@ bool arena::has_enqueued_tasks() { return !my_fifo_task_stream.empty(); } -bool arena::is_out_of_work() { -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_local_concurrency_flag.try_clear_if([this] { - return !has_enqueued_tasks(); - })) { - my_market->adjust_demand(*this, /* delta = */ -1, /* mandatory = */ true); +void arena::request_workers(int mandatory_delta, int workers_delta, bool wakeup_threads) { + my_threading_control->adjust_demand(my_tc_client, mandatory_delta, workers_delta); + + if (wakeup_threads) { + // Notify all sleeping threads that work has appeared in the arena. + get_waiting_threads_monitor().notify([&] (market_context context) { + return this == context.my_arena_addr; + }); } -#endif +} +bool arena::has_tasks() { // TODO: rework it to return at least a hint about where a task was found; better if the task itself. - switch (my_pool_state.load(std::memory_order_acquire)) { - case SNAPSHOT_EMPTY: - return true; - case SNAPSHOT_FULL: { - // Use unique id for "busy" in order to avoid ABA problems. - const pool_state_t busy = pool_state_t(&busy); - // Helper for CAS execution - pool_state_t expected_state; - - // Request permission to take snapshot - expected_state = SNAPSHOT_FULL; - if (my_pool_state.compare_exchange_strong(expected_state, busy)) { - // Got permission. Take the snapshot. - // NOTE: This is not a lock, as the state can be set to FULL at - // any moment by a thread that spawns/enqueues new task. - std::size_t n = my_limit.load(std::memory_order_acquire); - // Make local copies of volatile parameters. Their change during - // snapshot taking procedure invalidates the attempt, and returns - // this thread into the dispatch loop. - std::size_t k; - for (k = 0; k < n; ++k) { - if (my_slots[k].task_pool.load(std::memory_order_relaxed) != EmptyTaskPool && - my_slots[k].head.load(std::memory_order_relaxed) < my_slots[k].tail.load(std::memory_order_relaxed)) - { - // k-th primary task pool is nonempty and does contain tasks. - break; - } - if (my_pool_state.load(std::memory_order_acquire) != busy) - return false; // the work was published - } - bool work_absent = k == n; - // Test and test-and-set. - if (my_pool_state.load(std::memory_order_acquire) == busy) { - bool no_stream_tasks = !has_enqueued_tasks() && my_resume_task_stream.empty(); + std::size_t n = my_limit.load(std::memory_order_acquire); + bool tasks_are_available = false; + for (std::size_t k = 0; k < n && !tasks_are_available; ++k) { + tasks_are_available = !my_slots[k].is_empty(); + } + tasks_are_available = tasks_are_available || has_enqueued_tasks() || !my_resume_task_stream.empty(); #if __TBB_PREVIEW_CRITICAL_TASKS - no_stream_tasks = no_stream_tasks && my_critical_task_stream.empty(); + tasks_are_available = tasks_are_available || !my_critical_task_stream.empty(); #endif - work_absent = work_absent && no_stream_tasks; - if (work_absent) { - // save current demand value before setting SNAPSHOT_EMPTY, - // to avoid race with advertise_new_work. - int current_demand = (int)my_max_num_workers; - expected_state = busy; - if (my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_EMPTY)) { - // This thread transitioned pool to empty state, and thus is - // responsible for telling the market that there is no work to do. - my_market->adjust_demand(*this, -current_demand, /* mandatory = */ false); - return true; - } - return false; - } - // Undo previous transition SNAPSHOT_FULL-->busy, unless another thread undid it. - expected_state = busy; - my_pool_state.compare_exchange_strong(expected_state, SNAPSHOT_FULL); - } + return tasks_are_available; +} + +void arena::out_of_work() { + // We should try unset my_pool_state first due to keep arena invariants in consistent state + // Otherwise, we might have my_pool_state = false and my_mandatory_concurrency = true that is broken invariant + bool disable_mandatory = my_mandatory_concurrency.try_clear_if([this] { return !has_enqueued_tasks(); }); + bool release_workers = my_pool_state.try_clear_if([this] { return !has_tasks(); }); + + if (disable_mandatory || release_workers) { + int mandatory_delta = disable_mandatory ? -1 : 0; + int workers_delta = release_workers ? -(int)my_max_num_workers : 0; + + if (disable_mandatory && is_arena_workerless()) { + // We had set workers_delta to 1 when enabled mandatory concurrency, so revert it now + workers_delta = -1; } - return false; + request_workers(mandatory_delta, workers_delta); + } +} + +void arena::set_top_priority(bool is_top_priority) { + my_is_top_priority.store(is_top_priority, std::memory_order_relaxed); +} + +bool arena::is_top_priority() const { + return my_is_top_priority.load(std::memory_order_relaxed); +} + +bool arena::try_join() { + if (is_joinable()) { + my_references += arena::ref_worker; + return true; } - default: - // Another thread is taking a snapshot. - return false; + return false; +} + +void arena::set_allotment(unsigned allotment) { + if (my_num_workers_allotted.load(std::memory_order_relaxed) != allotment) { + my_num_workers_allotted.store(allotment, std::memory_order_relaxed); } } +int arena::update_concurrency(unsigned allotment) { + int delta = allotment - my_num_workers_allotted.load(std::memory_order_relaxed); + if (delta != 0) { + my_num_workers_allotted.store(allotment, std::memory_order_relaxed); + } + return delta; +} + +std::pair<int, int> arena::update_request(int mandatory_delta, int workers_delta) { + __TBB_ASSERT(-1 <= mandatory_delta && mandatory_delta <= 1, nullptr); + + int min_workers_request = 0; + int max_workers_request = 0; + + // Calculate min request + my_mandatory_requests += mandatory_delta; + min_workers_request = my_mandatory_requests > 0 ? 1 : 0; + + // Calculate max request + my_total_num_workers_requested += workers_delta; + // Clamp worker request into interval [0, my_max_num_workers] + max_workers_request = clamp(my_total_num_workers_requested, 0, + min_workers_request > 0 && is_arena_workerless() ? 1 : (int)my_max_num_workers); + + return { min_workers_request, max_workers_request }; +} + +thread_control_monitor& arena::get_waiting_threads_monitor() { + return my_threading_control->get_waiting_threads_monitor(); +} + void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& td) { task_group_context_impl::bind_to(ctx, &td); task_accessor::context(t) = &ctx; @@ -356,6 +443,17 @@ void arena::enqueue_task(d1::task& t, d1::task_group_context& ctx, thread_data& advertise_new_work<work_enqueued>(); } +arena& arena::create(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned arena_priority_level, d1::constraints constraints) { + __TBB_ASSERT(num_slots > 0, NULL); + __TBB_ASSERT(num_reserved_slots <= num_slots, NULL); + // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). + arena& a = arena::allocate_arena(control, num_slots, num_reserved_slots, arena_priority_level); + a.my_tc_client = control->create_client(a); + // We should not publish arena until all fields are initialized + control->publish_client(a.my_tc_client, constraints); + return a; +} + } // namespace r1 } // namespace detail } // namespace tbb @@ -382,12 +480,12 @@ void assert_arena_priority_valid( tbb::task_arena::priority ) {} unsigned arena_priority_level( tbb::task_arena::priority a_priority ) { assert_arena_priority_valid( a_priority ); - return market::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride); + return d1::num_priority_levels - unsigned(int(a_priority) / d1::priority_stride); } tbb::task_arena::priority arena_priority( unsigned priority_level ) { auto priority = tbb::task_arena::priority( - (market::num_priority_levels - priority_level) * d1::priority_stride + (d1::num_priority_levels - priority_level) * d1::priority_stride ); assert_arena_priority_valid( priority ); return priority; @@ -401,6 +499,7 @@ struct task_arena_impl { static void wait(d1::task_arena_base&); static int max_concurrency(const d1::task_arena_base*); static void enqueue(d1::task&, d1::task_group_context*, d1::task_arena_base*); + static d1::slot_id execution_slot(const d1::task_arena_base&); }; void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) { @@ -431,38 +530,61 @@ void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_group_context& ctx, d1::t task_arena_impl::enqueue(t, &ctx, ta); } +d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::task_arena_base& arena) { + return task_arena_impl::execution_slot(arena); +} + void task_arena_impl::initialize(d1::task_arena_base& ta) { // Enforce global market initialization to properly initialize soft limit (void)governor::get_thread_data(); + d1::constraints arena_constraints; + +#if __TBB_ARENA_BINDING + arena_constraints = d1::constraints{} + .set_core_type(ta.core_type()) + .set_max_threads_per_core(ta.max_threads_per_core()) + .set_numa_id(ta.my_numa_id); +#endif /*__TBB_ARENA_BINDING*/ + if (ta.my_max_concurrency < 1) { #if __TBB_ARENA_BINDING - d1::constraints arena_constraints = d1::constraints{} - .set_core_type(ta.core_type()) - .set_max_threads_per_core(ta.max_threads_per_core()) - .set_numa_id(ta.my_numa_id); ta.my_max_concurrency = (int)default_concurrency(arena_constraints); #else /*!__TBB_ARENA_BINDING*/ ta.my_max_concurrency = (int)governor::default_num_threads(); #endif /*!__TBB_ARENA_BINDING*/ } +#if __TBB_CPUBIND_PRESENT + numa_binding_observer* observer = construct_binding_observer( + static_cast<d1::task_arena*>(&ta), arena::num_arena_slots(ta.my_max_concurrency, ta.my_num_reserved_slots), + ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); + if (observer) { + // TODO: Consider lazy initialization for internal arena so + // the direct calls to observer might be omitted until actual initialization. + observer->on_scheduler_entry(true); + } +#endif /*__TBB_CPUBIND_PRESENT*/ + __TBB_ASSERT(ta.my_arena.load(std::memory_order_relaxed) == nullptr, "Arena already initialized"); unsigned priority_level = arena_priority_level(ta.my_priority); - arena* a = market::create_arena(ta.my_max_concurrency, ta.my_num_reserved_slots, priority_level, /* stack_size = */ 0); - ta.my_arena.store(a, std::memory_order_release); - // add an internal market reference; a public reference was added in create_arena - market::global_market( /*is_public=*/false); -#if __TBB_ARENA_BINDING - a->my_numa_binding_observer = construct_binding_observer( - static_cast<d1::task_arena*>(&ta), a->my_num_slots, ta.my_numa_id, ta.core_type(), ta.max_threads_per_core()); -#endif /*__TBB_ARENA_BINDING*/ + threading_control* thr_control = threading_control::register_public_reference(); + arena& a = arena::create(thr_control, unsigned(ta.my_max_concurrency), ta.my_num_reserved_slots, priority_level, arena_constraints); + + ta.my_arena.store(&a, std::memory_order_release); +#if __TBB_CPUBIND_PRESENT + a.my_numa_binding_observer = observer; + if (observer) { + observer->on_scheduler_exit(true); + observer->observe(true); + } +#endif /*__TBB_CPUBIND_PRESENT*/ } void task_arena_impl::terminate(d1::task_arena_base& ta) { arena* a = ta.my_arena.load(std::memory_order_relaxed); assert_pointer_valid(a); - a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); - a->on_thread_leaving<arena::ref_external>(); + threading_control::unregister_public_reference(/*blocking_terminate=*/false); + a->on_thread_leaving(arena::ref_external); ta.my_arena.store(nullptr, std::memory_order_relaxed); } @@ -478,10 +600,10 @@ bool task_arena_impl::attach(d1::task_arena_base& ta) { ta.my_num_reserved_slots = a->my_num_reserved_slots; ta.my_priority = arena_priority(a->my_priority_level); ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers; - __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, nullptr); + __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency, ta.my_num_reserved_slots) == a->my_num_slots, nullptr); ta.my_arena.store(a, std::memory_order_release); - // increases market's ref count for task_arena - market::global_market( /*is_public=*/true ); + // increases threading_control's ref count for task_arena + threading_control::register_public_reference(); return true; } return false; @@ -503,6 +625,14 @@ void task_arena_impl::enqueue(d1::task& t, d1::task_group_context* c, d1::task_a a->enqueue_task(t, *ctx, *td); } +d1::slot_id task_arena_impl::execution_slot(const d1::task_arena_base& ta) { + thread_data* td = governor::get_thread_data_if_initialized(); + if (td && (td->is_attached_to(ta.my_arena.load(std::memory_order_relaxed)))) { + return td->my_arena_index; + } + return d1::slot_id(-1); +} + class nested_arena_context : no_copy { public: nested_arena_context(thread_data& td, arena& nested_arena, std::size_t slot_index) @@ -512,9 +642,11 @@ public: m_orig_arena = td.my_arena; m_orig_slot_index = td.my_arena_index; m_orig_last_observer = td.my_last_observer; + m_orig_is_thread_registered = td.my_is_registered; td.detach_task_dispatcher(); td.attach_arena(nested_arena, slot_index); + td.my_is_registered = false; if (td.my_inbox.is_idle_state(true)) td.my_inbox.set_is_idle(false); task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); @@ -523,7 +655,7 @@ public: // If the calling thread occupies the slots out of external thread reserve we need to notify the // market that this arena requires one worker less. if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { - td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ -1, /* mandatory = */ false); + td.my_arena->request_workers(/* mandatory_delta = */ 0, /* workers_delta = */ -1); } td.my_last_observer = nullptr; @@ -559,13 +691,13 @@ public: // Notify the market that this thread releasing a one slot // that can be used by a worker thread. if (td.my_arena_index >= td.my_arena->my_num_reserved_slots) { - td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false); + td.my_arena->request_workers(/* mandatory_delta = */ 0, /* workers_delta = */ 1); } td.leave_task_dispatcher(); td.my_arena_slot->release(); td.my_arena->my_exit_monitors.notify_one(); // do not relax! - + td.my_is_registered = m_orig_is_thread_registered; td.attach_arena(*m_orig_arena, m_orig_slot_index); td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); @@ -581,6 +713,7 @@ private: unsigned m_orig_slot_index{}; bool m_orig_fifo_tasks_allowed{}; bool m_orig_critical_task_allowed{}; + bool m_orig_is_thread_registered{}; }; class delegated_task : public d1::task { @@ -613,7 +746,7 @@ class delegated_task : public d1::task { } void finalize() { m_wait_ctx.release(); // must precede the wakeup - m_monitor.notify([this](std::uintptr_t ctx) { + m_monitor.notify([this] (std::uintptr_t ctx) { return ctx == std::uintptr_t(&m_delegate); }); // do not relax, it needs a fence! m_completed.store(true, std::memory_order_release); @@ -702,7 +835,7 @@ void task_arena_impl::wait(d1::task_arena_base& ta) { __TBB_ASSERT_EX(td, "Scheduler is not initialized"); __TBB_ASSERT(td->my_arena != a || td->my_arena_index == 0, "internal_wait is not supported within a worker context" ); if (a->my_max_num_workers != 0) { - while (a->num_workers_active() || a->my_pool_state.load(std::memory_order_acquire) != arena::SNAPSHOT_EMPTY) { + while (a->num_workers_active() || !a->is_empty()) { yield(); } } @@ -717,11 +850,11 @@ int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { if( a ) { // Get parameters from the arena __TBB_ASSERT( !ta || ta->my_max_concurrency==1, nullptr); - return a->my_num_reserved_slots + a->my_max_num_workers -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - + (a->my_local_concurrency_flag.test() ? 1 : 0) -#endif - ; + int mandatory_worker = 0; + if (a->is_arena_workerless() && a->my_num_reserved_slots == 1) { + mandatory_worker = a->my_mandatory_concurrency.test() ? 1 : 0; + } + return a->my_num_reserved_slots + a->my_max_num_workers + mandatory_worker; } if (ta && ta->my_max_concurrency == 1) { diff --git a/contrib/libs/tbb/src/tbb/arena.h b/contrib/libs/tbb/src/tbb/arena.h index 0f4165d506..1e95f117b2 100644 --- a/contrib/libs/tbb/src/tbb/arena.h +++ b/contrib/libs/tbb/src/tbb/arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,6 +21,8 @@ #include <cstring> #include "oneapi/tbb/detail/_task.h" +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/spin_mutex.h" #include "scheduler_common.h" #include "intrusive_list.h" @@ -28,11 +30,11 @@ #include "arena_slot.h" #include "rml_tbb.h" #include "mailbox.h" -#include "market.h" #include "governor.h" #include "concurrent_monitor.h" #include "observer_proxy.h" -#include "oneapi/tbb/spin_mutex.h" +#include "thread_control_monitor.h" +#include "threading_control_client.h" namespace tbb { namespace detail { @@ -40,6 +42,7 @@ namespace r1 { class task_dispatcher; class task_group_context; +class threading_control; class allocate_root_with_context_proxy; #if __TBB_ARENA_BINDING @@ -133,11 +136,10 @@ struct stack_anchor_type { stack_anchor_type(const stack_anchor_type&) = delete; }; -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY class atomic_flag { static const std::uintptr_t SET = 1; - static const std::uintptr_t EMPTY = 0; - std::atomic<std::uintptr_t> my_state; + static const std::uintptr_t UNSET = 0; + std::atomic<std::uintptr_t> my_state{UNSET}; public: bool test_and_set() { std::uintptr_t state = my_state.load(std::memory_order_acquire); @@ -149,13 +151,13 @@ public: // We interrupted clear transaction return false; } - if (state != EMPTY) { + if (state != UNSET) { // We lost our epoch return false; } // We are too late but still in the same epoch __TBB_fallthrough; - case EMPTY: + case UNSET: return my_state.compare_exchange_strong(state, SET); } } @@ -165,21 +167,17 @@ public: std::uintptr_t state = my_state.load(std::memory_order_acquire); if (state == SET && my_state.compare_exchange_strong(state, busy)) { if (pred()) { - return my_state.compare_exchange_strong(busy, EMPTY); + return my_state.compare_exchange_strong(busy, UNSET); } // The result of the next operation is discarded, always false should be returned. my_state.compare_exchange_strong(busy, SET); } return false; } - void clear() { - my_state.store(EMPTY, std::memory_order_release); - } - bool test() { - return my_state.load(std::memory_order_acquire) != EMPTY; + bool test(std::memory_order order = std::memory_order_acquire) { + return my_state.load(order) != UNSET; } }; -#endif //! The structure of an arena, except the array of slots. /** Separated in order to simplify padding. @@ -220,60 +218,41 @@ struct arena_base : padded<intrusive_list_node> { //! The total number of workers that are requested from the resource manager. int my_total_num_workers_requested; - //! The number of workers that are really requested from the resource manager. - //! Possible values are in [0, my_max_num_workers] - int my_num_workers_requested; - //! The index in the array of per priority lists of arenas this object is in. /*const*/ unsigned my_priority_level; - //! The max priority level of arena in market. + //! The max priority level of arena in permit manager. std::atomic<bool> my_is_top_priority{false}; //! Current task pool state and estimate of available tasks amount. - /** The estimate is either 0 (SNAPSHOT_EMPTY) or infinity (SNAPSHOT_FULL). - Special state is "busy" (any other unsigned value). - Note that the implementation of arena::is_busy_or_empty() requires - my_pool_state to be unsigned. */ - using pool_state_t = std::uintptr_t ; - std::atomic<pool_state_t> my_pool_state; + atomic_flag my_pool_state; //! The list of local observers attached to this arena. observer_list my_observers; #if __TBB_ARENA_BINDING //! Pointer to internal observer that allows to bind threads in arena to certain NUMA node. - numa_binding_observer* my_numa_binding_observer; + numa_binding_observer* my_numa_binding_observer{nullptr}; #endif /*__TBB_ARENA_BINDING*/ // Below are rarely modified members - //! The market that owns this arena. - market* my_market; + threading_control* my_threading_control; //! Default task group context. d1::task_group_context* my_default_ctx; -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // arena needs an extra worker despite a global limit - std::atomic<bool> my_global_concurrency_mode; -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - //! Waiting object for external threads that cannot join the arena. concurrent_monitor my_exit_monitors; //! Coroutines (task_dispathers) cache buffer arena_co_cache my_co_cache; -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY // arena needs an extra worker despite the arena limit - atomic_flag my_local_concurrency_flag; + atomic_flag my_mandatory_concurrency; // the number of local mandatory concurrency requests - int my_local_concurrency_requests; -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY*/ + int my_mandatory_requests; - //! ABA prevention marker. - std::uintptr_t my_aba_epoch; //! The number of slots in the arena. unsigned my_num_slots; //! The number of reserved slots (can be occupied only by external threads). @@ -281,11 +260,7 @@ struct arena_base : padded<intrusive_list_node> { //! The number of workers requested by the external thread owning the arena. unsigned my_max_num_workers; - //! The target serialization epoch for callers of adjust_job_count_estimate - int my_adjust_demand_target_epoch; - - //! The current serialization epoch for callers of adjust_job_count_estimate - d1::waitable_atomic<int> my_adjust_demand_current_epoch; + threading_control_client my_tc_client; #if TBB_USE_ASSERT //! Used to trap accesses to the object after its destruction. @@ -306,17 +281,19 @@ public: }; //! Constructor - arena ( market& m, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level); + arena(threading_control* control, unsigned max_num_workers, unsigned num_reserved_slots, unsigned priority_level); //! Allocate an instance of arena. - static arena& allocate_arena( market& m, unsigned num_slots, unsigned num_reserved_slots, - unsigned priority_level ); + static arena& allocate_arena(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, + unsigned priority_level); + + static arena& create(threading_control* control, unsigned num_slots, unsigned num_reserved_slots, unsigned arena_priority_level, d1::constraints constraints = d1::constraints{}); - static int unsigned num_arena_slots ( unsigned num_slots ) { - return max(2u, num_slots); + static int unsigned num_arena_slots ( unsigned num_slots, unsigned num_reserved_slots ) { + return num_reserved_slots == 0 ? num_slots : max(2u, num_slots); } - static int allocation_size ( unsigned num_slots ) { + static int allocation_size( unsigned num_slots ) { return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot) + sizeof(task_dispatcher)); } @@ -328,13 +305,7 @@ public: } //! Completes arena shutdown, destructs and deallocates it. - void free_arena (); - - //! No tasks to steal since last snapshot was taken - static const pool_state_t SNAPSHOT_EMPTY = 0; - - //! At least one task has been offered for stealing since the last snapshot started - static const pool_state_t SNAPSHOT_FULL = pool_state_t(-1); + void free_arena(); //! The number of least significant bits for external references static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers @@ -343,9 +314,6 @@ public: static const unsigned ref_external = 1; static const unsigned ref_worker = 1 << ref_external_bits; - //! No tasks to steal or snapshot is being taken. - static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; } - //! The number of workers active in the arena. unsigned num_workers_active() const { return my_references.load(std::memory_order_acquire) >> ref_external_bits; @@ -356,6 +324,8 @@ public: return num_workers_active() > my_num_workers_allotted.load(std::memory_order_relaxed); } + void request_workers(int mandatory_delta, int workers_delta, bool wakeup_threads = false); + //! If necessary, raise a flag that there is new job in arena. template<arena::new_work_type work_type> void advertise_new_work(); @@ -372,8 +342,7 @@ public: #endif //! Check if there is job anywhere in arena. - /** Return true if no job or if arena is being cleaned up. */ - bool is_out_of_work(); + void out_of_work(); //! enqueue a task into starvation-resistance queue void enqueue_task(d1::task&, d1::task_group_context&, thread_data&); @@ -382,12 +351,19 @@ public: void process(thread_data&); //! Notification that the thread leaves its arena - template<unsigned ref_param> - inline void on_thread_leaving ( ); - //! Check for the presence of enqueued tasks at all priority levels + void on_thread_leaving(unsigned ref_param); + + //! Check for the presence of enqueued tasks bool has_enqueued_tasks(); + //! Check for the presence of any tasks + bool has_tasks(); + + bool is_empty() { return my_pool_state.test() == /* EMPTY */ false; } + + thread_control_monitor& get_waiting_threads_monitor(); + static const std::size_t out_of_arena = ~size_t(0); //! Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available, returns out_of_arena. template <bool as_worker> @@ -397,158 +373,67 @@ public: std::uintptr_t calculate_stealing_threshold(); - /** Must be the last data field */ - arena_slot my_slots[1]; -}; // class arena + unsigned priority_level() { return my_priority_level; } -template<unsigned ref_param> -inline void arena::on_thread_leaving ( ) { - // - // Implementation of arena destruction synchronization logic contained various - // bugs/flaws at the different stages of its evolution, so below is a detailed - // description of the issues taken into consideration in the framework of the - // current design. - // - // In case of using fire-and-forget tasks (scheduled via task::enqueue()) - // external thread is allowed to leave its arena before all its work is executed, - // and market may temporarily revoke all workers from this arena. Since revoked - // workers never attempt to reset arena state to EMPTY and cancel its request - // to RML for threads, the arena object is destroyed only when both the last - // thread is leaving it and arena's state is EMPTY (that is its external thread - // left and it does not contain any work). - // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not - // be done here (or anywhere else in the external thread to that matter); doing so - // can result either in arena's premature destruction (at least without - // additional costly checks in workers) or in unnecessary arena state changes - // (and ensuing workers migration). - // - // A worker that checks for work presence and transitions arena to the EMPTY - // state (in snapshot taking procedure arena::is_out_of_work()) updates - // arena::my_pool_state first and only then arena::my_num_workers_requested. - // So the check for work absence must be done against the latter field. - // - // In a time window between decrementing the active threads count and checking - // if there is an outstanding request for workers. New worker thread may arrive, - // finish remaining work, set arena state to empty, and leave decrementing its - // refcount and destroying. Then the current thread will destroy the arena - // the second time. To preclude it a local copy of the outstanding request - // value can be stored before decrementing active threads count. - // - // But this technique may cause two other problem. When the stored request is - // zero, it is possible that arena still has threads and they can generate new - // tasks and thus re-establish non-zero requests. Then all the threads can be - // revoked (as described above) leaving this thread the last one, and causing - // it to destroy non-empty arena. - // - // The other problem takes place when the stored request is non-zero. Another - // thread may complete the work, set arena state to empty, and leave without - // arena destruction before this thread decrements the refcount. This thread - // cannot destroy the arena either. Thus the arena may be "orphaned". - // - // In both cases we cannot dereference arena pointer after the refcount is - // decremented, as our arena may already be destroyed. - // - // If this is the external thread, the market is protected by refcount to it. - // In case of workers market's liveness is ensured by the RML connection - // rundown protocol, according to which the client (i.e. the market) lives - // until RML server notifies it about connection termination, and this - // notification is fired only after all workers return into RML. - // - // Thus if we decremented refcount to zero we ask the market to check arena - // state (including the fact if it is alive) under the lock. - // - std::uintptr_t aba_epoch = my_aba_epoch; - unsigned priority_level = my_priority_level; - market* m = my_market; - __TBB_ASSERT(my_references.load(std::memory_order_relaxed) >= ref_param, "broken arena reference counter"); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // When there is no workers someone must free arena, as - // without workers, no one calls is_out_of_work(). - // Skip workerless arenas because they have no demand for workers. - // TODO: consider more strict conditions for the cleanup, - // because it can create the demand of workers, - // but the arena can be already empty (and so ready for destroying) - // TODO: Fix the race: while we check soft limit and it might be changed. - if( ref_param==ref_external && my_num_slots != my_num_reserved_slots - && 0 == m->my_num_workers_soft_limit.load(std::memory_order_relaxed) && - !my_global_concurrency_mode.load(std::memory_order_relaxed) ) { - is_out_of_work(); - // We expect, that in worst case it's enough to have num_priority_levels-1 - // calls to restore priorities and yet another is_out_of_work() to conform - // that no work was found. But as market::set_active_num_workers() can be called - // concurrently, can't guarantee last is_out_of_work() return true. - } -#endif + bool has_request() { return my_total_num_workers_requested; } + + unsigned references() const { return my_references.load(std::memory_order_acquire); } + + bool is_arena_workerless() const { return my_max_num_workers == 0; } - // Release our reference to sync with arena destroy - unsigned remaining_ref = my_references.fetch_sub(ref_param, std::memory_order_release) - ref_param; - if (remaining_ref == 0) { - m->try_destroy_arena( this, aba_epoch, priority_level ); + void set_top_priority(bool); + + bool is_top_priority() const; + + bool is_joinable() const { + return num_workers_active() < my_num_workers_allotted.load(std::memory_order_relaxed); } -} -template<arena::new_work_type work_type> -void arena::advertise_new_work() { - auto is_related_arena = [&] (market_context context) { - return this == context.my_arena_addr; - }; + bool try_join(); - if( work_type == work_enqueued ) { - atomic_fence_seq_cst(); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && - my_global_concurrency_mode.load(std::memory_order_acquire) == false ) - my_market->enable_mandatory_concurrency(this); + void set_allotment(unsigned allotment); - if (my_max_num_workers == 0 && my_num_reserved_slots == 1 && my_local_concurrency_flag.test_and_set()) { - my_market->adjust_demand(*this, /* delta = */ 1, /* mandatory = */ true); - } -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ + int update_concurrency(unsigned concurrency); + + std::pair</*min workers = */ int, /*max workers = */ int> update_request(int mandatory_delta, int workers_delta); + + /** Must be the last data field */ + arena_slot my_slots[1]; +}; // class arena + +template <arena::new_work_type work_type> +void arena::advertise_new_work() { + bool is_mandatory_needed = false; + bool are_workers_needed = false; + + if (work_type != work_spawned) { // Local memory fence here and below is required to avoid missed wakeups; see the comment below. // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable. - } - else if( work_type == wakeup ) { atomic_fence_seq_cst(); } + if (work_type == work_enqueued && my_num_slots > my_num_reserved_slots) { + is_mandatory_needed = my_mandatory_concurrency.test_and_set(); + } + // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences. // Technically, to avoid missed wakeups, there should be a full memory fence between the point we // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a // fence might hurt overall performance more than it helps, because the fence would be executed // on every task pool release, even when stealing does not occur. Since TBB allows parallelism, // but never promises parallelism, the missed wakeup is not a correctness problem. - pool_state_t snapshot = my_pool_state.load(std::memory_order_acquire); - if( is_busy_or_empty(snapshot) ) { - // Attempt to mark as full. The compare_and_swap below is a little unusual because the - // result is compared to a value that can be different than the comparand argument. - pool_state_t expected_state = snapshot; - my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ); - if( expected_state == SNAPSHOT_EMPTY ) { - if( snapshot != SNAPSHOT_EMPTY ) { - // This thread read "busy" into snapshot, and then another thread transitioned - // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above - // to fail. Attempt to transition my_pool_state from "empty" to "full". - expected_state = SNAPSHOT_EMPTY; - if( !my_pool_state.compare_exchange_strong( expected_state, SNAPSHOT_FULL ) ) { - // Some other thread transitioned my_pool_state from "empty", and hence became - // responsible for waking up workers. - return; - } - } - // This thread transitioned pool from empty to full state, and thus is responsible for - // telling the market that there is work to do. -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if( work_type == work_spawned ) { - if ( my_global_concurrency_mode.load(std::memory_order_acquire) == true ) - my_market->mandatory_concurrency_disable( this ); - } -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - // TODO: investigate adjusting of arena's demand by a single worker. - my_market->adjust_demand(*this, my_max_num_workers, /* mandatory = */ false); + are_workers_needed = my_pool_state.test_and_set(); - // Notify all sleeping threads that work has appeared in the arena. - my_market->get_wait_list().notify(is_related_arena); + if (is_mandatory_needed || are_workers_needed) { + int mandatory_delta = is_mandatory_needed ? 1 : 0; + int workers_delta = are_workers_needed ? my_max_num_workers : 0; + + if (is_mandatory_needed && is_arena_workerless()) { + // Set workers_delta to 1 to keep arena invariants consistent + workers_delta = 1; } + + request_workers(mandatory_delta, workers_delta, /* wakeup_threads = */ true); } } diff --git a/contrib/libs/tbb/src/tbb/arena_slot.h b/contrib/libs/tbb/src/tbb/arena_slot.h index cdd91902d1..c526e47433 100644 --- a/contrib/libs/tbb/src/tbb/arena_slot.h +++ b/contrib/libs/tbb/src/tbb/arena_slot.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -169,6 +169,11 @@ public: return task_pool.load(std::memory_order_relaxed) != EmptyTaskPool; } + bool is_empty() const { + return task_pool.load(std::memory_order_relaxed) == EmptyTaskPool || + head.load(std::memory_order_relaxed) >= tail.load(std::memory_order_relaxed); + } + bool is_occupied() const { return my_is_occupied.load(std::memory_order_relaxed); } diff --git a/contrib/libs/tbb/src/tbb/cancellation_disseminator.h b/contrib/libs/tbb/src/tbb/cancellation_disseminator.h new file mode 100644 index 0000000000..7244582964 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/cancellation_disseminator.h @@ -0,0 +1,85 @@ +/* + Copyright (c) 2022-2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_cancellation_disseminator_H +#define _TBB_cancellation_disseminator_H + +#include "oneapi/tbb/mutex.h" +#include "oneapi/tbb/task_group.h" + +#include "intrusive_list.h" +#include "thread_data.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class cancellation_disseminator { +public: + //! Finds all contexts affected by the state change and propagates the new state to them. + /* The propagation is relayed to the cancellation_disseminator because tasks created by one + external thread can be passed to and executed by other external threads. This means + that context trees can span several arenas at once and thus state change + propagation cannot be generally localized to one arena only. + */ + bool propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, d1::task_group_context& src, uint32_t new_state) { + if (src.my_may_have_children.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { + return true; + } + + // The whole propagation algorithm is under the lock in order to ensure correctness + // in case of concurrent state changes at the different levels of the context tree. + threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex); + // TODO: consider to use double-check idiom + if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) { + // Another thread has concurrently changed the state. Back down. + return false; + } + + // Advance global state propagation epoch + ++the_context_state_propagation_epoch; + // Propagate to all workers and external threads and sync up their local epochs with the global one + // The whole propagation sequence is locked, thus no contention is expected + for (auto& thr_data : my_threads_list) { + thr_data.propagate_task_group_state(mptr_state, src, new_state); + } + + return true; + } + + void register_thread(thread_data& td) { + threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex); + my_threads_list.push_front(td); + } + + void unregister_thread(thread_data& td) { + threads_list_mutex_type::scoped_lock lock(my_threads_list_mutex); + my_threads_list.remove(td); + } + +private: + using thread_data_list_type = intrusive_list<thread_data>; + using threads_list_mutex_type = d1::mutex; + + threads_list_mutex_type my_threads_list_mutex; + thread_data_list_type my_threads_list; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_cancellation_disseminator_H diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor.h b/contrib/libs/tbb/src/tbb/concurrent_monitor.h index 3d20ef5b98..3e5c4bebe8 100644 --- a/contrib/libs/tbb/src/tbb/concurrent_monitor.h +++ b/contrib/libs/tbb/src/tbb/concurrent_monitor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -290,7 +290,17 @@ public: n = my_waitset.front(); if (n != end) { my_waitset.remove(*n); + +// GCC 12.x-13.x issues a warning here that to_wait_node(n)->my_is_in_list might have size 0, since n is +// a base_node pointer. (This cannot happen, because only wait_node pointers are added to my_waitset.) +#if (__TBB_GCC_VERSION >= 120100 && __TBB_GCC_VERSION < 140000 ) && !__clang__ && !__INTEL_COMPILER +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstringop-overflow" +#endif to_wait_node(n)->my_is_in_list.store(false, std::memory_order_relaxed); +#if (__TBB_GCC_VERSION >= 120100 && __TBB_GCC_VERSION < 140000 ) && !__clang__ && !__INTEL_COMPILER +#pragma GCC diagnostic pop +#endif } } diff --git a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def index 119eea1348..41aca2e932 100644 --- a/contrib/libs/tbb/src/tbb/def/lin64-tbb.def +++ b/contrib/libs/tbb/src/tbb/def/lin64-tbb.def @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -77,6 +77,7 @@ _ZN3tbb6detail2r17suspendEPFvPvPNS1_18suspend_point_typeEES2_; _ZN3tbb6detail2r16resumeEPNS1_18suspend_point_typeE; _ZN3tbb6detail2r121current_suspend_pointEv; _ZN3tbb6detail2r114notify_waitersEm; +_ZN3tbb6detail2r127get_thread_reference_vertexEPNS0_2d126wait_tree_vertex_interfaceE; /* Task dispatcher (task_dispatcher.cpp) */ _ZN3tbb6detail2r114execution_slotEPKNS0_2d114execution_dataE; @@ -105,6 +106,7 @@ _ZN3tbb6detail2r120isolate_within_arenaERNS0_2d113delegate_baseEl; _ZN3tbb6detail2r17enqueueERNS0_2d14taskEPNS2_15task_arena_baseE; _ZN3tbb6detail2r17enqueueERNS0_2d14taskERNS2_18task_group_contextEPNS2_15task_arena_baseE; _ZN3tbb6detail2r14waitERNS0_2d115task_arena_baseE; +_ZN3tbb6detail2r114execution_slotERKNS0_2d115task_arena_baseE; /* System topology parsing and threads pinning (governor.cpp) */ _ZN3tbb6detail2r115numa_node_countEv; diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.cpp b/contrib/libs/tbb/src/tbb/dynamic_link.cpp index 2d88f8bc5d..a21beb5aeb 100644 --- a/contrib/libs/tbb/src/tbb/dynamic_link.cpp +++ b/contrib/libs/tbb/src/tbb/dynamic_link.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,7 +34,8 @@ // Unify system calls #define dlopen( name, flags ) LoadLibrary( name ) #define dlsym( handle, name ) GetProcAddress( handle, name ) - #define dlclose( handle ) ( ! FreeLibrary( handle ) ) + // FreeLibrary return bool value that is not used. + #define dlclose( handle ) (void)( ! FreeLibrary( handle ) ) #define dlerror() GetLastError() #ifndef PATH_MAX #define PATH_MAX MAX_PATH diff --git a/contrib/libs/tbb/src/tbb/global_control.cpp b/contrib/libs/tbb/src/tbb/global_control.cpp index 1bc3c22c1f..f45c66b87f 100644 --- a/contrib/libs/tbb/src/tbb/global_control.cpp +++ b/contrib/libs/tbb/src/tbb/global_control.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,11 +17,13 @@ #include "oneapi/tbb/detail/_config.h" #include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/cache_aligned_allocator.h" #include "oneapi/tbb/global_control.h" #include "oneapi/tbb/tbb_allocator.h" #include "oneapi/tbb/spin_mutex.h" #include "governor.h" +#include "threading_control.h" #include "market.h" #include "misc.h" @@ -34,17 +36,21 @@ namespace r1 { //! Comparator for a set of global_control objects struct control_storage_comparator { - bool operator()(const global_control* lhs, const global_control* rhs) const; + bool operator()(const d1::global_control* lhs, const d1::global_control* rhs) const; }; class control_storage { friend struct global_control_impl; friend std::size_t global_control_active_value(int); + friend void global_control_lock(); + friend void global_control_unlock(); + friend std::size_t global_control_active_value_unsafe(d1::global_control::parameter); protected: std::size_t my_active_value{0}; - std::set<global_control*, control_storage_comparator, tbb_allocator<global_control*>> my_list{}; + std::set<d1::global_control*, control_storage_comparator, tbb_allocator<d1::global_control*>> my_list{}; spin_mutex my_list_mutex{}; public: + virtual ~control_storage() = default; virtual std::size_t default_value() const = 0; virtual void apply_active(std::size_t new_active) { my_active_value = new_active; @@ -56,6 +62,10 @@ public: spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call return !my_list.empty() ? my_active_value : default_value(); } + + std::size_t active_value_unsafe() { + return !my_list.empty() ? my_active_value : default_value(); + } }; class alignas(max_nfs_size) allowed_parallelism_control : public control_storage { @@ -67,23 +77,21 @@ class alignas(max_nfs_size) allowed_parallelism_control : public control_storage } void apply_active(std::size_t new_active) override { control_storage::apply_active(new_active); - __TBB_ASSERT( my_active_value>=1, nullptr); + __TBB_ASSERT(my_active_value >= 1, nullptr); // -1 to take external thread into account - market::set_active_num_workers( my_active_value-1 ); + threading_control::set_active_num_workers(my_active_value - 1); } std::size_t active_value() override { spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call - if (my_list.empty()) + if (my_list.empty()) { return default_value(); + } + // non-zero, if market is active - const std::size_t workers = market::max_num_workers(); + const std::size_t workers = threading_control::max_num_workers(); // We can't exceed market's maximal number of workers. // +1 to take external thread into account - return workers? min(workers+1, my_active_value): my_active_value; - } -public: - std::size_t active_value_if_present() const { - return !my_list.empty() ? my_active_value : 0; + return workers ? min(workers + 1, my_active_value) : my_active_value; } }; @@ -96,6 +104,8 @@ class alignas(max_nfs_size) stack_size_control : public control_storage { return hi - lo; }(); return ThreadStackSizeDefault; +#elif defined(EMSCRIPTEN) + return __TBB_EMSCRIPTEN_STACK_SIZE; #else return ThreadStackSize; #endif @@ -124,50 +134,57 @@ class alignas(max_nfs_size) lifetime_control : public control_storage { void apply_active(std::size_t new_active) override { if (new_active == 1) { // reserve the market reference - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - if (market::theMarket) { - market::add_ref_unsafe(lock, /*is_public*/ true); - } + threading_control::register_lifetime_control(); } else if (new_active == 0) { // new_active == 0 - // release the market reference - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - if (market::theMarket != nullptr) { - lock.release(); - market::theMarket->release(/*is_public*/ true, /*blocking_terminate*/ false); - } + threading_control::unregister_lifetime_control(/*blocking_terminate*/ false); } control_storage::apply_active(new_active); } +}; -public: - bool is_empty() { - spin_mutex::scoped_lock lock(my_list_mutex); - return my_list.empty(); +static control_storage* controls[] = {nullptr, nullptr, nullptr, nullptr}; + +void global_control_acquire() { + controls[0] = new (cache_aligned_allocate(sizeof(allowed_parallelism_control))) allowed_parallelism_control{}; + controls[1] = new (cache_aligned_allocate(sizeof(stack_size_control))) stack_size_control{}; + controls[2] = new (cache_aligned_allocate(sizeof(terminate_on_exception_control))) terminate_on_exception_control{}; + controls[3] = new (cache_aligned_allocate(sizeof(lifetime_control))) lifetime_control{}; +} + +void global_control_release() { + for (auto& ptr : controls) { + ptr->~control_storage(); + cache_aligned_deallocate(ptr); + ptr = nullptr; } -}; +} -static allowed_parallelism_control allowed_parallelism_ctl; -static stack_size_control stack_size_ctl; -static terminate_on_exception_control terminate_on_exception_ctl; -static lifetime_control lifetime_ctl; -static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; +void global_control_lock() { + for (auto& ctl : controls) { + ctl->my_list_mutex.lock(); + } +} -//! Comparator for a set of global_control objects -inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const { - __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , nullptr); - return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); +void global_control_unlock() { + int N = std::distance(std::begin(controls), std::end(controls)); + for (int i = N - 1; i >= 0; --i) { + controls[i]->my_list_mutex.unlock(); + } } -unsigned market::app_parallelism_limit() { - return allowed_parallelism_ctl.active_value_if_present(); +std::size_t global_control_active_value_unsafe(d1::global_control::parameter param) { + __TBB_ASSERT_RELEASE(param < d1::global_control::parameter_max, nullptr); + return controls[param]->active_value_unsafe(); } -bool terminate_on_exception() { - return global_control::active_value(global_control::terminate_on_exception) == 1; +//! Comparator for a set of global_control objects +inline bool control_storage_comparator::operator()(const d1::global_control* lhs, const d1::global_control* rhs) const { + __TBB_ASSERT_RELEASE(lhs->my_param < d1::global_control::parameter_max , nullptr); + return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); } -unsigned market::is_lifetime_control_present() { - return !lifetime_ctl.is_empty(); +bool terminate_on_exception() { + return d1::global_control::active_value(d1::global_control::terminate_on_exception) == 1; } struct global_control_impl { @@ -184,7 +201,7 @@ private: public: static void create(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); + __TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; spin_mutex::scoped_lock lock(c->my_list_mutex); @@ -197,15 +214,15 @@ public: } static void destroy(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); + __TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; // Concurrent reading and changing global parameter is possible. spin_mutex::scoped_lock lock(c->my_list_mutex); - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), nullptr); + __TBB_ASSERT(gc.my_param == d1::global_control::scheduler_handle || !c->my_list.empty(), nullptr); std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value; if (!erase_if_present(c, gc)) { - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , nullptr); + __TBB_ASSERT(gc.my_param == d1::global_control::scheduler_handle , nullptr); return; } if (c->my_list.empty()) { @@ -220,7 +237,7 @@ public: } static bool remove_and_check_if_empty(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); + __TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; spin_mutex::scoped_lock lock(c->my_list_mutex); @@ -230,7 +247,7 @@ public: } #if TBB_USE_ASSERT static bool is_present(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); + __TBB_ASSERT_RELEASE(gc.my_param < d1::global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; spin_mutex::scoped_lock lock(c->my_list_mutex); @@ -259,7 +276,7 @@ bool is_present(d1::global_control& gc) { } #endif // TBB_USE_ASSERT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { - __TBB_ASSERT_RELEASE(param < global_control::parameter_max, nullptr); + __TBB_ASSERT_RELEASE(param < d1::global_control::parameter_max, nullptr); return controls[param]->active_value(); } diff --git a/contrib/libs/tbb/src/tbb/governor.cpp b/contrib/libs/tbb/src/tbb/governor.cpp index 3111ab3e7b..218a2bc533 100644 --- a/contrib/libs/tbb/src/tbb/governor.cpp +++ b/contrib/libs/tbb/src/tbb/governor.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,12 +15,14 @@ */ #include "governor.h" +#include "threading_control.h" #include "main.h" #include "thread_data.h" #include "market.h" #include "arena.h" #include "dynamic_link.h" #include "concurrent_monitor.h" +#include "thread_dispatcher.h" #include "oneapi/tbb/task_group.h" #include "oneapi/tbb/global_control.h" @@ -35,11 +37,21 @@ #include <atomic> #include <algorithm> +#ifdef EMSCRIPTEN +#include <emscripten/stack.h> +#endif + namespace tbb { namespace detail { namespace r1 { +#if TBB_USE_ASSERT +std::atomic<int> the_observer_proxy_count; +#endif /* TBB_USE_ASSERT */ + void clear_address_waiter_table(); +void global_control_acquire(); +void global_control_release(); //! global_control.cpp contains definition bool remove_and_check_if_empty(d1::global_control& gc); @@ -58,6 +70,7 @@ namespace system_topology { //------------------------------------------------------------------------ void governor::acquire_resources () { + global_control_acquire(); #if __TBB_USE_POSIX int status = theTLS.create(auto_terminate); #else @@ -81,8 +94,15 @@ void governor::release_resources () { runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); clear_address_waiter_table(); +#if TBB_USE_ASSERT + if (the_observer_proxy_count != 0) { + runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count)); + } +#endif /* TBB_USE_ASSERT */ + system_topology::destroy(); dynamic_unlink_all(); + global_control_release(); } rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { @@ -108,6 +128,10 @@ void governor::one_time_init() { } } +bool governor::does_client_join_workers(const rml::tbb_client &client) { + return ((const thread_dispatcher&)client).must_join_workers(); +} + /* There is no portable way to get stack base address in Posix, however the modern Linux versions provide pthread_attr_np API that can be used to obtain thread's @@ -135,6 +159,9 @@ static std::uintptr_t get_stack_base(std::size_t stack_size) { NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); return reinterpret_cast<std::uintptr_t>(pteb->StackBase); +#elif defined(EMSCRIPTEN) + suppress_unused_warning(stack_size); + return reinterpret_cast<std::uintptr_t>(emscripten_stack_get_base()); #else // There is no portable way to get stack base address in Posix, so we use // non-portable method (on all modern Linux) or the simplified approach @@ -185,21 +212,20 @@ void governor::init_external_thread() { int num_reserved_slots = 1; unsigned arena_priority_level = 1; // corresponds to tbb::task_arena::priority::normal std::size_t stack_size = 0; - arena& a = *market::create_arena(num_slots, num_reserved_slots, arena_priority_level, stack_size); - // We need an internal reference to the market. TODO: is it legacy? - market::global_market(false); + threading_control* thr_control = threading_control::register_public_reference(); + arena& a = arena::create(thr_control, num_slots, num_reserved_slots, arena_priority_level); // External thread always occupies the first slot thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); td.attach_arena(a, /*slot index*/ 0); __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); - stack_size = a.my_market->worker_stack_size(); + stack_size = a.my_threading_control->worker_stack_size(); std::uintptr_t stack_base = get_stack_base(stack_size); task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size)); td.my_arena_slot->occupy(); - a.my_market->add_external_thread(td); + thr_control->register_thread(td); set_thread_data(td); #if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED // The external thread destructor is called from dllMain but it is not available with a static build. @@ -223,7 +249,7 @@ void governor::auto_terminate(void* tls) { // Only external thread can be inside an arena during termination. if (td->my_arena_slot) { arena* a = td->my_arena; - market* m = a->my_market; + threading_control* thr_control = a->my_threading_control; // If the TLS slot is already cleared by OS or underlying concurrency // runtime, restore its value to properly clean up arena @@ -236,16 +262,16 @@ void governor::auto_terminate(void* tls) { td->leave_task_dispatcher(); td->my_arena_slot->release(); // Release an arena - a->on_thread_leaving<arena::ref_external>(); + a->on_thread_leaving(arena::ref_external); - m->remove_external_thread(*td); + thr_control->unregister_thread(*td); // The tls should be cleared before market::release because // market can destroy the tls key if we keep the last reference clear_tls(); // If there was an associated arena, it added a public market reference - m->release( /*is_public*/ true, /*blocking_terminate*/ false); + thr_control->unregister_public_reference(/* blocking terminate =*/ false); } else { clear_tls(); } @@ -272,12 +298,10 @@ void release_impl(d1::task_scheduler_handle& handle) { bool finalize_impl(d1::task_scheduler_handle& handle) { __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle"); - market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); - bool ok = true; // ok if theMarket does not exist yet - market* m = market::theMarket; // read the state of theMarket - if (m != nullptr) { - lock.release(); - __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); + __TBB_ASSERT(is_present(*handle.m_ctl), "finalize or release was already called on this object"); + + bool ok = true; // ok if threading_control does not exist yet + if (threading_control::is_present()) { thread_data* td = governor::get_thread_data_if_initialized(); if (td) { task_dispatcher* task_disp = td->my_task_dispatcher; @@ -286,12 +310,14 @@ bool finalize_impl(d1::task_scheduler_handle& handle) { governor::auto_terminate(td); } } + if (remove_and_check_if_empty(*handle.m_ctl)) { - ok = m->release(/*is_public*/ true, /*blocking_terminate*/ true); + ok = threading_control::unregister_lifetime_control(/*blocking_terminate*/ true); } else { ok = false; } } + return ok; } @@ -367,15 +393,18 @@ static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) = dummy_get_default_concurrency; -#if _WIN32 || _WIN64 || __unix__ +#if _WIN32 || _WIN64 || __unix__ || __APPLE__ + // Table describing how to link the handlers. static const dynamic_link_descriptor TbbBindLinkTable[] = { DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr), +#if __TBB_CPUBIND_PRESENT DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), DLD(__TBB_internal_restore_affinity, restore_affinity_ptr), +#endif DLD(__TBB_internal_get_default_concurrency, get_default_concurrency_ptr) }; @@ -390,6 +419,9 @@ static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_ #if _WIN32 || _WIN64 #define LIBRARY_EXTENSION ".dll" #define LIBRARY_PREFIX +#elif __APPLE__ +#define LIBRARY_EXTENSION __TBB_STRING(.3.dylib) +#define LIBRARY_PREFIX "lib" #elif __unix__ #define LIBRARY_EXTENSION __TBB_STRING(.so.3) #define LIBRARY_PREFIX "lib" @@ -418,7 +450,7 @@ int core_types_count = 0; int* core_types_indexes = nullptr; const char* load_tbbbind_shared_object() { -#if _WIN32 || _WIN64 || __unix__ +#if _WIN32 || _WIN64 || __unix__ || __APPLE__ #if _WIN32 && !_WIN64 // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. SYSTEM_INFO si; @@ -430,7 +462,7 @@ const char* load_tbbbind_shared_object() { return tbbbind_version; } } -#endif /* _WIN32 || _WIN64 || __unix__ */ +#endif /* _WIN32 || _WIN64 || __unix__ || __APPLE__ */ return nullptr; } diff --git a/contrib/libs/tbb/src/tbb/governor.h b/contrib/libs/tbb/src/tbb/governor.h index 3d861e5323..573443d729 100644 --- a/contrib/libs/tbb/src/tbb/governor.h +++ b/contrib/libs/tbb/src/tbb/governor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,7 +47,8 @@ typedef std::size_t stack_size_type; class governor { private: friend class __TBB_InitOnce; - friend class market; + friend class thread_dispatcher; + friend class threading_control_impl; // TODO: consider using thread_local (measure performance and side effects) //! TLS for scheduler instances associated with individual threads @@ -137,6 +138,8 @@ public: static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } #endif + static bool hybrid_cpu() { return cpu_features.hybrid; } + static bool rethrow_exception_broken() { return is_rethrow_broken; } static bool is_itt_present() { diff --git a/contrib/libs/tbb/src/tbb/main.cpp b/contrib/libs/tbb/src/tbb/main.cpp index 8a1dc89320..f43c33f5b7 100644 --- a/contrib/libs/tbb/src/tbb/main.cpp +++ b/contrib/libs/tbb/src/tbb/main.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ #include "main.h" #include "governor.h" +#include "threading_control.h" #include "environment.h" #include "market.h" +#include "tcm_adaptor.h" #include "misc.h" #include "itt_notify.h" @@ -40,9 +42,9 @@ bool governor::UsePrivateRML; bool governor::is_rethrow_broken; //------------------------------------------------------------------------ -// market data -market* market::theMarket; -market::global_market_mutex_type market::theMarketMutex; +// threading_control data +threading_control* threading_control::g_threading_control; +threading_control::global_mutex_type threading_control::g_threading_control_mutex; //------------------------------------------------------------------------ // context propagation data @@ -70,28 +72,15 @@ void ITT_DoUnsafeOneTimeInitialization(); static __TBB_InitOnce __TBB_InitOnceHiddenInstance; #endif -#if TBB_USE_ASSERT -std::atomic<int> the_observer_proxy_count; - -struct check_observer_proxy_count { - ~check_observer_proxy_count() { - if (the_observer_proxy_count != 0) { - runtime_warning("Leaked %ld observer_proxy objects\n", long(the_observer_proxy_count)); - } - } -}; -// The proxy count checker shall be defined after __TBB_InitOnceHiddenInstance to check the count -// after auto termination. -static check_observer_proxy_count the_check_observer_proxy_count; -#endif /* TBB_USE_ASSERT */ - //------------------------------------------------------------------------ // __TBB_InitOnce //------------------------------------------------------------------------ void __TBB_InitOnce::add_ref() { - if( ++count==1 ) + if (++count == 1) { governor::acquire_resources(); + tcm_adaptor::initialize(); + } } void __TBB_InitOnce::remove_ref() { @@ -117,8 +106,10 @@ void DoOneTimeInitialization() { // No fence required for load of InitializationDone, because we are inside a critical section. if( !__TBB_InitOnce::InitializationDone ) { __TBB_InitOnce::add_ref(); - if( GetBoolEnvironmentVariable("TBB_VERSION") ) + if( GetBoolEnvironmentVariable("TBB_VERSION") ) { PrintVersion(); + tcm_adaptor::print_version(); + } bool itt_present = false; #if __TBB_USE_ITT_NOTIFY ITT_DoUnsafeOneTimeInitialization(); diff --git a/contrib/libs/tbb/src/tbb/market.cpp b/contrib/libs/tbb/src/tbb/market.cpp index b6504e0f3d..ae3fadd47e 100644 --- a/contrib/libs/tbb/src/tbb/market.cpp +++ b/contrib/libs/tbb/src/tbb/market.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,380 +14,65 @@ limitations under the License. */ -#include "oneapi/tbb/global_control.h" // global_control::active_value - -#include "market.h" -#include "main.h" -#include "governor.h" #include "arena.h" -#include "thread_data.h" -#include "itt_notify.h" +#include "market.h" -#include <cstring> // std::memset() +#include <algorithm> // std::find namespace tbb { namespace detail { namespace r1 { -/** This method must be invoked under my_arenas_list_mutex. **/ -arena* market::select_next_arena( arena* hint ) { - unsigned next_arena_priority_level = num_priority_levels; - if ( hint ) - next_arena_priority_level = hint->my_priority_level; - for ( unsigned idx = 0; idx < next_arena_priority_level; ++idx ) { - if ( !my_arenas[idx].empty() ) - return &*my_arenas[idx].begin(); - } - // don't change if arena with higher priority is not found. - return hint; -} -void market::insert_arena_into_list ( arena& a ) { - __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); - my_arenas[a.my_priority_level].push_front( a ); - __TBB_ASSERT( !my_next_arena || my_next_arena->my_priority_level < num_priority_levels, nullptr ); - my_next_arena = select_next_arena( my_next_arena ); -} +class tbb_permit_manager_client : public pm_client { +public: + tbb_permit_manager_client(arena& a) : pm_client(a) {} -void market::remove_arena_from_list ( arena& a ) { - __TBB_ASSERT( a.my_priority_level < num_priority_levels, nullptr ); - my_arenas[a.my_priority_level].remove( a ); - if ( my_next_arena == &a ) - my_next_arena = nullptr; - my_next_arena = select_next_arena( my_next_arena ); -} + void register_thread() override {} + + void unregister_thread() override {} + + void set_allotment(unsigned allotment) { + my_arena.set_allotment(allotment); + } +}; //------------------------------------------------------------------------ // market //------------------------------------------------------------------------ -market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ) - : my_num_workers_hard_limit(workers_hard_limit) - , my_num_workers_soft_limit(workers_soft_limit) - , my_next_arena(nullptr) - , my_ref_count(1) - , my_stack_size(stack_size) - , my_workers_soft_limit_to_report(workers_soft_limit) -{ - // Once created RML server will start initializing workers that will need - // global market instance to get worker stack size - my_server = governor::create_rml_server( *this ); - __TBB_ASSERT( my_server, "Failed to create RML server" ); -} +market::market(unsigned workers_soft_limit) + : my_num_workers_soft_limit(workers_soft_limit) +{} -market::~market() { - poison_pointer(my_server); - poison_pointer(my_next_arena); +pm_client* market::create_client(arena& a) { + return new (cache_aligned_allocate(sizeof(tbb_permit_manager_client))) tbb_permit_manager_client(a); } -static unsigned calc_workers_soft_limit(unsigned workers_soft_limit, unsigned workers_hard_limit) { - if( int soft_limit = market::app_parallelism_limit() ) - workers_soft_limit = soft_limit-1; - else // if user set no limits (yet), use market's parameter - workers_soft_limit = max( governor::default_num_threads() - 1, workers_soft_limit ); - if( workers_soft_limit >= workers_hard_limit ) - workers_soft_limit = workers_hard_limit-1; - return workers_soft_limit; +void market::register_client(pm_client* c, d1::constraints&) { + mutex_type::scoped_lock lock(my_mutex); + my_clients[c->priority_level()].push_back(c); } -bool market::add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned workers_requested, std::size_t stack_size ) { - market *m = theMarket; - if( m ) { - ++m->my_ref_count; - const unsigned old_public_count = is_public ? m->my_public_ref_count++ : /*any non-zero value*/1; - lock.release(); - if( old_public_count==0 ) - set_active_num_workers( calc_workers_soft_limit(workers_requested, m->my_num_workers_hard_limit) ); - - // do not warn if default number of workers is requested - if( workers_requested != governor::default_num_threads()-1 ) { - __TBB_ASSERT( skip_soft_limit_warning > workers_requested, - "skip_soft_limit_warning must be larger than any valid workers_requested" ); - unsigned soft_limit_to_report = m->my_workers_soft_limit_to_report.load(std::memory_order_relaxed); - if( soft_limit_to_report < workers_requested ) { - runtime_warning( "The number of workers is currently limited to %u. " - "The request for %u workers is ignored. Further requests for more workers " - "will be silently ignored until the limit changes.\n", - soft_limit_to_report, workers_requested ); - // The race is possible when multiple threads report warnings. - // We are OK with that, as there are just multiple warnings. - unsigned expected_limit = soft_limit_to_report; - m->my_workers_soft_limit_to_report.compare_exchange_strong(expected_limit, skip_soft_limit_warning); - } - - } - if( m->my_stack_size < stack_size ) - runtime_warning( "Thread stack size has been already set to %u. " - "The request for larger stack (%u) cannot be satisfied.\n", m->my_stack_size, stack_size ); - return true; - } - return false; -} - -market& market::global_market(bool is_public, unsigned workers_requested, std::size_t stack_size) { - global_market_mutex_type::scoped_lock lock( theMarketMutex ); - if( !market::add_ref_unsafe(lock, is_public, workers_requested, stack_size) ) { - // TODO: A lot is done under theMarketMutex locked. Can anything be moved out? - if( stack_size == 0 ) - stack_size = global_control::active_value(global_control::thread_stack_size); - // Expecting that 4P is suitable for most applications. - // Limit to 2P for large thread number. - // TODO: ask RML for max concurrency and possibly correct hard_limit - const unsigned factor = governor::default_num_threads()<=128? 4 : 2; - // The requested number of threads is intentionally not considered in - // computation of the hard limit, in order to separate responsibilities - // and avoid complicated interactions between global_control and task_scheduler_init. - // The market guarantees that at least 256 threads might be created. - const unsigned workers_hard_limit = max(max(factor*governor::default_num_threads(), 256u), app_parallelism_limit()); - const unsigned workers_soft_limit = calc_workers_soft_limit(workers_requested, workers_hard_limit); - // Create the global market instance - std::size_t size = sizeof(market); - __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(std::atomic<thread_data*>) == sizeof(market), - "my_workers must be the last data field of the market class"); - size += sizeof(std::atomic<thread_data*>) * (workers_hard_limit - 1); - __TBB_InitOnce::add_ref(); - void* storage = cache_aligned_allocate(size); - std::memset( storage, 0, size ); - // Initialize and publish global market - market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size ); - if( is_public ) - m->my_public_ref_count.store(1, std::memory_order_relaxed); - if (market::is_lifetime_control_present()) { - ++m->my_public_ref_count; - ++m->my_ref_count; - } - theMarket = m; - // This check relies on the fact that for shared RML default_concurrency==max_concurrency - if ( !governor::UsePrivateRML && m->my_server->default_concurrency() < workers_soft_limit ) - runtime_warning( "RML might limit the number of workers to %u while %u is requested.\n" - , m->my_server->default_concurrency(), workers_soft_limit ); - } - return *theMarket; -} - -void market::destroy () { - this->market::~market(); // qualified to suppress warning - cache_aligned_deallocate( this ); - __TBB_InitOnce::remove_ref(); -} - -bool market::release ( bool is_public, bool blocking_terminate ) { - market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); - bool do_release = false; +void market::unregister_and_destroy_client(pm_client& c) { { - global_market_mutex_type::scoped_lock lock( theMarketMutex ); - if ( blocking_terminate ) { - __TBB_ASSERT( is_public, "Only an object with a public reference can request the blocking terminate" ); - while ( my_public_ref_count.load(std::memory_order_relaxed) == 1 && - my_ref_count.load(std::memory_order_relaxed) > 1 ) { - lock.release(); - // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all - // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created. - // Theoretically, new private references to the market can be added during waiting making it potentially - // endless. - // TODO: revise why the weak scheduler needs market's pointer and try to remove this wait. - // Note that the market should know about its schedulers for cancellation/exception/priority propagation, - // see e.g. task_group_context::cancel_group_execution() - while ( my_public_ref_count.load(std::memory_order_acquire) == 1 && - my_ref_count.load(std::memory_order_acquire) > 1 ) { - yield(); - } - lock.acquire( theMarketMutex ); - } - } - if ( is_public ) { - __TBB_ASSERT( theMarket == this, "Global market instance was destroyed prematurely?" ); - __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), nullptr); - --my_public_ref_count; - } - if ( --my_ref_count == 0 ) { - __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), nullptr); - do_release = true; - theMarket = nullptr; - } - } - if( do_release ) { - __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), - "No public references remain if we remove the market." ); - // inform RML that blocking termination is required - my_join_workers = blocking_terminate; - my_server->request_close_connection(); - return blocking_terminate; + mutex_type::scoped_lock lock(my_mutex); + auto& clients = my_clients[c.priority_level()]; + auto it = std::find(clients.begin(), clients.end(), &c); + __TBB_ASSERT(it != clients.end(), "Destroying of an unregistered client"); + clients.erase(it); } - return false; -} - -int market::update_workers_request() { - int old_request = my_num_workers_requested; - my_num_workers_requested = min(my_total_demand.load(std::memory_order_relaxed), - (int)my_num_workers_soft_limit.load(std::memory_order_relaxed)); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); - my_num_workers_requested = 1; - } -#endif - update_allotment(my_num_workers_requested); - return my_num_workers_requested - old_request; -} - -void market::set_active_num_workers ( unsigned soft_limit ) { - market *m; - - { - global_market_mutex_type::scoped_lock lock( theMarketMutex ); - if ( !theMarket ) - return; // actual value will be used at market creation - m = theMarket; - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == soft_limit) - return; - ++m->my_ref_count; - } - // have my_ref_count for market, use it safely - - int delta = 0; - { - arenas_list_mutex_type::scoped_lock lock( m->my_arenas_list_mutex ); - __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, nullptr); - -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - arena_list_type* arenas = m->my_arenas; - - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0 && - m->my_mandatory_num_requested > 0) - { - for (unsigned level = 0; level < num_priority_levels; ++level ) - for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) - if (it->my_global_concurrency_mode.load(std::memory_order_relaxed)) - m->disable_mandatory_concurrency_impl(&*it); - } - __TBB_ASSERT(m->my_mandatory_num_requested == 0, nullptr); -#endif - - m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release); - // report only once after new soft limit value is set - m->my_workers_soft_limit_to_report.store(soft_limit, std::memory_order_relaxed); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (m->my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { - for (unsigned level = 0; level < num_priority_levels; ++level ) - for (arena_list_type::iterator it = arenas[level].begin(); it != arenas[level].end(); ++it) - if (it->has_enqueued_tasks()) - m->enable_mandatory_concurrency_impl(&*it); - } -#endif - - delta = m->update_workers_request(); - } - // adjust_job_count_estimate must be called outside of any locks - if( delta!=0 ) - m->my_server->adjust_job_count_estimate( delta ); - // release internal market reference to match ++m->my_ref_count above - m->release( /*is_public=*/false, /*blocking_terminate=*/false ); -} - -bool governor::does_client_join_workers (const rml::tbb_client &client) { - return ((const market&)client).must_join_workers(); -} - -arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level, - std::size_t stack_size ) -{ - __TBB_ASSERT( num_slots > 0, nullptr); - __TBB_ASSERT( num_reserved_slots <= num_slots, nullptr); - // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). - market &m = global_market( /*is_public=*/true, num_slots-num_reserved_slots, stack_size ); - arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level ); - // Add newly created arena into the existing market's list. - arenas_list_mutex_type::scoped_lock lock(m.my_arenas_list_mutex); - m.insert_arena_into_list(a); - return &a; + auto client = static_cast<tbb_permit_manager_client*>(&c); + client->~tbb_permit_manager_client(); + cache_aligned_deallocate(client); } -/** This method must be invoked under my_arenas_list_mutex. **/ -void market::detach_arena ( arena& a ) { - market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); - __TBB_ASSERT( !a.my_slots[0].is_occupied(), nullptr); - if (a.my_global_concurrency_mode.load(std::memory_order_relaxed)) - disable_mandatory_concurrency_impl(&a); - - remove_arena_from_list(a); - if (a.my_aba_epoch == my_arenas_aba_epoch.load(std::memory_order_relaxed)) { - my_arenas_aba_epoch.store(my_arenas_aba_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); - } -} - -void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { - bool locked = true; - __TBB_ASSERT( a, nullptr); - // we hold reference to the server, so market cannot be destroyed at any moment here - __TBB_ASSERT(!is_poisoned(my_server), nullptr); - my_arenas_list_mutex.lock(); - arena_list_type::iterator it = my_arenas[priority_level].begin(); - for ( ; it != my_arenas[priority_level].end(); ++it ) { - if ( a == &*it ) { - if ( it->my_aba_epoch == aba_epoch ) { - // Arena is alive - // Acquire my_references to sync with threads that just left the arena - if (!a->my_num_workers_requested && !a->my_references.load(std::memory_order_acquire)) { - __TBB_ASSERT( - !a->my_num_workers_allotted.load(std::memory_order_relaxed) && - (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), - "Inconsistent arena state" - ); - // Arena is abandoned. Destroy it. - detach_arena( *a ); - my_arenas_list_mutex.unlock(); - locked = false; - a->free_arena(); - } - } - if (locked) - my_arenas_list_mutex.unlock(); - return; - } - } - my_arenas_list_mutex.unlock(); -} +void market::update_allotment() { + int effective_soft_limit = my_mandatory_num_requested > 0 && my_num_workers_soft_limit == 0 ? 1 : my_num_workers_soft_limit; + int max_workers = min(my_total_demand, effective_soft_limit); + __TBB_ASSERT(max_workers >= 0, nullptr); -/** This method must be invoked under my_arenas_list_mutex. **/ -arena* market::arena_in_need ( arena_list_type* arenas, arena* hint ) { - // TODO: make sure arena with higher priority returned only if there are available slots in it. - hint = select_next_arena( hint ); - if ( !hint ) - return nullptr; - arena_list_type::iterator it = hint; - unsigned curr_priority_level = hint->my_priority_level; - __TBB_ASSERT( it != arenas[curr_priority_level].end(), nullptr ); - do { - arena& a = *it; - if ( ++it == arenas[curr_priority_level].end() ) { - do { - ++curr_priority_level %= num_priority_levels; - } while ( arenas[curr_priority_level].empty() ); - it = arenas[curr_priority_level].begin(); - } - if( a.num_workers_active() < a.my_num_workers_allotted.load(std::memory_order_relaxed) ) { - a.my_references += arena::ref_worker; - return &a; - } - } while ( it != hint ); - return nullptr; -} - -arena* market::arena_in_need(arena* prev) { - if (my_total_demand.load(std::memory_order_acquire) <= 0) - return nullptr; - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex, /*is_writer=*/false); - // TODO: introduce three state response: alive, not_alive, no_market_arenas - if ( is_arena_alive(prev) ) - return arena_in_need(my_arenas, prev); - return arena_in_need(my_arenas, my_next_arena); -} - -int market::update_allotment ( arena_list_type* arenas, int workers_demand, int max_workers ) { - __TBB_ASSERT( workers_demand > 0, nullptr ); - max_workers = min(workers_demand, max_workers); int unassigned_workers = max_workers; int assigned = 0; int carry = 0; @@ -395,13 +80,11 @@ int market::update_allotment ( arena_list_type* arenas, int workers_demand, int for (unsigned list_idx = 0; list_idx < num_priority_levels; ++list_idx ) { int assigned_per_priority = min(my_priority_level_demand[list_idx], unassigned_workers); unassigned_workers -= assigned_per_priority; - for (arena_list_type::iterator it = arenas[list_idx].begin(); it != arenas[list_idx].end(); ++it) { - arena& a = *it; - __TBB_ASSERT(a.my_num_workers_requested >= 0, nullptr); - __TBB_ASSERT(a.my_num_workers_requested <= int(a.my_max_num_workers) - || (a.my_max_num_workers == 0 && a.my_local_concurrency_requests > 0 && a.my_num_workers_requested == 1), nullptr); - if (a.my_num_workers_requested == 0) { - __TBB_ASSERT(!a.my_num_workers_allotted.load(std::memory_order_relaxed), nullptr); + // We use reverse iterator there to serve last added clients first + for (auto it = my_clients[list_idx].rbegin(); it != my_clients[list_idx].rend(); ++it) { + tbb_permit_manager_client& client = static_cast<tbb_permit_manager_client&>(**it); + if (client.max_workers() == 0) { + client.set_allotment(0); continue; } @@ -410,233 +93,49 @@ int market::update_allotment ( arena_list_type* arenas, int workers_demand, int } int allotted = 0; -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0) { + if (my_num_workers_soft_limit == 0) { __TBB_ASSERT(max_workers == 0 || max_workers == 1, nullptr); - allotted = a.my_global_concurrency_mode.load(std::memory_order_relaxed) && - assigned < max_workers ? 1 : 0; - } else -#endif - { - int tmp = a.my_num_workers_requested * assigned_per_priority + carry; + allotted = client.min_workers() > 0 && assigned < max_workers ? 1 : 0; + } else { + int tmp = client.max_workers() * assigned_per_priority + carry; allotted = tmp / my_priority_level_demand[list_idx]; carry = tmp % my_priority_level_demand[list_idx]; - __TBB_ASSERT(allotted <= a.my_num_workers_requested, nullptr); - __TBB_ASSERT(allotted <= int(a.my_num_slots - a.my_num_reserved_slots), nullptr); + __TBB_ASSERT(allotted <= client.max_workers(), nullptr); } - a.my_num_workers_allotted.store(allotted, std::memory_order_relaxed); - a.my_is_top_priority.store(list_idx == max_priority_level, std::memory_order_relaxed); + client.set_allotment(allotted); + client.set_top_priority(list_idx == max_priority_level); assigned += allotted; } } - __TBB_ASSERT( 0 <= assigned && assigned <= max_workers, nullptr ); - return assigned; -} - -/** This method must be invoked under my_arenas_list_mutex. **/ -bool market::is_arena_in_list( arena_list_type &arenas, arena *a ) { - __TBB_ASSERT( a, "Expected non-null pointer to arena." ); - for ( arena_list_type::iterator it = arenas.begin(); it != arenas.end(); ++it ) - if ( a == &*it ) - return true; - return false; -} - -/** This method must be invoked under my_arenas_list_mutex. **/ -bool market::is_arena_alive(arena* a) { - if ( !a ) - return false; - - // Still cannot access internals of the arena since the object itself might be destroyed. - - for ( unsigned idx = 0; idx < num_priority_levels; ++idx ) { - if ( is_arena_in_list( my_arenas[idx], a ) ) - return true; - } - return false; -} - -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY -void market::enable_mandatory_concurrency_impl ( arena *a ) { - __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), nullptr); - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); - - a->my_global_concurrency_mode.store(true, std::memory_order_relaxed); - my_mandatory_num_requested++; + __TBB_ASSERT(assigned == max_workers, nullptr); } -void market::enable_mandatory_concurrency ( arena *a ) { - int delta = 0; - { - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - if (my_num_workers_soft_limit.load(std::memory_order_relaxed) != 0 || - a->my_global_concurrency_mode.load(std::memory_order_relaxed)) - return; - - enable_mandatory_concurrency_impl(a); - delta = update_workers_request(); +void market::set_active_num_workers(int soft_limit) { + mutex_type::scoped_lock lock(my_mutex); + if (my_num_workers_soft_limit != soft_limit) { + my_num_workers_soft_limit = soft_limit; + update_allotment(); } - - if (delta != 0) - my_server->adjust_job_count_estimate(delta); -} - -void market::disable_mandatory_concurrency_impl(arena* a) { - __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), nullptr); - __TBB_ASSERT(my_mandatory_num_requested > 0, nullptr); - - a->my_global_concurrency_mode.store(false, std::memory_order_relaxed); - my_mandatory_num_requested--; } -void market::mandatory_concurrency_disable ( arena *a ) { - int delta = 0; - { - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - if (!a->my_global_concurrency_mode.load(std::memory_order_relaxed)) - return; - // There is a racy window in advertise_new_work between mandtory concurrency enabling and - // setting SNAPSHOT_FULL. It gives a chance to spawn request to disable mandatory concurrency. - // Therefore, we double check that there is no enqueued tasks. - if (a->has_enqueued_tasks()) - return; - - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); - disable_mandatory_concurrency_impl(a); +void market::adjust_demand(pm_client& c, int mandatory_delta, int workers_delta) { + __TBB_ASSERT(-1 <= mandatory_delta && mandatory_delta <= 1, nullptr); - delta = update_workers_request(); - } - if (delta != 0) - my_server->adjust_job_count_estimate(delta); -} -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - -void market::adjust_demand ( arena& a, int delta, bool mandatory ) { - if (!delta) { - return; - } - int target_epoch{}; + int delta{}; { - arenas_list_mutex_type::scoped_lock lock(my_arenas_list_mutex); - __TBB_ASSERT(theMarket != nullptr, "market instance was destroyed prematurely?"); -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - if (mandatory) { - __TBB_ASSERT(delta == 1 || delta == -1, nullptr); - // Count the number of mandatory requests and proceed only for 0->1 and 1->0 transitions. - a.my_local_concurrency_requests += delta; - if ((delta > 0 && a.my_local_concurrency_requests != 1) || - (delta < 0 && a.my_local_concurrency_requests != 0)) - { - return; - } - } -#endif - a.my_total_num_workers_requested += delta; - int target_workers = 0; - // Cap target_workers into interval [0, a.my_max_num_workers] - if (a.my_total_num_workers_requested > 0) { -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // At least one thread should be requested when mandatory concurrency - int max_num_workers = int(a.my_max_num_workers); - if (a.my_local_concurrency_requests > 0 && max_num_workers == 0) { - max_num_workers = 1; - } -#endif - target_workers = min(a.my_total_num_workers_requested, max_num_workers); - } - - delta = target_workers - a.my_num_workers_requested; - - if (delta == 0) { - return; - } - - a.my_num_workers_requested += delta; - if (a.my_num_workers_requested == 0) { - a.my_num_workers_allotted.store(0, std::memory_order_relaxed); - } - - int total_demand = my_total_demand.load(std::memory_order_relaxed) + delta; - my_total_demand.store(total_demand, std::memory_order_relaxed); - my_priority_level_demand[a.my_priority_level] += delta; - unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); - if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(effective_soft_limit == 0, nullptr); - effective_soft_limit = 1; - } - - update_allotment(effective_soft_limit); - if (delta > 0) { - // can't overflow soft_limit, but remember values request by arenas in - // my_total_demand to not prematurely release workers to RML - if (my_num_workers_requested + delta > (int)effective_soft_limit) - delta = effective_soft_limit - my_num_workers_requested; - } - else { - // the number of workers should not be decreased below my_total_demand - if (my_num_workers_requested + delta < total_demand) - delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested; - } - my_num_workers_requested += delta; - __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, nullptr); + mutex_type::scoped_lock lock(my_mutex); + // Update client's state + delta = c.update_request(mandatory_delta, workers_delta); - target_epoch = a.my_adjust_demand_target_epoch++; - } - - a.my_adjust_demand_current_epoch.wait_until(target_epoch, /* context = */ target_epoch, std::memory_order_relaxed); - // Must be called outside of any locks - my_server->adjust_job_count_estimate( delta ); - a.my_adjust_demand_current_epoch.exchange(target_epoch + 1); - a.my_adjust_demand_current_epoch.notify_relaxed(target_epoch + 1); -} + // Update market's state + my_total_demand += delta; + my_priority_level_demand[c.priority_level()] += delta; + my_mandatory_num_requested += mandatory_delta; -void market::process( job& j ) { - thread_data& td = static_cast<thread_data&>(j); - // td.my_arena can be dead. Don't access it until arena_in_need is called - arena *a = td.my_arena; - for (int i = 0; i < 2; ++i) { - while ( (a = arena_in_need(a)) ) { - a->process(td); - } - // Workers leave market because there is no arena in need. It can happen earlier than - // adjust_job_count_estimate() decreases my_slack and RML can put this thread to sleep. - // It might result in a busy-loop checking for my_slack<0 and calling this method instantly. - // the yield refines this spinning. - if ( !i ) { - yield(); - } + update_allotment(); } -} - -void market::cleanup( job& j) { - market::enforce([this] { return theMarket != this; }, nullptr ); - governor::auto_terminate(&j); -} - -void market::acknowledge_close_connection() { - destroy(); -} - -::rml::job* market::create_one_job() { - unsigned short index = ++my_first_unused_worker_idx; - __TBB_ASSERT( index > 0, nullptr); - ITT_THREAD_SET_NAME(_T("TBB Worker Thread")); - // index serves as a hint decreasing conflicts between workers when they migrate between arenas - thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; - __TBB_ASSERT( index <= my_num_workers_hard_limit, nullptr); - __TBB_ASSERT( my_workers[index - 1].load(std::memory_order_relaxed) == nullptr, nullptr); - my_workers[index - 1].store(td, std::memory_order_release); - return td; -} - -void market::add_external_thread(thread_data& td) { - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - my_masters.push_front(td); -} -void market::remove_external_thread(thread_data& td) { - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - my_masters.remove(td); + notify_thread_request(delta); } } // namespace r1 diff --git a/contrib/libs/tbb/src/tbb/market.h b/contrib/libs/tbb/src/tbb/market.h index f3891df305..85532ff106 100644 --- a/contrib/libs/tbb/src/tbb/market.h +++ b/contrib/libs/tbb/src/tbb/market.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,291 +17,62 @@ #ifndef _TBB_market_H #define _TBB_market_H -#include "scheduler_common.h" -#include "market_concurrent_monitor.h" -#include "intrusive_list.h" -#include "rml_tbb.h" #include "oneapi/tbb/rw_mutex.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/task_arena.h" -#include "oneapi/tbb/spin_rw_mutex.h" -#include "oneapi/tbb/task_group.h" +#include "permit_manager.h" +#include "pm_client.h" #include <atomic> - -#if defined(_MSC_VER) && defined(_Wp64) - // Workaround for overzealous compiler warnings in /Wp64 mode - #pragma warning (push) - #pragma warning (disable: 4244) -#endif +#include <vector> namespace tbb { namespace detail { - -namespace d1 { -class task_scheduler_handle; -} - namespace r1 { -class task_arena_base; -class task_group_context; - -//------------------------------------------------------------------------ -// Class market -//------------------------------------------------------------------------ - -class market : no_copy, rml::tbb_client { - friend class arena; - friend class task_arena_base; - template<typename SchedulerTraits> friend class custom_scheduler; - friend class task_group_context; - friend class governor; - friend class lifetime_control; - +class market : public permit_manager { public: - //! Keys for the arena map array. The lower the value the higher priority of the arena list. - static constexpr unsigned num_priority_levels = 3; - -private: - friend void ITT_DoUnsafeOneTimeInitialization (); - friend bool finalize_impl(d1::task_scheduler_handle& handle); - - typedef intrusive_list<arena> arena_list_type; - typedef intrusive_list<thread_data> thread_data_list_type; + market(unsigned soft_limit); - //! Currently active global market - static market* theMarket; + pm_client* create_client(arena& a) override; + void register_client(pm_client* client, d1::constraints&) override; + void unregister_and_destroy_client(pm_client& c) override; - typedef scheduler_mutex_type global_market_mutex_type; - - //! Mutex guarding creation/destruction of theMarket, insertions/deletions in my_arenas, and cancellation propagation - static global_market_mutex_type theMarketMutex; - - //! Lightweight mutex guarding accounting operations with arenas list - typedef rw_mutex arenas_list_mutex_type; - // TODO: introduce fine-grained (per priority list) locking of arenas. - arenas_list_mutex_type my_arenas_list_mutex; - - //! Pointer to the RML server object that services this TBB instance. - rml::tbb_server* my_server; - - //! Waiting object for external and coroutine waiters. - market_concurrent_monitor my_sleep_monitor; + //! Request that arena's need in workers should be adjusted. + void adjust_demand(pm_client&, int mandatory_delta, int workers_delta) override; - //! Maximal number of workers allowed for use by the underlying resource manager - /** It can't be changed after market creation. **/ - unsigned my_num_workers_hard_limit; + //! Set number of active workers + void set_active_num_workers(int soft_limit) override; +private: + //! Recalculates the number of workers assigned to each arena in the list. + void update_allotment(); - //! Current application-imposed limit on the number of workers (see set_active_num_workers()) - /** It can't be more than my_num_workers_hard_limit. **/ - std::atomic<unsigned> my_num_workers_soft_limit; + //! Keys for the arena map array. The lower the value the higher priority of the arena list. + static constexpr unsigned num_priority_levels = d1::num_priority_levels; - //! Number of workers currently requested from RML - int my_num_workers_requested; + using mutex_type = d1::rw_mutex; + mutex_type my_mutex; - //! First unused index of worker - /** Used to assign indices to the new workers coming from RML, and busy part - of my_workers array. **/ - std::atomic<unsigned> my_first_unused_worker_idx; + //! Current application-imposed limit on the number of workers + int my_num_workers_soft_limit; //! Number of workers that were requested by all arenas on all priority levels - std::atomic<int> my_total_demand; + int my_total_demand{0}; //! Number of workers that were requested by arenas per single priority list item - int my_priority_level_demand[num_priority_levels]; + int my_priority_level_demand[num_priority_levels] = {0}; -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY //! How many times mandatory concurrency was requested from the market - int my_mandatory_num_requested; -#endif + int my_mandatory_num_requested{0}; //! Per priority list of registered arenas - arena_list_type my_arenas[num_priority_levels]; - - //! The first arena to be checked when idle worker seeks for an arena to enter - /** The check happens in round-robin fashion. **/ - arena *my_next_arena; - - //! ABA prevention marker to assign to newly created arenas - std::atomic<uintptr_t> my_arenas_aba_epoch; - - //! Reference count controlling market object lifetime - std::atomic<unsigned> my_ref_count; - - //! Count of external threads attached - std::atomic<unsigned> my_public_ref_count; - - //! Stack size of worker threads - std::size_t my_stack_size; - - //! Shutdown mode - bool my_join_workers; - - //! The value indicating that the soft limit warning is unnecessary - static const unsigned skip_soft_limit_warning = ~0U; - - //! Either workers soft limit to be reported via runtime_warning() or skip_soft_limit_warning - std::atomic<unsigned> my_workers_soft_limit_to_report; - - //! Constructor - market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ); - - //! Destructor - ~market(); - - //! Destroys and deallocates market object created by market::create() - void destroy (); - - //! Recalculates the number of workers requested from RML and updates the allotment. - int update_workers_request(); - - //! Recalculates the number of workers assigned to each arena in the list. - /** The actual number of workers servicing a particular arena may temporarily - deviate from the calculated value. **/ - void update_allotment (unsigned effective_soft_limit) { - int total_demand = my_total_demand.load(std::memory_order_relaxed); - if (total_demand) { - update_allotment(my_arenas, total_demand, (int)effective_soft_limit); - } - } - - //! Returns next arena that needs more workers, or nullptr. - arena* arena_in_need(arena* prev); - - template <typename Pred> - static void enforce (Pred pred, const char* msg) { - suppress_unused_warning(pred, msg); -#if TBB_USE_ASSERT - global_market_mutex_type::scoped_lock lock(theMarketMutex); - __TBB_ASSERT(pred(), msg); -#endif - } - - //////////////////////////////////////////////////////////////////////////////// - // Helpers to unify code branches dependent on priority feature presence - - arena* select_next_arena( arena* hint ); - - void insert_arena_into_list ( arena& a ); - - void remove_arena_from_list ( arena& a ); - - arena* arena_in_need ( arena_list_type* arenas, arena* hint ); - - int update_allotment ( arena_list_type* arenas, int total_demand, int max_workers ); - - bool is_arena_in_list( arena_list_type& arenas, arena* a ); - - bool is_arena_alive( arena* a ); - - //////////////////////////////////////////////////////////////////////////////// - // Implementation of rml::tbb_client interface methods - - version_type version () const override { return 0; } - - unsigned max_job_count () const override { return my_num_workers_hard_limit; } - - std::size_t min_stack_size () const override { return worker_stack_size(); } - - job* create_one_job () override; - - void cleanup( job& j ) override; - - void acknowledge_close_connection () override; - - void process( job& j ) override; - -public: - //! Factory method creating new market object - static market& global_market( bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); - - //! Add reference to market if theMarket exists - static bool add_ref_unsafe( global_market_mutex_type::scoped_lock& lock, bool is_public, unsigned max_num_workers = 0, std::size_t stack_size = 0 ); - - //! Creates an arena object - /** If necessary, also creates global market instance, and boosts its ref count. - Each call to create_arena() must be matched by the call to arena::free_arena(). **/ - static arena* create_arena ( int num_slots, int num_reserved_slots, - unsigned arena_index, std::size_t stack_size ); - - //! Removes the arena from the market's list - void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned priority_level ); - - //! Removes the arena from the market's list - void detach_arena ( arena& ); - - //! Decrements market's refcount and destroys it in the end - bool release ( bool is_public, bool blocking_terminate ); - - //! Return wait list - market_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } - -#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - //! Imlpementation of mandatory concurrency enabling - void enable_mandatory_concurrency_impl ( arena *a ); - - //! Inform the external thread that there is an arena with mandatory concurrency - void enable_mandatory_concurrency ( arena *a ); - - //! Inform the external thread that the arena is no more interested in mandatory concurrency - void disable_mandatory_concurrency_impl(arena* a); - - //! Inform the external thread that the arena is no more interested in mandatory concurrency - void mandatory_concurrency_disable ( arena *a ); -#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ - - //! Request that arena's need in workers should be adjusted. - /** Concurrent invocations are possible only on behalf of different arenas. **/ - void adjust_demand ( arena&, int delta, bool mandatory ); - - //! Used when RML asks for join mode during workers termination. - bool must_join_workers () const { return my_join_workers; } - - //! Returns the requested stack size of worker threads. - std::size_t worker_stack_size () const { return my_stack_size; } - - //! Set number of active workers - static void set_active_num_workers( unsigned w ); - - //! Reports active parallelism level according to user's settings - static unsigned app_parallelism_limit(); - - //! Reports if any active global lifetime references are present - static unsigned is_lifetime_control_present(); - - //! Finds all contexts affected by the state change and propagates the new state to them. - /** The propagation is relayed to the market because tasks created by one - external thread can be passed to and executed by other external threads. This means - that context trees can span several arenas at once and thus state change - propagation cannot be generally localized to one arena only. **/ - template <typename T> - bool propagate_task_group_state (std::atomic<T> d1::task_group_context::*mptr_state, d1::task_group_context& src, T new_state ); - - //! List of registered external threads - thread_data_list_type my_masters; - - //! Array of pointers to the registered workers - /** Used by cancellation propagation mechanism. - Must be the last data member of the class market. **/ - std::atomic<thread_data*> my_workers[1]; - - static unsigned max_num_workers() { - global_market_mutex_type::scoped_lock lock( theMarketMutex ); - return theMarket? theMarket->my_num_workers_hard_limit : 0; - } - - void add_external_thread(thread_data& td); - - void remove_external_thread(thread_data& td); + using clients_container_type = std::vector<pm_client*, tbb::tbb_allocator<pm_client*>>; + clients_container_type my_clients[num_priority_levels]; }; // class market } // namespace r1 } // namespace detail } // namespace tbb -#if defined(_MSC_VER) && defined(_Wp64) - // Workaround for overzealous compiler warnings in /Wp64 mode - #pragma warning (pop) -#endif // warning 4244 is back - #endif /* _TBB_market_H */ diff --git a/contrib/libs/tbb/src/tbb/misc.cpp b/contrib/libs/tbb/src/tbb/misc.cpp index 17da1238f8..115a5f3885 100644 --- a/contrib/libs/tbb/src/tbb/misc.cpp +++ b/contrib/libs/tbb/src/tbb/misc.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -92,6 +92,8 @@ void PrintExtraVersionInfo( const char* category, const char* format, ... ) { //! check for transaction support. #if _MSC_VER #include <intrin.h> // for __cpuid +#elif __APPLE__ +#include <sys/sysctl.h> #endif #if __TBB_x86_32 || __TBB_x86_64 @@ -131,13 +133,22 @@ void detect_cpu_features(cpu_features_type& cpu_features) { #if __TBB_x86_32 || __TBB_x86_64 const int rtm_ebx_mask = 1 << 11; const int waitpkg_ecx_mask = 1 << 5; + const int hybrid_edx_mask = 1 << 15; int registers[4] = {0}; - // Check RTM and WAITPKG + // Check RTM, WAITPKG, HYBRID check_cpuid(7, 0, registers); cpu_features.rtm_enabled = (registers[1] & rtm_ebx_mask) != 0; cpu_features.waitpkg_enabled = (registers[2] & waitpkg_ecx_mask) != 0; -#endif /* (__TBB_x86_32 || __TBB_x86_64) */ + cpu_features.hybrid = (registers[3] & hybrid_edx_mask) != 0; +#elif __APPLE__ + // Check HYBRID (hw.nperflevels > 1) + uint64_t nperflevels = 0; + size_t nperflevels_size = sizeof(nperflevels); + if (!sysctlbyname("hw.nperflevels", &nperflevels, &nperflevels_size, nullptr, 0)) { + cpu_features.hybrid = (nperflevels > 1); + } +#endif } } // namespace r1 diff --git a/contrib/libs/tbb/src/tbb/misc.h b/contrib/libs/tbb/src/tbb/misc.h index b11c0029ef..988c29b17b 100644 --- a/contrib/libs/tbb/src/tbb/misc.h +++ b/contrib/libs/tbb/src/tbb/misc.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -211,6 +211,7 @@ T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { struct cpu_features_type { bool rtm_enabled{false}; bool waitpkg_enabled{false}; + bool hybrid{false}; }; void detect_cpu_features(cpu_features_type& cpu_features); diff --git a/contrib/libs/tbb/src/tbb/misc_ex.cpp b/contrib/libs/tbb/src/tbb/misc_ex.cpp index 55be0af3f3..13b7b04fb1 100644 --- a/contrib/libs/tbb/src/tbb/misc_ex.cpp +++ b/contrib/libs/tbb/src/tbb/misc_ex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -215,6 +215,7 @@ int AvailableHwConcurrency() { } fscanf(fp, ","); } + fclose(fp); return (num_cpus > 0) ? num_cpus : 1; } diff --git a/contrib/libs/tbb/src/tbb/permit_manager.h b/contrib/libs/tbb/src/tbb/permit_manager.h new file mode 100644 index 0000000000..0a6a737c4d --- /dev/null +++ b/contrib/libs/tbb/src/tbb/permit_manager.h @@ -0,0 +1,61 @@ +/* + Copyright (c) 2022-2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_permit_manager_H +#define _TBB_permit_manager_H + +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/detail/_utils.h" +#include "thread_request_serializer.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class arena; +class pm_client; + +class permit_manager : no_copy { +public: + virtual ~permit_manager() {} + virtual pm_client* create_client(arena& a) = 0; + virtual void register_client(pm_client* client, d1::constraints& constraints) = 0; + virtual void unregister_and_destroy_client(pm_client& c) = 0; + + virtual void set_active_num_workers(int soft_limit) = 0; + virtual void adjust_demand(pm_client&, int mandatory_delta, int workers_delta) = 0; + + void set_thread_request_observer(thread_request_observer& tr_observer) { + __TBB_ASSERT(!my_thread_request_observer, "set_thread_request_observer was called already?"); + my_thread_request_observer = &tr_observer; + } +protected: + void notify_thread_request(int delta) { + __TBB_ASSERT(my_thread_request_observer, "set_thread_request_observer was not called?"); + if (delta) { + my_thread_request_observer->update(delta); + } + } +private: + thread_request_observer* my_thread_request_observer{nullptr}; +}; + + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_permit_manager_H diff --git a/contrib/libs/tbb/src/tbb/pm_client.h b/contrib/libs/tbb/src/tbb/pm_client.h new file mode 100644 index 0000000000..d08af82490 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/pm_client.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2022-2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_pm_client_H +#define _TBB_pm_client_H + +#include "arena.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class pm_client { +public: + pm_client(arena& a) : my_arena(a) {} + virtual ~pm_client() {} + + unsigned priority_level() { + return my_arena.priority_level(); + } + + void set_top_priority(bool b) { + my_arena.set_top_priority(b); + } + + int min_workers() const { + return my_min_workers; + } + + int max_workers() const { + return my_max_workers; + } + + int update_request(int mandatory_delta, int workers_delta) { + auto min_max_workers = my_arena.update_request(mandatory_delta, workers_delta); + int delta = min_max_workers.second - my_max_workers; + set_workers(min_max_workers.first, min_max_workers.second); + return delta; + } + + virtual void register_thread() = 0; + + virtual void unregister_thread() = 0; + + +protected: + void set_workers(int mn_w, int mx_w) { + __TBB_ASSERT(mn_w >= 0, nullptr); + __TBB_ASSERT(mx_w >= 0, nullptr); + my_min_workers = mn_w; + my_max_workers = mx_w; + } + + arena& my_arena; + int my_min_workers{0}; + int my_max_workers{0}; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_pm_client_H diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.cpp b/contrib/libs/tbb/src/tbb/rml_tbb.cpp index 4c772eae06..d1cd285c1a 100644 --- a/contrib/libs/tbb/src/tbb/rml_tbb.cpp +++ b/contrib/libs/tbb/src/tbb/rml_tbb.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ namespace rml { #if _WIN32 || _WIN64 #define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll" #elif __APPLE__ -#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib" +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".1.dylib" #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX #define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so" #elif __unix__ diff --git a/contrib/libs/tbb/src/tbb/scheduler_common.h b/contrib/libs/tbb/src/tbb/scheduler_common.h index 9e10365736..e4686e1673 100644 --- a/contrib/libs/tbb/src/tbb/scheduler_common.h +++ b/contrib/libs/tbb/src/tbb/scheduler_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ #include "oneapi/tbb/detail/_machine.h" #include "oneapi/tbb/task_group.h" #include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/tbb_allocator.h" #include "itt_notify.h" #include "co_context.h" #include "misc.h" @@ -41,6 +42,8 @@ #include <cstdint> #include <exception> +#include <memory> // unique_ptr +#include <unordered_map> //! Mutex type for global locks in the scheduler using scheduler_mutex_type = __TBB_SCHEDULER_MUTEX_TYPE; @@ -68,6 +71,22 @@ template<task_stream_accessor_type> class task_stream; using isolation_type = std::intptr_t; constexpr isolation_type no_isolation = 0; +struct cache_aligned_deleter { + template <typename T> + void operator() (T* ptr) const { + ptr->~T(); + cache_aligned_deallocate(ptr); + } +}; + +template <typename T> +using cache_aligned_unique_ptr = std::unique_ptr<T, cache_aligned_deleter>; + +template <typename T, typename ...Args> +cache_aligned_unique_ptr<T> make_cache_aligned_unique(Args&& ...args) { + return cache_aligned_unique_ptr<T>(new (cache_aligned_allocate(sizeof(T))) T(std::forward<Args>(args)...)); +} + //------------------------------------------------------------------------ // Extended execute data //------------------------------------------------------------------------ @@ -225,9 +244,10 @@ inline void prolonged_pause() { std::uint64_t time_stamp = machine_time_stamp(); // _tpause function directs the processor to enter an implementation-dependent optimized state // until the Time Stamp Counter reaches or exceeds the value specified in second parameter. - // Constant "700" is ticks to wait for. + // Constant "1000" is ticks to wait for. + // TODO : Modify this parameter based on empirical study of benchmarks. // First parameter 0 selects between a lower power (cleared) or faster wakeup (set) optimized state. - _tpause(0, time_stamp + 700); + _tpause(0, time_stamp + 1000); } else #endif @@ -245,17 +265,12 @@ class stealing_loop_backoff { int my_yield_count; public: // my_yield_threshold = 100 is an experimental value. Ideally, once we start calling __TBB_Yield(), - // the time spent spinning before calling is_out_of_work() should be approximately + // the time spent spinning before calling out_of_work() should be approximately // the time it takes for a thread to be woken up. Doing so would guarantee that we do // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. stealing_loop_backoff(int num_workers, int yields_multiplier) : my_pause_threshold{ 2 * (num_workers + 1) } -#if __APPLE__ - // threshold value tuned separately for macOS due to high cost of sched_yield there - , my_yield_threshold{10 * yields_multiplier} -#else , my_yield_threshold{100 * yields_multiplier} -#endif , my_pause_count{} , my_yield_count{} {} @@ -382,7 +397,7 @@ struct suspend_point_type { void finilize_resume() { m_stack_state.store(stack_state::active, std::memory_order_relaxed); - // Set the suspended state for the stack that we left. If the state is already notified, it means that + // Set the suspended state for the stack that we left. If the state is already notified, it means that // someone already tried to resume our previous stack but failed. So, we need to resume it. // m_prev_suspend_point might be nullptr when destroying co_context based on threads if (m_prev_suspend_point && m_prev_suspend_point->m_stack_state.exchange(stack_state::suspended) == stack_state::notified) { @@ -461,6 +476,13 @@ public: //! Suspend point (null if this task dispatcher has been never suspended) suspend_point_type* m_suspend_point{ nullptr }; + //! Used to improve scalability of d1::wait_context by using per thread reference_counter + std::unordered_map<d1::wait_tree_vertex_interface*, d1::reference_vertex*, + std::hash<d1::wait_tree_vertex_interface*>, std::equal_to<d1::wait_tree_vertex_interface*>, + tbb_allocator<std::pair<d1::wait_tree_vertex_interface* const, d1::reference_vertex*>> + > + m_reference_vertex_map; + //! Attempt to get a task from the mailbox. /** Gets a task only if it has not been executed by its sender or a thief that has stolen it from the sender's task pool. Otherwise returns nullptr. @@ -489,6 +511,14 @@ public: m_suspend_point->~suspend_point_type(); cache_aligned_deallocate(m_suspend_point); } + + for (auto& elem : m_reference_vertex_map) { + d1::reference_vertex*& node = elem.second; + node->~reference_vertex(); + cache_aligned_deallocate(node); + poison_pointer(node); + } + poison_pointer(m_thread_data); poison_pointer(m_suspend_point); } @@ -548,6 +578,7 @@ public: #endif inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) { + __TBB_ASSERT(stack_size != 0, "Stack size cannot be zero"); __TBB_ASSERT(base > stack_size / 2, "Stack anchor calculation overflow"); return base - stack_size / 2; } @@ -558,8 +589,7 @@ struct task_group_context_impl { static void register_with(d1::task_group_context&, thread_data*); static void bind_to_impl(d1::task_group_context&, thread_data*); static void bind_to(d1::task_group_context&, thread_data*); - template <typename T> - static void propagate_task_group_state(d1::task_group_context&, std::atomic<T> d1::task_group_context::*, d1::task_group_context&, T); + static void propagate_task_group_state(d1::task_group_context&, std::atomic<uint32_t> d1::task_group_context::*, d1::task_group_context&, uint32_t); static bool cancel_group_execution(d1::task_group_context&); static bool is_group_execution_cancelled(const d1::task_group_context&); static void reset(d1::task_group_context&); diff --git a/contrib/libs/tbb/src/tbb/task.cpp b/contrib/libs/tbb/src/tbb/task.cpp index bd4e32dfe5..84b4278f0a 100644 --- a/contrib/libs/tbb/src/tbb/task.cpp +++ b/contrib/libs/tbb/src/tbb/task.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -53,7 +53,7 @@ void resume(suspend_point_type* sp) { // Prolong the arena's lifetime while all coroutines are alive // (otherwise the arena can be destroyed while some tasks are suspended). arena& a = *sp->m_arena; - a.my_references += arena::ref_external; + a.my_references += arena::ref_worker; if (task_disp.m_properties.critical_task_allowed) { // The target is not in the process of executing critical task, so the resume task is not critical. @@ -67,7 +67,7 @@ void resume(suspend_point_type* sp) { // Do not access target after that point. a.advertise_new_work<arena::wakeup>(); // Release our reference to my_arena. - a.on_thread_leaving<arena::ref_external>(); + a.on_thread_leaving(arena::ref_worker); } } @@ -77,13 +77,13 @@ suspend_point_type* current_suspend_point() { return td.my_task_dispatcher->get_suspend_point(); } -static task_dispatcher& create_coroutine(thread_data& td) { +task_dispatcher& create_coroutine(thread_data& td) { // We may have some task dispatchers cached task_dispatcher* task_disp = td.my_arena->my_co_cache.pop(); if (!task_disp) { void* ptr = cache_aligned_allocate(sizeof(task_dispatcher)); task_disp = new(ptr) task_dispatcher(td.my_arena); - task_disp->init_suspend_point(td.my_arena, td.my_arena->my_market->worker_stack_size()); + task_disp->init_suspend_point(td.my_arena, td.my_arena->my_threading_control->worker_stack_size()); } // Prolong the arena's lifetime until all coroutines is alive // (otherwise the arena can be destroyed while some tasks are suspended). @@ -163,7 +163,7 @@ void task_dispatcher::do_post_resume_action() { case post_resume_action::register_waiter: { __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); - static_cast<market_concurrent_monitor::resume_context*>(td->my_post_resume_arg)->notify(); + static_cast<thread_control_monitor::resume_context*>(td->my_post_resume_arg)->notify(); break; } case post_resume_action::cleanup: @@ -171,7 +171,7 @@ void task_dispatcher::do_post_resume_action() { __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(td->my_post_resume_arg); // Release coroutine's reference to my_arena - td->my_arena->on_thread_leaving<arena::ref_external>(); + td->my_arena->on_thread_leaving(arena::ref_external); // Cache the coroutine for possible later re-usage td->my_arena->my_co_cache.push(to_cleanup); break; @@ -186,7 +186,7 @@ void task_dispatcher::do_post_resume_action() { auto is_our_suspend_point = [sp] (market_context ctx) { return std::uintptr_t(sp) == ctx.my_uniq_addr; }; - td->my_arena->my_market->get_wait_list().notify(is_our_suspend_point); + td->my_arena->get_waiting_threads_monitor().notify(is_our_suspend_point); break; } default: @@ -218,10 +218,40 @@ void notify_waiters(std::uintptr_t wait_ctx_addr) { return wait_ctx_addr == context.my_uniq_addr; }; - r1::governor::get_thread_data()->my_arena->my_market->get_wait_list().notify(is_related_wait_ctx); + governor::get_thread_data()->my_arena->get_waiting_threads_monitor().notify(is_related_wait_ctx); +} + +d1::wait_tree_vertex_interface* get_thread_reference_vertex(d1::wait_tree_vertex_interface* top_wait_context) { + __TBB_ASSERT(top_wait_context, nullptr); + auto& dispatcher = *governor::get_thread_data()->my_task_dispatcher; + + d1::reference_vertex* ref_counter{nullptr}; + auto& reference_map = dispatcher.m_reference_vertex_map; + auto pos = reference_map.find(top_wait_context); + if (pos != reference_map.end()) { + ref_counter = pos->second; + } else { + constexpr std::size_t max_reference_vertex_map_size = 1000; + if (reference_map.size() > max_reference_vertex_map_size) { + // TODO: Research the possibility of using better approach for a clean-up + for (auto it = reference_map.begin(); it != reference_map.end();) { + if (it->second->get_num_child() == 0) { + it->second->~reference_vertex(); + cache_aligned_deallocate(it->second); + it = reference_map.erase(it); + } else { + ++it; + } + } + } + + reference_map[top_wait_context] = ref_counter = + new (cache_aligned_allocate(sizeof(d1::reference_vertex))) d1::reference_vertex(top_wait_context, 0); + } + + return ref_counter; } } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.h b/contrib/libs/tbb/src/tbb/task_dispatcher.h index f6ff3f173c..c818934e5a 100644 --- a/contrib/libs/tbb/src/tbb/task_dispatcher.h +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2022 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ #include "mailbox.h" #include "itt_notify.h" #include "concurrent_monitor.h" +#include "threading_control.h" #include <atomic> @@ -65,13 +66,13 @@ inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed); if (ed_ext.wait_ctx) { - market_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; + thread_control_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; // The wait_ctx is present only in external_waiter. In that case we leave the current stack // in the abandoned state to resume when waiting completes. thread_data* td = ed_ext.task_disp->m_thread_data; td->set_post_resume_action(task_dispatcher::post_resume_action::register_waiter, &monitor_node); - market_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); + thread_control_monitor& wait_list = td->my_arena->get_waiting_threads_monitor(); if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) { return nullptr; @@ -248,15 +249,21 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { task_dispatcher& task_disp; execution_data_ext old_execute_data_ext; properties old_properties; + bool is_initially_registered; ~dispatch_loop_guard() { task_disp.m_execute_data_ext = old_execute_data_ext; task_disp.m_properties = old_properties; + if (!is_initially_registered) { + task_disp.m_thread_data->my_arena->my_tc_client.get_pm_client()->unregister_thread(); + task_disp.m_thread_data->my_is_registered = false; + } + __TBB_ASSERT(task_disp.m_thread_data && governor::is_thread_data_set(task_disp.m_thread_data), nullptr); __TBB_ASSERT(task_disp.m_thread_data->my_task_dispatcher == &task_disp, nullptr); } - } dl_guard{ *this, m_execute_data_ext, m_properties }; + } dl_guard{ *this, m_execute_data_ext, m_properties, m_thread_data->my_is_registered }; // The context guard to track fp setting and itt tasks. context_guard_helper</*report_tasks=*/ITTPossible> context_guard; @@ -281,6 +288,11 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { m_properties.outermost = false; m_properties.fifo_tasks_allowed = false; + if (!dl_guard.is_initially_registered) { + m_thread_data->my_arena->my_tc_client.get_pm_client()->register_thread(); + m_thread_data->my_is_registered = true; + } + t = get_critical_task(t, ed, isolation, critical_allowed); if (t && m_thread_data->my_inbox.is_idle_state(true)) { // The thread has a work to do. Therefore, marking its inbox as not idle so that diff --git a/contrib/libs/tbb/src/tbb/task_group_context.cpp b/contrib/libs/tbb/src/tbb/task_group_context.cpp index 177dd555b8..c20b2790f8 100644 --- a/contrib/libs/tbb/src/tbb/task_group_context.cpp +++ b/contrib/libs/tbb/src/tbb/task_group_context.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -197,8 +197,7 @@ void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* __TBB_ASSERT(ctx.my_state.load(std::memory_order_relaxed) != d1::task_group_context::state::locked, nullptr); } -template <typename T> -void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { +void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<std::uint32_t> d1::task_group_context::* mptr_state, d1::task_group_context& src, std::uint32_t new_state) { __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); /* 1. if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state): Nothing to do, whether descending from "src" or not, so no need to scan. @@ -224,50 +223,6 @@ void task_group_context_impl::propagate_task_group_state(d1::task_group_context& } } -template <typename T> -void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - mutex::scoped_lock lock(my_context_list->m_mutex); - // Acquire fence is necessary to ensure that the subsequent node->my_next load - // returned the correct value in case it was just inserted in another thread. - // The fence also ensures visibility of the correct ctx.my_parent value. - for (context_list::iterator it = my_context_list->begin(); it != my_context_list->end(); ++it) { - d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, &(*it)); - if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) - task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); - } - // Sync up local propagation epoch with the global one. Release fence prevents - // reordering of possible store to *mptr_state after the sync point. - my_context_list->epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); -} - -template <typename T> -bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - if (src.my_may_have_children.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) - return true; - // The whole propagation algorithm is under the lock in order to ensure correctness - // in case of concurrent state changes at the different levels of the context tree. - // See comment at the bottom of scheduler.cpp - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) - // Another thread has concurrently changed the state. Back down. - return false; - // Advance global state propagation epoch - ++the_context_state_propagation_epoch; - // Propagate to all workers and external threads and sync up their local epochs with the global one - unsigned num_workers = my_first_unused_worker_idx; - for (unsigned i = 0; i < num_workers; ++i) { - thread_data* td = my_workers[i].load(std::memory_order_acquire); - // If the worker is only about to be registered, skip it. - if (td) - td->propagate_task_group_state(mptr_state, src, new_state); - } - // Propagate to all external threads - // The whole propagation sequence is locked, thus no contention is expected - for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) - it->propagate_task_group_state(mptr_state, src, new_state); - return true; -} - bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); @@ -277,7 +232,7 @@ bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx // not missing out on any cancellation still being propagated, and a context cannot be uncanceled.) return false; } - governor::get_thread_data()->my_arena->my_market->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); + governor::get_thread_data()->my_arena->my_threading_control->propagate_task_group_state(&d1::task_group_context::my_cancellation_requested, ctx, uint32_t(1)); return true; } diff --git a/contrib/libs/tbb/src/tbb/tcm.h b/contrib/libs/tbb/src/tbb/tcm.h new file mode 100644 index 0000000000..66ee18a2f0 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tcm.h @@ -0,0 +1,174 @@ +/* + Copyright (c) 2023-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_tcm_H +#define _TBB_tcm_H + +#include <stdint.h> +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +// Support for the TCM API return value + +typedef enum _tcm_result_t { + TCM_RESULT_SUCCESS = 0x0, + TCM_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004, + TCM_RESULT_ERROR_UNKNOWN = 0x7ffffffe +} tcm_result_t; + +// Support for permit states + +enum tcm_permit_states_t { + TCM_PERMIT_STATE_VOID, + TCM_PERMIT_STATE_INACTIVE, + TCM_PERMIT_STATE_PENDING, + TCM_PERMIT_STATE_IDLE, + TCM_PERMIT_STATE_ACTIVE +}; + +typedef uint8_t tcm_permit_state_t; + +// Support for permit flags + +typedef struct _tcm_permit_flags_t { + uint32_t stale : 1; + uint32_t rigid_concurrency : 1; + uint32_t exclusive : 1; + uint32_t request_as_inactive : 1; + uint32_t reserved : 28; +} tcm_permit_flags_t; + +typedef struct _tcm_callback_flags_t { + uint32_t new_concurrency : 1; + uint32_t new_state : 1; + uint32_t reserved : 30; +} tcm_callback_flags_t; + +// Support for cpu masks + +struct hwloc_bitmap_s; +typedef struct hwloc_bitmap_s* hwloc_bitmap_t; +typedef hwloc_bitmap_t tcm_cpu_mask_t; + +// Support for ids + +typedef uint64_t tcm_client_id_t; + +// Support for permits + +typedef struct _tcm_permit_t { + uint32_t* concurrencies; + tcm_cpu_mask_t* cpu_masks; + uint32_t size; + tcm_permit_state_t state; + tcm_permit_flags_t flags; +} tcm_permit_t; + +// Support for permit handle + +typedef struct tcm_permit_rep_t* tcm_permit_handle_t; + +// Support for constraints + +typedef int32_t tcm_numa_node_t; +typedef int32_t tcm_core_type_t; + +const int8_t tcm_automatic = -1; +const int8_t tcm_any = -2; + +#define TCM_PERMIT_REQUEST_CONSTRAINTS_INITIALIZER {tcm_automatic, tcm_automatic, NULL, \ + tcm_automatic, tcm_automatic, tcm_automatic} + +typedef struct _tcm_cpu_constraints_t { + int32_t min_concurrency; + int32_t max_concurrency; + tcm_cpu_mask_t mask; + tcm_numa_node_t numa_id; + tcm_core_type_t core_type_id; + int32_t threads_per_core; +} tcm_cpu_constraints_t; + +// Support for priorities + +enum tcm_request_priorities_t { + TCM_REQUEST_PRIORITY_LOW = (INT32_MAX / 4) * 1, + TCM_REQUEST_PRIORITY_NORMAL = (INT32_MAX / 4) * 2, + TCM_REQUEST_PRIORITY_HIGH = (INT32_MAX / 4) * 3 +}; + +typedef int32_t tcm_request_priority_t; + +// Support for requests + +#define TCM_PERMIT_REQUEST_INITIALIZER {tcm_automatic, tcm_automatic, \ + NULL, 0, TCM_REQUEST_PRIORITY_NORMAL, {}, {}} + +typedef struct _tcm_permit_request_t { + int32_t min_sw_threads; + int32_t max_sw_threads; + tcm_cpu_constraints_t* cpu_constraints; + uint32_t constraints_size; + tcm_request_priority_t priority; + tcm_permit_flags_t flags; + char reserved[4]; +} tcm_permit_request_t; + +// Support for client callback + +typedef tcm_result_t (*tcm_callback_t)(tcm_permit_handle_t p, void* callback_arg, tcm_callback_flags_t); + +#if _WIN32 + #define __TCM_EXPORT __declspec(dllexport) +#else + #define __TCM_EXPORT +#endif + + +__TCM_EXPORT tcm_result_t tcmConnect(tcm_callback_t callback, + tcm_client_id_t *client_id); +__TCM_EXPORT tcm_result_t tcmDisconnect(tcm_client_id_t client_id); + +__TCM_EXPORT tcm_result_t tcmRequestPermit(tcm_client_id_t client_id, + tcm_permit_request_t request, + void* callback_arg, + tcm_permit_handle_t* permit_handle, + tcm_permit_t* permit); + +__TCM_EXPORT tcm_result_t tcmGetPermitData(tcm_permit_handle_t permit_handle, + tcm_permit_t* permit); + +__TCM_EXPORT tcm_result_t tcmReleasePermit(tcm_permit_handle_t permit); + +__TCM_EXPORT tcm_result_t tcmIdlePermit(tcm_permit_handle_t permit_handle); + +__TCM_EXPORT tcm_result_t tcmDeactivatePermit(tcm_permit_handle_t permit_handle); + +__TCM_EXPORT tcm_result_t tcmActivatePermit(tcm_permit_handle_t permit_handle); + +__TCM_EXPORT tcm_result_t tcmRegisterThread(tcm_permit_handle_t permit_handle); + +__TCM_EXPORT tcm_result_t tcmUnregisterThread(); + +__TCM_EXPORT tcm_result_t tcmGetVersionInfo(char* buffer, uint32_t buffer_size); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif /* _TBB_tcm_H */ diff --git a/contrib/libs/tbb/src/tbb/tcm_adaptor.cpp b/contrib/libs/tbb/src/tbb/tcm_adaptor.cpp new file mode 100644 index 0000000000..85ca125b4e --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tcm_adaptor.cpp @@ -0,0 +1,327 @@ +/* + Copyright (c) 2023-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_intrusive_list_node.h" +#include "oneapi/tbb/detail/_template_helpers.h" +#include "oneapi/tbb/task_arena.h" + +#include "pm_client.h" +#include "dynamic_link.h" +#include "misc.h" +#include "tcm.h" +#include "tcm_adaptor.h" + +#include <iostream> + +namespace tbb { +namespace detail { +namespace r1 { + +namespace { +#if __TBB_WEAK_SYMBOLS_PRESENT +#pragma weak tcmConnect +#pragma weak tcmDisconnect +#pragma weak tcmRequestPermit +#pragma weak tcmGetPermitData +#pragma weak tcmReleasePermit +#pragma weak tcmIdlePermit +#pragma weak tcmDeactivatePermit +#pragma weak tcmActivatePermit +#pragma weak tcmRegisterThread +#pragma weak tcmUnregisterThread +#pragma weak tcmGetVersionInfo +#endif /* __TBB_WEAK_SYMBOLS_PRESENT */ + +tcm_result_t(*tcm_connect)(tcm_callback_t callback, tcm_client_id_t* client_id){nullptr}; +tcm_result_t(*tcm_disconnect)(tcm_client_id_t client_id){ nullptr }; +tcm_result_t(*tcm_request_permit)(tcm_client_id_t client_id, tcm_permit_request_t request, + void* callback_arg, tcm_permit_handle_t* permit_handle, tcm_permit_t* permit){nullptr}; +tcm_result_t(*tcm_get_permit_data)(tcm_permit_handle_t permit_handle, tcm_permit_t* permit){nullptr}; +tcm_result_t(*tcm_release_permit)(tcm_permit_handle_t permit){nullptr}; +tcm_result_t(*tcm_idle_permit)(tcm_permit_handle_t permit_handle){nullptr}; +tcm_result_t(*tcm_deactivate_permit)(tcm_permit_handle_t permit_handle){nullptr}; +tcm_result_t(*tcm_activate_permit)(tcm_permit_handle_t permit_handle){nullptr}; +tcm_result_t(*tcm_register_thread)(tcm_permit_handle_t permit_handle){nullptr}; +tcm_result_t(*tcm_unregister_thread)(){nullptr}; +tcm_result_t (*tcm_get_version_info)(char* buffer, uint32_t buffer_size){nullptr}; + +static const dynamic_link_descriptor tcm_link_table[] = { + DLD(tcmConnect, tcm_connect), + DLD(tcmDisconnect, tcm_disconnect), + DLD(tcmRequestPermit, tcm_request_permit), + DLD(tcmGetPermitData, tcm_get_permit_data), + DLD(tcmReleasePermit, tcm_release_permit), + DLD(tcmIdlePermit, tcm_idle_permit), + DLD(tcmDeactivatePermit, tcm_deactivate_permit), + DLD(tcmActivatePermit, tcm_activate_permit), + DLD(tcmRegisterThread, tcm_register_thread), + DLD(tcmUnregisterThread, tcm_unregister_thread), + DLD(tcmGetVersionInfo, tcm_get_version_info) +}; + +#if TBB_USE_DEBUG +#define DEBUG_SUFFIX "_debug" +#else +#define DEBUG_SUFFIX +#endif /* TBB_USE_DEBUG */ + +#if _WIN32 || _WIN64 +#define LIBRARY_EXTENSION ".dll" +#define LIBRARY_PREFIX +#elif __unix__ +#define LIBRARY_EXTENSION ".so.1" +#define LIBRARY_PREFIX "lib" +#else +#define LIBRARY_EXTENSION +#define LIBRARY_PREFIX +#endif /* __unix__ */ + +#define TCMLIB_NAME LIBRARY_PREFIX "tcm" DEBUG_SUFFIX LIBRARY_EXTENSION + +static bool tcm_functions_loaded{ false }; +} + +class tcm_client : public pm_client { + using tcm_client_mutex_type = d1::mutex; +public: + tcm_client(tcm_adaptor& adaptor, arena& a) : pm_client(a), my_tcm_adaptor(adaptor) {} + + ~tcm_client() { + if (my_permit_handle) { + __TBB_ASSERT(tcm_release_permit, nullptr); + auto res = tcm_release_permit(my_permit_handle); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + } + } + + int update_concurrency(uint32_t concurrency) { + return my_arena.update_concurrency(concurrency); + } + + unsigned priority_level() { + return my_arena.priority_level(); + } + + tcm_permit_request_t& permit_request() { + return my_permit_request; + } + + tcm_permit_handle_t& permit_handle() { + return my_permit_handle; + } + + void actualize_permit() { + __TBB_ASSERT(tcm_get_permit_data, nullptr); + int delta{}; + { + tcm_client_mutex_type::scoped_lock lock(my_permit_mutex); + + uint32_t new_concurrency{}; + tcm_permit_t new_permit{ &new_concurrency, nullptr, 1, TCM_PERMIT_STATE_VOID, {} }; + auto res = tcm_get_permit_data(my_permit_handle, &new_permit); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + + // The permit has changed during the reading, so the callback will be invoked soon one more time and + // we can just skip this renegotiation iteration. + if (!new_permit.flags.stale) { + // If there is no other demand in TCM, the permit may still have granted concurrency but + // be in the deactivated state thus we enforce 0 allotment to preserve arena invariants. + delta = update_concurrency(new_permit.state != TCM_PERMIT_STATE_INACTIVE ? new_concurrency : 0); + } + } + if (delta) { + my_tcm_adaptor.notify_thread_request(delta); + } + } + + void request_permit(tcm_client_id_t client_id) { + __TBB_ASSERT(tcm_request_permit, nullptr); + + my_permit_request.max_sw_threads = max_workers(); + my_permit_request.min_sw_threads = my_permit_request.max_sw_threads == 0 ? 0 : min_workers(); + + if (my_permit_request.constraints_size > 0) { + my_permit_request.cpu_constraints->min_concurrency = my_permit_request.min_sw_threads; + my_permit_request.cpu_constraints->max_concurrency = my_permit_request.max_sw_threads; + } + + __TBB_ASSERT(my_permit_request.max_sw_threads >= my_permit_request.min_sw_threads, nullptr); + + tcm_result_t res = tcm_request_permit(client_id, my_permit_request, this, &my_permit_handle, nullptr); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + } + + void deactivate_permit() { + __TBB_ASSERT(tcm_deactivate_permit, nullptr); + tcm_result_t res = tcm_deactivate_permit(my_permit_handle); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + } + + void init(tcm_client_id_t client_id, d1::constraints& constraints) { + __TBB_ASSERT(tcm_request_permit, nullptr); + __TBB_ASSERT(tcm_deactivate_permit, nullptr); + + if (constraints.core_type != d1::task_arena::automatic || + constraints.numa_id != d1::task_arena::automatic || + constraints.max_threads_per_core != d1::task_arena::automatic) + { + my_permit_constraints.max_concurrency = constraints.max_concurrency; + my_permit_constraints.min_concurrency = 0; + my_permit_constraints.core_type_id = constraints.core_type; + my_permit_constraints.numa_id = constraints.numa_id; + my_permit_constraints.threads_per_core = constraints.max_threads_per_core; + + my_permit_request.cpu_constraints = &my_permit_constraints; + my_permit_request.constraints_size = 1; + } + + my_permit_request.min_sw_threads = 0; + my_permit_request.max_sw_threads = 0; + my_permit_request.flags.request_as_inactive = 1; + + tcm_result_t res = tcm_request_permit(client_id, my_permit_request, this, &my_permit_handle, nullptr); + __TBB_ASSERT_EX(res == TCM_RESULT_SUCCESS, nullptr); + + my_permit_request.flags.request_as_inactive = 0; + } + + void register_thread() override { + __TBB_ASSERT(tcm_register_thread, nullptr); + auto return_code = tcm_register_thread(my_permit_handle); + __TBB_ASSERT_EX(return_code == TCM_RESULT_SUCCESS, nullptr); + } + + void unregister_thread() override { + __TBB_ASSERT(tcm_unregister_thread, nullptr); + auto return_code = tcm_unregister_thread(); + __TBB_ASSERT_EX(return_code == TCM_RESULT_SUCCESS, nullptr); + } + +private: + tcm_cpu_constraints_t my_permit_constraints = TCM_PERMIT_REQUEST_CONSTRAINTS_INITIALIZER; + tcm_permit_request_t my_permit_request = TCM_PERMIT_REQUEST_INITIALIZER; + tcm_permit_handle_t my_permit_handle{}; + tcm_client_mutex_type my_permit_mutex; + tcm_adaptor& my_tcm_adaptor; +}; + +//------------------------------------------------------------------------ +// tcm_adaptor_impl +//------------------------------------------------------------------------ + +struct tcm_adaptor_impl { + using demand_mutex_type = d1::mutex; + demand_mutex_type my_demand_mutex; + tcm_client_id_t client_id{}; + + tcm_adaptor_impl(tcm_client_id_t id) : client_id(id) + {} +}; + +//------------------------------------------------------------------------ +// tcm_adaptor +//------------------------------------------------------------------------ + +tcm_result_t renegotiation_callback(tcm_permit_handle_t, void* client_ptr, tcm_callback_flags_t) { + __TBB_ASSERT(client_ptr, nullptr); + static_cast<tcm_client*>(client_ptr)->actualize_permit(); + return TCM_RESULT_SUCCESS; +} + +void tcm_adaptor::initialize() { + tcm_functions_loaded = dynamic_link(TCMLIB_NAME, tcm_link_table, /* tcm_link_table size = */ 11); +} + +bool tcm_adaptor::is_initialized() { + return tcm_functions_loaded; +} + +void tcm_adaptor::print_version() { + if (is_initialized()) { + __TBB_ASSERT(tcm_get_version_info, nullptr); + char buffer[1024]; + tcm_get_version_info(buffer, 1024); + std::fprintf(stderr, "%.*s", 1024, buffer); + } +} + +tcm_adaptor::tcm_adaptor() { + __TBB_ASSERT(tcm_connect, nullptr); + tcm_client_id_t client_id{}; + auto return_code = tcm_connect(renegotiation_callback, &client_id); + if (return_code == TCM_RESULT_SUCCESS) { + my_impl = make_cache_aligned_unique<tcm_adaptor_impl>(client_id); + } +} + +tcm_adaptor::~tcm_adaptor() { + if (my_impl) { + __TBB_ASSERT(tcm_disconnect, nullptr); + auto return_code = tcm_disconnect(my_impl->client_id); + __TBB_ASSERT_EX(return_code == TCM_RESULT_SUCCESS, nullptr); + my_impl = nullptr; + } +} + +bool tcm_adaptor::is_connected() { + return my_impl != nullptr; +} + +pm_client* tcm_adaptor::create_client(arena& a) { + return new (cache_aligned_allocate(sizeof(tcm_client))) tcm_client(*this, a); +} + +void tcm_adaptor::register_client(pm_client* c, d1::constraints& constraints) { + static_cast<tcm_client*>(c)->init(my_impl->client_id, constraints); +} + +void tcm_adaptor::unregister_and_destroy_client(pm_client& c) { + auto& client = static_cast<tcm_client&>(c); + + { + tcm_adaptor_impl::demand_mutex_type::scoped_lock lock(my_impl->my_demand_mutex); + client.~tcm_client(); + } + cache_aligned_deallocate(&client); +} + +void tcm_adaptor::set_active_num_workers(int) {} + + +void tcm_adaptor::adjust_demand(pm_client& c, int mandatory_delta, int workers_delta) { + __TBB_ASSERT(-1 <= mandatory_delta && mandatory_delta <= 1, nullptr); + + auto& client = static_cast<tcm_client&>(c); + { + tcm_adaptor_impl::demand_mutex_type::scoped_lock lock(my_impl->my_demand_mutex); + + // Update client's state + workers_delta = client.update_request(mandatory_delta, workers_delta); + if (workers_delta == 0) return; + + if (client.max_workers() == 0) { + client.deactivate_permit(); + } else { + client.request_permit(my_impl->client_id); + } + } + + client.actualize_permit(); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/tcm_adaptor.h b/contrib/libs/tbb/src/tbb/tcm_adaptor.h new file mode 100644 index 0000000000..f9f4d5277b --- /dev/null +++ b/contrib/libs/tbb/src/tbb/tcm_adaptor.h @@ -0,0 +1,63 @@ +/* + Copyright (c) 2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_tcm_adaptor_H +#define _TBB_tcm_adaptor_H + +#include "scheduler_common.h" + +#include "permit_manager.h" +#include "pm_client.h" + +namespace tbb { +namespace detail { +namespace r1 { + +struct tcm_adaptor_impl; + +//------------------------------------------------------------------------ +// Class tcm_adaptor +//------------------------------------------------------------------------ + +class tcm_adaptor : public permit_manager { +public: + tcm_adaptor(); + ~tcm_adaptor(); + + pm_client* create_client(arena& a) override; + void register_client(pm_client* client, d1::constraints& constraints) override; + void unregister_and_destroy_client(pm_client& c) override; + + void set_active_num_workers(int soft_limit) override; + + void adjust_demand(pm_client& c, int mandatory_delta, int workers_delta) override; + + bool is_connected(); + + static void initialize(); + static bool is_initialized(); + static void print_version(); +private: + cache_aligned_unique_ptr<tcm_adaptor_impl> my_impl; + + friend class tcm_client; +}; // class tcm_adaptor + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif /* _TBB_tcm_adaptor_H */ diff --git a/contrib/libs/tbb/src/tbb/market_concurrent_monitor.h b/contrib/libs/tbb/src/tbb/thread_control_monitor.h index 37927617ba..f9c3cacc97 100644 --- a/contrib/libs/tbb/src/tbb/market_concurrent_monitor.h +++ b/contrib/libs/tbb/src/tbb/thread_control_monitor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2021 Intel Corporation + Copyright (c) 2021-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -14,8 +14,8 @@ limitations under the License. */ -#ifndef __TBB_market_concurrent_monitor_H -#define __TBB_market_concurrent_monitor_H +#ifndef __TBB_thread_control_monitor_H +#define __TBB_thread_control_monitor_H #include "concurrent_monitor.h" #include "scheduler_common.h" @@ -93,12 +93,12 @@ private: }; #endif // __TBB_RESUMABLE_TASKS -class market_concurrent_monitor : public concurrent_monitor_base<market_context> { +class thread_control_monitor : public concurrent_monitor_base<market_context> { using base_type = concurrent_monitor_base<market_context>; public: using base_type::base_type; - ~market_concurrent_monitor() { + ~thread_control_monitor() { destroy(); } @@ -113,4 +113,4 @@ public: } // namespace detail } // namespace tbb -#endif // __TBB_market_concurrent_monitor_H +#endif // __TBB_thread_control_monitor_H diff --git a/contrib/libs/tbb/src/tbb/thread_data.h b/contrib/libs/tbb/src/tbb/thread_data.h index 808f3cc311..422ec694ec 100644 --- a/contrib/libs/tbb/src/tbb/thread_data.h +++ b/contrib/libs/tbb/src/tbb/thread_data.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2022 Intel Corporation + Copyright (c) 2020-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,6 +28,7 @@ #include "mailbox.h" #include "misc.h" // FastRandom #include "small_object_pool_impl.h" +#include "intrusive_list.h" #include <atomic> @@ -39,8 +40,9 @@ class task; class arena_slot; class task_group_context; class task_dispatcher; +class thread_dispatcher_client; -class context_list : public intrusive_list<intrusive_list_node> { +class context_list : public intrusive_list<d1::intrusive_list_node> { public: bool orphaned{false}; @@ -61,10 +63,10 @@ public: cache_aligned_deallocate(this); } - void remove(intrusive_list_node& val) { + void remove(d1::intrusive_list_node& val) { mutex::scoped_lock lock(m_mutex); - intrusive_list<intrusive_list_node>::remove(val); + intrusive_list<d1::intrusive_list_node>::remove(val); if (orphaned && empty()) { lock.release(); @@ -72,10 +74,10 @@ public: } } - void push_front(intrusive_list_node& val) { + void push_front(d1::intrusive_list_node& val) { mutex::scoped_lock lock(m_mutex); - intrusive_list<intrusive_list_node>::push_front(val); + intrusive_list<d1::intrusive_list_node>::push_front(val); } void orphan() { @@ -93,14 +95,16 @@ public: // Thread Data //------------------------------------------------------------------------ class thread_data : public ::rml::job - , public intrusive_list_node + , public d1::intrusive_list_node , no_copy { public: thread_data(unsigned short index, bool is_worker) : my_arena_index{ index } , my_is_worker{ is_worker } + , my_is_registered { false } , my_task_dispatcher{ nullptr } - , my_arena{} + , my_arena{ nullptr } + , my_last_client{ nullptr } , my_arena_slot{} , my_random{ this } , my_last_observer{ nullptr } @@ -134,8 +138,7 @@ public: void detach_task_dispatcher(); void enter_task_dispatcher(task_dispatcher& task_disp, std::uintptr_t stealing_threshold); void leave_task_dispatcher(); - template <typename T> - void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state); + void propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::* mptr_state, d1::task_group_context& src, uint32_t new_state); //! Index of the arena slot the scheduler occupies now, or occupied last time unsigned short my_arena_index; @@ -143,12 +146,16 @@ public: //! Indicates if the thread is created by RML const bool my_is_worker; + bool my_is_registered; + //! The current task dipsatcher task_dispatcher* my_task_dispatcher; //! The arena that I own (if external thread) or am servicing at the moment (if worker) arena* my_arena; + thread_dispatcher_client* my_last_client; + //! Pointer to the slot in the arena we own at the moment arena_slot* my_arena_slot; @@ -232,6 +239,21 @@ inline void thread_data::leave_task_dispatcher() { detach_task_dispatcher(); } +inline void thread_data::propagate_task_group_state(std::atomic<std::uint32_t> d1::task_group_context::* mptr_state, d1::task_group_context& src, std::uint32_t new_state) { + mutex::scoped_lock lock(my_context_list->m_mutex); + // Acquire fence is necessary to ensure that the subsequent node->my_next load + // returned the correct value in case it was just inserted in another thread. + // The fence also ensures visibility of the correct ctx.my_parent value. + for (context_list::iterator it = my_context_list->begin(); it != my_context_list->end(); ++it) { + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, &(*it)); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) + task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); + } + // Sync up local propagation epoch with the global one. Release fence prevents + // reordering of possible store to *mptr_state after the sync point. + my_context_list->epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); +} + } // namespace r1 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/thread_dispatcher.cpp b/contrib/libs/tbb/src/tbb/thread_dispatcher.cpp new file mode 100644 index 0000000000..69a108d6f2 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_dispatcher.cpp @@ -0,0 +1,236 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "thread_dispatcher.h" +#include "threading_control.h" + +namespace tbb { +namespace detail { +namespace r1 { + +thread_dispatcher::thread_dispatcher(threading_control& tc, unsigned hard_limit, std::size_t stack_size) + : my_threading_control(tc) + , my_num_workers_hard_limit(hard_limit) + , my_stack_size(stack_size) +{ + my_server = governor::create_rml_server( *this ); + __TBB_ASSERT( my_server, "Failed to create RML server" ); +} + +thread_dispatcher::~thread_dispatcher() { + poison_pointer(my_server); +} + +thread_dispatcher_client* thread_dispatcher::select_next_client(thread_dispatcher_client* hint) { + unsigned next_client_priority_level = num_priority_levels; + if (hint) { + next_client_priority_level = hint->priority_level(); + } + + for (unsigned idx = 0; idx < next_client_priority_level; ++idx) { + if (!my_client_list[idx].empty()) { + return &*my_client_list[idx].begin(); + } + } + + return hint; +} + +thread_dispatcher_client* thread_dispatcher::create_client(arena& a) { + return new (cache_aligned_allocate(sizeof(thread_dispatcher_client))) thread_dispatcher_client(a, my_clients_aba_epoch); +} + + +void thread_dispatcher::register_client(thread_dispatcher_client* client) { + client_list_mutex_type::scoped_lock lock(my_list_mutex); + insert_client(*client); +} + +bool thread_dispatcher::try_unregister_client(thread_dispatcher_client* client, std::uint64_t aba_epoch, unsigned priority) { + __TBB_ASSERT(client, nullptr); + // we hold reference to the server, so market cannot be destroyed at any moment here + __TBB_ASSERT(!is_poisoned(my_server), nullptr); + my_list_mutex.lock(); + for (auto& it : my_client_list[priority]) { + if (client == &it) { + if (it.get_aba_epoch() == aba_epoch) { + // Client is alive + // Acquire my_references to sync with threads that just left the arena + // Pay attention that references should be read before workers_requested because + // if references is no zero some other thread might call adjust_demand and lead to + // a race over workers_requested + if (!client->references() && !client->has_request()) { + // Client is abandoned. Destroy it. + remove_client(*client); + ++my_clients_aba_epoch; + + my_list_mutex.unlock(); + destroy_client(client); + + return true; + } + } + break; + } + } + my_list_mutex.unlock(); + return false; +} + +void thread_dispatcher::destroy_client(thread_dispatcher_client* client) { + client->~thread_dispatcher_client(); + cache_aligned_deallocate(client); +} + +// Should be called under lock +void thread_dispatcher::insert_client(thread_dispatcher_client& client) { + __TBB_ASSERT(client.priority_level() < num_priority_levels, nullptr); + my_client_list[client.priority_level()].push_front(client); + + __TBB_ASSERT(!my_next_client || my_next_client->priority_level() < num_priority_levels, nullptr); + my_next_client = select_next_client(my_next_client); +} + +// Should be called under lock +void thread_dispatcher::remove_client(thread_dispatcher_client& client) { + __TBB_ASSERT(client.priority_level() < num_priority_levels, nullptr); + my_client_list[client.priority_level()].remove(client); + + if (my_next_client == &client) { + my_next_client = nullptr; + } + my_next_client = select_next_client(my_next_client); +} + +bool thread_dispatcher::is_client_alive(thread_dispatcher_client* client) { + if (!client) { + return false; + } + + // Still cannot access internals of the client since the object itself might be destroyed. + for (auto& priority_list : my_client_list) { + for (auto& c : priority_list) { + if (client == &c) { + return true; + } + } + } + return false; +} + +thread_dispatcher_client* thread_dispatcher::client_in_need(client_list_type* clients, thread_dispatcher_client* hint) { + // TODO: make sure client with higher priority returned only if there are available slots in it. + hint = select_next_client(hint); + if (!hint) { + return nullptr; + } + + client_list_type::iterator it = hint; + unsigned curr_priority_level = hint->priority_level(); + __TBB_ASSERT(it != clients[curr_priority_level].end(), nullptr); + do { + thread_dispatcher_client& t = *it; + if (++it == clients[curr_priority_level].end()) { + do { + ++curr_priority_level %= num_priority_levels; + } while (clients[curr_priority_level].empty()); + it = clients[curr_priority_level].begin(); + } + if (t.try_join()) { + return &t; + } + } while (it != hint); + return nullptr; +} + +thread_dispatcher_client* thread_dispatcher::client_in_need(thread_dispatcher_client* prev) { + client_list_mutex_type::scoped_lock lock(my_list_mutex, /*is_writer=*/false); + if (is_client_alive(prev)) { + return client_in_need(my_client_list, prev); + } + return client_in_need(my_client_list, my_next_client); +} + +bool thread_dispatcher::is_any_client_in_need() { + client_list_mutex_type::scoped_lock lock(my_list_mutex, /*is_writer=*/false); + for (auto& priority_list : my_client_list) { + for (auto& client : priority_list) { + if (client.is_joinable()) { + return true; + } + } + } + return false; +} + +void thread_dispatcher::adjust_job_count_estimate(int delta) { + my_server->adjust_job_count_estimate(delta); +} + +void thread_dispatcher::release(bool blocking_terminate) { + my_join_workers = blocking_terminate; + my_server->request_close_connection(); +} + +void thread_dispatcher::process(job& j) { + thread_data& td = static_cast<thread_data&>(j); + // td.my_last_client can be dead. Don't access it until client_in_need is called + thread_dispatcher_client* client = td.my_last_client; + for (int i = 0; i < 2; ++i) { + while ((client = client_in_need(client)) ) { + td.my_last_client = client; + client->process(td); + } + // Workers leave thread_dispatcher because there is no client in need. It can happen earlier than + // adjust_job_count_estimate() decreases my_slack and RML can put this thread to sleep. + // It might result in a busy-loop checking for my_slack<0 and calling this method instantly. + // the yield refines this spinning. + if ( !i ) { + yield(); + } + } +} + + +//! Used when RML asks for join mode during workers termination. +bool thread_dispatcher::must_join_workers() const { return my_join_workers; } + +//! Returns the requested stack size of worker threads. +std::size_t thread_dispatcher::worker_stack_size() const { return my_stack_size; } + +void thread_dispatcher::acknowledge_close_connection() { + my_threading_control.destroy(); +} + +::rml::job* thread_dispatcher::create_one_job() { + unsigned short index = ++my_first_unused_worker_idx; + __TBB_ASSERT(index > 0, nullptr); + ITT_THREAD_SET_NAME(_T("TBB Worker Thread")); + // index serves as a hint decreasing conflicts between workers when they migrate between arenas + thread_data* td = new (cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; + __TBB_ASSERT(index <= my_num_workers_hard_limit, nullptr); + my_threading_control.register_thread(*td); + return td; +} + +void thread_dispatcher::cleanup(job& j) { + my_threading_control.unregister_thread(static_cast<thread_data&>(j)); + governor::auto_terminate(&j); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/thread_dispatcher.h b/contrib/libs/tbb/src/tbb/thread_dispatcher.h new file mode 100644 index 0000000000..e511e2b737 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_dispatcher.h @@ -0,0 +1,107 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_thread_dispatcher_H +#define _TBB_thread_dispatcher_H + +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/rw_mutex.h" +#include "oneapi/tbb/task_arena.h" + +#include "arena.h" +#include "governor.h" +#include "thread_data.h" +#include "rml_tbb.h" +#include "thread_dispatcher_client.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class threading_control_impl; + +class thread_dispatcher : no_copy, rml::tbb_client { + using client_list_type = intrusive_list<thread_dispatcher_client>; + using client_list_mutex_type = d1::rw_mutex; +public: + thread_dispatcher(threading_control& tc, unsigned hard_limit, std::size_t stack_size); + ~thread_dispatcher(); + + thread_dispatcher_client* create_client(arena& a); + void register_client(thread_dispatcher_client* client); + bool try_unregister_client(thread_dispatcher_client* client, std::uint64_t aba_epoch, unsigned priority); + bool is_any_client_in_need(); + + void adjust_job_count_estimate(int delta); + void release(bool blocking_terminate); + void process(job& j) override; + //! Used when RML asks for join mode during workers termination. + bool must_join_workers() const; + //! Returns the requested stack size of worker threads. + std::size_t worker_stack_size() const; + +private: + version_type version () const override { return 0; } + unsigned max_job_count () const override { return my_num_workers_hard_limit; } + std::size_t min_stack_size () const override { return worker_stack_size(); } + void cleanup(job& j) override; + void acknowledge_close_connection() override; + ::rml::job* create_one_job() override; + + thread_dispatcher_client* select_next_client(thread_dispatcher_client* hint); + void destroy_client(thread_dispatcher_client* client); + void insert_client(thread_dispatcher_client& client); + void remove_client(thread_dispatcher_client& client); + bool is_client_alive(thread_dispatcher_client* client); + thread_dispatcher_client* client_in_need(client_list_type* clients, thread_dispatcher_client* hint); + thread_dispatcher_client* client_in_need(thread_dispatcher_client* prev); + + friend class threading_control_impl; + static constexpr unsigned num_priority_levels = d1::num_priority_levels; + client_list_mutex_type my_list_mutex; + client_list_type my_client_list[num_priority_levels]; + + thread_dispatcher_client* my_next_client{nullptr}; + + //! Shutdown mode + bool my_join_workers{false}; + + threading_control& my_threading_control; + + //! ABA prevention marker to assign to newly created clients + std::atomic<std::uint64_t> my_clients_aba_epoch{0}; + + //! Maximal number of workers allowed for use by the underlying resource manager + /** It can't be changed after thread_dispatcher creation. **/ + unsigned my_num_workers_hard_limit{0}; + + //! Stack size of worker threads + std::size_t my_stack_size{0}; + + //! First unused index of worker + /** Used to assign indices to the new workers coming from RML **/ + std::atomic<unsigned> my_first_unused_worker_idx{0}; + + //! Pointer to the RML server object that services this TBB instance. + rml::tbb_server* my_server{nullptr}; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_thread_dispatcher_H diff --git a/contrib/libs/tbb/src/tbb/thread_dispatcher_client.h b/contrib/libs/tbb/src/tbb/thread_dispatcher_client.h new file mode 100644 index 0000000000..f7c199cb86 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_dispatcher_client.h @@ -0,0 +1,69 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_thread_dispatcher_client_H +#define _TBB_thread_dispatcher_client_H + +#include "oneapi/tbb/detail/_intrusive_list_node.h" +#include "arena.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class thread_dispatcher_client : public d1::intrusive_list_node /* Need for list in thread pool */ { +public: + thread_dispatcher_client(arena& a, std::uint64_t aba_epoch) : my_arena(a), my_aba_epoch(aba_epoch) {} + + // Interface of communication with thread pool + bool try_join() { + return my_arena.try_join(); + } + + bool is_joinable() { + return my_arena.is_joinable(); + } + + void process(thread_data& td) { + my_arena.process(td); + } + + unsigned priority_level() { + return my_arena.priority_level(); + } + + std::uint64_t get_aba_epoch() { + return my_aba_epoch; + } + + unsigned references() { + return my_arena.references(); + } + + bool has_request() { + return my_arena.has_request(); + } + +private: + arena& my_arena; + std::uint64_t my_aba_epoch; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_thread_dispatcher_client_H diff --git a/contrib/libs/tbb/src/tbb/thread_request_serializer.cpp b/contrib/libs/tbb/src/tbb/thread_request_serializer.cpp new file mode 100644 index 0000000000..41cf51b0b0 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_request_serializer.cpp @@ -0,0 +1,139 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "misc.h" +#include "thread_request_serializer.h" + +namespace tbb { +namespace detail { +namespace r1 { + +thread_request_serializer::thread_request_serializer(thread_dispatcher& td, int soft_limit) + : my_thread_dispatcher(td) + , my_soft_limit(soft_limit) +{} + +void thread_request_serializer::update(int delta) { + constexpr std::uint64_t delta_mask = (pending_delta_base << 1) - 1; + constexpr std::uint64_t counter_value = delta_mask + 1; + + int prev_pending_delta = my_pending_delta.fetch_add(counter_value + delta); + + // There is a pseudo request aggregator, so only thread that see pending_delta_base in my_pending_delta + // Will enter to critical section and call adjust_job_count_estimate + if (prev_pending_delta == pending_delta_base) { + delta = int(my_pending_delta.exchange(pending_delta_base) & delta_mask) - int(pending_delta_base); + mutex_type::scoped_lock lock(my_mutex); + my_total_request.store(my_total_request.load(std::memory_order_relaxed) + delta, std::memory_order_relaxed); + delta = limit_delta(delta, my_soft_limit, my_total_request.load(std::memory_order_relaxed)); + my_thread_dispatcher.adjust_job_count_estimate(delta); + } +} + +void thread_request_serializer::set_active_num_workers(int soft_limit) { + mutex_type::scoped_lock lock(my_mutex); + int delta = soft_limit - my_soft_limit; + delta = limit_delta(delta, my_total_request.load(std::memory_order_relaxed), soft_limit); + my_thread_dispatcher.adjust_job_count_estimate(delta); + my_soft_limit = soft_limit; +} + +int thread_request_serializer::limit_delta(int delta, int limit, int new_value) { + // This method can be described with such pseudocode: + // bool above_limit = prev_value >= limit && new_value >= limit; + // bool below_limit = prev_value <= limit && new_value <= limit; + // enum request_type { ABOVE_LIMIT, CROSS_LIMIT, BELOW_LIMIT }; + // request = above_limit ? ABOVE_LIMIT : below_limit ? BELOW_LIMIT : CROSS_LIMIT; + + // switch (request) { + // case ABOVE_LIMIT: + // delta = 0; + // case CROSS_LIMIT: + // delta = delta > 0 ? limit - prev_value : new_value - limit; + // case BELOW_LIMIT: + // // No changes to delta + // } + + int prev_value = new_value - delta; + + // actual new_value and prev_value cannot exceed the limit + new_value = min(limit, new_value); + prev_value = min(limit, prev_value); + return new_value - prev_value; +} + + +thread_request_serializer_proxy::thread_request_serializer_proxy(thread_dispatcher& td, int soft_limit) : my_serializer(td, soft_limit) +{} + +void thread_request_serializer_proxy::register_mandatory_request(int mandatory_delta) { + if (mandatory_delta != 0) { + mutex_type::scoped_lock lock(my_mutex, /* is_write = */ false); + int prev_value = my_num_mandatory_requests.fetch_add(mandatory_delta); + + const bool should_try_enable = mandatory_delta > 0 && prev_value == 0; + const bool should_try_disable = mandatory_delta < 0 && prev_value == 1; + + if (should_try_enable) { + enable_mandatory_concurrency(lock); + } else if (should_try_disable) { + disable_mandatory_concurrency(lock); + } + } +} + +void thread_request_serializer_proxy::set_active_num_workers(int soft_limit) { + mutex_type::scoped_lock lock(my_mutex, /* is_write = */ true); + + if (soft_limit != 0) { + my_is_mandatory_concurrency_enabled = false; + } else if (my_num_mandatory_requests > 0) { + my_is_mandatory_concurrency_enabled = true; + soft_limit = 1; + } + + my_serializer.set_active_num_workers(soft_limit); +} + +int thread_request_serializer_proxy::num_workers_requested() { return my_serializer.num_workers_requested(); } + +void thread_request_serializer_proxy::update(int delta) { my_serializer.update(delta); } + +void thread_request_serializer_proxy::enable_mandatory_concurrency(mutex_type::scoped_lock& lock) { + lock.upgrade_to_writer(); + bool still_should_enable = my_num_mandatory_requests.load(std::memory_order_relaxed) > 0 && + !my_is_mandatory_concurrency_enabled && my_serializer.is_no_workers_avaliable(); + + if (still_should_enable) { + my_is_mandatory_concurrency_enabled = true; + my_serializer.set_active_num_workers(1); + } +} + +void thread_request_serializer_proxy::disable_mandatory_concurrency(mutex_type::scoped_lock& lock) { + lock.upgrade_to_writer(); + bool still_should_disable = my_num_mandatory_requests.load(std::memory_order_relaxed) <= 0 && + my_is_mandatory_concurrency_enabled && !my_serializer.is_no_workers_avaliable(); + + if (still_should_disable) { + my_is_mandatory_concurrency_enabled = false; + my_serializer.set_active_num_workers(0); + } +} + +} // r1 +} // detail +} // tbb diff --git a/contrib/libs/tbb/src/tbb/thread_request_serializer.h b/contrib/libs/tbb/src/tbb/thread_request_serializer.h new file mode 100644 index 0000000000..9dc9799e1a --- /dev/null +++ b/contrib/libs/tbb/src/tbb/thread_request_serializer.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_thread_serializer_handlers_H +#define _TBB_thread_serializer_handlers_H + +#include "oneapi/tbb/mutex.h" +#include "oneapi/tbb/rw_mutex.h" + +#include "thread_dispatcher.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class thread_request_observer { +protected: + virtual ~thread_request_observer() {} +public: + virtual void update(int delta) = 0; +}; + + +class thread_request_serializer : public thread_request_observer { + using mutex_type = d1::mutex; +public: + thread_request_serializer(thread_dispatcher& td, int soft_limit); + void set_active_num_workers(int soft_limit); + int num_workers_requested() { return my_total_request.load(std::memory_order_relaxed); } + bool is_no_workers_avaliable() { return my_soft_limit == 0; } + +private: + friend class thread_request_serializer_proxy; + void update(int delta) override; + static int limit_delta(int delta, int limit, int new_value); + + thread_dispatcher& my_thread_dispatcher; + int my_soft_limit{ 0 }; + std::atomic<int> my_total_request{ 0 }; + // my_pending_delta is set to pending_delta_base to have ability to hold negative values + // consider increase base since thead number will be bigger than 1 << 15 + static constexpr std::uint64_t pending_delta_base = 1 << 15; + std::atomic<std::uint64_t> my_pending_delta{ pending_delta_base }; + mutex_type my_mutex; +}; + +// Handles mandatory concurrency i.e. enables worker threads for enqueue tasks +class thread_request_serializer_proxy : public thread_request_observer { + using mutex_type = d1::rw_mutex; +public: + thread_request_serializer_proxy(thread_dispatcher& td, int soft_limit); + void register_mandatory_request(int mandatory_delta); + void set_active_num_workers(int soft_limit); + int num_workers_requested(); + +private: + void update(int delta) override; + void enable_mandatory_concurrency(mutex_type::scoped_lock& lock); + void disable_mandatory_concurrency(mutex_type::scoped_lock& lock); + + std::atomic<int> my_num_mandatory_requests{0}; + bool my_is_mandatory_concurrency_enabled{false}; + thread_request_serializer my_serializer; + mutex_type my_mutex; +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_thread_serializer_handlers_H diff --git a/contrib/libs/tbb/src/tbb/threading_control.cpp b/contrib/libs/tbb/src/tbb/threading_control.cpp new file mode 100644 index 0000000000..7a62b337f6 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/threading_control.cpp @@ -0,0 +1,411 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "threading_control.h" +#include "permit_manager.h" +#include "market.h" +#include "tcm_adaptor.h" +#include "thread_dispatcher.h" +#include "governor.h" +#include "thread_dispatcher_client.h" + +namespace tbb { +namespace detail { +namespace r1 { + +// ---------------------------------------- threading_control_impl -------------------------------------------------------------- + +std::size_t global_control_active_value_unsafe(d1::global_control::parameter); + +std::pair<unsigned, unsigned> threading_control_impl::calculate_workers_limits() { + // Expecting that 4P is suitable for most applications. + // Limit to 2P for large thread number. + // TODO: ask RML for max concurrency and possibly correct hard_limit + unsigned factor = governor::default_num_threads() <= 128 ? 4 : 2; + + // The requested number of threads is intentionally not considered in + // computation of the hard limit, in order to separate responsibilities + // and avoid complicated interactions between global_control and task_scheduler_init. + // The threading control guarantees that at least 256 threads might be created. + unsigned workers_app_limit = global_control_active_value_unsafe(global_control::max_allowed_parallelism); + unsigned workers_hard_limit = max(max(factor * governor::default_num_threads(), 256u), workers_app_limit); + unsigned workers_soft_limit = calc_workers_soft_limit(workers_hard_limit); + + return std::make_pair(workers_soft_limit, workers_hard_limit); +} + +unsigned threading_control_impl::calc_workers_soft_limit(unsigned workers_hard_limit) { + unsigned workers_soft_limit{}; + unsigned soft_limit = global_control_active_value_unsafe(global_control::max_allowed_parallelism); + + // if user set no limits (yet), use default value + workers_soft_limit = soft_limit != 0 ? soft_limit - 1 : governor::default_num_threads() - 1; + + if (workers_soft_limit >= workers_hard_limit) { + workers_soft_limit = workers_hard_limit - 1; + } + + return workers_soft_limit; +} + +cache_aligned_unique_ptr<permit_manager> threading_control_impl::make_permit_manager(unsigned workers_soft_limit) { + if (tcm_adaptor::is_initialized()) { + auto tcm = make_cache_aligned_unique<tcm_adaptor>(); + if (tcm->is_connected()) { + return tcm; + } + } + return make_cache_aligned_unique<market>(workers_soft_limit); +} + +cache_aligned_unique_ptr<thread_dispatcher> threading_control_impl::make_thread_dispatcher(threading_control& tc, + unsigned workers_soft_limit, + unsigned workers_hard_limit) +{ + stack_size_type stack_size = global_control_active_value_unsafe(global_control::thread_stack_size); + + cache_aligned_unique_ptr<thread_dispatcher> td = + make_cache_aligned_unique<thread_dispatcher>(tc, workers_hard_limit, stack_size); + // This check relies on the fact that for shared RML default_concurrency == max_concurrency + if (!governor::UsePrivateRML && td->my_server->default_concurrency() < workers_soft_limit) { + runtime_warning("RML might limit the number of workers to %u while %u is requested.\n", + td->my_server->default_concurrency(), workers_soft_limit); + } + + return td; +} + +threading_control_impl::threading_control_impl(threading_control* tc) { + unsigned workers_soft_limit{}, workers_hard_limit{}; + std::tie(workers_soft_limit, workers_hard_limit) = calculate_workers_limits(); + + my_permit_manager = make_permit_manager(workers_soft_limit); + my_thread_dispatcher = make_thread_dispatcher(*tc, workers_soft_limit, workers_hard_limit); + my_thread_request_serializer = + make_cache_aligned_unique<thread_request_serializer_proxy>(*my_thread_dispatcher, workers_soft_limit); + my_permit_manager->set_thread_request_observer(*my_thread_request_serializer); + + my_cancellation_disseminator = make_cache_aligned_unique<cancellation_disseminator>(); + my_waiting_threads_monitor = make_cache_aligned_unique<thread_control_monitor>(); +} + +void threading_control_impl::release(bool blocking_terminate) { + my_thread_dispatcher->release(blocking_terminate); +} + +void threading_control_impl::set_active_num_workers(unsigned soft_limit) { + __TBB_ASSERT(soft_limit <= my_thread_dispatcher->my_num_workers_hard_limit, nullptr); + my_thread_request_serializer->set_active_num_workers(soft_limit); + my_permit_manager->set_active_num_workers(soft_limit); +} + +threading_control_client threading_control_impl::create_client(arena& a) { + pm_client* pm_client = my_permit_manager->create_client(a); + thread_dispatcher_client* td_client = my_thread_dispatcher->create_client(a); + + return threading_control_client{pm_client, td_client}; +} + +threading_control_impl::client_snapshot threading_control_impl::prepare_client_destruction(threading_control_client client) { + auto td_client = client.get_thread_dispatcher_client(); + return {td_client->get_aba_epoch(), td_client->priority_level(), td_client, client.get_pm_client()}; +} + +bool threading_control_impl::try_destroy_client(threading_control_impl::client_snapshot snapshot) { + if (my_thread_dispatcher->try_unregister_client(snapshot.my_td_client, snapshot.aba_epoch, snapshot.priority_level)) { + my_permit_manager->unregister_and_destroy_client(*snapshot.my_pm_client); + return true; + } + return false; +} + +void threading_control_impl::publish_client(threading_control_client tc_client, d1::constraints& constraints) { + my_permit_manager->register_client(tc_client.get_pm_client(), constraints); + my_thread_dispatcher->register_client(tc_client.get_thread_dispatcher_client()); +} + +void threading_control_impl::register_thread(thread_data& td) { + my_cancellation_disseminator->register_thread(td); +} +void threading_control_impl::unregister_thread(thread_data& td) { + my_cancellation_disseminator->unregister_thread(td); +} + +void threading_control_impl::propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, + d1::task_group_context& src, uint32_t new_state) +{ + my_cancellation_disseminator->propagate_task_group_state(mptr_state, src, new_state); +} + +std::size_t threading_control_impl::worker_stack_size() { + return my_thread_dispatcher->worker_stack_size(); +} + +unsigned threading_control_impl::max_num_workers() { + return my_thread_dispatcher->my_num_workers_hard_limit; +} + +void threading_control_impl::adjust_demand(threading_control_client tc_client, int mandatory_delta, int workers_delta) { + auto& c = *tc_client.get_pm_client(); + my_thread_request_serializer->register_mandatory_request(mandatory_delta); + my_permit_manager->adjust_demand(c, mandatory_delta, workers_delta); +} + +bool threading_control_impl::is_any_other_client_active() { + return my_thread_request_serializer->num_workers_requested() > 0 ? my_thread_dispatcher->is_any_client_in_need() : false; +} + +thread_control_monitor& threading_control_impl::get_waiting_threads_monitor() { + return *my_waiting_threads_monitor; +} + +// ---------------------------------------- threading_control ------------------------------------------------------------------- + +// Defined in global_control.cpp +void global_control_lock(); +void global_control_unlock(); + +void threading_control::add_ref(bool is_public) { + ++my_ref_count; + if (is_public) { + my_public_ref_count++; + } +} + +bool threading_control::remove_ref(bool is_public) { + if (is_public) { + __TBB_ASSERT(g_threading_control == this, "Global threading control instance was destroyed prematurely?"); + __TBB_ASSERT(my_public_ref_count.load(std::memory_order_relaxed), nullptr); + --my_public_ref_count; + } + + bool is_last_ref = --my_ref_count == 0; + if (is_last_ref) { + __TBB_ASSERT(!my_public_ref_count.load(std::memory_order_relaxed), nullptr); + g_threading_control = nullptr; + } + + return is_last_ref; +} + +threading_control* threading_control::get_threading_control(bool is_public) { + threading_control* control = g_threading_control; + if (control) { + control->add_ref(is_public); + } + + return control; +} + +threading_control* threading_control::create_threading_control() { + // Global control should be locked before threading_control_impl + global_control_lock(); + + threading_control* thr_control{ nullptr }; + try_call([&] { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + + thr_control = get_threading_control(/*public = */ true); + if (thr_control == nullptr) { + thr_control = new (cache_aligned_allocate(sizeof(threading_control))) threading_control(/*public_ref = */ 1, /*private_ref = */ 1); + thr_control->my_pimpl = make_cache_aligned_unique<threading_control_impl>(thr_control); + + __TBB_InitOnce::add_ref(); + + if (global_control_active_value_unsafe(global_control::scheduler_handle)) { + ++thr_control->my_public_ref_count; + ++thr_control->my_ref_count; + } + + g_threading_control = thr_control; + } + }).on_exception([&] { + global_control_unlock(); + + cache_aligned_deleter deleter{}; + deleter(thr_control); + }); + + global_control_unlock(); + return thr_control; +} + +void threading_control::destroy () { + cache_aligned_deleter deleter; + deleter(this); + __TBB_InitOnce::remove_ref(); +} + +void threading_control::wait_last_reference(global_mutex_type::scoped_lock& lock) { + while (my_public_ref_count.load(std::memory_order_relaxed) == 1 && my_ref_count.load(std::memory_order_relaxed) > 1) { + lock.release(); + // To guarantee that request_close_connection() is called by the last external thread, we need to wait till all + // references are released. Re-read my_public_ref_count to limit waiting if new external threads are created. + // Theoretically, new private references to the threading control can be added during waiting making it potentially + // endless. + // TODO: revise why the weak scheduler needs threading control's pointer and try to remove this wait. + // Note that the threading control should know about its schedulers for cancellation/exception/priority propagation, + // see e.g. task_group_context::cancel_group_execution() + while (my_public_ref_count.load(std::memory_order_acquire) == 1 && my_ref_count.load(std::memory_order_acquire) > 1) { + yield(); + } + lock.acquire(g_threading_control_mutex); + } +} + +bool threading_control::release(bool is_public, bool blocking_terminate) { + bool do_release = false; + { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + if (blocking_terminate) { + __TBB_ASSERT(is_public, "Only an object with a public reference can request the blocking terminate"); + wait_last_reference(lock); + } + do_release = remove_ref(is_public); + } + + if (do_release) { + __TBB_ASSERT(!my_public_ref_count.load(std::memory_order_relaxed), "No public references must remain if we remove the threading control."); + // inform RML that blocking termination is required + my_pimpl->release(blocking_terminate); + return blocking_terminate; + } + return false; +} + +threading_control::threading_control(unsigned public_ref, unsigned ref) : my_public_ref_count(public_ref), my_ref_count(ref) +{} + +threading_control* threading_control::register_public_reference() { + threading_control* control{nullptr}; + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + control = get_threading_control(/*public = */ true); + if (!control) { + // We are going to create threading_control_impl, we should acquire mutexes in right order + lock.release(); + control = create_threading_control(); + } + + return control; +} + +bool threading_control::unregister_public_reference(bool blocking_terminate) { + __TBB_ASSERT(g_threading_control, "Threading control should exist until last public reference"); + __TBB_ASSERT(g_threading_control->my_public_ref_count.load(std::memory_order_relaxed), nullptr); + return g_threading_control->release(/*public = */ true, /*blocking_terminate = */ blocking_terminate); +} + +threading_control_client threading_control::create_client(arena& a) { + { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + add_ref(/*public = */ false); + } + + return my_pimpl->create_client(a); +} + +void threading_control::publish_client(threading_control_client client, d1::constraints& constraints) { + return my_pimpl->publish_client(client, constraints); +} + +threading_control::client_snapshot threading_control::prepare_client_destruction(threading_control_client client) { + return my_pimpl->prepare_client_destruction(client); +} + +bool threading_control::try_destroy_client(threading_control::client_snapshot deleter) { + bool res = my_pimpl->try_destroy_client(deleter); + if (res) { + release(/*public = */ false, /*blocking_terminate = */ false); + } + return res; +} + +void threading_control::set_active_num_workers(unsigned soft_limit) { + threading_control* thr_control{nullptr}; + { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + thr_control = get_threading_control(/*public = */ false); + } + + if (thr_control != nullptr) { + thr_control->my_pimpl->set_active_num_workers(soft_limit); + thr_control->release(/*is_public=*/false, /*blocking_terminate=*/false); + } +} + +bool threading_control::is_present() { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + return g_threading_control != nullptr; +} + +bool threading_control::register_lifetime_control() { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + return get_threading_control(/*public = */ true) != nullptr; +} + +bool threading_control::unregister_lifetime_control(bool blocking_terminate) { + threading_control* thr_control{nullptr}; + { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + thr_control = g_threading_control; + } + + bool released{true}; + if (thr_control) { + released = thr_control->release(/*public = */ true, /*blocking_terminate = */ blocking_terminate); + } + + return released; +} + +void threading_control::register_thread(thread_data& td) { + my_pimpl->register_thread(td); +} + +void threading_control::unregister_thread(thread_data& td) { + my_pimpl->unregister_thread(td); +} + +void threading_control::propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, + d1::task_group_context& src, uint32_t new_state) +{ + my_pimpl->propagate_task_group_state(mptr_state, src, new_state); +} + +std::size_t threading_control::worker_stack_size() { + return my_pimpl->worker_stack_size(); +} + +unsigned threading_control::max_num_workers() { + global_mutex_type::scoped_lock lock(g_threading_control_mutex); + return g_threading_control ? g_threading_control->my_pimpl->max_num_workers() : 0; +} + +void threading_control::adjust_demand(threading_control_client client, int mandatory_delta, int workers_delta) { + my_pimpl->adjust_demand(client, mandatory_delta, workers_delta); +} + +bool threading_control::is_any_other_client_active() { + return my_pimpl->is_any_other_client_active(); +} + +thread_control_monitor& threading_control::get_waiting_threads_monitor() { + return my_pimpl->get_waiting_threads_monitor(); +} + +} // r1 +} // detail +} // tbb diff --git a/contrib/libs/tbb/src/tbb/threading_control.h b/contrib/libs/tbb/src/tbb/threading_control.h new file mode 100644 index 0000000000..7381b2978d --- /dev/null +++ b/contrib/libs/tbb/src/tbb/threading_control.h @@ -0,0 +1,154 @@ +/* + Copyright (c) 2022-2024 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_threading_control_H +#define _TBB_threading_control_H + +#include "oneapi/tbb/mutex.h" +#include "oneapi/tbb/global_control.h" + +#include "threading_control_client.h" +#include "intrusive_list.h" +#include "main.h" +#include "permit_manager.h" +#include "pm_client.h" +#include "thread_dispatcher.h" +#include "cancellation_disseminator.h" +#include "thread_request_serializer.h" +#include "scheduler_common.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class arena; +class thread_data; + +class threading_control; + +class threading_control_impl { +public: + threading_control_impl(threading_control*); + +public: + void release(bool blocking_terminate); + + threading_control_client create_client(arena& a); + void publish_client(threading_control_client client, d1::constraints& constraints); + + struct client_snapshot { + std::uint64_t aba_epoch; + unsigned priority_level; + thread_dispatcher_client* my_td_client; + pm_client* my_pm_client; + }; + + client_snapshot prepare_client_destruction(threading_control_client client); + bool try_destroy_client(client_snapshot deleter); + + void register_thread(thread_data& td); + void unregister_thread(thread_data& td); + void propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, + d1::task_group_context& src, uint32_t new_state); + + void set_active_num_workers(unsigned soft_limit); + std::size_t worker_stack_size(); + unsigned max_num_workers(); + + void adjust_demand(threading_control_client, int mandatory_delta, int workers_delta); + bool is_any_other_client_active(); + + thread_control_monitor& get_waiting_threads_monitor(); + +private: + static unsigned calc_workers_soft_limit(unsigned workers_hard_limit); + static std::pair<unsigned, unsigned> calculate_workers_limits(); + static cache_aligned_unique_ptr<permit_manager> make_permit_manager(unsigned workers_soft_limit); + static cache_aligned_unique_ptr<thread_dispatcher> make_thread_dispatcher(threading_control& control, + unsigned workers_soft_limit, + unsigned workers_hard_limit); + + // TODO: Consider allocation one chunk of memory and construct objects on it + cache_aligned_unique_ptr<permit_manager> my_permit_manager{nullptr}; + cache_aligned_unique_ptr<thread_dispatcher> my_thread_dispatcher{nullptr}; + cache_aligned_unique_ptr<thread_request_serializer_proxy> my_thread_request_serializer{nullptr}; + cache_aligned_unique_ptr<cancellation_disseminator> my_cancellation_disseminator{nullptr}; + cache_aligned_unique_ptr<thread_control_monitor> my_waiting_threads_monitor{nullptr}; +}; + + +class threading_control { + using global_mutex_type = d1::mutex; +public: + using client_snapshot = threading_control_impl::client_snapshot; + + static threading_control* register_public_reference(); + static bool unregister_public_reference(bool blocking_terminate); + + static bool is_present(); + static void set_active_num_workers(unsigned soft_limit); + static bool register_lifetime_control(); + static bool unregister_lifetime_control(bool blocking_terminate); + + threading_control_client create_client(arena& a); + void publish_client(threading_control_client client, d1::constraints& constraints); + client_snapshot prepare_client_destruction(threading_control_client client); + bool try_destroy_client(client_snapshot deleter); + + void register_thread(thread_data& td); + void unregister_thread(thread_data& td); + void propagate_task_group_state(std::atomic<uint32_t> d1::task_group_context::*mptr_state, + d1::task_group_context& src, uint32_t new_state); + + std::size_t worker_stack_size(); + static unsigned max_num_workers(); + + void adjust_demand(threading_control_client client, int mandatory_delta, int workers_delta); + bool is_any_other_client_active(); + + thread_control_monitor& get_waiting_threads_monitor(); + +private: + threading_control(unsigned public_ref, unsigned ref); + void add_ref(bool is_public); + bool remove_ref(bool is_public); + + static threading_control* get_threading_control(bool is_public); + static threading_control* create_threading_control(); + + bool release(bool is_public, bool blocking_terminate); + void wait_last_reference(global_mutex_type::scoped_lock& lock); + void destroy(); + + friend class thread_dispatcher; + + static threading_control* g_threading_control; + //! Mutex guarding creation/destruction of g_threading_control, insertions/deletions in my_arenas, and cancellation propagation + static global_mutex_type g_threading_control_mutex; + + cache_aligned_unique_ptr<threading_control_impl> my_pimpl{nullptr}; + //! Count of external threads attached + std::atomic<unsigned> my_public_ref_count{0}; + //! Reference count controlling threading_control object lifetime + std::atomic<unsigned> my_ref_count{0}; +}; + +} // r1 +} // detail +} // tbb + + +#endif // _TBB_threading_control_H diff --git a/contrib/libs/tbb/src/tbb/threading_control_client.h b/contrib/libs/tbb/src/tbb/threading_control_client.h new file mode 100644 index 0000000000..4ff9359cf7 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/threading_control_client.h @@ -0,0 +1,58 @@ +/* + Copyright (c) 2022-2023 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_threading_control_client_H +#define _TBB_threading_control_client_H + +#include "oneapi/tbb/detail/_assert.h" + +namespace tbb { +namespace detail { +namespace r1 { + +class pm_client; +class thread_dispatcher_client; + +class threading_control_client { +public: + threading_control_client() = default; + threading_control_client(const threading_control_client&) = default; + threading_control_client& operator=(const threading_control_client&) = default; + + threading_control_client(pm_client* p, thread_dispatcher_client* t) : my_pm_client(p), my_thread_dispatcher_client(t) { + __TBB_ASSERT(my_pm_client, nullptr); + __TBB_ASSERT(my_thread_dispatcher_client, nullptr); + } + + pm_client* get_pm_client() { + return my_pm_client; + } + + thread_dispatcher_client* get_thread_dispatcher_client() { + return my_thread_dispatcher_client; + } + +private: + pm_client* my_pm_client{nullptr}; + thread_dispatcher_client* my_thread_dispatcher_client{nullptr}; +}; + + +} +} +} + +#endif // _TBB_threading_control_client_H diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h index e701980f6a..eb1571dc8b 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -101,6 +101,11 @@ The same ID may not be reused for different instances, unless a previous # define ITT_OS_FREEBSD 4 #endif /* ITT_OS_FREEBSD */ +#ifndef ITT_OS_OPENBSD +# define ITT_OS_OPENBSD 5 +#endif /* ITT_OS_OPENBSD */ + + #ifndef ITT_OS # if defined WIN32 || defined _WIN32 # define ITT_OS ITT_OS_WIN @@ -108,6 +113,8 @@ The same ID may not be reused for different instances, unless a previous # define ITT_OS ITT_OS_MAC # elif defined( __FreeBSD__ ) # define ITT_OS ITT_OS_FREEBSD +# elif defined( __OpenBSD__ ) +# define ITT_OS ITT_OS_OPENBSD # else # define ITT_OS ITT_OS_LINUX # endif @@ -129,6 +136,10 @@ The same ID may not be reused for different instances, unless a previous # define ITT_PLATFORM_FREEBSD 4 #endif /* ITT_PLATFORM_FREEBSD */ +#ifndef ITT_PLATFORM_OPENBSD +# define ITT_PLATFORM_OPENBSD 5 +#endif /* ITT_PLATFORM_OPENBSD */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN @@ -136,6 +147,8 @@ The same ID may not be reused for different instances, unless a previous # define ITT_PLATFORM ITT_PLATFORM_MAC # elif ITT_OS==ITT_OS_FREEBSD # define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# elif ITT_OS==ITT_OS_OPENBSD +# define ITT_PLATFORM ITT_PLATFORM_OPENBSD # else # define ITT_PLATFORM ITT_PLATFORM_POSIX # endif @@ -305,7 +318,7 @@ extern "C" { * only pauses tracing and analyzing memory access. * It does not pause tracing or analyzing threading APIs. * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * Intel(R) VTune(TM) Profiler: * - Does continue to record when new threads are started. * . * - Other effects: @@ -320,30 +333,57 @@ void ITTAPI __itt_resume(void); /** @brief Detach collection */ void ITTAPI __itt_detach(void); +/** + * @enum __itt_collection_scope + * @brief Enumerator for collection scopes + */ +typedef enum { + __itt_collection_scope_host = 1 << 0, + __itt_collection_scope_offload = 1 << 1, + __itt_collection_scope_all = 0x7FFFFFFF +} __itt_collection_scope; + +/** @brief Pause scoped collection */ +void ITTAPI __itt_pause_scoped(__itt_collection_scope); +/** @brief Resume scoped collection */ +void ITTAPI __itt_resume_scoped(__itt_collection_scope); + /** @cond exclude_from_documentation */ #ifndef INTEL_NO_MACRO_BODY #ifndef INTEL_NO_ITTNOTIFY_API -ITT_STUBV(ITTAPI, void, pause, (void)) -ITT_STUBV(ITTAPI, void, resume, (void)) -ITT_STUBV(ITTAPI, void, detach, (void)) -#define __itt_pause ITTNOTIFY_VOID(pause) -#define __itt_pause_ptr ITTNOTIFY_NAME(pause) -#define __itt_resume ITTNOTIFY_VOID(resume) -#define __itt_resume_ptr ITTNOTIFY_NAME(resume) -#define __itt_detach ITTNOTIFY_VOID(detach) -#define __itt_detach_ptr ITTNOTIFY_NAME(detach) +ITT_STUBV(ITTAPI, void, pause, (void)) +ITT_STUBV(ITTAPI, void, pause_scoped, (__itt_collection_scope)) +ITT_STUBV(ITTAPI, void, resume, (void)) +ITT_STUBV(ITTAPI, void, resume_scoped, (__itt_collection_scope)) +ITT_STUBV(ITTAPI, void, detach, (void)) +#define __itt_pause ITTNOTIFY_VOID(pause) +#define __itt_pause_ptr ITTNOTIFY_NAME(pause) +#define __itt_pause_scoped ITTNOTIFY_VOID(pause_scoped) +#define __itt_pause_scoped_ptr ITTNOTIFY_NAME(pause_scoped) +#define __itt_resume ITTNOTIFY_VOID(resume) +#define __itt_resume_ptr ITTNOTIFY_NAME(resume) +#define __itt_resume_scoped ITTNOTIFY_VOID(resume_scoped) +#define __itt_resume_scoped_ptr ITTNOTIFY_NAME(resume_scoped) +#define __itt_detach ITTNOTIFY_VOID(detach) +#define __itt_detach_ptr ITTNOTIFY_NAME(detach) #else /* INTEL_NO_ITTNOTIFY_API */ #define __itt_pause() -#define __itt_pause_ptr 0 +#define __itt_pause_ptr 0 +#define __itt_pause_scoped(scope) +#define __itt_pause_scoped_ptr 0 #define __itt_resume() -#define __itt_resume_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_resume_scoped(scope) +#define __itt_resume_scoped_ptr 0 #define __itt_detach() -#define __itt_detach_ptr 0 +#define __itt_detach_ptr 0 #endif /* INTEL_NO_ITTNOTIFY_API */ #else /* INTEL_NO_MACRO_BODY */ -#define __itt_pause_ptr 0 -#define __itt_resume_ptr 0 -#define __itt_detach_ptr 0 +#define __itt_pause_ptr 0 +#define __itt_pause_scoped_ptr 0 +#define __itt_resume_ptr 0 +#define __itt_resume_scoped_ptr 0 +#define __itt_detach_ptr 0 #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ /** @} control group */ @@ -353,7 +393,7 @@ ITT_STUBV(ITTAPI, void, detach, (void)) * @defgroup Intel Processor Trace control * API from this group provides control over collection and analysis of Intel Processor Trace (Intel PT) data * Information about Intel Processor Trace technology can be found here (Volume 3 chapter 35): - * https://software.intel.com/sites/default/files/managed/39/c5/325462-sdm-vol-1-2abcd-3abcd.pdf + * https://github.com/tpn/pdfs/blob/master/Intel%2064%20and%20IA-32%20Architectures%20Software%20Developer's%20Manual%20-%20Combined%20Volumes%201-4%20-%20May%202018%20(325462-sdm-vol-1-2abcd-3abcd).pdf * Use this API to mark particular code regions for loading detailed performance statistics. * This mode makes your analysis faster and more accurate. * @{ @@ -587,8 +627,8 @@ ITT_STUBV(ITTAPI, void, suppress_pop, (void)) /** @endcond */ /** - * @enum __itt_model_disable - * @brief Enumerator for the disable methods + * @enum __itt_suppress_mode + * @brief Enumerator for the suppressing modes */ typedef enum __itt_suppress_mode { __itt_unsuppress_range, @@ -597,12 +637,12 @@ typedef enum __itt_suppress_mode { /** * @enum __itt_collection_state - * @brief Enumerator for collection state. All non-work states have negative values. + * @brief Enumerator for collection state. */ typedef enum { __itt_collection_uninitialized = 0, /* uninitialized */ __itt_collection_init_fail = 1, /* failed to init */ - __itt_collection_collector_absent = 2, /* non work state collector exists */ + __itt_collection_collector_absent = 2, /* non work state collector is absent */ __itt_collection_collector_exists = 3, /* work state collector exists */ __itt_collection_init_successful = 4 /* success to init */ } __itt_collection_state; @@ -2345,7 +2385,7 @@ ITT_STUBV(ITTAPI, void, task_end_overlapped, (const __itt_domain *domain, __it /** * @defgroup markers Markers - * Markers represent a single discreet event in time. Markers have a scope, + * Markers represent a single discrete event in time. Markers have a scope, * described by an enumerated type __itt_scope. Markers are created by * the API call __itt_marker. A marker instance can be given an ID for use in * adding metadata. @@ -4005,6 +4045,173 @@ __itt_collection_state __itt_get_collection_state(void); void __itt_release_resources(void); /** @endcond */ +/** + * @brief Create a typed counter with given domain pointer, string name and counter type +*/ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +__itt_counter ITTAPI __itt_counter_createA_v3(const __itt_domain* domain, const char* name, __itt_metadata_type type); +__itt_counter ITTAPI __itt_counter_createW_v3(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type type); +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_counter_create_v3 __itt_counter_createW_v3 +# define __itt_counter_create_v3_ptr __itt_counter_createW_v3_ptr +#else /* UNICODE */ +# define __itt_counter_create_v3 __itt_counter_createA_v3 +# define __itt_counter_create_v3_ptr __itt_counter_createA_v3_ptr +#endif /* UNICODE */ +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +__itt_counter ITTAPI __itt_counter_create_v3(const __itt_domain* domain, const char* name, __itt_metadata_type type); +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +#if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA_v3, (const __itt_domain* domain, const char* name, __itt_metadata_type type)) +ITT_STUB(ITTAPI, __itt_counter, counter_createW_v3, (const __itt_domain* domain, const wchar_t* name, __itt_metadata_type type)) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_v3, (const __itt_domain* domain, const char* name, __itt_metadata_type type)) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_v3 ITTNOTIFY_DATA(counter_createA_v3) +#define __itt_counter_createA_v3_ptr ITTNOTIFY_NAME(counter_createA_v3) +#define __itt_counter_createW_v3 ITTNOTIFY_DATA(counter_createW_v3) +#define __itt_counter_createW_v3_ptr ITTNOTIFY_NAME(counter_createW_v3) +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_v3 ITTNOTIFY_DATA(counter_create_v3) +#define __itt_counter_create_v3_ptr ITTNOTIFY_NAME(counter_create_v3) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#else /* INTEL_NO_ITTNOTIFY_API */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_v3(domain, name, type) (__itt_counter)0 +#define __itt_counter_createA_v3_ptr 0 +#define __itt_counter_createW_v3(domain, name, type) (__itt_counter)0 +#define __itt_counter_create_typedW_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_v3(domain, name, type) (__itt_counter)0 +#define __itt_counter_create_v3_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#if ITT_PLATFORM==ITT_PLATFORM_WIN +#define __itt_counter_createA_v3_ptr 0 +#define __itt_counter_createW_v3_ptr 0 +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#define __itt_counter_create_v3_ptr 0 +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Set the counter value api + */ +void ITTAPI __itt_counter_set_value_v3(__itt_counter counter, void *value_ptr); + +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, counter_set_value_v3, (__itt_counter counter, void *value_ptr)) +#define __itt_counter_set_value_v3 ITTNOTIFY_VOID(counter_set_value_v3) +#define __itt_counter_set_value_v3_ptr ITTNOTIFY_NAME(counter_set_value_v3) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_counter_set_value_v3(counter, value_ptr) +#define __itt_counter_set_value_v3_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_counter_set_value_v3_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief describes the type of context metadata +*/ +typedef enum { + __itt_context_unknown = 0, /*!< Undefined type */ + __itt_context_nameA, /*!< ASCII string char* type */ + __itt_context_nameW, /*!< Unicode string wchar_t* type */ + __itt_context_deviceA, /*!< ASCII string char* type */ + __itt_context_deviceW, /*!< Unicode string wchar_t* type */ + __itt_context_unitsA, /*!< ASCII string char* type */ + __itt_context_unitsW, /*!< Unicode string wchar_t* type */ + __itt_context_pci_addrA, /*!< ASCII string char* type */ + __itt_context_pci_addrW, /*!< Unicode string wchar_t* type */ + __itt_context_tid, /*!< Unsigned 64-bit integer type */ + __itt_context_max_val, /*!< Unsigned 64-bit integer type */ + __itt_context_bandwidth_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_latency_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_occupancy_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_on_thread_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_is_abs_val_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_cpu_instructions_flag, /*!< Unsigned 64-bit integer type */ + __itt_context_cpu_cycles_flag /*!< Unsigned 64-bit integer type */ +} __itt_context_type; + +#if defined(UNICODE) || defined(_UNICODE) +# define __itt_context_name __itt_context_nameW +# define __itt_context_device __itt_context_deviceW +# define __itt_context_units __itt_context_unitsW +# define __itt_context_pci_addr __itt_context_pci_addrW +#else /* UNICODE || _UNICODE */ +# define __itt_context_name __itt_context_nameA +# define __itt_context_device __itt_context_deviceA +# define __itt_context_units __itt_context_unitsA +# define __itt_context_pci_addr __itt_context_pci_addrA +#endif /* UNICODE || _UNICODE */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_context_metadata +{ + __itt_context_type type; /*!< Type of the context metadata value */ + void* value; /*!< Pointer to context metadata value itself */ +} __itt_context_metadata; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_counter_metadata +{ + __itt_counter counter; /*!< Associated context metadata counter */ + __itt_context_type type; /*!< Type of the context metadata value */ + const char* str_valueA; /*!< String context metadata value */ +#if defined(UNICODE) || defined(_UNICODE) + const wchar_t* str_valueW; +#else /* UNICODE || _UNICODE */ + void* str_valueW; +#endif /* UNICODE || _UNICODE */ + unsigned long long value; /*!< Numeric context metadata value */ + int extra1; /*!< Reserved to the runtime */ + void* extra2; /*!< Reserved to the runtime */ + struct ___itt_counter_metadata* next; +} __itt_counter_metadata; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Bind context metadata to counter instance + * @param[in] counter Pointer to the counter instance to which the context metadata is to be associated. + * @param[in] length The number of elements in context metadata array. + * @param[in] metadata The context metadata itself. +*/ +void ITTAPI __itt_bind_context_metadata_to_counter(__itt_counter counter, size_t length, __itt_context_metadata* metadata); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, bind_context_metadata_to_counter, (__itt_counter counter, size_t length, __itt_context_metadata* metadata)) +#define __itt_bind_context_metadata_to_counter ITTNOTIFY_VOID(bind_context_metadata_to_counter) +#define __itt_bind_context_metadata_to_counter_ptr ITTNOTIFY_NAME(bind_context_metadata_to_counter) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_bind_context_metadata_to_counter(counter, length, metadata) +#define __itt_bind_context_metadata_to_counter_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_bind_context_metadata_to_counter_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + #ifdef __cplusplus } #endif /* __cplusplus */ @@ -4423,7 +4630,7 @@ typedef enum __itt_error_code { __itt_error_success = 0, /*!< no error */ __itt_error_no_module = 1, /*!< module can't be loaded */ - /* %1$s -- library name; win: %2$d -- system error code; unx: %2$s -- system error message. */ + /* %1$s -- library name; win: %2$d -- system error code; unix: %2$s -- system error message. */ __itt_error_no_symbol = 2, /*!< symbol not found */ /* %1$s -- library name, %2$s -- symbol name. */ __itt_error_unknown_group = 3, /*!< unknown group specified */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h index 0f5d80f659..001d42e0e5 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2023 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,6 +34,10 @@ # define ITT_OS_FREEBSD 4 #endif /* ITT_OS_FREEBSD */ +#ifndef ITT_OS_OPENBSD +# define ITT_OS_OPENBSD 5 +#endif /* ITT_OS_OPENBSD */ + #ifndef ITT_OS # if defined WIN32 || defined _WIN32 # define ITT_OS ITT_OS_WIN @@ -41,6 +45,8 @@ # define ITT_OS ITT_OS_MAC # elif defined( __FreeBSD__ ) # define ITT_OS ITT_OS_FREEBSD +# elif defined( __OpenBSD__ ) +# define ITT_OS ITT_OS_OPENBSD # else # define ITT_OS ITT_OS_LINUX # endif @@ -62,6 +68,10 @@ # define ITT_PLATFORM_FREEBSD 4 #endif /* ITT_PLATFORM_FREEBSD */ +#ifndef ITT_PLATFORM_OPENBSD +# define ITT_PLATFORM_OPENBSD 5 +#endif /* ITT_PLATFORM_OPENBSD */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN @@ -69,6 +79,8 @@ # define ITT_PLATFORM ITT_PLATFORM_MAC # elif ITT_OS==ITT_OS_FREEBSD # define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# elif ITT_OS==ITT_OS_OPENBSD +# define ITT_PLATFORM ITT_PLATFORM_OPENBSD # else # define ITT_PLATFORM ITT_PLATFORM_POSIX # endif @@ -232,10 +244,10 @@ #define ITT_MAGIC { 0xED, 0xAB, 0xAB, 0xEC, 0x0D, 0xEE, 0xDA, 0x30 } /* Replace with snapshot date YYYYMMDD for promotion build. */ -#define API_VERSION_BUILD 20180723 +#define API_VERSION_BUILD 20230630 #ifndef API_VERSION_NUM -#define API_VERSION_NUM 3.23.0 +#define API_VERSION_NUM 3.24.4 #endif /* API_VERSION_NUM */ #define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ @@ -494,6 +506,7 @@ typedef struct __itt_counter_info struct ___itt_domain; struct ___itt_string_handle; struct ___itt_histogram; +struct ___itt_counter_metadata; #include "ittnotify.h" @@ -520,6 +533,7 @@ typedef struct ___itt_global __itt_counter_info_t* counter_list; unsigned int ipt_collect_events; struct ___itt_histogram* histogram_list; + struct ___itt_counter_metadata* counter_metadata_list; } __itt_global; #pragma pack(pop) @@ -632,7 +646,7 @@ typedef struct ___itt_global h->nameA = NULL; \ h->nameW = name ? _wcsdup(name) : NULL; \ h->domainA = NULL; \ - h->domainW = name ? _wcsdup(domain) : NULL; \ + h->domainW = domain ? _wcsdup(domain) : NULL; \ h->type = type; \ h->index = 0; \ h->next = NULL; \ @@ -674,6 +688,7 @@ typedef struct ___itt_global h->y_type = y_type; \ h->extra1 = 0; \ h->extra2 = NULL; \ + h->next = NULL; \ if (h_tail == NULL) \ (gptr)->histogram_list = h; \ else \ @@ -693,6 +708,7 @@ typedef struct ___itt_global h->y_type = y_type; \ h->extra1 = 0; \ h->extra2 = NULL; \ + h->next = NULL; \ if (h_tail == NULL) \ (gptr)->histogram_list = h; \ else \ @@ -700,4 +716,60 @@ typedef struct ___itt_global } \ } +#define NEW_COUNTER_METADATA_NUM(gptr,h,h_tail,counter,type,value) { \ + h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \ + if (h != NULL) { \ + h->counter = counter; \ + h->type = type; \ + h->str_valueA = NULL; \ + h->str_valueW = NULL; \ + h->value = value; \ + h->extra1 = 0; \ + h->extra2 = NULL; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_metadata_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_COUNTER_METADATA_STR_A(gptr,h,h_tail,counter,type,str_valueA) { \ + h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \ + if (h != NULL) { \ + h->counter = counter; \ + h->type = type; \ + char *str_value_copy = NULL; \ + __itt_fstrdup(str_valueA, str_value_copy); \ + h->str_valueA = str_value_copy; \ + h->str_valueW = NULL; \ + h->value = 0; \ + h->extra1 = 0; \ + h->extra2 = NULL; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_metadata_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + +#define NEW_COUNTER_METADATA_STR_W(gptr,h,h_tail,counter,type,str_valueW) { \ + h = (__itt_counter_metadata*)malloc(sizeof(__itt_counter_metadata)); \ + if (h != NULL) { \ + h->counter = counter; \ + h->type = type; \ + h->str_valueA = NULL; \ + h->str_valueW = str_valueW ? _wcsdup(str_valueW) : NULL; \ + h->value = 0; \ + h->extra1 = 0; \ + h->extra2 = NULL; \ + h->next = NULL; \ + if (h_tail == NULL) \ + (gptr)->counter_metadata_list = h; \ + else \ + h_tail->next = h; \ + } \ +} + #endif /* _ITTNOTIFY_CONFIG_H_ */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c index 0b9aa492ac..c3a53bf0a6 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -81,7 +81,7 @@ static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; #if ITT_OS==ITT_OS_WIN static const char* ittnotify_lib_name = "libittnotify.dll"; -#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD +#elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD|| ITT_OS==ITT_OS_OPENBSD static const char* ittnotify_lib_name = "libittnotify.so"; #elif ITT_OS==ITT_OS_MAC static const char* ittnotify_lib_name = "libittnotify.dylib"; @@ -305,7 +305,8 @@ __itt_global _N_(_ittapi_global) = { __itt_collection_uninitialized, /* collection state */ NULL, /* counter_list */ 0, /* ipt_collect_events */ - NULL /* histogram_list */ + NULL, /* histogram_list */ + NULL /* counter_metadata_list */ }; typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); @@ -850,6 +851,169 @@ static __itt_histogram* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(histogram_create),_in return (__itt_histogram*)h; } +#if ITT_PLATFORM==ITT_PLATFORM_WIN +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW_v3),_init))(const __itt_domain* domain, const wchar_t* name, __itt_metadata_type type) +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL || domain == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(counter_createW_v3) && ITTNOTIFY_NAME(counter_createW_v3) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW_v3),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createW_v3)(domain, name, type); + } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return NULL; + } + } + if (__itt_is_collector_available()) + { + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain->nameW == NULL) || + (h->domainW != NULL && domain->nameW != NULL && !wcscmp(h->domainW, domain->nameW)))) break; + + } + if (h == NULL) + { + NEW_COUNTER_W(&_N_(_ittapi_global),h,h_tail,name,domain->nameW,type); + } + } + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA_v3),_init))(const __itt_domain* domain, const char* name, __itt_metadata_type type) +#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ +static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_v3),_init))(const __itt_domain* domain, const char* name, __itt_metadata_type type) +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +{ + __itt_counter_info_t *h_tail = NULL, *h = NULL; + + if (name == NULL || domain == NULL) + { + return NULL; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + if (ITTNOTIFY_NAME(counter_createA_v3) && ITTNOTIFY_NAME(counter_createA_v3) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA_v3),_init))) + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_createA_v3)(domain, name, type); + } +#else + if (ITTNOTIFY_NAME(counter_create_v3) && ITTNOTIFY_NAME(counter_create_v3) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_v3),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return ITTNOTIFY_NAME(counter_create_v3)(domain, name, type); + } +#endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return NULL; + } + } + if (__itt_is_collector_available()) + { + for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) + { + if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain->nameA == NULL) || + (h->domainA != NULL && domain->nameA != NULL && !__itt_fstrcmp(h->domainA, domain->nameA)))) break; + } + if (h == NULL) + { + NEW_COUNTER_A(&_N_(_ittapi_global),h,h_tail,name,domain->nameA,type); + } + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return (__itt_counter)h; +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(bind_context_metadata_to_counter),_init))(__itt_counter counter, size_t length, __itt_context_metadata* metadata) +{ + __itt_counter_metadata *h_tail = NULL, *h = NULL; + + if (counter == NULL || length == 0 || metadata == NULL) + { + return; + } + + ITT_MUTEX_INIT_AND_LOCK(_N_(_ittapi_global)); + if (_N_(_ittapi_global).api_initialized) + { + if (ITTNOTIFY_NAME(bind_context_metadata_to_counter) && ITTNOTIFY_NAME(bind_context_metadata_to_counter) != ITT_VERSIONIZE(ITT_JOIN(_N_(bind_context_metadata_to_counter),_init))) + { + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + ITTNOTIFY_NAME(bind_context_metadata_to_counter)(counter, length, metadata); + } + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return; + } + } + if (__itt_is_collector_available()) + { + size_t item; + char* str_valueA = NULL; +#if ITT_PLATFORM==ITT_PLATFORM_WIN + wchar_t* str_valueW = NULL; +#endif + unsigned long long value = 0; + __itt_context_type type = __itt_context_unknown; + + for (item = 0; item < length; item++) + { + type = metadata[item].type; + for (h_tail = NULL, h = _N_(_ittapi_global).counter_metadata_list; h != NULL; h_tail = h, h = h->next) + { + if (h->counter != NULL && h->counter == counter && h->type == type) break; + } + if (h == NULL && counter != NULL && type != __itt_context_unknown) + { + if (type == __itt_context_nameA || type == __itt_context_deviceA || type == __itt_context_unitsA || type == __itt_context_pci_addrA) + { + str_valueA = (char*)(metadata[item].value); + NEW_COUNTER_METADATA_STR_A(&_N_(_ittapi_global),h,h_tail,counter,type,str_valueA); + } +#if ITT_PLATFORM==ITT_PLATFORM_WIN + else if (type == __itt_context_nameW || type == __itt_context_deviceW || type == __itt_context_unitsW || type == __itt_context_pci_addrW) + { + str_valueW = (wchar_t*)(metadata[item].value); + NEW_COUNTER_METADATA_STR_W(&_N_(_ittapi_global),h,h_tail,counter,type,str_valueW); + } +#endif + else if (type >= __itt_context_tid && type <= __itt_context_cpu_cycles_flag) + { + value = *(unsigned long long*)(metadata[item].value); + NEW_COUNTER_METADATA_NUM(&_N_(_ittapi_global),h,h_tail,counter,type,value); + } + } + } + } + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +} /* -------------------------------------------------------------------------- */ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause),_init))(void) @@ -876,6 +1040,30 @@ static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume),_init))(void) } } +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(pause_scoped),_init))(__itt_collection_scope scope) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(pause_scoped) && ITTNOTIFY_NAME(pause_scoped) != ITT_VERSIONIZE(ITT_JOIN(_N_(pause_scoped),_init))) + { + ITTNOTIFY_NAME(pause_scoped)(scope); + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(resume_scoped),_init))(__itt_collection_scope scope) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(resume_scoped) && ITTNOTIFY_NAME(resume_scoped) != ITT_VERSIONIZE(ITT_JOIN(_N_(resume_scoped),_init))) + { + ITTNOTIFY_NAME(resume_scoped)(scope); + } +} + #if ITT_PLATFORM==ITT_PLATFORM_WIN static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(thread_set_nameW),_init))(const wchar_t* name) { @@ -1393,6 +1581,20 @@ static void __itt_free_allocated_resources(void) current_histogram = tmp; } _N_(_ittapi_global).histogram_list = NULL; + + + __itt_counter_metadata* current_counter_metadata = _N_(_ittapi_global).counter_metadata_list; + while (current_counter_metadata != NULL) + { + __itt_counter_metadata* tmp = current_counter_metadata->next; + free((char*)current_counter_metadata->str_valueA); +#if ITT_PLATFORM==ITT_PLATFORM_WIN + free((wchar_t*)current_counter_metadata->str_valueW); +#endif + free(current_counter_metadata); + current_counter_metadata = tmp; + } + _N_(_ittapi_global).counter_metadata_list = NULL; } ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_groups) diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h index d59bfac1a2..7f57291401 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2023 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -66,6 +66,8 @@ ITT_STUB(ITTAPI, __itt_counter, counter_create_typed, (const char *name, con ITT_STUBV(ITTAPI, void, pause, (void), (ITT_NO_PARAMS), pause, __itt_group_control | __itt_group_legacy, "no args") ITT_STUBV(ITTAPI, void, resume, (void), (ITT_NO_PARAMS), resume, __itt_group_control | __itt_group_legacy, "no args") +ITT_STUBV(ITTAPI, void, pause_scoped, (__itt_collection_scope scope), (ITT_FORMAT scope), pause_scoped, __itt_group_control, "%d") +ITT_STUBV(ITTAPI, void, resume_scoped, (__itt_collection_scope scope), (ITT_FORMAT scope), resume_scoped, __itt_group_control, "%d") #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUBV(ITTAPI, void, thread_set_nameA, (const char *name), (ITT_FORMAT name), thread_set_nameA, __itt_group_thread, "\"%s\"") @@ -90,6 +92,15 @@ ITT_STUB(ITTAPI, __itt_histogram*, histogram_createW, (const __itt_domain* domai ITT_STUB(ITTAPI, __itt_histogram*, histogram_create, (const __itt_domain* domain, const char* name, __itt_metadata_type x_type, __itt_metadata_type y_type), (ITT_FORMAT domain, name, x_type, y_type), histogram_create, __itt_group_structure, "%p, \"%s\", %d, %d") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + #if ITT_PLATFORM==ITT_PLATFORM_WIN +ITT_STUB(ITTAPI, __itt_counter, counter_createA_v3, (const __itt_domain* domain, const char *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_createA_v3, __itt_group_counter, "%p, \"%s\", %d") +ITT_STUB(ITTAPI, __itt_counter, counter_createW_v3, (const __itt_domain* domain, const wchar_t *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_createW_v3, __itt_group_counter, "%p, \"%s\", %d") +#else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUB(ITTAPI, __itt_counter, counter_create_v3, (const __itt_domain* domain, const char *name, __itt_metadata_type type), (ITT_FORMAT domain, name, type), counter_create_v3, __itt_group_counter, "%p, \"%s\", %d") +#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ + +ITT_STUBV(ITTAPI, void, bind_context_metadata_to_counter, (__itt_counter counter, size_t length, __itt_context_metadata* metadata), (ITT_FORMAT counter, length, metadata), bind_context_metadata_to_counter, __itt_group_structure, "%p, %lu, %p") + #endif /* __ITT_INTERNAL_BODY */ ITT_STUBV(ITTAPI, void, enable_attach, (void), (ITT_NO_PARAMS), enable_attach, __itt_group_all, "no args") @@ -362,4 +373,6 @@ ITT_STUBV(ITTAPI, void, module_unload, (void *start_addr), (ITT_FORMAT start_add ITT_STUBV(ITTAPI, void, histogram_submit, (__itt_histogram* histogram, size_t length, void* x_data, void* y_data), (ITT_FORMAT histogram, length, x_data, y_data), histogram_submit, __itt_group_structure, "%p, %lu, %p, %p") +ITT_STUBV(ITTAPI, void, counter_set_value_v3, (__itt_counter counter, void *value_ptr), (ITT_FORMAT counter, value_ptr), counter_set_value_v3, __itt_group_counter, "%p, %p") + #endif /* __ITT_INTERNAL_INIT */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h index 1c40c28884..837bc48008 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2022 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -39,6 +39,10 @@ # define ITT_OS_FREEBSD 4 #endif /* ITT_OS_FREEBSD */ +#ifndef ITT_OS_OPENBSD +# define ITT_OS_OPENBSD 5 +#endif /* ITT_OS_OPENBSD */ + #ifndef ITT_OS # if defined WIN32 || defined _WIN32 # define ITT_OS ITT_OS_WIN @@ -46,6 +50,8 @@ # define ITT_OS ITT_OS_MAC # elif defined( __FreeBSD__ ) # define ITT_OS ITT_OS_FREEBSD +# elif defined( __OpenBSD__ ) +# define ITT_OS ITT_OS_OPENBSD # else # define ITT_OS ITT_OS_LINUX # endif @@ -67,6 +73,10 @@ # define ITT_PLATFORM_FREEBSD 4 #endif /* ITT_PLATFORM_FREEBSD */ +#ifndef ITT_PLATFORM_OPENBSD +# define ITT_PLATFORM_OPENBSD 5 +#endif /* ITT_PLATFORM_OPENBSD */ + #ifndef ITT_PLATFORM # if ITT_OS==ITT_OS_WIN # define ITT_PLATFORM ITT_PLATFORM_WIN @@ -74,6 +84,8 @@ # define ITT_PLATFORM ITT_PLATFORM_MAC # elif ITT_OS==ITT_OS_FREEBSD # define ITT_PLATFORM ITT_PLATFORM_FREEBSD +# elif ITT_OS==ITT_OS_OPENBSD +# define ITT_PLATFORM ITT_PLATFORM_OPENBSD # else # define ITT_PLATFORM ITT_PLATFORM_POSIX # endif @@ -233,7 +245,7 @@ extern "C" { * only pauses tracing and analyzing memory access. * It does not pause tracing or analyzing threading APIs. * . - * - Intel(R) Parallel Amplifier and Intel(R) VTune(TM) Amplifier XE: + * - Intel(R) VTune(TM) Profiler: * - Does continue to record when new threads are started. * . * - Other effects: diff --git a/contrib/libs/tbb/src/tbb/waiters.h b/contrib/libs/tbb/src/tbb/waiters.h index 7e0906bee9..8ed431f857 100644 --- a/contrib/libs/tbb/src/tbb/waiters.h +++ b/contrib/libs/tbb/src/tbb/waiters.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2024 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "oneapi/tbb/detail/_task.h" #include "scheduler_common.h" #include "arena.h" +#include "threading_control.h" namespace tbb { namespace detail { @@ -33,7 +34,7 @@ public: bool pause() { if (my_backoff.pause()) { - my_arena.is_out_of_work(); + my_arena.out_of_work(); return true; } @@ -57,6 +58,24 @@ public: __TBB_ASSERT(t == nullptr, nullptr); if (is_worker_should_leave(slot)) { + if (!governor::hybrid_cpu()) { + static constexpr std::chrono::microseconds worker_wait_leave_duration(1000); + static_assert(worker_wait_leave_duration > std::chrono::steady_clock::duration(1), "Clock resolution is not enough for measured interval."); + + for (auto t1 = std::chrono::steady_clock::now(), t2 = t1; + std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1) < worker_wait_leave_duration; + t2 = std::chrono::steady_clock::now()) + { + if (!my_arena.is_empty() && !my_arena.is_recall_requested()) { + return true; + } + + if (my_arena.my_threading_control->is_any_other_client_active()) { + break; + } + d0::yield(); + } + } // Leave dispatch loop return false; } @@ -82,7 +101,7 @@ private: using base_type = waiter_base; bool is_worker_should_leave(arena_slot& slot) const { - bool is_top_priority_arena = my_arena.my_is_top_priority.load(std::memory_order_relaxed); + bool is_top_priority_arena = my_arena.is_top_priority(); bool is_task_pool_empty = slot.task_pool.load(std::memory_order_relaxed) == EmptyTaskPool; if (is_top_priority_arena) { @@ -109,14 +128,11 @@ class sleep_waiter : public waiter_base { protected: using waiter_base::waiter_base; - bool is_arena_empty() { - return my_arena.my_pool_state.load(std::memory_order_relaxed) == arena::SNAPSHOT_EMPTY; - } - template <typename Pred> void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { - my_arena.my_market->get_wait_list().wait<market_concurrent_monitor::thread_context>(wakeup_condition, + my_arena.get_waiting_threads_monitor().wait<thread_control_monitor::thread_context>(wakeup_condition, market_context{uniq_tag, &my_arena}); + reset_wait(); } }; @@ -139,10 +155,9 @@ public: return; } - auto wakeup_condition = [&] { return !is_arena_empty() || !my_wait_ctx.continue_execution(); }; + auto wakeup_condition = [&] { return !my_arena.is_empty() || !my_wait_ctx.continue_execution(); }; sleep(std::uintptr_t(&my_wait_ctx), wakeup_condition); - my_backoff.reset_wait(); } d1::wait_context* wait_ctx() { @@ -176,14 +191,9 @@ public: suspend_point_type* sp = slot.default_task_dispatcher().m_suspend_point; - auto wakeup_condition = [&] { return !is_arena_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; + auto wakeup_condition = [&] { return !my_arena.is_empty() || sp->m_is_owner_recalled.load(std::memory_order_relaxed); }; sleep(std::uintptr_t(sp), wakeup_condition); - my_backoff.reset_wait(); - } - - void reset_wait() { - my_backoff.reset_wait(); } d1::wait_context* wait_ctx() { diff --git a/contrib/libs/tbb/ya.make b/contrib/libs/tbb/ya.make index f5093b99b7..586db7b6b9 100644 --- a/contrib/libs/tbb/ya.make +++ b/contrib/libs/tbb/ya.make @@ -2,9 +2,9 @@ LIBRARY() -VERSION(2021.10.0) +VERSION(2022.0.0) -ORIGINAL_SOURCE(https://github.com/uxlfoundation/oneTBB/archive/v2021.10.0.tar.gz) +ORIGINAL_SOURCE(https://github.com/uxlfoundation/oneTBB/archive/v2022.0.0.tar.gz) LICENSE(Apache-2.0) @@ -20,6 +20,7 @@ NO_UTIL() CFLAGS( -D__TBB_BUILD + -D__TBB_GNU_ASM_VERSION=2041 ) SRCS( @@ -49,6 +50,10 @@ SRCS( src/tbb/task.cpp src/tbb/task_dispatcher.cpp src/tbb/task_group_context.cpp + src/tbb/tcm_adaptor.cpp + src/tbb/thread_dispatcher.cpp + src/tbb/thread_request_serializer.cpp + src/tbb/threading_control.cpp src/tbb/version.cpp ) |