diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2022-12-03 18:06:02 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2022-12-03 18:06:02 +0300 |
commit | f549b3d7dc37323cac277957d13084624df1a59f (patch) | |
tree | 084450f16894937538bdc83175c8b920479441b2 /contrib | |
parent | 803a34938e5cab1f2fe4b0079b073a91e7326782 (diff) | |
download | ydb-f549b3d7dc37323cac277957d13084624df1a59f.tar.gz |
Update contrib/libs/tbb to 2021.7.0
Diffstat (limited to 'contrib')
105 files changed, 4430 insertions, 1963 deletions
diff --git a/contrib/libs/tbb/CMakeLists.darwin.txt b/contrib/libs/tbb/CMakeLists.darwin.txt index 3440164918..10a6fb56cb 100644 --- a/contrib/libs/tbb/CMakeLists.darwin.txt +++ b/contrib/libs/tbb/CMakeLists.darwin.txt @@ -24,6 +24,7 @@ target_link_libraries(contrib-libs-tbb PUBLIC contrib-libs-cxxsupp ) target_sources(contrib-libs-tbb PRIVATE + ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/address_waiter.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/allocator.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena_slot.cpp diff --git a/contrib/libs/tbb/CMakeLists.linux-aarch64.txt b/contrib/libs/tbb/CMakeLists.linux-aarch64.txt index a786d878d4..c071bba85d 100644 --- a/contrib/libs/tbb/CMakeLists.linux-aarch64.txt +++ b/contrib/libs/tbb/CMakeLists.linux-aarch64.txt @@ -20,6 +20,7 @@ target_link_libraries(contrib-libs-tbb PUBLIC contrib-libs-cxxsupp ) target_sources(contrib-libs-tbb PRIVATE + ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/address_waiter.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/allocator.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena_slot.cpp diff --git a/contrib/libs/tbb/CMakeLists.linux.txt b/contrib/libs/tbb/CMakeLists.linux.txt index 3440164918..10a6fb56cb 100644 --- a/contrib/libs/tbb/CMakeLists.linux.txt +++ b/contrib/libs/tbb/CMakeLists.linux.txt @@ -24,6 +24,7 @@ target_link_libraries(contrib-libs-tbb PUBLIC contrib-libs-cxxsupp ) target_sources(contrib-libs-tbb PRIVATE + ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/address_waiter.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/allocator.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena.cpp ${CMAKE_SOURCE_DIR}/contrib/libs/tbb/src/tbb/arena_slot.cpp diff --git a/contrib/libs/tbb/CONTRIBUTING.md b/contrib/libs/tbb/CONTRIBUTING.md new file mode 100644 index 0000000000..c8b437083a --- /dev/null +++ b/contrib/libs/tbb/CONTRIBUTING.md @@ -0,0 +1,58 @@ +<!-- +****************************************************************************** +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/--> + +# How to Contribute +As an open source project, we welcome community contributions to oneAPI Threading Building Blocks (oneTBB). This document explains how to participate in project conversations, log bugs and enhancement requests, and submit code patches to the project. + +## Licensing + +Licensing is very important to open source projects. It helps ensure the software continues to be available under the terms that the author desired. The oneTBB project uses the [Apache 2.0 License](https://github.com/oneapi-src/oneTBB/blob/master/LICENSE.txt), a permissive open source license that allows you to freely use, modify, and distribute your own products that include Apache 2.0 licensed software. By contributing to the oneTBB project, you agree to the license and copyright terms therein and release your own contributions under these terms. + +Some imported or reused components within oneTBB use other licenses, as described in [third-party-programs.txt](https://github.com/oneapi-src/oneTBB/blob/master/third-party-programs.txt). By carefully reviewing potential contributions and enforcing a [Developer Certification of Origin (DCO)](https://developercertificate.org/) for contributed code, we can ensure that the community can develop products with oneTBB without concerns over patent or copyright issues. + +The DCO is an attestation attached to every contribution made by every developer. In the commit message of the contribution, (described later), the developer simply adds a Signed-off-by statement and thereby agrees to the DCO. + +## Prerequisites + +As a contributor, you’ll want to be familiar with the oneTBB project and the repository layout. You should also know how to use it as explained in the [oneTBB documentation](https://oneapi-src.github.io/oneTBB/) and how to set up your build development environment to configure, build, and test oneTBB as explained in the [oneTBB Build System Description](cmake/README.md). + +## Issues +If you face a problem, first check out open [oneTBB GitHub issues](https://github.com/oneapi-src/oneTBB/issues) to see if the issue you’d like to address is already reported. You may find users that have encountered the bug you’re finding or have similar ideas for changes or additions. + +You can use issues to report a problem, make a feature request, or add comments on an existing issue. + +## Pull Requests + +You can find all [open oneTBB pull requests](https://github.com/oneapi-src/oneTBB/pulls) on GitHub. + +No anonymous contributions are accepted. The name in the commit message Signed-off-by line and your email must match the change authorship information. Make sure your .gitconfig is set up correctly so you can use `git commit -s` for signing your patches: + +`git config --global user.name "Taylor Developer"` + +`git config --global user.email taylor.developer@company.com` + +### Before contributing changes directly to the oneTBB repository + +* Make sure you can build the product and run all the tests with your patch. +* For a larger feature, provide a relevant test. +* Document your code. The oneTBB project uses reStructuredText for documentation. +* Update the copyright year in the first line of the changing file(s). + For example, if you commit your changes in 2022: + * the copyright year should be `2005-2022` for existing files + * the copyright year should be `2022` for new files +* Submit a pull request into the master branch. You can submit changes with a pull request (preferred) or by sending an email patch. + +Continuous Integration (CI) testing is enabled for the repository. Your pull request must pass all checks before it can be merged. We will review your contribution and may provide feedback to guide you if any additional fixes or modifications are necessary. When reviewed and accepted, your pull request will be merged into our GitHub repository. diff --git a/contrib/libs/tbb/INSTALL.md b/contrib/libs/tbb/INSTALL.md new file mode 100644 index 0000000000..6fff47a165 --- /dev/null +++ b/contrib/libs/tbb/INSTALL.md @@ -0,0 +1,122 @@ +# Installation from Sources + + +## Prerequisites + + - Make sure you have installed CMake version 3.1 (or newer) on your system. oneTBB uses CMake build configuration. + - Configure and build oneTBB. To work with build configurations, see [Build System Description](cmake/README.md). + + +## Configure oneTBB + +At the command prompt, type: +``` +cmake <options> <repo_root> +``` + +You may want to use some additional options for configuration: + +| Option | Purpose | Description | +| ------ |------ | ------ | +| `-G <generator>` | Specify project generator | For more information, run cmake `–help`. | +|`-DCMAKE_BUILD_TYPE=Debug` | Specify for Debug build | Not applicable for multi-configuration generators such as Visual Studio generator. | + + +## Build oneTBB + +To build the system, run: +``` +cmake --build . <options> +``` + +Some useful build options: +- `--target <target>` - specific target, "all" is default. +- `--config <Release|Debug>` - build configuration, applicable only for multi-config generators such as Visual Studio generator. + + +## Install and Pack oneTBB + +--- +**NOTE** + +Be careful about installing prefix. It defaults to `/usr/local` on UNIX* and `c:/Program Files/${PROJECT_NAME}` on Windows* OS. +You can define custom `CMAKE_INSTALL_PREFIX` during configuration: + +``` +cmake -DCMAKE_INSTALL_PREFIX=/my/install/prefix .. +``` + +--- + +Installation can also be done using: + +``` +cmake --install <project-binary-dir> +``` + +Special ``--install`` target can alternatively be used for installation, e.g. ``make install``. + +You can use the ``install`` components for partial installation. + +The following install components are supported: +- `runtime` - oneTBB runtime package (core shared libraries and `.dll` files on Windows* OS). +- `devel` - oneTBB development package (header files, CMake integration files, library symbolic links, and `.lib` files on Windows* OS). +- `tbb4py` - [oneTBB Module for Python](#onetbb-python-module-support). + +If you want to install specific components after configuration and build, run: + +```bash +cmake -DCOMPONENT=<component> [-DBUILD_TYPE=<build-type>] -P cmake_install.cmake +``` + +Simple packaging using CPack is supported. +The following commands allow you to create a simple portable package that includes header files, libraries, and integration files for CMake: + +```bash +cmake <options> .. +cpack +``` + +## Example of Installation + +### Single-configuration generators + +The following example demonstrates how to install oneTBB for single-configuration generators (e.g. GNU Make, Ninja, etc.). +```bash +# Do our experiments in /tmp +cd /tmp +# Clone oneTBB repository +git clone https://github.com/oneapi-src/oneTBB.git +cd oneTBB +# Create binary directory for out-of-source build +mkdir build && cd build +# Configure: customize CMAKE_INSTALL_PREFIX and disable TBB_TEST to avoid tests build +cmake -DCMAKE_INSTALL_PREFIX=/tmp/my_installed_onetbb -DTBB_TEST=OFF .. +# Build +cmake --build . +# Install +cmake --install . +# Well done! Your installed oneTBB is in /tmp/my_installed_onetbb +``` + +### Multi-configuration generators + +The following example demonstrates how to install oneTBB for multi-configuration generators such as Visual Studio*. + +Choose the configuration during the build and install steps: +```batch +REM Do our experiments in %TMP% +cd %TMP% +REM Clone oneTBB repository +git clone https://github.com/oneapi-src/oneTBB.git +cd oneTBB +REM Create binary directory for out-of-source build +mkdir build && cd build +REM Configure: customize CMAKE_INSTALL_PREFIX and disable TBB_TEST to avoid tests build +cmake -DCMAKE_INSTALL_PREFIX=%TMP%\my_installed_onetbb -DTBB_TEST=OFF .. +REM Build "release with debug information" configuration +cmake --build . --config relwithdebinfo +REM Install "release with debug information" configuration +cmake --install . --config relwithdebinfo +REM Well done! Your installed oneTBB is in %TMP%\my_installed_onetbb +``` diff --git a/contrib/libs/tbb/README.md b/contrib/libs/tbb/README.md index cfd8112141..1c506464e0 100644 --- a/contrib/libs/tbb/README.md +++ b/contrib/libs/tbb/README.md @@ -1,49 +1,53 @@ # oneAPI Threading Building Blocks -[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE.txt) +[![Apache License Version 2.0](https://img.shields.io/badge/license-Apache_2.0-green.svg)](LICENSE.txt) [![oneTBB CI](https://github.com/oneapi-src/oneTBB/actions/workflows/ci.yml/badge.svg)](https://github.com/oneapi-src/oneTBB/actions/workflows/ci.yml?query=branch%3Amaster) -oneAPI Threading Building Blocks (oneTBB) lets you easily write parallel C++ programs that take -full advantage of multicore performance, that are portable, composable and have future-proof scalability. +oneTBB is a flexible C++ library that simplifies the work of adding parallelism +to complex applications, even if you are not a threading expert. + +The library lets you easily write parallel programs that take full advantage of the multi-core performance. Such programs are portable, +composable and have a future-proof scalability. oneTBB provides you with functions, interfaces, and classes to parallelize and scale the code. +All you have to do is to use the templates. + +The library differs from typical threading packages in the following ways: +* oneTBB enables you to specify logical parallelism instead of threads. +* oneTBB targets threading for performance. +* oneTBB is compatible with other threading packages. +* oneTBB emphasizes scalable, data parallel programming. +* oneTBB relies on generic programming. + + +Refer to oneTBB [examples](examples) and [samples](https://github.com/oneapi-src/oneAPI-samples/tree/master/Libraries/oneTBB) to see how you can use the library. + +oneTBB is a part of [oneAPI](https://oneapi.io). The current branch implements version 1.1 of oneAPI Specification. ## Release Information -Here are [Release Notes]( https://software.intel.com/en-us/articles/intel-oneapi-threading-building-blocks-release-notes) and -[System Requirements](https://software.intel.com/en-us/articles/intel-oneapi-threading-building-blocks-system-requirements). +Here are [Release Notes](RELEASE_NOTES.md) and [System Requirements](SYSTEM_REQUIREMENTS.md). ## Documentation -* [oneTBB documentation](https://software.intel.com/en-us/oneapi-tbb-documentation) -* README for build system: [cmake/README.md](cmake/README.md) +* [oneTBB Specification](https://spec.oneapi.com/versions/latest/elements/oneTBB/source/nested-index.html) +* [oneTBB Developer Guide and Reference](https://oneapi-src.github.io/oneTBB) +* [Migrating from TBB to oneTBB](https://oneapi-src.github.io/oneTBB/main/tbb_userguide/Migration_Guide.html) +* [README for the CMake build system](cmake/README.md) +* [Basic support for the Bazel build system](Bazel.md) +* [oneTBB Discussions](https://github.com/oneapi-src/oneTBB/discussions) + +## Installation +See [Installation from Sources](INSTALL.md) to learn how to install oneTBB. ## Support -Please report issues and suggestions via -[GitHub issues](https://github.com/oneapi-src/oneTBB/issues) or start a topic on the -[oneTBB forum](https://community.intel.com/t5/Intel-oneAPI-Threading-Building/bd-p/oneapi-threading-building-blocks). +Please report issues and suggestions via [GitHub issues](https://github.com/oneapi-src/oneTBB/issues). See our [documentation](./CONTRIBUTING.md##Issues) to learn how to work with them. ## How to Contribute -To contribute to oneTBB, please open a GitHub pull request (preferred) or send us a patch by e-mail. - -Please use the sign-off line at the end of the patch. -Your signature certifies that you wrote the patch or -otherwise have the right to pass it on as an open-source patch. -The rules are pretty simple: -if you can certify the https://developercertificate.org then you just add a line to every git commit message: -``` -Signed-off-by: Name Surname <user@email.com> -``` -Use your real name (sorry, no pseudonyms or anonymous contributions). - -If you set your `user.name` and `user.email` git configs, you can sign your -commit automatically with `git commit -s`. +We welcome community contributions, so check our [Contributing Guidelines](CONTRIBUTING.md) +to learn more. ## License oneAPI Threading Building Blocks is licensed under [Apache License, Version 2.0](LICENSE.txt). By its terms, contributions submitted to the project are also done under that license. -## Security -To report security issues please go to https://intel.com/security. ## Engineering team contacts -* [E-mail us.](mailto:inteltbbdevelopers@intel.com) +* [Email us.](mailto:inteltbbdevelopers@intel.com) ------------------------------------------------------------------------ -Intel and the Intel logo are trademarks of Intel Corporation or its subsidiaries in the U.S. and/or other countries. - -\* Other names and brands may be claimed as the property of others. +\* All names and brands may be claimed as the property of others. diff --git a/contrib/libs/tbb/RELEASE_NOTES.md b/contrib/libs/tbb/RELEASE_NOTES.md new file mode 100644 index 0000000000..6e7093059a --- /dev/null +++ b/contrib/libs/tbb/RELEASE_NOTES.md @@ -0,0 +1,53 @@ +<!-- +****************************************************************************** +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/--> + +# Release Notes <!-- omit in toc --> +This document contains changes of oneTBB compared to the last release. + +## Table of Contents <!-- omit in toc --> +- [New Features](#new_features) +- [Known Limitations](#known-limitations) +- [Fixed Issues](#fixed-issues) +- [Open-source Contributions Integrated](#open-source-contributions-integrated) + +## :white_check_mark: New Features +- Improved support and use of the latest C++ standards for parallel_sort that allows using this algorithm with user-defined and standard library-defined objects with modern semantics. +- The following features are now fully functional: task_arena extensions, collaborative_call_once, adaptive mutexes, heterogeneous overloads for concurrent_hash_map, and task_scheduler_handle. +- Added support for Windows* Server 2022 and Python 3.10. + +## :rotating_light: Known Limitations +- An application using Parallel STL algorithms in libstdc++ versions 9 and 10 may fail to compile due to incompatible interface changes between earlier versions of Threading Building Blocks (TBB) and oneAPI Threading Building Blocks (oneTBB). Disable support for Parallel STL algorithms by defining PSTL_USE_PARALLEL_POLICIES (in libstdc++ 9) or _GLIBCXX_USE_TBB_PAR_BACKEND (in libstdc++ 10) macro to zero before inclusion of the first standard header file in each translation unit. +- On Linux* OS, if oneAPI Threading Building Blocks (oneTBB) or Threading Building Blocks (TBB) are installed in a system folder like /usr/lib64, the application may fail to link due to the order in which the linker searches for libraries. Use the -L linker option to specify the correct location of oneTBB library. This issue does not affect the program execution. +- The oneapi::tbb::info namespace interfaces might unexpectedly change the process affinity mask on Windows* OS systems (see https://github.com/open-mpi/hwloc/issues/366 for details) when using hwloc version lower than 2.5. +- Using a hwloc version other than 1.11, 2.0, or 2.5 may cause an undefined behavior on Windows OS. See https://github.com/open-mpi/hwloc/issues/477 for details. +- The NUMA topology may be detected incorrectly on Windows OS machines where the number of NUMA node threads exceeds the size of 1 processor group. +- On Windows OS on ARM64*, when compiling an application using oneTBB with the Microsoft* Compiler, the compiler issues a warning C4324 that a structure was padded due to the alignment specifier. Consider suppressing the warning by specifying /wd4324 to the compiler command line. +- oneTBB does not support fork(), to work-around the issue, consider using task_scheduler_handle to join oneTBB worker threads before using fork(). +- C++ exception handling mechanism on Windows* OS on ARM64* might corrupt memory if an exception is thrown from any oneTBB parallel algorithm (see Windows* OS on ARM64* compiler issue: https://developercommunity.visualstudio.com/t/ARM64-incorrect-stack-unwinding-for-alig/1544293). + +## :hammer: Fixed Issues +- Memory allocator crash on a system with an incomplete /proc/meminfo (GitHub* [#584](https://github.com/oneapi-src/oneTBB/issues/584)). +- Incorrect blocking of task stealing (GitHub* #[478](https://github.com/oneapi-src/oneTBB/issues/478)). +- Hang due to incorrect decrement of a limiter_node (GitHub* [#634](https://github.com/oneapi-src/oneTBB/issues/634)). +- Memory corruption in some rare cases when passing big messages in a flow graph (GitHub* [#639](https://github.com/oneapi-src/oneTBB/issues/639)). +- Possible deadlock in a throwable flow graph node with a lightweight policy. The lightweight policy is now ignored for functors that can throw exceptions (GitHub* [#420](https://github.com/oneapi-src/oneTBB/issues/420)). +- Crash when obtaining a range from empty ordered and unordered containers (GitHub* [#641](https://github.com/oneapi-src/oneTBB/issues/641)). +- Deadlock in a concurrent_vector resize() that could happen when the new size is less than the previous size (GitHub* [#733](https://github.com/oneapi-src/oneTBB/issues/733)). + +## :octocat: Open-source Contributions Integrated +- Improved aligned memory allocation. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/671). +- Optimized usage of atomic_fence on IA-32 and Intel(R) 64 architectures. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/pull/328). +- Fixed incorrect definition of the assignment operator in containers. Contributed by Andrey Semashev (https://github.com/oneapi-src/oneTBB/issues/372). diff --git a/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md b/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md new file mode 100644 index 0000000000..c6a6c0bea5 --- /dev/null +++ b/contrib/libs/tbb/SYSTEM_REQUIREMENTS.md @@ -0,0 +1,65 @@ +<!-- +****************************************************************************** +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/--> + +# System Requirements <!-- omit in toc --> +This document provides details about hardware, operating system, and software prerequisites for the oneAPI Threading Building Blocks (oneTBB). + +## Table of Contents <!-- omit in toc --> +- [Supported Hardware](#supported-hardware) +- [Software](#software) + - [Supported Operating Systems](#supported-operating-systems) + - [Supported Compilers](#supported-compilers) + + +## Supported Hardware +- Intel(R) Celeron(R) processor family +- Intel(R) Core* processor family +- Intel(R) Xeon(R) processor family +- Intel(R) Xeon Phi* processor family +- Intel(R) Atom* processor family +- Non-Intel(R) processors compatible with the processors listed above + + +## Software + +### Supported Operating Systems +- Systems with Microsoft* Windows* operating systems + - Microsoft* Windows* 10 + - Microsoft* Windows* Server 2016 + - Microsoft* Windows* Server 2019 +- Systems with Linux* operating systems + - Clear Linux* + - Amazon* Linux 2 + - CentOS* 8 + - Debian* 10 + - Fedora* 34 + - Red Hat* Enterprise Linux* 7, 8 + - SuSE* Linux* Enterprise Server 15 + - Ubuntu* 18.04 LTS, 20.04, 21.04 +- Systems with macOS* operating systems + - macOS* 10.15, 11.x +- Systems with Android* operating systems + - Android* 9 + +### Supported Compilers +- Intel* oneAPI DPC++/C++ Compiler +- Intel* C++ Compiler 19.0 and 19.1 version +- Microsoft* Visual C++ 14.1 (Microsoft* Visual Studio* 2017, Windows* OS only) +- Microsoft* Visual C++ 14.2 (Microsoft* Visual Studio* 2019, Windows* OS only) +- For each supported Linux* operating system, the standard gcc version provided with that operating system is supported + - GNU Compilers (gcc) 4.8.5 - 11.1.1 + - GNU C Library (glibc) version 2.17 - 2.33 + - Clang* 6.0.0 - 12.0.0 diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h index f6612fb4e3..12862fa2a1 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h @@ -40,6 +40,7 @@ namespace d1 { //! A range over which to iterate. /** @ingroup algorithms */ template<typename Value> + __TBB_requires(blocked_range_value<Value>) class blocked_range { public: //! Type of a value @@ -139,12 +140,18 @@ private: } template<typename RowValue, typename ColValue> + __TBB_requires(blocked_range_value<RowValue> && + blocked_range_value<ColValue>) friend class blocked_range2d; template<typename RowValue, typename ColValue, typename PageValue> + __TBB_requires(blocked_range_value<RowValue> && + blocked_range_value<ColValue> && + blocked_range_value<PageValue>) friend class blocked_range3d; template<typename DimValue, unsigned int N, typename> + __TBB_requires(blocked_range_value<DimValue>) friend class blocked_rangeNd_impl; }; diff --git a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h index 645f3fbd2e..f23f99abf6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,9 +30,9 @@ namespace tbb { namespace detail { namespace r1 { -void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); -void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); -std::size_t __TBB_EXPORTED_FUNC cache_line_size(); +TBB_EXPORT void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +TBB_EXPORT void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC cache_line_size(); } namespace d1 { @@ -123,7 +123,7 @@ private: std::size_t cache_line_alignment = correct_alignment(alignment); std::size_t space = correct_size(bytes) + cache_line_alignment; std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space)); - __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); + __TBB_ASSERT(base != 0, "Upstream resource returned nullptr."); // Round up to the next cache line (align the base address) std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h index c8ae7afff7..18f5bc80cf 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h @@ -26,7 +26,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { // A high-performance thread-safe non-blocking concurrent queue. // Multiple threads may each push and pop concurrently. @@ -57,7 +57,7 @@ public: my_allocator(a), my_queue_representation(nullptr) { my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type))); - queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + queue_allocator_traits::construct(my_allocator, my_queue_representation); __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); @@ -76,13 +76,13 @@ public: concurrent_queue(const concurrent_queue& src, const allocator_type& a) : concurrent_queue(a) { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } concurrent_queue(const concurrent_queue& src) : concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } // Move constructors @@ -101,7 +101,7 @@ public: internal_swap(src); } else { // allocators are different => performing per-element move - my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item); src.clear(); } } @@ -109,7 +109,7 @@ public: // Destroy queue ~concurrent_queue() { clear(); - my_queue_representation->clear(); + my_queue_representation->clear(my_allocator); queue_allocator_traits::destroy(my_allocator, my_queue_representation); r1::cache_aligned_deallocate(my_queue_representation); } @@ -177,7 +177,7 @@ private: template <typename... Args> void internal_push( Args&&... args ) { ticket_type k = my_queue_representation->tail_counter++; - my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...); + my_queue_representation->choose(k).push(k, *my_queue_representation, my_allocator, std::forward<Args>(args)...); } bool internal_try_pop( void* dst ) { @@ -193,7 +193,7 @@ private: // Queue had item with ticket k when we looked. Attempt to get that item. // Another thread snatched the item, retry. } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); - } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation)); + } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation, my_allocator)); return true; } @@ -227,39 +227,26 @@ concurrent_queue( It, It, Alloc = Alloc() ) class concurrent_monitor; -template <typename FuncType> -class delegated_function : public delegate_base { -public: - delegated_function(FuncType& f) : my_func(f) {} - - bool operator()() const override { - return my_func(); - } - -private: - FuncType &my_func; -}; // class delegated_function - // The concurrent monitor tags for concurrent_bounded_queue. static constexpr std::size_t cbq_slots_avail_tag = 0; static constexpr std::size_t cbq_items_avail_tag = 1; -} // namespace d1 +} // namespace d2 namespace r1 { class concurrent_monitor; - std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); - void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); - void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); - void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag + TBB_EXPORT std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); + TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); + TBB_EXPORT void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); + TBB_EXPORT void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag , std::size_t ticket ); - void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, + TBB_EXPORT void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, d1::delegate_base& predicate ); } // namespace r1 -namespace d1 { +namespace d2 { // A high-performance thread-safe blocking concurrent bounded queue. // Supports boundedness and blocking semantics. // Multiple threads may each push and pop concurrently. @@ -273,7 +260,7 @@ class concurrent_bounded_queue { template <typename FuncType> void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { - delegated_function<FuncType> func(pred); + d1::delegated_function<FuncType> func(pred); r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); } public: @@ -298,7 +285,7 @@ public: my_queue_representation = reinterpret_cast<queue_representation_type*>( r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1); - queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + queue_allocator_traits::construct(my_allocator, my_queue_representation); my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); @@ -318,13 +305,13 @@ public: concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : concurrent_bounded_queue(a) { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } concurrent_bounded_queue( const concurrent_bounded_queue& src ) : concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) { - my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, copy_construct_item); } // Move constructors @@ -343,7 +330,7 @@ public: internal_swap(src); } else { // allocators are different => performing per-element move - my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + my_queue_representation->assign(*src.my_queue_representation, my_allocator, move_construct_item); src.clear(); } } @@ -351,7 +338,7 @@ public: // Destroy queue ~concurrent_bounded_queue() { clear(); - my_queue_representation->clear(); + my_queue_representation->clear(my_allocator); queue_allocator_traits::destroy(my_allocator, my_queue_representation); r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation), sizeof(queue_representation_type)); @@ -469,12 +456,12 @@ private: try_call( [&] { internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); }).on_exception( [&] { - my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation); + my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation, my_allocator); }); } __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); - my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward<Args>(args)...); r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); } @@ -490,7 +477,7 @@ private: // Another thread claimed the slot, so retry. } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); - my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, my_allocator, std::forward<Args>(args)...); r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); return true; } @@ -518,7 +505,7 @@ private: }); } __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); - } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation)); + } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation, my_allocator)); r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); return true; @@ -536,7 +523,7 @@ private: // Queue had item with ticket k when we looked. Attempt to get that item. // Another thread snatched the item, retry. } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); - } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation)); + } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation, my_allocator)); r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); return true; @@ -576,13 +563,13 @@ concurrent_bounded_queue( It, It, Alloc = Alloc() ) #endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ -} //namespace d1 +} //namespace d2 } // namesapce detail inline namespace v1 { -using detail::d1::concurrent_queue; -using detail::d1::concurrent_bounded_queue; +using detail::d2::concurrent_queue; +using detail::d2::concurrent_bounded_queue; using detail::r1::user_abort; using detail::r1::bad_last_alloc; diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h index 4116386a92..fce714ffec 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,25 +19,37 @@ #include "_config.h" +#if __TBBMALLOC_BUILD +namespace rml { namespace internal { +#else namespace tbb { namespace detail { namespace r1 { +#endif //! Process an assertion failure. /** Normally called from __TBB_ASSERT macro. If assertion handler is null, print message for assertion failure and abort. Otherwise call the assertion handler. */ -void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment); +TBB_EXPORT void __TBB_EXPORTED_FUNC assertion_failure(const char* location, int line, const char* expression, const char* comment); +#if __TBBMALLOC_BUILD +}} // namespaces rml::internal +#else } // namespace r1 } // namespace detail } // namespace tbb +#endif +#if __TBBMALLOC_BUILD //! Release version of assertions -#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message)) +#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : rml::internal::assertion_failure(__func__,__LINE__,#predicate,message)) +#else +#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__func__,__LINE__,#predicate,message)) +#endif #if TBB_USE_ASSERT //! Assert that predicate is true. /** If predicate is false, print assertion failure message. - If the comment argument is not NULL, it is printed as part of the failure message. + If the comment argument is not nullptr, it is printed as part of the failure message. The comment argument has no other effect. */ #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) //! "Extended" version diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_attach.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_attach.h new file mode 100644 index 0000000000..45f29727a0 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_attach.h @@ -0,0 +1,32 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__attach_H +#define __TBB_detail__attach_H + +#include "_config.h" + +namespace tbb { +namespace detail { +namespace d1 { + + struct attach {}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__attach_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h index 6289632601..8bdf213230 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -30,7 +30,7 @@ namespace tbb { namespace detail { -namespace d1 { +namespace d2 { using ticket_type = std::size_t; @@ -67,6 +67,7 @@ public: using allocator_type = Allocator; using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_rep_type>; static constexpr size_type item_size = sizeof(T); static constexpr size_type items_per_page = item_size <= 8 ? 32 : @@ -123,7 +124,7 @@ public: } if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); - call_itt_notify(acquired, &tail_counter); + d1::call_itt_notify(d1::acquired, &tail_counter); if (p) { spin_mutex::scoped_lock lock( page_mutex ); @@ -133,7 +134,7 @@ public: } else { head_page.store(p, std::memory_order_relaxed); } - tail_page.store(p, std::memory_order_relaxed);; + tail_page.store(p, std::memory_order_release); } else { p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? } @@ -141,10 +142,10 @@ public: } template<typename... Args> - void push( ticket_type k, queue_rep_type& base, Args&&... args ) + void push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator, Args&&... args ) { padded_page* p = nullptr; - page_allocator_type page_allocator(base.get_allocator()); + page_allocator_type page_allocator(allocator); size_type index = prepare_page(k, base, page_allocator, p); __TBB_ASSERT(p != nullptr, "Page was not prepared"); @@ -152,38 +153,38 @@ public: // variadic capture on GCC 4.8.5 auto value_guard = make_raii_guard([&] { ++base.n_invalid_entries; - call_itt_notify(releasing, &tail_counter); + d1::call_itt_notify(d1::releasing, &tail_counter); tail_counter.fetch_add(queue_rep_type::n_queue); }); page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...); // If no exception was thrown, mark item as present. p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); - call_itt_notify(releasing, &tail_counter); + d1::call_itt_notify(d1::releasing, &tail_counter); value_guard.dismiss(); tail_counter.fetch_add(queue_rep_type::n_queue); } - void abort_push( ticket_type k, queue_rep_type& base) { + void abort_push( ticket_type k, queue_rep_type& base, queue_allocator_type& allocator ) { padded_page* p = nullptr; - prepare_page(k, base, base.get_allocator(), p); + prepare_page(k, base, allocator, p); ++base.n_invalid_entries; tail_counter.fetch_add(queue_rep_type::n_queue); } - bool pop( void* dst, ticket_type k, queue_rep_type& base ) { + bool pop( void* dst, ticket_type k, queue_rep_type& base, queue_allocator_type& allocator) { k &= -queue_rep_type::n_queue; - if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k); - call_itt_notify(acquired, &head_counter); - if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k); - call_itt_notify(acquired, &tail_counter); + spin_wait_until_eq(head_counter, k); + d1::call_itt_notify(d1::acquired, &head_counter); + spin_wait_while_eq(tail_counter, k); + d1::call_itt_notify(d1::acquired, &tail_counter); padded_page *p = head_page.load(std::memory_order_acquire); __TBB_ASSERT( p, nullptr ); size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); bool success = false; { - page_allocator_type page_allocator(base.get_allocator()); + page_allocator_type page_allocator(allocator); micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator, k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { @@ -196,7 +197,7 @@ public: return success; } - micro_queue& assign( const micro_queue& src, queue_rep_type& base, + micro_queue& assign( const micro_queue& src, queue_allocator_type& allocator, item_constructor_type construct_item ) { head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -211,7 +212,7 @@ public: size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; try_call( [&] { - head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); + head_page.store(make_copy(allocator, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); }).on_exception( [&] { head_counter.store(0, std::memory_order_relaxed); tail_counter.store(0, std::memory_order_relaxed); @@ -221,7 +222,7 @@ public: try_call( [&] { if (srcp != src.tail_page.load(std::memory_order_relaxed)) { for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { - cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item ); + cur_page->next = make_copy( allocator, srcp, 0, items_per_page, g_index, construct_item ); cur_page = cur_page->next; } @@ -229,7 +230,7 @@ public: size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); if( last_index==0 ) last_index = items_per_page; - cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item ); + cur_page->next = make_copy( allocator, srcp, 0, last_index, g_index, construct_item ); cur_page = cur_page->next; } tail_page.store(cur_page, std::memory_order_relaxed); @@ -244,10 +245,10 @@ public: return *this; } - padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page, + padded_page* make_copy( queue_allocator_type& allocator, const padded_page* src_page, size_type begin_in_page, size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) { - page_allocator_type page_allocator(base.get_allocator()); + page_allocator_type page_allocator(allocator); padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); new_page->next = nullptr; new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -287,10 +288,10 @@ public: tail_page.store(pg, std::memory_order_relaxed); } - void clear(queue_rep_type& base) { + void clear(queue_allocator_type& allocator ) { padded_page* curr_page = head_page.load(std::memory_order_relaxed); std::size_t index = head_counter.load(std::memory_order_relaxed); - page_allocator_type page_allocator(base.get_allocator()); + page_allocator_type page_allocator(allocator); while (curr_page) { for (; index != items_per_page - 1; ++index) { @@ -377,12 +378,12 @@ public: if( is_valid_page(p) ) { spin_mutex::scoped_lock lock( my_queue.page_mutex ); padded_page* q = p->next; - my_queue.head_page.store(q, std::memory_order_relaxed); + my_queue.head_page.store(q, std::memory_order_release); if( !is_valid_page(q) ) { - my_queue.tail_page.store(nullptr, std::memory_order_relaxed); + my_queue.tail_page.store(nullptr, std::memory_order_release); } } - my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed); + my_queue.head_counter.store(my_ticket_type, std::memory_order_release); if ( is_valid_page(p) ) { allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p)); allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1); @@ -423,14 +424,13 @@ public: static constexpr size_type item_size = micro_queue_type::item_size; static constexpr size_type items_per_page = micro_queue_type::items_per_page; - concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc) - {} + concurrent_queue_rep() {} concurrent_queue_rep( const concurrent_queue_rep& ) = delete; concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; - void clear() { - page_allocator_type page_allocator(my_queue_allocator); + void clear( queue_allocator_type& alloc ) { + page_allocator_type page_allocator(alloc); for (size_type i = 0; i < n_queue; ++i) { padded_page* tail_page = array[i].get_tail_page(); if( is_valid_page(tail_page) ) { @@ -444,7 +444,7 @@ public: } } - void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) { + void assign( const concurrent_queue_rep& src, queue_allocator_type& alloc, item_constructor_type construct_item ) { head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -453,11 +453,11 @@ public: size_type queue_idx = 0; try_call( [&] { for (; queue_idx < n_queue; ++queue_idx) { - array[queue_idx].assign(src.array[queue_idx], *this, construct_item); + array[queue_idx].assign(src.array[queue_idx], alloc, construct_item); } }).on_exception( [&] { for (size_type i = 0; i < queue_idx + 1; ++i) { - array[i].clear(*this); + array[i].clear(alloc); } head_counter.store(0, std::memory_order_relaxed); tail_counter.store(0, std::memory_order_relaxed); @@ -478,7 +478,7 @@ public: } std::ptrdiff_t size() const { - __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); + __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), nullptr); std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); @@ -486,10 +486,6 @@ public: return tc - hc - nie; } - queue_allocator_type& get_allocator() { - return my_queue_allocator; - } - friend class micro_queue<T, Allocator>; // Map ticket_type to an array index @@ -507,7 +503,6 @@ public: alignas(max_nfs_size) std::atomic<ticket_type> head_counter{}; alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{}; alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{}; - queue_allocator_type& my_queue_allocator; }; // class concurrent_queue_rep #if _MSC_VER && !defined(__INTEL_COMPILER) @@ -588,7 +583,7 @@ protected: Value* my_item{ nullptr }; queue_rep_type* my_queue_rep{ nullptr }; ticket_type my_head_counter{}; - padded_page* my_array[queue_rep_type::n_queue]; + padded_page* my_array[queue_rep_type::n_queue]{}; }; // class concurrent_queue_iterator_base struct concurrent_queue_iterator_provider { @@ -652,7 +647,7 @@ private: friend struct concurrent_queue_iterator_provider; }; // class concurrent_queue_iterator -} // namespace d1 +} // namespace d2 } // namespace detail } // tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h index 251ebb8d82..7c96ad658e 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -28,6 +28,8 @@ /* Check which standard library we use. */ #include <cstddef> +#include "_export.h" + #if _MSC_VER #define __TBB_EXPORTED_FUNC __cdecl #define __TBB_EXPORTED_METHOD __thiscall @@ -44,7 +46,7 @@ #define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) #define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L) +#define __TBB_CPP20_PRESENT (__TBB_LANG >= 202002L) #if __INTEL_COMPILER || _MSC_VER #define __TBB_NOINLINE(decl) __declspec(noinline) decl @@ -92,7 +94,7 @@ #define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) #define __TBB_MACRO_EMPTY 1 -#if _M_X64 +#if _M_X64 || _M_ARM64 #define __TBB_W(name) name##64 #else #define __TBB_W(name) name @@ -206,6 +208,15 @@ #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) #endif +/** Address sanitizer detection **/ +#ifdef __SANITIZE_ADDRESS__ + #define __TBB_USE_ADDRESS_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(address_sanitizer) + #define __TBB_USE_ADDRESS_SANITIZER 1 +#endif +#endif + /** Library features presence macros **/ #define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L) @@ -249,14 +260,15 @@ #define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L) -#define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT -#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_)) -#define __TBB_RESUMABLE_TASKS 1 +#if defined(__cpp_impl_three_way_comparison) && defined(__cpp_lib_three_way_comparison) + #define __TBB_CPP20_COMPARISONS_PRESENT ((__cpp_impl_three_way_comparison >= 201907L) && (__cpp_lib_three_way_comparison >= 201907L)) #else -#define __TBB_RESUMABLE_TASKS 0 + #define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT #endif +#define __TBB_RESUMABLE_TASKS (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__QNXNTO__ && (!__linux__ || __GLIBC__)) + /* This macro marks incomplete code or comments describing ideas which are considered for the future. * See also for plain comment with TODO and FIXME marks for small improvement opportunities. */ @@ -287,6 +299,10 @@ #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) #endif +#if __GNUC__ && !__INTEL_COMPILER && !__clang__ + #define __TBB_GCC_PARAMETER_PACK_IN_LAMBDAS_BROKEN (__TBB_GCC_VERSION <= 40805) +#endif + #define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) #define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L) #define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) @@ -310,10 +326,10 @@ #define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS) - #define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__) -#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) +#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) \ + && (_WIN32 || _WIN64 || __unix__ || __APPLE__) && (__TBB_x86_32 || __TBB_x86_64) && !__ANDROID__) /** Internal TBB features & modes **/ @@ -357,10 +373,6 @@ #define __TBB_ARENA_BINDING 1 #endif -#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD - #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 -#endif - #if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 #endif @@ -390,7 +402,7 @@ // instantiation site, which is too late for suppression of the corresponding messages for internal // stuff. #if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) - #if (__TBB_LANG >= 201402L) + #if (__TBB_LANG >= 201402L && (!defined(_MSC_VER) || _MSC_VER >= 1920)) #define __TBB_DEPRECATED [[deprecated]] #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] #elif _MSC_VER @@ -434,6 +446,36 @@ #endif #endif +#if __SANITIZE_THREAD__ + #define __TBB_USE_THREAD_SANITIZER 1 +#elif defined(__has_feature) +#if __has_feature(thread_sanitizer) + #define __TBB_USE_THREAD_SANITIZER 1 +#endif +#endif + +#ifndef __TBB_USE_SANITIZERS +#define __TBB_USE_SANITIZERS (__TBB_USE_THREAD_SANITIZER || __TBB_USE_ADDRESS_SANITIZER) +#endif + +#ifndef __TBB_RESUMABLE_TASKS_USE_THREADS +#define __TBB_RESUMABLE_TASKS_USE_THREADS __TBB_USE_SANITIZERS +#endif + +#ifndef __TBB_USE_CONSTRAINTS +#define __TBB_USE_CONSTRAINTS 1 +#endif + +#ifndef __TBB_STRICT_CONSTRAINTS +#define __TBB_STRICT_CONSTRAINTS 1 +#endif + +#if __TBB_CPP20_CONCEPTS_PRESENT && __TBB_USE_CONSTRAINTS + #define __TBB_requires(...) requires __VA_ARGS__ +#else // __TBB_CPP20_CONCEPTS_PRESENT + #define __TBB_requires(...) +#endif // __TBB_CPP20_CONCEPTS_PRESENT + /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by the bugs in compilers, standard or OS specific libraries. They should be removed as soon as the corresponding bugs are fixed or the buggy OS/compiler @@ -473,6 +515,14 @@ #define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) #endif +#if TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS +#define __TBB_PREVIEW_CONCURRENT_HASH_MAP_EXTENSIONS 1 +#endif + +#if TBB_PREVIEW_TASK_GROUP_EXTENSIONS || __TBB_BUILD +#define __TBB_PREVIEW_TASK_GROUP_EXTENSIONS 1 +#endif + #if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500 #define __TBB_ALIGNAS_AVAILABLE 1 diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h index 9764209fa8..21c61188d0 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h @@ -21,9 +21,7 @@ #include <new> // std::bad_alloc #include <exception> // std::exception -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE #include <stdexcept> // std::runtime_error -#endif namespace tbb { namespace detail { @@ -44,37 +42,39 @@ enum class exception_id { }; } // namespace d0 +#if _MSC_VER + #pragma warning(disable: 4275) +#endif + namespace r1 { //! Exception for concurrent containers -class bad_last_alloc : public std::bad_alloc { +class TBB_EXPORT bad_last_alloc : public std::bad_alloc { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; //! Exception for user-initiated abort -class user_abort : public std::exception { +class TBB_EXPORT user_abort : public std::exception { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; //! Exception for missing wait on structured_task_group -class missing_wait : public std::exception { +class TBB_EXPORT missing_wait : public std::exception { public: const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; }; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE //! Exception for impossible finalization of task_sheduler_handle -class unsafe_wait : public std::runtime_error { +class TBB_EXPORT unsafe_wait : public std::runtime_error { public: unsafe_wait(const char* msg) : std::runtime_error(msg) {} }; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE //! Gathers all throw operators in one place. /** Its purpose is to minimize code bloat that can be caused by throw operators scattered in multiple places, especially in templates. **/ -void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); +TBB_EXPORT void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); } // namespace r1 inline namespace d0 { diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_export.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_export.h new file mode 100644 index 0000000000..4c015223b5 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_export.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__export_H +#define __TBB_detail__export_H + +#if defined(__MINGW32__) + #define _EXPORT __declspec(dllexport) +#elif defined(_WIN32) || defined(__unix__) || defined(__APPLE__) // Use .def files for these + #define _EXPORT +#else + #error "Unknown platform/compiler" +#endif + +#if __TBB_BUILD + #define TBB_EXPORT _EXPORT +#else + #define TBB_EXPORT +#endif + +#if __TBBMALLOC_BUILD + #define TBBMALLOC_EXPORT _EXPORT +#else + #define TBBMALLOC_EXPORT +#endif + +#if __TBBBIND_BUILD + #define TBBBIND_EXPORT _EXPORT +#else + #define TBBBIND_EXPORT +#endif + +#endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_intrusive_list_node.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_intrusive_list_node.h new file mode 100644 index 0000000000..69286c8fab --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_intrusive_list_node.h @@ -0,0 +1,41 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _TBB_detail__intrusive_list_node_H +#define _TBB_detail__intrusive_list_node_H + +namespace tbb { +namespace detail { +namespace d1 { + +//! Data structure to be inherited by the types that can form intrusive lists. +/** Intrusive list is formed by means of the member_intrusive_list<T> template class. + Note that type T must derive from intrusive_list_node either publicly or + declare instantiation member_intrusive_list<T> as a friend. + This class implements a limited subset of std::list interface. **/ +struct intrusive_list_node { + intrusive_list_node* my_prev_node{}; + intrusive_list_node* my_next_node{}; +#if TBB_USE_ASSERT + intrusive_list_node() { my_prev_node = my_next_node = this; } +#endif /* TBB_USE_ASSERT */ +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // _TBB_detail__intrusive_list_node_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h index 3270da786a..763bc65b11 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,14 +25,25 @@ #include <cstdint> #include <cstddef> -#ifdef _MSC_VER +#ifdef _WIN32 #include <intrin.h> +#ifdef __TBBMALLOC_BUILD +#define WIN32_LEAN_AND_MEAN +#ifndef NOMINMAX +#define NOMINMAX +#endif +#include <windows.h> // SwitchToThread() +#endif +#ifdef _MSC_VER +#if __TBB_x86_64 || __TBB_x86_32 #pragma intrinsic(__rdtsc) #endif +#endif +#endif #if __TBB_x86_64 || __TBB_x86_32 #include <immintrin.h> // _mm_pause #endif -#if (_WIN32 || _WIN64) +#if (_WIN32) #include <float.h> // _control87 #endif @@ -53,32 +64,28 @@ inline namespace d0 { #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN static inline void yield() { int err = sched_yield(); - __TBB_ASSERT_EX(err == 0, "sched_yiled has failed"); + __TBB_ASSERT_EX(err == 0, "sched_yield has failed"); +} +#elif __TBBMALLOC_BUILD && _WIN32 +// Use Windows API for yield in tbbmalloc to avoid dependency on C++ runtime with some implementations. +static inline void yield() { + SwitchToThread(); } #else using std::this_thread::yield; #endif //-------------------------------------------------------------------------------------------------- -// atomic_fence implementation +// atomic_fence_seq_cst implementation //-------------------------------------------------------------------------------------------------- -#if (_WIN32 || _WIN64) -#pragma intrinsic(_mm_mfence) +static inline void atomic_fence_seq_cst() { +#if (__TBB_x86_64 || __TBB_x86_32) && defined(__GNUC__) && __GNUC__ < 11 + unsigned char dummy = 0u; + __asm__ __volatile__ ("lock; notb %0" : "+m" (dummy) :: "memory"); +#else + std::atomic_thread_fence(std::memory_order_seq_cst); #endif - -static inline void atomic_fence(std::memory_order order) { -#if (_WIN32 || _WIN64) - if (order == std::memory_order_seq_cst || - order == std::memory_order_acq_rel || - order == std::memory_order_acquire || - order == std::memory_order_release ) - { - _mm_mfence(); - return; - } -#endif /*(_WIN32 || _WIN64)*/ - std::atomic_thread_fence(order); } //-------------------------------------------------------------------------------------------------- @@ -229,7 +236,7 @@ T machine_reverse_bits(T src) { namespace d1 { -#if (_WIN32 || _WIN64) +#if (_WIN32) // API to retrieve/update FPU control setting #define __TBB_CPU_CTL_ENV_PRESENT 1 struct cpu_ctl_env { @@ -319,7 +326,7 @@ namespace d1 { class cpu_ctl_env { fenv_t *my_fenv_ptr; public: - cpu_ctl_env() : my_fenv_ptr(NULL) {} + cpu_ctl_env() : my_fenv_ptr(nullptr) {} ~cpu_ctl_env() { if ( my_fenv_ptr ) r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); @@ -330,11 +337,11 @@ public: // dispatch loop may become invalid. // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation // with a platform specific implementation. - cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { + cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(nullptr) { *this = src; } cpu_ctl_env& operator=( const cpu_ctl_env &src ) { - __TBB_ASSERT( src.my_fenv_ptr, NULL ); + __TBB_ASSERT( src.my_fenv_ptr, nullptr); if ( !my_fenv_ptr ) my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); *my_fenv_ptr = *src.my_fenv_ptr; diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_mutex_common.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_mutex_common.h new file mode 100644 index 0000000000..4650c19268 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_mutex_common.h @@ -0,0 +1,61 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__mutex_common_H +#define __TBB_detail__mutex_common_H + +#include "_config.h" +#include "_utils.h" + +#if __TBB_CPP20_CONCEPTS_PRESENT +#include <concepts> + +namespace tbb { +namespace detail { +inline namespace d0 { + +template <typename Lock, typename Mutex> +concept mutex_scoped_lock = std::default_initializable<Lock> && + std::constructible_from<Lock, Mutex&> && + requires( Lock& lock, Mutex& mutex ) { + lock.acquire(mutex); + { lock.try_acquire(mutex) } -> adaptive_same_as<bool>; + lock.release(); + }; + +template <typename Lock, typename Mutex> +concept rw_mutex_scoped_lock = mutex_scoped_lock<Lock, Mutex> && + std::constructible_from<Lock, Mutex&, bool> && + requires( Lock& lock, Mutex& mutex ) { + lock.acquire(mutex, false); + { lock.try_acquire(mutex, false) } -> adaptive_same_as<bool>; + { lock.upgrade_to_writer() } -> adaptive_same_as<bool>; + { lock.downgrade_to_reader() } -> adaptive_same_as<bool>; + }; + +template <typename Mutex> +concept scoped_lockable = mutex_scoped_lock<typename Mutex::scoped_lock, Mutex>; + +template <typename Mutex> +concept rw_scoped_lockable = scoped_lockable<Mutex> && + rw_mutex_scoped_lock<typename Mutex::scoped_lock, Mutex>; + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_CPP20_CONCEPTS_PRESENT +#endif // __TBB_detail__mutex_common_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h index 95a4d3dc96..149b7f46c2 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ class base_filter; } namespace r1 { -void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); +TBB_EXPORT void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); class pipeline; class stage_task; class input_buffer; @@ -129,7 +129,9 @@ class flow_control { bool is_pipeline_stopped = false; flow_control() = default; template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; - template<typename Output> friend class input_node; + template<typename Output> + __TBB_requires(std::copyable<Output>) + friend class input_node; public: void stop() { is_pipeline_stopped = true; } }; @@ -418,7 +420,7 @@ inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { } inline filter_node& filter_node_ptr::operator*() const{ - __TBB_ASSERT(my_node,"NULL node is used"); + __TBB_ASSERT(my_node,"nullptr node is used"); return *my_node; } diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h index 36c4ca84ee..1011f029df 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h @@ -19,6 +19,10 @@ #include "_config.h" #include "_utils.h" +#if __TBB_CPP20_CONCEPTS_PRESENT +#include <concepts> +#endif +#include <iterator> namespace tbb { namespace detail { @@ -69,6 +73,56 @@ auto get_range_split_object( PartitionerSplitType& split_obj ) return range_split_object_provider<Range>::get(split_obj); } +template <typename Range> +using range_iterator_type = decltype(std::begin(std::declval<Range&>())); + +#if __TBB_CPP20_CONCEPTS_PRESENT +template <typename Iterator> +using iterator_reference_type = typename std::iterator_traits<Iterator>::reference; + +template <typename Range> +using range_reference_type = iterator_reference_type<range_iterator_type<Range>>; + +template <typename Value> +concept blocked_range_value = std::copyable<Value> && + requires( const std::remove_reference_t<Value>& lhs, const std::remove_reference_t<Value>& rhs ) { + { lhs < rhs } -> relaxed_convertible_to<bool>; + { lhs - rhs } -> std::convertible_to<std::size_t>; + { lhs + (rhs - lhs) } -> std::convertible_to<Value>; + }; + +template <typename T> +concept splittable = std::constructible_from<T, T&, tbb::detail::split>; + +template <typename Range> +concept tbb_range = std::copy_constructible<Range> && + splittable<Range> && + requires( const std::remove_reference_t<Range>& range ) { + { range.empty() } -> relaxed_convertible_to<bool>; + { range.is_divisible() } -> relaxed_convertible_to<bool>; + }; + +template <typename Iterator> +constexpr bool iterator_concept_helper( std::input_iterator_tag ) { + return std::input_iterator<Iterator>; +} + +template <typename Iterator> +constexpr bool iterator_concept_helper( std::random_access_iterator_tag ) { + return std::random_access_iterator<Iterator>; +} + +template <typename Iterator, typename IteratorTag> +concept iterator_satisfies = requires (IteratorTag tag) { + requires iterator_concept_helper<Iterator>(tag); +}; + +template <typename Sequence, typename IteratorTag> +concept container_based_sequence = requires( Sequence& seq ) { + { std::begin(seq) } -> iterator_satisfies<IteratorTag>; + { std::end(seq) } -> iterator_satisfies<IteratorTag>; +}; +#endif // __TBB_CPP20_CONCEPTS_PRESENT } // namespace d0 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h index 28ef9f042e..11583bad3a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h @@ -108,11 +108,11 @@ private: namespace r1 { //! Internal acquire lock. // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); + TBB_EXPORT void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); //! Internal try_acquire lock. - bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); + TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); //! Internal release lock. - void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); + TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); } // namespace r1 namespace d1 { @@ -143,14 +143,14 @@ inline void rtm_mutex::scoped_lock::release() { inline void set_name(rtm_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(rtm_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(rtm_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(rtm_mutex&, const wchar_t*) {} #endif // WIN #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h index b62e86bd0a..25a8fc1e5b 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h @@ -100,6 +100,7 @@ public: //! Downgrade writer to become a reader. inline bool downgrade_to_reader(); + inline bool is_writer() const; private: rtm_rw_mutex* m_mutex; rtm_type m_transaction_state; @@ -123,20 +124,20 @@ private: namespace r1 { //! Internal acquire write lock. // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); //! Internal acquire read lock. // only_speculate == true if we're doing a try_lock, else false. - void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + TBB_EXPORT void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); //! Internal upgrade reader to become a writer. - bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); + TBB_EXPORT bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); //! Internal downgrade writer to become a reader. - bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); + TBB_EXPORT bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); //! Internal try_acquire write lock. - bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); //! Internal try_acquire read lock. - bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + TBB_EXPORT bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); //! Internal release lock. - void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); + TBB_EXPORT void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); } namespace d1 { @@ -186,18 +187,23 @@ bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { return r1::downgrade(*this); } +bool rtm_rw_mutex::scoped_lock::is_writer() const { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + return m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer; +} + #if TBB_USE_PROFILING_TOOLS inline void set_name(rtm_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(rtm_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(rtm_rw_mutex&, const wchar_t*) {} #endif // WIN #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_scoped_lock.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_scoped_lock.h new file mode 100644 index 0000000000..a49dcdff53 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_scoped_lock.h @@ -0,0 +1,174 @@ +/* + Copyright (c) 2005-2022 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail_scoped_lock_H +#define __TBB_detail_scoped_lock_H + +namespace tbb { +namespace detail { +namespace d1 { + +// unique_scoped_lock supposes that Mutex operations never throw +template <typename Mutex> +class unique_scoped_lock { + //! Points to currently held Mutex, or nullptr if no lock is held. + Mutex* m_mutex{}; + +public: + //! Construct without acquiring a Mutex. + constexpr unique_scoped_lock() noexcept : m_mutex(nullptr) {} + + //! Construct and acquire lock on a Mutex. + unique_scoped_lock(Mutex& m) { + acquire(m); + } + + //! No Copy + unique_scoped_lock(const unique_scoped_lock&) = delete; + unique_scoped_lock& operator=(const unique_scoped_lock&) = delete; + + //! Acquire lock. + void acquire(Mutex& m) { + __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); + m_mutex = &m; + m.lock(); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_acquire(Mutex& m) { + __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); + bool succeed = m.try_lock(); + if (succeed) { + m_mutex = &m; + } + return succeed; + } + + //! Release lock + void release() { + __TBB_ASSERT(m_mutex, "release on Mutex::unique_scoped_lock that is not holding a lock"); + m_mutex->unlock(); + m_mutex = nullptr; + } + + //! Destroy lock. If holding a lock, releases the lock first. + ~unique_scoped_lock() { + if (m_mutex) { + release(); + } + } +}; + +// rw_scoped_lock supposes that Mutex operations never throw +template <typename Mutex> +class rw_scoped_lock { +public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr rw_scoped_lock() noexcept {} + + //! Acquire lock on given mutex. + rw_scoped_lock(Mutex& m, bool write = true) { + acquire(m, write); + } + + //! Release lock (if lock is held). + ~rw_scoped_lock() { + if (m_mutex) { + release(); + } + } + + //! No Copy + rw_scoped_lock(const rw_scoped_lock&) = delete; + rw_scoped_lock& operator=(const rw_scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire(Mutex& m, bool write = true) { + __TBB_ASSERT(m_mutex == nullptr, "The mutex is already acquired"); + m_is_writer = write; + m_mutex = &m; + if (write) { + m_mutex->lock(); + } else { + m_mutex->lock_shared(); + } + } + + //! Try acquire lock on given mutex. + bool try_acquire(Mutex& m, bool write = true) { + bool succeed = write ? m.try_lock() : m.try_lock_shared(); + if (succeed) { + m_mutex = &m; + m_is_writer = write; + } + return succeed; + } + + //! Release lock. + void release() { + __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); + Mutex* m = m_mutex; + m_mutex = nullptr; + + if (m_is_writer) { + m->unlock(); + } else { + m->unlock_shared(); + } + } + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade_to_writer() { + __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); + if (m_is_writer) { + return true; // Already a writer + } + m_is_writer = true; + return m_mutex->upgrade(); + } + + //! Downgrade writer to become a reader. + bool downgrade_to_reader() { + __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); + if (m_is_writer) { + m_mutex->downgrade(); + m_is_writer = false; + } + return true; + } + + bool is_writer() const { + __TBB_ASSERT(m_mutex != nullptr, "The mutex is not acquired"); + return m_is_writer; + } + +protected: + //! The pointer to the current mutex that is held, or nullptr if no mutex is held. + Mutex* m_mutex {nullptr}; + + //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. + /** Not defined if not holding a lock. */ + bool m_is_writer {false}; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail_scoped_lock_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h index 8a10a61e1a..7485b31c76 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h @@ -37,12 +37,12 @@ struct execution_data; } namespace r1 { -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, +TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, const d1::execution_data& ed); -void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, +TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); +TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, const d1::execution_data& ed); -void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); +TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); } namespace d1 { @@ -66,7 +66,7 @@ public: template <typename Type> void delete_object(Type* object, const execution_data& ed) { - // Copy this since the it can be the member of the passed object and + // Copy this since it can be a member of the passed object and // unintentionally destroyed when Type destructor is called below small_object_allocator alloc = *this; object->~Type(); @@ -75,7 +75,7 @@ public: template <typename Type> void delete_object(Type* object) { - // Copy this since the it can be the member of the passed object and + // Copy this since it can be a member of the passed object and // unintentionally destroyed when Type destructor is called below small_object_allocator alloc = *this; object->~Type(); diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h index 7b4f8521c6..237c464465 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,21 +47,21 @@ struct execution_data; namespace r1 { //! Task spawn/wait entry points -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); -void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); -void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); -void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); -d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); -d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); +TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); +TBB_EXPORT void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); +TBB_EXPORT void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); +TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); +TBB_EXPORT d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); +TBB_EXPORT d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); // Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. struct suspend_point_type; using suspend_callback_type = void(*)(void*, suspend_point_type*); //! The resumable tasks entry points -void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); -void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); -suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); -void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); +TBB_EXPORT void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); +TBB_EXPORT void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); +TBB_EXPORT suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); +TBB_EXPORT void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); class thread_data; class task_dispatcher; @@ -135,7 +135,7 @@ public: wait_context(const wait_context&) = delete; ~wait_context() { - __TBB_ASSERT(!continue_execution(), NULL); + __TBB_ASSERT(!continue_execution(), nullptr); } void reserve(std::uint32_t delta = 1) { @@ -145,11 +145,6 @@ public: void release(std::uint32_t delta = 1) { add_reference(-std::int64_t(delta)); } -#if __TBB_EXTRA_DEBUG - unsigned reference_count() const { - return unsigned(m_ref_count.load(std::memory_order_acquire)); - } -#endif }; struct execution_data { diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h new file mode 100644 index 0000000000..e32154f409 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task_handle.h @@ -0,0 +1,122 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + + +#ifndef __TBB_task_handle_H +#define __TBB_task_handle_H + +#include "_config.h" +#include "_task.h" +#include "_small_object_pool.h" +#include "_utils.h" +#include <memory> + +namespace tbb { +namespace detail { + +namespace d1 { class task_group_context; class wait_context; struct execution_data; } +namespace d2 { + +class task_handle; + +class task_handle_task : public d1::task { + std::uint64_t m_version_and_traits{}; + d1::wait_context& m_wait_ctx; + d1::task_group_context& m_ctx; + d1::small_object_allocator m_allocator; +public: + void finalize(const d1::execution_data* ed = nullptr) { + if (ed) { + m_allocator.delete_object(this, *ed); + } else { + m_allocator.delete_object(this); + } + } + + task_handle_task(d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) + : m_wait_ctx(wo) + , m_ctx(ctx) + , m_allocator(alloc) { + suppress_unused_warning(m_version_and_traits); + } + + ~task_handle_task() override { + m_wait_ctx.release(); + } + + d1::task_group_context& ctx() const { return m_ctx; } +}; + + +class task_handle { + struct task_handle_task_finalizer_t{ + void operator()(task_handle_task* p){ p->finalize(); } + }; + using handle_impl_t = std::unique_ptr<task_handle_task, task_handle_task_finalizer_t>; + + handle_impl_t m_handle = {nullptr}; +public: + task_handle() = default; + task_handle(task_handle&&) = default; + task_handle& operator=(task_handle&&) = default; + + explicit operator bool() const noexcept { return static_cast<bool>(m_handle); } + + friend bool operator==(task_handle const& th, std::nullptr_t) noexcept; + friend bool operator==(std::nullptr_t, task_handle const& th) noexcept; + + friend bool operator!=(task_handle const& th, std::nullptr_t) noexcept; + friend bool operator!=(std::nullptr_t, task_handle const& th) noexcept; + +private: + friend struct task_handle_accessor; + + task_handle(task_handle_task* t) : m_handle {t}{}; + + d1::task* release() { + return m_handle.release(); + } +}; + +struct task_handle_accessor { +static task_handle construct(task_handle_task* t) { return {t}; } +static d1::task* release(task_handle& th) { return th.release(); } +static d1::task_group_context& ctx_of(task_handle& th) { + __TBB_ASSERT(th.m_handle, "ctx_of does not expect empty task_handle."); + return th.m_handle->ctx(); +} +}; + +inline bool operator==(task_handle const& th, std::nullptr_t) noexcept { + return th.m_handle == nullptr; +} +inline bool operator==(std::nullptr_t, task_handle const& th) noexcept { + return th.m_handle == nullptr; +} + +inline bool operator!=(task_handle const& th, std::nullptr_t) noexcept { + return th.m_handle != nullptr; +} + +inline bool operator!=(std::nullptr_t, task_handle const& th) noexcept { + return th.m_handle != nullptr; +} + +} // namespace d2 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_task_handle_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h index 45a8ffede6..7221117182 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h @@ -22,7 +22,7 @@ #include <cstddef> #include <cstdint> - +#include <utility> #include <type_traits> #include <memory> #include <iterator> @@ -182,7 +182,17 @@ using pack_element_t = typename pack_element<N, Args...>::type; template <typename Func> class raii_guard { public: - raii_guard( Func f ) : my_func(f), is_active(true) {} + static_assert( + std::is_nothrow_copy_constructible<Func>::value && + std::is_nothrow_move_constructible<Func>::value, + "Throwing an exception during the Func copy or move construction cause an unexpected behavior." + ); + + raii_guard( Func f ) noexcept : my_func(f), is_active(true) {} + + raii_guard( raii_guard&& g ) noexcept : my_func(std::move(g.my_func)), is_active(g.is_active) { + g.is_active = false; + } ~raii_guard() { if (is_active) { diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h index d1e02179f8..28fe1a1730 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h @@ -37,6 +37,8 @@ template<typename... T> void suppress_unused_warning(T&&...) {} bound is more useful than a run-time exact answer. @ingroup memory_allocation */ constexpr size_t max_nfs_size = 128; +constexpr std::size_t max_nfs_size_exp = 7; +static_assert(1 << max_nfs_size_exp == max_nfs_size, "max_nfs_size_exp must be a log2(max_nfs_size)"); //! Class that implements exponential backoff. class atomic_backoff { @@ -90,25 +92,43 @@ public: //! Spin WHILE the condition is true. /** T and U should be comparable types. */ template <typename T, typename C> -void spin_wait_while_condition(const std::atomic<T>& location, C comp) { +T spin_wait_while(const std::atomic<T>& location, C comp, std::memory_order order) { atomic_backoff backoff; - while (comp(location.load(std::memory_order_acquire))) { + T snapshot = location.load(order); + while (comp(snapshot)) { backoff.pause(); + snapshot = location.load(order); } + return snapshot; } //! Spin WHILE the value of the variable is equal to a given value /** T and U should be comparable types. */ template <typename T, typename U> -void spin_wait_while_eq(const std::atomic<T>& location, const U value) { - spin_wait_while_condition(location, [&value](T t) { return t == value; }); +T spin_wait_while_eq(const std::atomic<T>& location, const U value, std::memory_order order = std::memory_order_acquire) { + return spin_wait_while(location, [&value](T t) { return t == value; }, order); } //! Spin UNTIL the value of the variable is equal to a given value /** T and U should be comparable types. */ template<typename T, typename U> -void spin_wait_until_eq(const std::atomic<T>& location, const U value) { - spin_wait_while_condition(location, [&value](T t) { return t != value; }); +T spin_wait_until_eq(const std::atomic<T>& location, const U value, std::memory_order order = std::memory_order_acquire) { + return spin_wait_while(location, [&value](T t) { return t != value; }, order); +} + +//! Spin UNTIL the condition returns true or spinning time is up. +/** Returns what the passed functor returned last time it was invoked. */ +template <typename Condition> +bool timed_spin_wait_until(Condition condition) { + // 32 pauses + 32 yields are meausered as balanced spin time before sleep. + bool finish = condition(); + for (int i = 1; !finish && i < 32; finish = condition(), i *= 2) { + machine_pause(i); + } + for (int i = 32; !finish && i < 64; finish = condition(), ++i) { + yield(); + } + return finish; } template <typename T> @@ -178,10 +198,7 @@ template<typename T> inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } #else template<typename T> -inline void poison_pointer(T* &) {/*do nothing*/} - -template<typename T> -inline void poison_pointer(std::atomic<T*>&) { /* do nothing */} +inline void poison_pointer(T&) {/*do nothing*/} #endif /* !TBB_USE_ASSERT */ template <std::size_t alignment = 0, typename T> @@ -309,6 +326,18 @@ using synthesized_three_way_result = decltype(synthesized_three_way_comparator{} std::declval<T2&>())); #endif // __TBB_CPP20_COMPARISONS_PRESENT + +// Check if the type T is implicitly OR explicitly convertible to U +template <typename T, typename U> +concept relaxed_convertible_to = std::constructible_from<U, T>; + +template <typename T, typename U> +concept adaptive_same_as = +#if __TBB_STRICT_CONSTRAINTS + std::same_as<T, U>; +#else + std::convertible_to<T, U>; +#endif #endif // __TBB_CPP20_CONCEPTS_PRESENT } // namespace d0 @@ -319,9 +348,21 @@ class delegate_base { public: virtual bool operator()() const = 0; virtual ~delegate_base() {} -}; // class delegate_base +}; -} // namespace d1 +template <typename FuncType> +class delegated_function : public delegate_base { +public: + delegated_function(FuncType& f) : my_func(f) {} + + bool operator()() const override { + return my_func(); + } + +private: + FuncType &my_func; +}; +} // namespace d1 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h new file mode 100644 index 0000000000..fa7280a577 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_waitable_atomic.h @@ -0,0 +1,104 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__address_waiters_H +#define __TBB_detail__address_waiters_H + +#include "_utils.h" + +namespace tbb { +namespace detail { + +namespace r1 { +TBB_EXPORT void __TBB_EXPORTED_FUNC wait_on_address(void* address, d1::delegate_base& wakeup_condition, std::uintptr_t context); +TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address(void* address, std::uintptr_t context); +TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_one(void* address); +TBB_EXPORT void __TBB_EXPORTED_FUNC notify_by_address_all(void* address); +} // namespace r1 + +namespace d1 { + +template <typename Predicate> +void adaptive_wait_on_address(void* address, Predicate wakeup_condition, std::uintptr_t context) { + if (!timed_spin_wait_until(wakeup_condition)) { + d1::delegated_function<Predicate> pred(wakeup_condition); + r1::wait_on_address(address, pred, context); + } +} + +template <typename T> +class waitable_atomic { +public: + waitable_atomic() = default; + + explicit waitable_atomic(T value) : my_atomic(value) {} + + waitable_atomic(const waitable_atomic&) = delete; + waitable_atomic& operator=(const waitable_atomic&) = delete; + + T load(std::memory_order order) const noexcept { + return my_atomic.load(order); + } + + T exchange(T desired) noexcept { + return my_atomic.exchange(desired); + } + + void wait(T old, std::uintptr_t context, std::memory_order order) { + auto wakeup_condition = [&] { return my_atomic.load(order) != old; }; + if (!timed_spin_wait_until(wakeup_condition)) { + // We need to use while here, because notify_all() will wake up all threads + // But predicate for them might be false + d1::delegated_function<decltype(wakeup_condition)> pred(wakeup_condition); + do { + r1::wait_on_address(this, pred, context); + } while (!wakeup_condition()); + } + } + + void wait_until(T expected, std::uintptr_t context, std::memory_order order) { + auto wakeup_condition = [&] { return my_atomic.load(order) == expected; }; + if (!timed_spin_wait_until(wakeup_condition)) { + // We need to use while here, because notify_all() will wake up all threads + // But predicate for them might be false + d1::delegated_function<decltype(wakeup_condition)> pred(wakeup_condition); + do { + r1::wait_on_address(this, pred, context); + } while (!wakeup_condition()); + } + } + + void notify_relaxed(std::uintptr_t context) { + r1::notify_by_address(this, context); + } + + void notify_one_relaxed() { + r1::notify_by_address_one(this); + } + + // TODO: consider adding following interfaces: + // store(desired, memory_order) + // notify_all_relaxed() + +private: + std::atomic<T> my_atomic{}; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__address_waiters_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/global_control.h b/contrib/libs/tbb/include/oneapi/tbb/global_control.h index 80177b6b82..57f3b9dbcd 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/global_control.h +++ b/contrib/libs/tbb/include/oneapi/tbb/global_control.h @@ -18,38 +18,34 @@ #define __TBB_global_control_H #include "detail/_config.h" -#include "detail/_namespace_injection.h" + #include "detail/_assert.h" -#include "detail/_template_helpers.h" +#include "detail/_attach.h" #include "detail/_exception.h" +#include "detail/_namespace_injection.h" +#include "detail/_template_helpers.h" -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE -#include <new> // std::nothrow_t -#endif #include <cstddef> +#include <new> // std::nothrow_t namespace tbb { namespace detail { namespace d1 { class global_control; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE class task_scheduler_handle; -#endif } namespace r1 { -void __TBB_EXPORTED_FUNC create(d1::global_control&); -void __TBB_EXPORTED_FUNC destroy(d1::global_control&); -std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); +TBB_EXPORT void __TBB_EXPORTED_FUNC create(d1::global_control&); +TBB_EXPORT void __TBB_EXPORTED_FUNC destroy(d1::global_control&); +TBB_EXPORT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); struct global_control_impl; struct control_storage_comparator; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE void release_impl(d1::task_scheduler_handle& handle); bool finalize_impl(d1::task_scheduler_handle& handle); -void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); -bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); -#endif +TBB_EXPORT void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); +TBB_EXPORT bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); } namespace d1 { @@ -60,11 +56,7 @@ public: max_allowed_parallelism, thread_stack_size, terminate_on_exception, -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE scheduler_handle, // not a public parameter -#else - reserved1, // not a public parameter -#endif parameter_max // insert new parameters above this point }; @@ -109,7 +101,6 @@ private: friend struct r1::control_storage_comparator; }; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE //! Finalization options. //! Outside of the class to avoid extensive friendship. static constexpr std::intptr_t release_nothrowing = 0; @@ -119,9 +110,17 @@ static constexpr std::intptr_t finalize_throwing = 2; //! User side wrapper for a task scheduler lifetime control object class task_scheduler_handle { public: + //! Creates an empty task_scheduler_handle task_scheduler_handle() = default; + + //! Creates an attached instance of task_scheduler_handle + task_scheduler_handle(attach) { + r1::get(*this); + } + + //! Release a reference if any ~task_scheduler_handle() { - release(*this); + release(); } //! No copy @@ -129,7 +128,7 @@ public: task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; //! Move only - task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} { + task_scheduler_handle(task_scheduler_handle&& other) noexcept { std::swap(m_ctl, other.m_ctl); } task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { @@ -137,17 +136,16 @@ public: return *this; }; - //! Get and active instance of task_scheduler_handle - static task_scheduler_handle get() { - task_scheduler_handle handle; - r1::get(handle); - return handle; + //! Checks if the task_scheduler_handle is empty + explicit operator bool() const noexcept { + return m_ctl != nullptr; } //! Release the reference and deactivate handle - static void release(task_scheduler_handle& handle) { - if (handle.m_ctl != nullptr) { - r1::finalize(handle, release_nothrowing); + void release() { + if (m_ctl != nullptr) { + r1::finalize(*this, release_nothrowing); + m_ctl = nullptr; } } @@ -156,31 +154,45 @@ private: friend bool r1::finalize_impl(task_scheduler_handle& handle); friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); + friend void finalize(task_scheduler_handle&); + friend bool finalize(task_scheduler_handle&, const std::nothrow_t&) noexcept; + global_control* m_ctl{nullptr}; }; #if TBB_USE_EXCEPTIONS //! Waits for worker threads termination. Throws exception on error. inline void finalize(task_scheduler_handle& handle) { - r1::finalize(handle, finalize_throwing); + try_call([&] { + if (handle.m_ctl != nullptr) { + bool finalized = r1::finalize(handle, finalize_throwing); + __TBB_ASSERT_EX(finalized, "r1::finalize did not respect finalize_throwing ?"); + + } + }).on_completion([&] { + __TBB_ASSERT(!handle, "The handle should be empty after finalize"); + }); } #endif //! Waits for worker threads termination. Returns false on error. inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { - return r1::finalize(handle, finalize_nothrowing); + bool finalized = true; + if (handle.m_ctl != nullptr) { + finalized = r1::finalize(handle, finalize_nothrowing); + } + __TBB_ASSERT(!handle, "The handle should be empty after finalize"); + return finalized; } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE } // namespace d1 } // namespace detail inline namespace v1 { using detail::d1::global_control; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +using detail::d1::attach; using detail::d1::finalize; using detail::d1::task_scheduler_handle; using detail::r1::unsafe_wait; -#endif } // namespace v1 } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/info.h b/contrib/libs/tbb/include/oneapi/tbb/info.h index 21475a4d00..5a68960a84 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/info.h +++ b/contrib/libs/tbb/include/oneapi/tbb/info.h @@ -22,6 +22,7 @@ #if __TBB_ARENA_BINDING #include <vector> +#include <cstdint> namespace tbb { namespace detail { @@ -37,10 +38,6 @@ struct constraints { constraints(numa_node_id id = -1, int maximal_concurrency = -1) : numa_id(id) , max_concurrency(maximal_concurrency) -#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT - , core_type(-1) - , max_threads_per_core(-1) -#endif {} #endif /*!__TBB_CPP20_PRESENT*/ @@ -74,17 +71,17 @@ struct constraints { } // namespace d1 namespace r1 { -unsigned __TBB_EXPORTED_FUNC numa_node_count(); -void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); -int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); +TBB_EXPORT unsigned __TBB_EXPORTED_FUNC numa_node_count(); +TBB_EXPORT void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); +TBB_EXPORT int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); // Reserved fields are required to save binary backward compatibility in case of future changes. // They must be defined to 0 at this moment. -unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); -void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); +TBB_EXPORT unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); +TBB_EXPORT void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); -int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); -int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); +TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); +TBB_EXPORT int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); } // namespace r1 namespace d1 { @@ -107,6 +104,7 @@ inline std::vector<core_type_id> core_types() { } inline int default_concurrency(constraints c) { + if (c.max_concurrency > 0) { return c.max_concurrency; } return r1::constraints_default_concurrency(c); } #endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ diff --git a/contrib/libs/tbb/include/oneapi/tbb/mutex.h b/contrib/libs/tbb/include/oneapi/tbb/mutex.h new file mode 100644 index 0000000000..a4d2a9a3de --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/mutex.h @@ -0,0 +1,95 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_mutex_H +#define __TBB_mutex_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_scoped_lock.h" +#include "detail/_waitable_atomic.h" +#include "detail/_mutex_common.h" +#include "profiling.h" + +namespace tbb { +namespace detail { +namespace d1 { + +class mutex { +public: + //! Constructors + mutex() { + create_itt_sync(this, "tbb::mutex", ""); + }; + + //! Destructor + ~mutex() { + __TBB_ASSERT(!my_flag.load(std::memory_order_relaxed), "destruction of an acquired mutex"); + } + + //! No Copy + mutex(const mutex&) = delete; + mutex& operator=(const mutex&) = delete; + + using scoped_lock = unique_scoped_lock<mutex>; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + /** Spin if the lock is taken */ + void lock() { + call_itt_notify(prepare, this); + while (!try_lock()) { + my_flag.wait(true, /* context = */ 0, std::memory_order_relaxed); + } + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + bool result = !my_flag.load(std::memory_order_relaxed) && !my_flag.exchange(true); + if (result) { + call_itt_notify(acquired, this); + } + return result; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + // We need Write Read memory barrier before notify that reads the waiter list. + // In C++ only full fence covers this type of barrier. + my_flag.exchange(false); + my_flag.notify_one_relaxed(); + } + +private: + waitable_atomic<bool> my_flag{0}; +}; // class mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::mutex; +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_mutex_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h index ed137d4d09..d3c8fdd849 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h @@ -33,6 +33,31 @@ namespace tbb { namespace detail { +#if __TBB_CPP20_CONCEPTS_PRESENT +inline namespace d0 { + +template <typename Body, typename Range> +concept parallel_for_body = std::copy_constructible<Body> && + requires( const std::remove_reference_t<Body>& body, Range& range ) { + body(range); + }; + +template <typename Index> +concept parallel_for_index = std::constructible_from<Index, int> && + std::copyable<Index> && + requires( const std::remove_reference_t<Index>& lhs, const std::remove_reference_t<Index>& rhs ) { + { lhs < rhs } -> adaptive_same_as<bool>; + { lhs - rhs } -> std::convertible_to<std::size_t>; + { lhs + (rhs - lhs) } -> std::convertible_to<Index>; + }; + +template <typename Function, typename Index> +concept parallel_for_function = requires( const std::remove_reference_t<Function>& func, Index index ) { + func(index); +}; + +} // namespace d0 +#endif // __TBB_CPP20_CONCEPTS_PRESENT namespace d1 { //! Task type used in parallel_for @@ -158,12 +183,12 @@ task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) { //! Calls the function with values from range [begin, end) with a step provided template<typename Function, typename Index> -class parallel_for_body : detail::no_assign { +class parallel_for_body_wrapper : detail::no_assign { const Function &my_func; const Index my_begin; const Index my_step; public: - parallel_for_body( const Function& _func, Index& _begin, Index& _step ) + parallel_for_body_wrapper( const Function& _func, Index& _begin, Index& _step ) : my_func(_func), my_begin(_begin), my_step(_step) {} void operator()( const blocked_range<Index>& r ) const { @@ -201,6 +226,7 @@ public: //! Parallel iteration over range with default partitioner. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body ) { start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); } @@ -208,6 +234,7 @@ void parallel_for( const Range& range, const Body& body ) { //! Parallel iteration over range with simple partitioner. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); } @@ -215,6 +242,7 @@ void parallel_for( const Range& range, const Body& body, const simple_partitione //! Parallel iteration over range with auto_partitioner. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); } @@ -222,6 +250,7 @@ void parallel_for( const Range& range, const Body& body, const auto_partitioner& //! Parallel iteration over range with static_partitioner. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); } @@ -229,6 +258,7 @@ void parallel_for( const Range& range, const Body& body, const static_partitione //! Parallel iteration over range with affinity_partitioner. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); } @@ -236,6 +266,7 @@ void parallel_for( const Range& range, const Body& body, affinity_partitioner& p //! Parallel iteration over range with default partitioner and user-supplied context. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, task_group_context& context ) { start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); } @@ -243,6 +274,7 @@ void parallel_for( const Range& range, const Body& body, task_group_context& con //! Parallel iteration over range with simple partitioner and user-supplied context. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); } @@ -250,6 +282,7 @@ void parallel_for( const Range& range, const Body& body, const simple_partitione //! Parallel iteration over range with auto_partitioner and user-supplied context. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); } @@ -257,6 +290,7 @@ void parallel_for( const Range& range, const Body& body, const auto_partitioner& //! Parallel iteration over range with static_partitioner and user-supplied context. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); } @@ -264,6 +298,7 @@ void parallel_for( const Range& range, const Body& body, const static_partitione //! Parallel iteration over range with affinity_partitioner and user-supplied context. /** @ingroup algorithms **/ template<typename Range, typename Body> + __TBB_requires(tbb_range<Range> && parallel_for_body<Body, Range>) void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); } @@ -273,63 +308,73 @@ template <typename Index, typename Function, typename Partitioner> void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { if (step <= 0 ) throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument - else if (last > first) { + else if (first < last) { // Above "else" avoids "potential divide by zero" warning on some platforms Index end = (last - first - Index(1)) / step + Index(1); blocked_range<Index> range(static_cast<Index>(0), end); - parallel_for_body<Function, Index> body(f, first, step); + parallel_for_body_wrapper<Function, Index> body(f, first, step); parallel_for(range, body, partitioner); } } //! Parallel iteration over a range of integers with a step provided and default partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); } //! Parallel iteration over a range of integers with a step provided and simple partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and auto partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and static partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a step provided and affinity partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { parallel_for_impl(first, last, step, f, partitioner); } //! Parallel iteration over a range of integers with a default step value and default partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); } //! Parallel iteration over a range of integers with a default step value and simple partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and auto partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and static partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); } //! Parallel iteration over a range of integers with a default step value and affinity partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); } @@ -339,63 +384,73 @@ template <typename Index, typename Function, typename Partitioner> void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { if (step <= 0 ) throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument - else if (last > first) { + else if (first < last) { // Above "else" avoids "potential divide by zero" warning on some platforms Index end = (last - first - Index(1)) / step + Index(1); blocked_range<Index> range(static_cast<Index>(0), end); - parallel_for_body<Function, Index> body(f, first, step); + parallel_for_body_wrapper<Function, Index> body(f, first, step); parallel_for(range, body, partitioner, context); } } //! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); } //! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, step, f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); } //! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner template <typename Index, typename Function> + __TBB_requires(parallel_for_index<Index> && parallel_for_function<Function, Index>) void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); } diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h index 87a159c925..a204b9c4c6 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h @@ -30,7 +30,7 @@ namespace tbb { namespace detail { namespace r1 { -void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); +TBB_EXPORT void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); } namespace d1 { diff --git a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h index 37ac0a09d9..829ce84651 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h +++ b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -78,7 +78,7 @@ class affinity_partitioner_base: no_copy { friend class affinity_partitioner; friend class affinity_partition_type; //! Array that remembers affinities of tree positions to affinity_id. - /** NULL if my_size==0. */ + /** nullptr if my_size==0. */ slot_id* my_array; //! Number of elements in my_array. std::size_t my_size; @@ -265,7 +265,7 @@ struct partition_type_base { template<typename StartType, typename Range> void work_balance(StartType &start, Range &range, const execution_data&) { - start.run_body( range ); // simple partitioner goes always here + start.run_body( range ); // static partitioner goes here } template<typename StartType, typename Range> @@ -301,6 +301,10 @@ struct adaptive_mode : partition_type_base<Partition> { static const unsigned factor = 1; adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} + adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0) + { + // left blank as my_divisor gets overridden in the successors' constructors + } /*! Override do_split methods in order to specify splitting strategy */ std::size_t do_split(adaptive_mode &src, split) { return src.my_divisor /= 2u; @@ -337,7 +341,11 @@ struct proportional_mode : adaptive_mode<Partition> { proportional_mode() : adaptive_mode<Partition>() {} proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} - proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } + proportional_mode(proportional_mode &src, const proportional_split& split_obj) + : adaptive_mode<Partition>(src, split_obj) + { + self().my_divisor = do_split(src, split_obj); + } std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { std::size_t portion = split_obj.right() * my_partition::factor; portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); @@ -430,13 +438,13 @@ struct dynamic_grainsize_mode : Mode { } depth_t max_depth() { return my_max_depth; } void align_depth(depth_t base) { - __TBB_ASSERT(base <= my_max_depth, 0); + __TBB_ASSERT(base <= my_max_depth, nullptr); my_max_depth -= base; } template<typename StartType, typename Range> void work_balance(StartType &start, Range &range, execution_data& ed) { if( !range.is_divisible() || !self().max_depth() ) { - start.run_body( range ); // simple partitioner goes always here + start.run_body( range ); } else { // do range pool range_vector<Range, range_pool_size> range_pool(range); @@ -478,8 +486,7 @@ struct dynamic_grainsize_mode : Mode { class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { public: - auto_partition_type( const auto_partitioner& ) - : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { + auto_partition_type( const auto_partitioner& ) { my_divisor *= __TBB_INITIAL_CHUNKS; } auto_partition_type( auto_partition_type& src, split) @@ -525,8 +532,7 @@ public: class static_partition_type : public linear_affinity_mode<static_partition_type> { public: typedef detail::proportional_split split_type; - static_partition_type( const static_partitioner& ) - : linear_affinity_mode<static_partition_type>() {} + static_partition_type( const static_partitioner& ) {} static_partition_type( static_partition_type& p, const proportional_split& split_obj ) : linear_affinity_mode<static_partition_type>(p, split_obj) {} }; @@ -537,13 +543,12 @@ class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mo public: static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task typedef detail::proportional_split split_type; - affinity_partition_type( affinity_partitioner_base& ap ) - : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { + affinity_partition_type( affinity_partitioner_base& ap ) { __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); ap.resize(factor); my_array = ap.my_array; my_max_depth = factor_power + 1; - __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); + __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, nullptr ); } affinity_partition_type(affinity_partition_type& p, split) : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) diff --git a/contrib/libs/tbb/include/oneapi/tbb/profiling.h b/contrib/libs/tbb/include/oneapi/tbb/profiling.h index 4b62da2060..3bd2a42654 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/profiling.h +++ b/contrib/libs/tbb/include/oneapi/tbb/profiling.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -46,7 +46,7 @@ inline namespace d0 { }; //! Unicode support -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) //! Unicode character type. Always wchar_t on Windows. using tchar = wchar_t; #else /* !WIN */ @@ -70,27 +70,27 @@ namespace d1 { } // namespace d1 namespace r1 { - void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); - void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); - void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); + TBB_EXPORT void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, void* parent, unsigned long long parent_extra, string_resource_index name_index); - void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, void* parent, unsigned long long parent_extra, string_resource_index name_index); - void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); - void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); - void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, string_resource_index key, const char* value); - void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, string_resource_index key, void* value); - void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, itt_relation relation, void* addr1, unsigned long long addr1_extra); - void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); - void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); + TBB_EXPORT void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); } // namespace r1 namespace d1 { -#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__ +#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { std::size_t len; mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); @@ -100,7 +100,7 @@ namespace d1 { #if TBB_USE_PROFILING_TOOLS inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); wchar_t *type = new wchar_t[len_type]; multibyte_to_widechar(type, objtype, len_type); @@ -113,7 +113,7 @@ namespace d1 { #endif r1::create_itt_sync(ptr, type, name); -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) delete[] type; delete[] name; #endif // WIN @@ -204,7 +204,7 @@ class event { const std::string my_name; static void emit_trace(const std::string &input) { - itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); + itt_metadata_str_add( ITT_DOMAIN_FLOW, nullptr, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); } public: diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h index 6bb748f8a3..f8325dfd9c 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "detail/_config.h" #include "detail/_namespace_injection.h" #include "detail/_assert.h" +#include "detail/_mutex_common.h" #include "profiling.h" @@ -45,7 +46,7 @@ public: create_itt_sync(this, "tbb::queuing_rw_mutex", ""); } - //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL + //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-null ~queuing_rw_mutex() { __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); } @@ -107,8 +108,10 @@ public: //! Downgrade writer to become a reader. bool downgrade_to_reader(); + bool is_writer() const; + private: - //! The pointer to the mutex owned, or NULL if not holding a mutex. + //! The pointer to the mutex owned, or nullptr if not holding a mutex. queuing_rw_mutex* my_mutex; //! The 'pointer' to the previous and next competitors for a mutex @@ -141,25 +144,26 @@ private: inline void set_name(queuing_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif //WIN #else inline void set_name(queuing_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(queuing_rw_mutex&, const wchar_t*) {} #endif //WIN #endif } // namespace d1 namespace r1 { -void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); -bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); -void release(d1::queuing_rw_mutex::scoped_lock&); -bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); -bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); +TBB_EXPORT void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +TBB_EXPORT bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +TBB_EXPORT void release(d1::queuing_rw_mutex::scoped_lock&); +TBB_EXPORT bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); +TBB_EXPORT bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); +TBB_EXPORT bool is_writer(const d1::queuing_rw_mutex::scoped_lock&); } // namespace r1 namespace d1 { @@ -184,6 +188,10 @@ inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { return r1::downgrade_to_reader(*this); } + +inline bool queuing_rw_mutex::scoped_lock::is_writer() const { + return r1::is_writer(*this); +} } // namespace d1 } // namespace detail diff --git a/contrib/libs/tbb/include/oneapi/tbb/rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/rw_mutex.h new file mode 100644 index 0000000000..c3fbaf657a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/rw_mutex.h @@ -0,0 +1,216 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_rw_mutex_H +#define __TBB_rw_mutex_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_waitable_atomic.h" +#include "detail/_scoped_lock.h" +#include "detail/_mutex_common.h" +#include "profiling.h" + +namespace tbb { +namespace detail { +namespace d1 { + +class rw_mutex { +public: + //! Constructors + rw_mutex() noexcept : m_state(0) { + create_itt_sync(this, "tbb::rw_mutex", ""); + } + + //! Destructor + ~rw_mutex() { + __TBB_ASSERT(!m_state.load(std::memory_order_relaxed), "destruction of an acquired mutex"); + } + + //! No Copy + rw_mutex(const rw_mutex&) = delete; + rw_mutex& operator=(const rw_mutex&) = delete; + + using scoped_lock = rw_scoped_lock<rw_mutex>; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + void lock() { + call_itt_notify(prepare, this); + while (!try_lock()) { + if (!(m_state.load(std::memory_order_relaxed) & WRITER_PENDING)) { // no pending writers + m_state |= WRITER_PENDING; + } + + auto wakeup_condition = [&] { return !(m_state.load(std::memory_order_relaxed) & BUSY); }; + adaptive_wait_on_address(this, wakeup_condition, WRITER_CONTEXT); + } + + call_itt_notify(acquired, this); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + // for a writer: only possible to acquire if no active readers or writers + // Use relaxed memory fence is OK here because + // Acquire memory fence guaranteed by compare_exchange_strong() + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 + if (m_state.compare_exchange_strong(s, WRITER)) { + call_itt_notify(acquired, this); + return true; // successfully stored writer flag + } + } + return false; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + state_type curr_state = (m_state &= READERS | WRITER_PENDING); // Returns current state + + if (curr_state & WRITER_PENDING) { + r1::notify_by_address(this, WRITER_CONTEXT); + } else { + // It's possible that WRITER sleeps without WRITER_PENDING, + // because other thread might clear this bit at upgrade() + r1::notify_by_address_all(this); + } + } + + //! Lock shared ownership mutex + void lock_shared() { + call_itt_notify(prepare, this); + while (!try_lock_shared()) { + state_type has_writer = WRITER | WRITER_PENDING; + auto wakeup_condition = [&] { return !(m_state.load(std::memory_order_relaxed) & has_writer); }; + adaptive_wait_on_address(this, wakeup_condition, READER_CONTEXT); + } + __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state of a read lock: no readers"); + } + + //! Try lock shared ownership mutex + bool try_lock_shared() { + // for a reader: acquire if no active or waiting writers + // Use relaxed memory fence is OK here because + // Acquire memory fence guaranteed by fetch_add() + state_type has_writer = WRITER | WRITER_PENDING; + if (!(m_state.load(std::memory_order_relaxed) & has_writer)) { + if (m_state.fetch_add(ONE_READER) & has_writer) { + m_state -= ONE_READER; + r1::notify_by_address(this, WRITER_CONTEXT); + } else { + call_itt_notify(acquired, this); + return true; // successfully stored increased number of readers + } + } + return false; + } + + //! Unlock shared ownership mutex + void unlock_shared() { + __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state of a read lock: no readers"); + call_itt_notify(releasing, this); + + state_type curr_state = (m_state -= ONE_READER); // Returns current state + + if (curr_state & (WRITER_PENDING)) { + r1::notify_by_address(this, WRITER_CONTEXT); + } else { + // It's possible that WRITER sleeps without WRITER_PENDING, + // because other thread might clear this bit at upgrade() + r1::notify_by_address_all(this); + } + } + +private: + /** Internal non ISO C++ standard API **/ + //! This API is used through the scoped_lock class + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade() { + state_type s = m_state.load(std::memory_order_relaxed); + __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); + // Check and set writer-pending flag. + // Required conditions: either no pending writers, or we are the only reader + // (with multiple readers and pending writer, another upgrade could have been requested) + while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { + if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { + auto wakeup_condition = [&] { return (m_state.load(std::memory_order_relaxed) & READERS) == ONE_READER; }; + while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) { + adaptive_wait_on_address(this, wakeup_condition, WRITER_CONTEXT); + } + + __TBB_ASSERT((m_state.load(std::memory_order_relaxed) & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), + "invalid state when upgrading to writer"); + // Both new readers and writers are blocked at this time + m_state -= (ONE_READER + WRITER_PENDING); + return true; // successfully upgraded + } + } + // Slow reacquire + unlock_shared(); + lock(); + return false; + } + + //! Downgrade writer to a reader + void downgrade() { + __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & WRITER, nullptr), + call_itt_notify(releasing, this); + m_state += (ONE_READER - WRITER); + + if (!(m_state & WRITER_PENDING)) { + r1::notify_by_address(this, READER_CONTEXT); + } + + __TBB_ASSERT(m_state.load(std::memory_order_relaxed) & READERS, "invalid state after downgrade: no readers"); + } + + using state_type = std::intptr_t; + static constexpr state_type WRITER = 1; + static constexpr state_type WRITER_PENDING = 2; + static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); + static constexpr state_type ONE_READER = 4; + static constexpr state_type BUSY = WRITER | READERS; + + using context_type = std::uintptr_t; + static constexpr context_type WRITER_CONTEXT = 0; + static constexpr context_type READER_CONTEXT = 1; + friend scoped_lock; + //! State of lock + /** Bit 0 = writer is holding lock + Bit 1 = request by a writer to acquire lock (hint to readers to wait) + Bit 2..N = number of readers holding lock */ + std::atomic<state_type> m_state; +}; // class rw_mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::rw_mutex; +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_rw_mutex_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h index 7fde7e15af..e38c47c9d9 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h @@ -18,11 +18,13 @@ #define __TBB_spin_mutex_H #include "detail/_namespace_injection.h" +#include "detail/_mutex_common.h" #include "profiling.h" #include "detail/_assert.h" #include "detail/_utils.h" +#include "detail/_scoped_lock.h" #include <atomic> @@ -54,54 +56,7 @@ public: spin_mutex(const spin_mutex&) = delete; spin_mutex& operator=(const spin_mutex&) = delete; - //! Represents acquisition of a mutex. - class scoped_lock { - //! Points to currently held mutex, or NULL if no lock is held. - spin_mutex* m_mutex; - - public: - //! Construct without acquiring a mutex. - constexpr scoped_lock() noexcept : m_mutex(nullptr) {} - - //! Construct and acquire lock on a mutex. - scoped_lock(spin_mutex& m) { - acquire(m); - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock. - void acquire(spin_mutex& m) { - m_mutex = &m; - m.lock(); - } - - //! Try acquiring lock (non-blocking) - /** Return true if lock acquired; false otherwise. */ - bool try_acquire(spin_mutex& m) { - bool result = m.try_lock(); - if (result) { - m_mutex = &m; - } - return result; - } - - //! Release lock - void release() { - __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock"); - m_mutex->unlock(); - m_mutex = nullptr; - } - - //! Destroy lock. If holding a lock, releases the lock first. - ~scoped_lock() { - if (m_mutex) { - release(); - } - } - }; + using scoped_lock = unique_scoped_lock<spin_mutex>; //! Mutex traits static constexpr bool is_rw_mutex = false; @@ -141,14 +96,14 @@ protected: inline void set_name(spin_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(spin_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif //WIN #else inline void set_name(spin_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(spin_mutex&, const wchar_t*) {} #endif // WIN #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h index baf6b24b56..3fdae3500a 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h @@ -18,11 +18,13 @@ #define __TBB_spin_rw_mutex_H #include "detail/_namespace_injection.h" +#include "detail/_mutex_common.h" #include "profiling.h" #include "detail/_assert.h" #include "detail/_utils.h" +#include "detail/_scoped_lock.h" #include <atomic> @@ -52,88 +54,7 @@ public: spin_rw_mutex(const spin_rw_mutex&) = delete; spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; - //! The scoped locking pattern - /** It helps to avoid the common problem of forgetting to release lock. - It also nicely provides the "node" for queuing locks. */ - class scoped_lock { - public: - //! Construct lock that has not acquired a mutex. - /** Equivalent to zero-initialization of *this. */ - constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {} - - //! Acquire lock on given mutex. - scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) { - acquire(m, write); - } - - //! Release lock (if lock is held). - ~scoped_lock() { - if (m_mutex) { - release(); - } - } - - //! No Copy - scoped_lock(const scoped_lock&) = delete; - scoped_lock& operator=(const scoped_lock&) = delete; - - //! Acquire lock on given mutex. - void acquire(spin_rw_mutex& m, bool write = true) { - m_is_writer = write; - m_mutex = &m; - if (write) { - m_mutex->lock(); - } else { - m_mutex->lock_shared(); - } - } - - //! Try acquire lock on given mutex. - bool try_acquire(spin_rw_mutex& m, bool write = true) { - m_is_writer = write; - bool result = write ? m.try_lock() : m.try_lock_shared(); - if (result) { - m_mutex = &m; - } - return result; - } - - //! Release lock. - void release() { - spin_rw_mutex* m = m_mutex; - m_mutex = nullptr; - - if (m_is_writer) { - m->unlock(); - } else { - m->unlock_shared(); - } - } - - //! Upgrade reader to become a writer. - /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ - bool upgrade_to_writer() { - if (m_is_writer) return true; // Already a writer - m_is_writer = true; - return m_mutex->upgrade(); - } - - //! Downgrade writer to become a reader. - bool downgrade_to_reader() { - if (!m_is_writer) return true; // Already a reader - m_mutex->downgrade(); - m_is_writer = false; - return true; - } - - protected: - //! The pointer to the current mutex that is held, or nullptr if no mutex is held. - spin_rw_mutex* m_mutex; - - //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. - /** Not defined if not holding a lock. */ - bool m_is_writer; - }; + using scoped_lock = rw_scoped_lock<spin_rw_mutex>; //! Mutex traits static constexpr bool is_rw_mutex = true; @@ -258,6 +179,7 @@ protected: static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); static constexpr state_type ONE_READER = 4; static constexpr state_type BUSY = WRITER | READERS; + friend scoped_lock; //! State of lock /** Bit 0 = writer is holding lock Bit 1 = request by a writer to acquire lock (hint to readers to wait) @@ -269,14 +191,14 @@ protected: inline void set_name(spin_rw_mutex& obj, const char* name) { itt_set_sync_name(&obj, name); } -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { itt_set_sync_name(&obj, name); } #endif // WIN #else inline void set_name(spin_rw_mutex&, const char*) {} -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) inline void set_name(spin_rw_mutex&, const wchar_t*) {} #endif // WIN #endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h index f1d0f9dea3..69c8b94765 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,11 +17,16 @@ #ifndef __TBB_task_arena_H #define __TBB_task_arena_H -#include "detail/_namespace_injection.h" -#include "detail/_task.h" -#include "detail/_exception.h" +#include "detail/_config.h" + #include "detail/_aligned_space.h" +#include "detail/_attach.h" +#include "detail/_exception.h" +#include "detail/_namespace_injection.h" #include "detail/_small_object_pool.h" +#include "detail/_task.h" + +#include "detail/_task_handle.h" #if __TBB_ARENA_BINDING #include "info.h" @@ -78,19 +83,31 @@ namespace r1 { class arena; struct task_arena_impl; -void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); -void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); -void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); -bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); -void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); -void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); -int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); -void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); - -void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); -void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); +TBB_EXPORT void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); +TBB_EXPORT void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); +TBB_EXPORT void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); +TBB_EXPORT bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); +TBB_EXPORT void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +TBB_EXPORT void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); +TBB_EXPORT int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); +TBB_EXPORT void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); + +TBB_EXPORT void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); +TBB_EXPORT void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_group_context&, d1::task_arena_base*); +TBB_EXPORT void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); } // namespace r1 +namespace d2 { +inline void enqueue_impl(task_handle&& th, d1::task_arena_base* ta) { + __TBB_ASSERT(th != nullptr, "Attempt to schedule empty task_handle"); + + auto& ctx = task_handle_accessor::ctx_of(th); + + // Do not access th after release + r1::enqueue(*task_handle_accessor::release(th), ctx, ta); +} +} //namespace d2 + namespace d1 { static constexpr int priority_stride = INT_MAX / 4; @@ -113,9 +130,9 @@ protected: std::atomic<do_once_state> my_initialization_state; - //! NULL if not currently initialized. + //! nullptr if not currently initialized. std::atomic<r1::arena*> my_arena; - static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), + static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); //! Concurrency level for deferred initialization @@ -192,6 +209,33 @@ R isolate_impl(F& f) { return func.consume_result(); } +template <typename F> +class enqueue_task : public task { + small_object_allocator m_allocator; + const F m_func; + + void finalize(const execution_data& ed) { + m_allocator.delete_object(this, ed); + } + task* execute(execution_data& ed) override { + m_func(); + finalize(ed); + return nullptr; + } + task* cancel(execution_data&) override { + __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); + return nullptr; + } +public: + enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} + enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} +}; + +template<typename F> +void enqueue_impl(F&& f, task_arena_base* ta) { + small_object_allocator alloc{}; + r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), ta); +} /** 1-to-1 proxy representation class of scheduler's arena * Constructors set up settings only, real construction is deferred till the first method invocation * Destructor only removes one of the references to the inner arena representation. @@ -199,40 +243,11 @@ R isolate_impl(F& f) { */ class task_arena : public task_arena_base { - template <typename F> - class enqueue_task : public task { - small_object_allocator m_allocator; - const F m_func; - - void finalize(const execution_data& ed) { - m_allocator.delete_object(this, ed); - } - task* execute(execution_data& ed) override { - m_func(); - finalize(ed); - return nullptr; - } - task* cancel(execution_data&) override { - __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); - return nullptr; - } - public: - enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} - enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} - }; - void mark_initialized() { __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); my_initialization_state.store(do_once_state::initialized, std::memory_order_release); } - template<typename F> - void enqueue_impl(F&& f) { - initialize(); - small_object_allocator alloc{}; - r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this); - } - template<typename R, typename F> R execute_impl(F& f) { initialize(); @@ -290,6 +305,11 @@ public: } } + //! Creates an instance of task_arena attached to the current arena of the thread + explicit task_arena(d1::attach) + : task_arena(attach{}) + {} + //! Forces allocation of the resources for the task_arena as specified in constructor arguments void initialize() { atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); @@ -341,6 +361,11 @@ public: } } + //! Attaches this instance to the current arena of the thread + void initialize(d1::attach) { + initialize(attach{}); + } + //! Removes the reference to the internal arena representation. //! Not thread safe wrt concurrent invocations of other methods. void terminate() { @@ -367,7 +392,15 @@ public: template<typename F> void enqueue(F&& f) { - enqueue_impl(std::forward<F>(f)); + initialize(); + enqueue_impl(std::forward<F>(f), this); + } + + //! Enqueues a task into the arena to process a functor wrapped in task_handle, and immediately returns. + //! Does not require the calling thread to join the arena + void enqueue(d2::task_handle&& th) { + initialize(); + d2::enqueue_impl(std::move(th), this); } //! Joins the arena and executes a mutable functor, then returns @@ -423,15 +456,30 @@ inline auto isolate(F&& f) -> decltype(f()) { //! Returns the index, aka slot number, of the calling thread in its current arena inline int current_thread_index() { - int idx = r1::execution_slot(nullptr); - return idx == -1 ? task_arena_base::not_initialized : idx; + slot_id idx = r1::execution_slot(nullptr); + return idx == slot_id(-1) ? task_arena_base::not_initialized : int(idx); } +#if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS +inline bool is_inside_task() { + return nullptr != current_context(); +} +#endif //__TBB_PREVIEW_TASK_GROUP_EXTENSIONS + //! Returns the maximal number of threads that can work inside the arena inline int max_concurrency() { return r1::max_concurrency(nullptr); } +inline void enqueue(d2::task_handle&& th) { + d2::enqueue_impl(std::move(th), nullptr); +} + +template<typename F> +inline void enqueue(F&& f) { + enqueue_impl(std::forward<F>(f), nullptr); +} + using r1::submit; } // namespace d1 @@ -439,11 +487,18 @@ using r1::submit; inline namespace v1 { using detail::d1::task_arena; +using detail::d1::attach; + +#if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS +using detail::d1::is_inside_task; +#endif namespace this_task_arena { using detail::d1::current_thread_index; using detail::d1::max_concurrency; using detail::d1::isolate; + +using detail::d1::enqueue; } // namespace this_task_arena } // inline namespace v1 diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_group.h b/contrib/libs/tbb/include/oneapi/tbb/task_group.h index e82553076a..2bbacd5578 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_group.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_group.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,15 +19,18 @@ #include "detail/_config.h" #include "detail/_namespace_injection.h" -#include "detail/_template_helpers.h" +#include "detail/_assert.h" #include "detail/_utils.h" +#include "detail/_template_helpers.h" #include "detail/_exception.h" #include "detail/_task.h" #include "detail/_small_object_pool.h" +#include "detail/_intrusive_list_node.h" +#include "detail/_task_handle.h" #include "profiling.h" -#include <functional> +#include <type_traits> #if _MSC_VER && !defined(__INTEL_COMPILER) // Suppress warning: structure was padded due to alignment specifier @@ -42,6 +45,7 @@ namespace d1 { class delegate_base; class task_arena_base; class task_group_context; +class task_group_base; } namespace r1 { @@ -53,32 +57,91 @@ class task_dispatcher; template <bool> class context_guard_helper; struct task_arena_impl; +class context_list; -void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); -void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); +TBB_EXPORT void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +TBB_EXPORT void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); -void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); -void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); -void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); -bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); -bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); -void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); +TBB_EXPORT void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); +TBB_EXPORT void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); +TBB_EXPORT void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); +TBB_EXPORT bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); +TBB_EXPORT bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); +TBB_EXPORT void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); struct task_group_context_impl; } +namespace d2 { + +namespace { +template<typename F> +d1::task* task_ptr_or_nullptr(F&& f); +} + +template<typename F> +class function_task : public task_handle_task { + //TODO: apply empty base optimization here + const F m_func; + +private: + d1::task* execute(d1::execution_data& ed) override { + __TBB_ASSERT(ed.context == &this->ctx(), "The task group context should be used for all tasks"); + task* res = task_ptr_or_nullptr(m_func); + finalize(&ed); + return res; + } + d1::task* cancel(d1::execution_data& ed) override { + finalize(&ed); + return nullptr; + } +public: + template<typename FF> + function_task(FF&& f, d1::wait_context& wo, d1::task_group_context& ctx, d1::small_object_allocator& alloc) + : task_handle_task{wo, ctx, alloc}, + m_func(std::forward<FF>(f)) {} +}; + +#if __TBB_PREVIEW_TASK_GROUP_EXTENSIONS +namespace { + template<typename F> + d1::task* task_ptr_or_nullptr_impl(std::false_type, F&& f){ + task_handle th = std::forward<F>(f)(); + return task_handle_accessor::release(th); + } + + template<typename F> + d1::task* task_ptr_or_nullptr_impl(std::true_type, F&& f){ + std::forward<F>(f)(); + return nullptr; + } + + template<typename F> + d1::task* task_ptr_or_nullptr(F&& f){ + using is_void_t = std::is_void< + decltype(std::forward<F>(f)()) + >; + + return task_ptr_or_nullptr_impl(is_void_t{}, std::forward<F>(f)); + } +} +#else +namespace { + template<typename F> + d1::task* task_ptr_or_nullptr(F&& f){ + std::forward<F>(f)(); + return nullptr; + } +} // namespace +#endif // __TBB_PREVIEW_TASK_GROUP_EXTENSIONS +} // namespace d2 + namespace d1 { +// This structure is left here for backward compatibility check struct context_list_node { std::atomic<context_list_node*> prev{}; std::atomic<context_list_node*> next{}; - - void remove_relaxed() { - context_list_node* p = prev.load(std::memory_order_relaxed); - context_list_node* n = next.load(std::memory_order_relaxed); - p->next.store(n, std::memory_order_relaxed); - n->prev.store(p, std::memory_order_relaxed); - } }; //! Used to form groups of tasks @@ -118,47 +181,64 @@ private: //! Specifies whether cancellation was requested for this task group. std::atomic<std::uint32_t> my_cancellation_requested; - //! Version for run-time checks and behavioral traits of the context. - std::uint8_t my_version; + //! Versioning for run-time checks and behavioral traits of the context. + enum class task_group_context_version : std::uint8_t { + unused = 1 // ensure that new versions, if any, will not clash with previously used ones + }; + task_group_context_version my_version; //! The context traits. struct context_traits { bool fp_settings : 1; bool concurrent_wait : 1; bool bound : 1; + bool reserved1 : 1; + bool reserved2 : 1; + bool reserved3 : 1; + bool reserved4 : 1; + bool reserved5 : 1; } my_traits; static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); static constexpr std::uint8_t may_have_children = 1; //! The context internal state (currently only may_have_children). - std::atomic<std::uint8_t> my_state; + std::atomic<std::uint8_t> my_may_have_children; - enum class lifetime_state : std::uint8_t { + enum class state : std::uint8_t { created, locked, isolated, bound, - detached, - dying + dead, + proxy = std::uint8_t(-1) //the context is not the real one, but proxy to other one }; //! The synchronization machine state to manage lifetime. - std::atomic<lifetime_state> my_lifetime_state; + std::atomic<state> my_state; + + union { + //! Pointer to the context of the parent cancellation group. nullptr for isolated contexts. + task_group_context* my_parent; - //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. - task_group_context* my_parent; + //! Pointer to the actual context 'this' context represents a proxy of. + task_group_context* my_actual_context; + }; //! Thread data instance that registered this context in its list. - std::atomic<r1::thread_data*> my_owner; + r1::context_list* my_context_list; + static_assert(sizeof(std::atomic<r1::thread_data*>) == sizeof(r1::context_list*), "To preserve backward compatibility these types should have the same size"); //! Used to form the thread specific list of contexts without additional memory allocation. /** A context is included into the list of the current thread when its binding to its parent happens. Any context can be present in the list of one thread only. **/ - context_list_node my_node; + intrusive_list_node my_node; + static_assert(sizeof(intrusive_list_node) == sizeof(context_list_node), "To preserve backward compatibility these types should have the same size"); //! Pointer to the container storing exception being propagated across this task group. - r1::tbb_exception_ptr* my_exception; + std::atomic<r1::tbb_exception_ptr*> my_exception; + static_assert(sizeof(std::atomic<r1::tbb_exception_ptr*>) == sizeof(r1::tbb_exception_ptr*), + "backward compatibility check"); //! Used to set and maintain stack stitching point for Intel Performance Tools. void* my_itt_caller; @@ -167,34 +247,68 @@ private: string_resource_index my_name; char padding[max_nfs_size - - sizeof(std::uint64_t) // my_cpu_ctl_env - - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested - - sizeof(std::uint8_t) // my_version - - sizeof(context_traits) // my_traits - - sizeof(std::atomic<std::uint8_t>) // my_state - - sizeof(std::atomic<lifetime_state>) // my_lifetime_state - - sizeof(task_group_context*) // my_parent - - sizeof(std::atomic<r1::thread_data*>) // my_owner - - sizeof(context_list_node) // my_node - - sizeof(r1::tbb_exception_ptr*) // my_exception - - sizeof(void*) // my_itt_caller - - sizeof(string_resource_index) // my_name + - sizeof(std::uint64_t) // my_cpu_ctl_env + - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested + - sizeof(std::uint8_t) // my_version + - sizeof(context_traits) // my_traits + - sizeof(std::atomic<std::uint8_t>) // my_state + - sizeof(std::atomic<state>) // my_state + - sizeof(task_group_context*) // my_parent + - sizeof(r1::context_list*) // my_context_list + - sizeof(intrusive_list_node) // my_node + - sizeof(std::atomic<r1::tbb_exception_ptr*>) // my_exception + - sizeof(void*) // my_itt_caller + - sizeof(string_resource_index) // my_name ]; task_group_context(context_traits t, string_resource_index name) - : my_version{}, my_name{ name } { + : my_version{task_group_context_version::unused}, my_name{name} + { my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) r1::initialize(*this); } + task_group_context(task_group_context* actual_context) + : my_version{task_group_context_version::unused} + , my_state{state::proxy} + , my_actual_context{actual_context} + { + __TBB_ASSERT(my_actual_context, "Passed pointer value points to nothing."); + my_name = actual_context->my_name; + + // no need to initialize 'this' context as it acts as a proxy for my_actual_context, which + // initialization is a user-side responsibility. + } + static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { context_traits ct; - ct.bound = relation_with_parent == bound; ct.fp_settings = (user_traits & fp_settings) == fp_settings; ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; + ct.bound = relation_with_parent == bound; + ct.reserved1 = ct.reserved2 = ct.reserved3 = ct.reserved4 = ct.reserved5 = false; return ct; } + bool is_proxy() const { + return my_state.load(std::memory_order_relaxed) == state::proxy; + } + + task_group_context& actual_context() noexcept { + if (is_proxy()) { + __TBB_ASSERT(my_actual_context, "Actual task_group_context is not set."); + return *my_actual_context; + } + return *this; + } + + const task_group_context& actual_context() const noexcept { + if (is_proxy()) { + __TBB_ASSERT(my_actual_context, "Actual task_group_context is not set."); + return *my_actual_context; + } + return *this; + } + public: //! Default & binding constructor. /** By default a bound context is created. That is this context will be bound @@ -227,12 +341,17 @@ public: : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} // Custom constructor for instrumentation of oneTBB algorithm - task_group_context (string_resource_index name ) + task_group_context(string_resource_index name ) : task_group_context(make_traits(bound, default_traits), name) {} // Do not introduce any logic on user side since it might break state propagation assumptions ~task_group_context() { - r1::destroy(*this); + // When 'this' serves as a proxy, the initialization does not happen - nor should the + // destruction. + if (!is_proxy()) + { + r1::destroy(*this); + } } //! Forcefully reinitializes the context after the task tree it was associated with is completed. @@ -244,7 +363,7 @@ public: The method does not change the context's parent if it is set. **/ void reset() { - r1::reset(*this); + r1::reset(actual_context()); } //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. @@ -256,12 +375,12 @@ public: that when this method is concurrently called on the same not yet cancelled context, true will be returned by one and only one invocation. **/ bool cancel_group_execution() { - return r1::cancel_group_execution(*this); + return r1::cancel_group_execution(actual_context()); } //! Returns true if the context received cancellation request. bool is_group_execution_cancelled() { - return r1::is_group_execution_cancelled(*this); + return r1::is_group_execution_cancelled(actual_context()); } #if __TBB_FP_CONTEXT @@ -274,15 +393,16 @@ public: The method does not change the FPU control settings of the context's parent. **/ void capture_fp_settings() { - r1::capture_fp_settings(*this); + r1::capture_fp_settings(actual_context()); } #endif //! Returns the user visible context trait std::uintptr_t traits() const { std::uintptr_t t{}; - t |= my_traits.fp_settings ? fp_settings : 0; - t |= my_traits.concurrent_wait ? concurrent_wait : 0; + const task_group_context& ctx = actual_context(); + t |= ctx.my_traits.fp_settings ? fp_settings : 0; + t |= ctx.my_traits.concurrent_wait ? concurrent_wait : 0; return t; } private: @@ -294,6 +414,7 @@ private: friend class r1::context_guard_helper; friend struct r1::task_arena_impl; friend struct r1::task_group_context_impl; + friend class task_group_base; }; // class task_group_context static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); @@ -328,9 +449,9 @@ class function_task : public task { allocator.deallocate(this, ed); } task* execute(execution_data& ed) override { - m_func(); + task* res = d2::task_ptr_or_nullptr(m_func); finalize(ed); - return nullptr; + return res; } task* cancel(execution_data& ed) override { finalize(ed); @@ -357,9 +478,9 @@ class function_stack_task : public task { m_wait_ctx.release(); } task* execute(execution_data&) override { - m_func(); + task* res = d2::task_ptr_or_nullptr(m_func); finalize(); - return nullptr; + return res; } task* cancel(execution_data&) override { finalize(); @@ -380,11 +501,28 @@ protected: m_wait_ctx.reserve(); bool cancellation_status = false; try_call([&] { - execute_and_wait(t, m_context, m_wait_ctx, m_context); + execute_and_wait(t, context(), m_wait_ctx, context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. - cancellation_status = m_context.is_group_execution_cancelled(); - m_context.reset(); + cancellation_status = context().is_group_execution_cancelled(); + context().reset(); + }); + return cancellation_status ? canceled : complete; + } + + task_group_status internal_run_and_wait(d2::task_handle&& h) { + __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); + + using acs = d2::task_handle_accessor; + __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); + + bool cancellation_status = false; + try_call([&] { + execute_and_wait(*acs::release(h), context(), m_wait_ctx, context()); + }).on_completion([&] { + // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = context().is_group_execution_cancelled(); + context().reset(); }); return cancellation_status ? canceled : complete; } @@ -396,12 +534,30 @@ protected: return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); } + task_group_context& context() noexcept { + return m_context.actual_context(); + } + + template<typename F> + d2::task_handle prepare_task_handle(F&& f) { + m_wait_ctx.reserve(); + small_object_allocator alloc{}; + using function_task_t = d2::function_task<typename std::decay<F>::type>; + d2::task_handle_task* function_task_p = alloc.new_object<function_task_t>(std::forward<F>(f), m_wait_ctx, context(), alloc); + + return d2::task_handle_accessor::construct(function_task_p); + } + public: task_group_base(uintptr_t traits = 0) : m_wait_ctx(0) , m_context(task_group_context::bound, task_group_context::default_traits | traits) - { - } + {} + + task_group_base(task_group_context& ctx) + : m_wait_ctx(0) + , m_context(&ctx) + {} ~task_group_base() noexcept(false) { if (m_wait_ctx.continue_execution()) { @@ -412,9 +568,9 @@ public: #endif // Always attempt to do proper cleanup to avoid inevitable memory corruption // in case of missing wait (for the sake of better testability & debuggability) - if (!m_context.is_group_execution_cancelled()) + if (!context().is_group_execution_cancelled()) cancel(); - d1::wait(m_wait_ctx, m_context); + d1::wait(m_wait_ctx, context()); if (!stack_unwinding_in_progress) throw_exception(exception_id::missing_wait); } @@ -423,33 +579,53 @@ public: task_group_status wait() { bool cancellation_status = false; try_call([&] { - d1::wait(m_wait_ctx, m_context); + d1::wait(m_wait_ctx, context()); }).on_completion([&] { // TODO: the reset method is not thread-safe. Ensure the correct behavior. cancellation_status = m_context.is_group_execution_cancelled(); - m_context.reset(); + context().reset(); }); return cancellation_status ? canceled : complete; } void cancel() { - m_context.cancel_group_execution(); + context().cancel_group_execution(); } }; // class task_group_base class task_group : public task_group_base { public: task_group() : task_group_base(task_group_context::concurrent_wait) {} + task_group(task_group_context& ctx) : task_group_base(ctx) {} template<typename F> void run(F&& f) { - spawn(*prepare_task(std::forward<F>(f)), m_context); + spawn(*prepare_task(std::forward<F>(f)), context()); + } + + void run(d2::task_handle&& h) { + __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); + + using acs = d2::task_handle_accessor; + __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); + + spawn(*acs::release(h), context()); + } + + template<typename F> + d2::task_handle defer(F&& f) { + return prepare_task_handle(std::forward<F>(f)); + } template<typename F> task_group_status run_and_wait(const F& f) { return internal_run_and_wait(f); } + + task_group_status run_and_wait(d2::task_handle&& h) { + return internal_run_and_wait(std::move(h)); + } }; // class task_group #if TBB_PREVIEW_ISOLATED_TASK_GROUP @@ -496,11 +672,23 @@ class isolated_task_group : public task_group { return reinterpret_cast<intptr_t>(this); } public: - isolated_task_group () : task_group() {} + isolated_task_group() : task_group() {} + + isolated_task_group(task_group_context& ctx) : task_group(ctx) {} template<typename F> void run(F&& f) { - spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context); + spawn_delegate sd(prepare_task(std::forward<F>(f)), context()); + r1::isolate_within_arena(sd, this_isolation()); + } + + void run(d2::task_handle&& h) { + __TBB_ASSERT(h != nullptr, "Attempt to schedule empty task_handle"); + + using acs = d2::task_handle_accessor; + __TBB_ASSERT(&acs::ctx_of(h) == &context(), "Attempt to schedule task_handle into different task_group"); + + spawn_delegate sd(acs::release(h), context()); r1::isolate_within_arena(sd, this_isolation()); } @@ -545,6 +733,8 @@ using detail::d1::canceled; using detail::d1::is_current_task_group_canceling; using detail::r1::missing_wait; + +using detail::d2::task_handle; } } // namespace tbb diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h index 276ca70707..a228cfe1b9 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h +++ b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h @@ -36,7 +36,7 @@ class observer_list; /** For local observers the method can be used only when the current thread has the task scheduler initialized or is attached to an arena. Repeated calls with the same state are no-ops. **/ -void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); +TBB_EXPORT void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); } namespace d1 { @@ -88,7 +88,7 @@ public: It is recommended to disable observation before destructor of a derived class starts, otherwise it can lead to concurrent notification callback on partly destroyed object **/ virtual ~task_scheduler_observer() { - if (my_proxy.load(std::memory_order_relaxed)) { + if (my_proxy.load(std::memory_order_acquire)) { observe(false); } } diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h index 3da61a009d..82b968e978 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h +++ b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h @@ -30,9 +30,9 @@ namespace tbb { namespace detail { namespace r1 { -void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); -void __TBB_EXPORTED_FUNC deallocate_memory(void* p); -bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); +TBB_EXPORT void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); +TBB_EXPORT void __TBB_EXPORTED_FUNC deallocate_memory(void* p); +TBB_EXPORT bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); } namespace d1 { diff --git a/contrib/libs/tbb/include/oneapi/tbb/version.h b/contrib/libs/tbb/include/oneapi/tbb/version.h index 1e3507cd9b..0c52c07878 100644 --- a/contrib/libs/tbb/include/oneapi/tbb/version.h +++ b/contrib/libs/tbb/include/oneapi/tbb/version.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ // Product version #define TBB_VERSION_MAJOR 2021 // Update version -#define TBB_VERSION_MINOR 2 +#define TBB_VERSION_MINOR 7 // "Patch" version for custom releases #define TBB_VERSION_PATCH 0 // Suffix string @@ -34,7 +34,7 @@ // OneAPI oneTBB specification version #define ONETBB_SPEC_VERSION "1.0" // Full interface version -#define TBB_INTERFACE_VERSION 12020 +#define TBB_INTERFACE_VERSION 12070 // Major interface version #define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) // Minor interface version @@ -96,13 +96,13 @@ * The returned pointer is an address of a string in the shared library. * It can be different than the TBB_VERSION_STRING obtained at compile time. */ -extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); +extern "C" TBB_EXPORT const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); //! The function returns the interface version of the oneTBB shared library being used. /** * The returned version is determined at runtime, not at compile/link time. * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. */ -extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); +extern "C" TBB_EXPORT int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); #endif // __TBB_version_H diff --git a/contrib/libs/tbb/src/tbb/address_waiter.cpp b/contrib/libs/tbb/src/tbb/address_waiter.cpp new file mode 100644 index 0000000000..1fd3dea8e1 --- /dev/null +++ b/contrib/libs/tbb/src/tbb/address_waiter.cpp @@ -0,0 +1,106 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "oneapi/tbb/detail/_utils.h" +#include "governor.h" +#include "concurrent_monitor.h" +#include "oneapi/tbb/detail/_waitable_atomic.h" + +#include <type_traits> + +namespace tbb { +namespace detail { +namespace r1 { + +struct address_context { + address_context() = default; + + address_context(void* address, std::uintptr_t context) : + my_address(address), my_context(context) + {} + + void* my_address{nullptr}; + std::uintptr_t my_context{0}; +}; + +class address_waiter : public concurrent_monitor_base<address_context> { + using base_type = concurrent_monitor_base<address_context>; +public: + using base_type::base_type; + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<address_context>; +}; + +// 1024 is a rough estimate based on two assumptions: +// 1) there are no more than 1000 threads in the application; +// 2) the mutexes are optimized for short critical sections less than a couple of microseconds, +// which is less than 1/1000 of a time slice. +// In the worst case, we have single mutex that is locked and its thread is preempted. +// Therefore, the probability of a collision while taking unrelated mutex is about 1/size of a table. +static constexpr std::size_t num_address_waiters = 2 << 10; +static_assert(std::is_standard_layout<address_waiter>::value, + "address_waiter must be with standard layout"); +static address_waiter address_waiter_table[num_address_waiters]; + +void clear_address_waiter_table() { + for (std::size_t i = 0; i < num_address_waiters; ++i) { + address_waiter_table[i].destroy(); + } +} + +static address_waiter& get_address_waiter(void* address) { + std::uintptr_t tag = std::uintptr_t(address); + return address_waiter_table[((tag >> 5) ^ tag) % num_address_waiters]; +} + +void wait_on_address(void* address, d1::delegate_base& predicate, std::uintptr_t context) { + address_waiter& waiter = get_address_waiter(address); + waiter.wait<address_waiter::thread_context>(predicate, address_context{address, context}); +} + +void notify_by_address(void* address, std::uintptr_t target_context) { + address_waiter& waiter = get_address_waiter(address); + + auto predicate = [address, target_context] (address_context ctx) { + return ctx.my_address == address && ctx.my_context == target_context; + }; + + waiter.notify_relaxed(predicate); +} + +void notify_by_address_one(void* address) { + address_waiter& waiter = get_address_waiter(address); + + auto predicate = [address] (address_context ctx) { + return ctx.my_address == address; + }; + + waiter.notify_one_relaxed(predicate); +} + +void notify_by_address_all(void* address) { + address_waiter& waiter = get_address_waiter(address); + + auto predicate = [address] (address_context ctx) { + return ctx.my_address == address; + }; + + waiter.notify_relaxed(predicate); +} + +} // namespace r1 +} // namespace detail +} // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/allocator.cpp b/contrib/libs/tbb/src/tbb/allocator.cpp index 6bf5a0be01..5453aeab12 100644 --- a/contrib/libs/tbb/src/tbb/allocator.cpp +++ b/contrib/libs/tbb/src/tbb/allocator.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,17 +19,37 @@ #include "oneapi/tbb/detail/_exception.h" #include "oneapi/tbb/detail/_assert.h" #include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/tbb_allocator.h" // Is this OK? +#include "oneapi/tbb/cache_aligned_allocator.h" #include "dynamic_link.h" #include "misc.h" #include <cstdlib> -#if _WIN32 || _WIN64 -#include <Windows.h> +#ifdef _WIN32 +#include <windows.h> #else #include <dlfcn.h> -#endif /* _WIN32||_WIN64 */ +#endif + +#if (!defined(_WIN32) && !defined(_WIN64)) || defined(__CYGWIN__) +#include <stdlib.h> // posix_memalign, free +// With glibc, uClibc and musl on Linux and bionic on Android it is safe to use memalign(), as the allocated memory +// can be freed with free(). It is also better to use memalign() since posix_memalign() is just a wrapper on top of +// memalign() and it offers nothing but overhead due to inconvenient interface. This is likely the case with other +// standard libraries as well, and more libraries can be added to the preprocessor check below. Unfortunately, we +// can't detect musl, so we simply enable memalign() on Linux and Android in general. +#if defined(linux) || defined(__linux) || defined(__linux__) || defined(__ANDROID__) +#include <malloc.h> // memalign +#define __TBB_USE_MEMALIGN +#else +#define __TBB_USE_POSIX_MEMALIGN +#endif +#elif defined(_MSC_VER) || defined(__MINGW32__) +#include <malloc.h> // _aligned_malloc, _aligned_free +#define __TBB_USE_MSVC_ALIGNED_MALLOC +#endif #if __TBB_WEAK_SYMBOLS_PRESENT @@ -55,7 +75,9 @@ namespace r1 { static void* initialize_allocate_handler(std::size_t size); //! Handler for memory allocation -static void* (*allocate_handler)(std::size_t size) = &initialize_allocate_handler; +using allocate_handler_type = void* (*)(std::size_t size); +static std::atomic<allocate_handler_type> allocate_handler{ &initialize_allocate_handler }; +allocate_handler_type allocate_handler_unsafe = nullptr; //! Handler for memory deallocation static void (*deallocate_handler)(void* pointer) = nullptr; @@ -63,23 +85,25 @@ static void (*deallocate_handler)(void* pointer) = nullptr; //! Initialization routine used for first indirect call via cache_aligned_allocate_handler. static void* initialize_cache_aligned_allocate_handler(std::size_t n, std::size_t alignment); -//! Allocates memory using standard malloc. It is used when scalable_allocator is not available +//! Allocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available. static void* std_cache_aligned_allocate(std::size_t n, std::size_t alignment); -//! Allocates memory using standard free. It is used when scalable_allocator is not available +//! Deallocates overaligned memory using standard memory allocator. It is used when scalable_allocator is not available. static void std_cache_aligned_deallocate(void* p); //! Handler for padded memory allocation -static void* (*cache_aligned_allocate_handler)(std::size_t n, std::size_t alignment) = &initialize_cache_aligned_allocate_handler; +using cache_aligned_allocate_handler_type = void* (*)(std::size_t n, std::size_t alignment); +static std::atomic<cache_aligned_allocate_handler_type> cache_aligned_allocate_handler{ &initialize_cache_aligned_allocate_handler }; +cache_aligned_allocate_handler_type cache_aligned_allocate_handler_unsafe = nullptr; //! Handler for padded memory deallocation static void (*cache_aligned_deallocate_handler)(void* p) = nullptr; //! Table describing how to link the handlers. static const dynamic_link_descriptor MallocLinkTable[] = { - DLD(scalable_malloc, allocate_handler), + DLD(scalable_malloc, allocate_handler_unsafe), DLD(scalable_free, deallocate_handler), - DLD(scalable_aligned_malloc, cache_aligned_allocate_handler), + DLD(scalable_aligned_malloc, cache_aligned_allocate_handler_unsafe), DLD(scalable_aligned_free, cache_aligned_deallocate_handler), }; @@ -97,7 +121,7 @@ static const dynamic_link_descriptor MallocLinkTable[] = { #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".dylib" #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX || __ANDROID__ #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so" -#elif __linux__ // Note that order of these #elif's is important! +#elif __unix__ // Note that order of these #elif's is important! #define MALLOCLIB_NAME "libtbbmalloc" DEBUG_SUFFIX ".so.2" #else #error Unknown OS @@ -108,19 +132,22 @@ static const dynamic_link_descriptor MallocLinkTable[] = { The routine attempts to dynamically link with the TBB memory allocator. If that allocator is not found, it links to malloc and free. */ void initialize_handler_pointers() { - __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, NULL); + __TBB_ASSERT(allocate_handler == &initialize_allocate_handler, nullptr); bool success = dynamic_link(MALLOCLIB_NAME, MallocLinkTable, 4); if(!success) { // If unsuccessful, set the handlers to the default routines. // This must be done now, and not before FillDynamicLinks runs, because if other // threads call the handlers, we want them to go through the DoOneTimeInitializations logic, // which forces them to wait. - allocate_handler = &std::malloc; + allocate_handler_unsafe = &std::malloc; deallocate_handler = &std::free; - cache_aligned_allocate_handler = &std_cache_aligned_allocate; + cache_aligned_allocate_handler_unsafe = &std_cache_aligned_allocate; cache_aligned_deallocate_handler = &std_cache_aligned_deallocate; } + allocate_handler.store(allocate_handler_unsafe, std::memory_order_release); + cache_aligned_allocate_handler.store(cache_aligned_allocate_handler_unsafe, std::memory_order_release); + PrintExtraVersionInfo( "ALLOCATOR", success?"scalable_malloc":"malloc" ); } @@ -132,14 +159,14 @@ void initialize_cache_aligned_allocator() { //! Executed on very first call through allocate_handler static void* initialize_allocate_handler(std::size_t size) { initialize_cache_aligned_allocator(); - __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, NULL); + __TBB_ASSERT(allocate_handler != &initialize_allocate_handler, nullptr); return (*allocate_handler)(size); } //! Executed on very first call through cache_aligned_allocate_handler static void* initialize_cache_aligned_allocate_handler(std::size_t bytes, std::size_t alignment) { initialize_cache_aligned_allocator(); - __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, NULL); + __TBB_ASSERT(cache_aligned_allocate_handler != &initialize_cache_aligned_allocate_handler, nullptr); return (*cache_aligned_allocate_handler)(bytes, alignment); } @@ -159,10 +186,10 @@ void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size) { if (size + cache_line_size < size) { throw_exception(exception_id::bad_alloc); } - // scalable_aligned_malloc considers zero size request an error, and returns NULL + // scalable_aligned_malloc considers zero size request an error, and returns nullptr if (size == 0) size = 1; - void* result = cache_aligned_allocate_handler(size, cache_line_size); + void* result = cache_aligned_allocate_handler.load(std::memory_order_acquire)(size, cache_line_size); if (!result) { throw_exception(exception_id::bad_alloc); } @@ -176,6 +203,17 @@ void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p) { } static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment) { +#if defined(__TBB_USE_MEMALIGN) + return memalign(alignment, bytes); +#elif defined(__TBB_USE_POSIX_MEMALIGN) + void* p = nullptr; + int res = posix_memalign(&p, alignment, bytes); + if (res != 0) + p = nullptr; + return p; +#elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC) + return _aligned_malloc(bytes, alignment); +#else // TODO: make it common with cache_aligned_resource std::size_t space = alignment + bytes; std::uintptr_t base = reinterpret_cast<std::uintptr_t>(std::malloc(space)); @@ -190,9 +228,15 @@ static void* std_cache_aligned_allocate(std::size_t bytes, std::size_t alignment // Record where block actually starts. (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; return reinterpret_cast<void*>(result); +#endif } static void std_cache_aligned_deallocate(void* p) { +#if defined(__TBB_USE_MEMALIGN) || defined(__TBB_USE_POSIX_MEMALIGN) + free(p); +#elif defined(__TBB_USE_MSVC_ALIGNED_MALLOC) + _aligned_free(p); +#else if (p) { __TBB_ASSERT(reinterpret_cast<std::uintptr_t>(p) >= 0x4096, "attempt to free block not obtained from cache_aligned_allocator"); // Recover where block actually starts @@ -200,10 +244,11 @@ static void std_cache_aligned_deallocate(void* p) { __TBB_ASSERT(((base + nfs_size) & ~(nfs_size - 1)) == reinterpret_cast<std::uintptr_t>(p), "Incorrect alignment or not allocated by std_cache_aligned_deallocate?"); std::free(reinterpret_cast<void*>(base)); } +#endif } void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size) { - void* result = (*allocate_handler)(size); + void* result = allocate_handler.load(std::memory_order_acquire)(size); if (!result) { throw_exception(exception_id::bad_alloc); } @@ -218,15 +263,16 @@ void __TBB_EXPORTED_FUNC deallocate_memory(void* p) { } bool __TBB_EXPORTED_FUNC is_tbbmalloc_used() { - if (allocate_handler == &initialize_allocate_handler) { - void* void_ptr = allocate_handler(1); - deallocate_handler(void_ptr); + auto handler_snapshot = allocate_handler.load(std::memory_order_acquire); + if (handler_snapshot == &initialize_allocate_handler) { + initialize_cache_aligned_allocator(); } - __TBB_ASSERT(allocate_handler != &initialize_allocate_handler && deallocate_handler != nullptr, NULL); + handler_snapshot = allocate_handler.load(std::memory_order_relaxed); + __TBB_ASSERT(handler_snapshot != &initialize_allocate_handler && deallocate_handler != nullptr, nullptr); // Cast to void avoids type mismatch errors on some compilers (e.g. __IBMCPP__) - __TBB_ASSERT((reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), + __TBB_ASSERT((reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc)) == (reinterpret_cast<void*>(deallocate_handler) == reinterpret_cast<void*>(&std::free)), "Both shim pointers must refer to routines from the same package (either TBB or CRT)"); - return reinterpret_cast<void*>(allocate_handler) == reinterpret_cast<void*>(&std::malloc); + return reinterpret_cast<void*>(handler_snapshot) == reinterpret_cast<void*>(&std::malloc); } } // namespace r1 diff --git a/contrib/libs/tbb/src/tbb/arena.cpp b/contrib/libs/tbb/src/tbb/arena.cpp index 1ddab36ff5..2161ed5dc2 100644 --- a/contrib/libs/tbb/src/tbb/arena.cpp +++ b/contrib/libs/tbb/src/tbb/arena.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -49,7 +49,7 @@ public: restore_affinity_mask(my_binding_handler, this_task_arena::current_thread_index()); } - ~numa_binding_observer(){ + ~numa_binding_observer() override{ destroy_binding_handler(my_binding_handler); } }; @@ -65,7 +65,7 @@ numa_binding_observer* construct_binding_observer( d1::task_arena* ta, int num_s } void destroy_binding_observer( numa_binding_observer* binding_observer ) { - __TBB_ASSERT(binding_observer, "Trying to deallocate NULL pointer"); + __TBB_ASSERT(binding_observer, "Trying to deallocate nullptr pointer"); binding_observer->observe(false); binding_observer->~numa_binding_observer(); deallocate_memory(binding_observer); @@ -77,7 +77,7 @@ std::size_t arena::occupy_free_slot_in_range( thread_data& tls, std::size_t lowe // Start search for an empty slot from the one we occupied the last time std::size_t index = tls.my_arena_index; if ( index < lower || index >= upper ) index = tls.my_random.get() % (upper - lower) + lower; - __TBB_ASSERT( index >= lower && index < upper, NULL ); + __TBB_ASSERT( index >= lower && index < upper, nullptr); // Find a free slot for ( std::size_t i = index; i < upper; ++i ) if (my_slots[i].try_occupy()) return i; @@ -119,11 +119,15 @@ void arena::process(thread_data& tls) { } __TBB_ASSERT( index >= my_num_reserved_slots, "Workers cannot occupy reserved slots" ); tls.attach_arena(*this, index); + // worker thread enters the dispatch loop to look for a work + tls.my_inbox.set_is_idle(true); + if (tls.my_arena_slot->is_task_pool_published()) { + tls.my_inbox.set_is_idle(false); + } task_dispatcher& task_disp = tls.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(calculate_stealing_threshold()); + tls.enter_task_dispatcher(task_disp, calculate_stealing_threshold()); __TBB_ASSERT(task_disp.can_steal(), nullptr); - tls.attach_task_dispatcher(task_disp); __TBB_ASSERT( !tls.my_last_observer, "There cannot be notified local observers when entering arena" ); my_observers.notify_entry_observers(tls.my_last_observer, tls.my_is_worker); @@ -131,6 +135,10 @@ void arena::process(thread_data& tls) { // Waiting on special object tied to this arena outermost_worker_waiter waiter(*this); d1::task* t = tls.my_task_dispatcher->local_wait_for_all(nullptr, waiter); + // For purposes of affinity support, the slot's mailbox is considered idle while no thread is + // attached to it. + tls.my_inbox.set_is_idle(true); + __TBB_ASSERT_EX(t == nullptr, "Outermost worker must not leave dispatch loop with a task"); __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr); __TBB_ASSERT(tls.my_task_dispatcher == &task_disp, nullptr); @@ -138,8 +146,7 @@ void arena::process(thread_data& tls) { my_observers.notify_exit_observers(tls.my_last_observer, tls.my_is_worker); tls.my_last_observer = nullptr; - task_disp.set_stealing_threshold(0); - tls.detach_task_dispatcher(); + tls.leave_task_dispatcher(); // Arena slot detach (arena may be used in market::process) // TODO: Consider moving several calls below into a new method(e.g.detach_arena). @@ -172,16 +179,16 @@ arena::arena ( market& m, unsigned num_slots, unsigned num_reserved_slots, unsig my_aba_epoch = m.my_arenas_aba_epoch.load(std::memory_order_relaxed); my_observers.my_arena = this; my_co_cache.init(4 * num_slots); - __TBB_ASSERT ( my_max_num_workers <= my_num_slots, NULL ); + __TBB_ASSERT ( my_max_num_workers <= my_num_slots, nullptr); // Initialize the default context. It should be allocated before task_dispatch construction. my_default_ctx = new (cache_aligned_allocate(sizeof(d1::task_group_context))) d1::task_group_context{ d1::task_group_context::isolated, d1::task_group_context::fp_settings }; // Construct slots. Mark internal synchronization elements for the tools. task_dispatcher* base_td_pointer = reinterpret_cast<task_dispatcher*>(my_slots + my_num_slots); for( unsigned i = 0; i < my_num_slots; ++i ) { - // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, NULL ); - __TBB_ASSERT( !my_slots[i].task_pool_ptr, NULL ); - __TBB_ASSERT( !my_slots[i].my_task_pool_size, NULL ); + // __TBB_ASSERT( !my_slots[i].my_scheduler && !my_slots[i].task_pool, nullptr); + __TBB_ASSERT( !my_slots[i].task_pool_ptr, nullptr); + __TBB_ASSERT( !my_slots[i].my_task_pool_size, nullptr); mailbox(i).construct(); my_slots[i].init_task_streams(i); my_slots[i].my_default_task_dispatcher = new(base_td_pointer + i) task_dispatcher(this); @@ -214,23 +221,28 @@ arena& arena::allocate_arena( market& m, unsigned num_slots, unsigned num_reserv } void arena::free_arena () { - __TBB_ASSERT( is_alive(my_guard), NULL ); + __TBB_ASSERT( is_alive(my_guard), nullptr); __TBB_ASSERT( !my_references.load(std::memory_order_relaxed), "There are threads in the dying arena" ); __TBB_ASSERT( !my_num_workers_requested && !my_num_workers_allotted, "Dying arena requests workers" ); __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, "Inconsistent state of a dying arena" ); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - __TBB_ASSERT( !my_global_concurrency_mode, NULL ); + __TBB_ASSERT( !my_global_concurrency_mode, nullptr); #endif +#if __TBB_ARENA_BINDING + if (my_numa_binding_observer != nullptr) { + destroy_binding_observer(my_numa_binding_observer); + my_numa_binding_observer = nullptr; + } +#endif /*__TBB_ARENA_BINDING*/ poison_value( my_guard ); - std::intptr_t drained = 0; for ( unsigned i = 0; i < my_num_slots; ++i ) { // __TBB_ASSERT( !my_slots[i].my_scheduler, "arena slot is not empty" ); // TODO: understand the assertion and modify - // __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, NULL ); - __TBB_ASSERT( my_slots[i].head == my_slots[i].tail, NULL ); // TODO: replace by is_quiescent_local_task_pool_empty + // __TBB_ASSERT( my_slots[i].task_pool == EmptyTaskPool, nullptr); + __TBB_ASSERT( my_slots[i].head == my_slots[i].tail, nullptr); // TODO: replace by is_quiescent_local_task_pool_empty my_slots[i].free_task_pool(); - drained += mailbox(i).drain(); + mailbox(i).drain(); my_slots[i].my_default_task_dispatcher->~task_dispatcher(); } __TBB_ASSERT(my_fifo_task_stream.empty(), "Not all enqueued tasks were executed"); @@ -244,12 +256,13 @@ void arena::free_arena () { #endif // remove an internal reference my_market->release( /*is_public=*/false, /*blocking_terminate=*/false ); - if ( !my_observers.empty() ) { - my_observers.clear(); - } + + // Clear enfources synchronization with observe(false) + my_observers.clear(); + void* storage = &mailbox(my_num_slots-1); - __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, NULL ); - __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, NULL ); + __TBB_ASSERT( my_references.load(std::memory_order_relaxed) == 0, nullptr); + __TBB_ASSERT( my_pool_state.load(std::memory_order_relaxed) == SNAPSHOT_EMPTY || !my_max_num_workers, nullptr); this->~arena(); #if TBB_USE_ASSERT > 1 std::memset( storage, 0, allocation_size(my_num_slots) ); @@ -387,7 +400,7 @@ struct task_arena_impl { static void execute(d1::task_arena_base&, d1::delegate_base&); static void wait(d1::task_arena_base&); static int max_concurrency(const d1::task_arena_base*); - static void enqueue(d1::task&, d1::task_arena_base*); + static void enqueue(d1::task&, d1::task_group_context*, d1::task_arena_base*); }; void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base& ta) { @@ -411,11 +424,16 @@ int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base* ta) { } void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_arena_base* ta) { - task_arena_impl::enqueue(t, ta); + task_arena_impl::enqueue(t, nullptr, ta); +} + +void __TBB_EXPORTED_FUNC enqueue(d1::task& t, d1::task_group_context& ctx, d1::task_arena_base* ta) { + task_arena_impl::enqueue(t, &ctx, ta); } void task_arena_impl::initialize(d1::task_arena_base& ta) { - governor::one_time_init(); + // Enforce global market initialization to properly initialize soft limit + (void)governor::get_thread_data(); if (ta.my_max_concurrency < 1) { #if __TBB_ARENA_BINDING @@ -449,12 +467,6 @@ void task_arena_impl::initialize(d1::task_arena_base& ta) { void task_arena_impl::terminate(d1::task_arena_base& ta) { arena* a = ta.my_arena.load(std::memory_order_relaxed); assert_pointer_valid(a); -#if __TBB_ARENA_BINDING - if(a->my_numa_binding_observer != nullptr ) { - destroy_binding_observer(a->my_numa_binding_observer); - a->my_numa_binding_observer = nullptr; - } -#endif /*__TBB_ARENA_BINDING*/ a->my_market->release( /*is_public=*/true, /*blocking_terminate=*/false ); a->on_thread_leaving<arena::ref_external>(); ta.my_arena.store(nullptr, std::memory_order_relaxed); @@ -467,12 +479,12 @@ bool task_arena_impl::attach(d1::task_arena_base& ta) { arena* a = td->my_arena; // There is an active arena to attach to. // It's still used by s, so won't be destroyed right away. - __TBB_ASSERT(a->my_references > 0, NULL ); + __TBB_ASSERT(a->my_references > 0, nullptr); a->my_references += arena::ref_external; ta.my_num_reserved_slots = a->my_num_reserved_slots; ta.my_priority = arena_priority(a->my_priority_level); ta.my_max_concurrency = ta.my_num_reserved_slots + a->my_max_num_workers; - __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, NULL); + __TBB_ASSERT(arena::num_arena_slots(ta.my_max_concurrency) == a->my_num_slots, nullptr); ta.my_arena.store(a, std::memory_order_release); // increases market's ref count for task_arena market::global_market( /*is_public=*/true ); @@ -481,14 +493,20 @@ bool task_arena_impl::attach(d1::task_arena_base& ta) { return false; } -void task_arena_impl::enqueue(d1::task& t, d1::task_arena_base* ta) { +void task_arena_impl::enqueue(d1::task& t, d1::task_group_context* c, d1::task_arena_base* ta) { thread_data* td = governor::get_thread_data(); // thread data is only needed for FastRandom instance - arena* a = ta->my_arena.load(std::memory_order_relaxed); - assert_pointers_valid(ta, a, a->my_default_ctx, td); - // Is there a better place for checking the state of my_default_ctx? + assert_pointer_valid(td, "thread_data pointer should not be null"); + arena* a = ta ? + ta->my_arena.load(std::memory_order_relaxed) + : td->my_arena + ; + assert_pointer_valid(a, "arena pointer should not be null"); + auto* ctx = c ? c : a->my_default_ctx; + assert_pointer_valid(ctx, "context pointer should not be null"); + // Is there a better place for checking the state of ctx? __TBB_ASSERT(!a->my_default_ctx->is_group_execution_cancelled(), - "The task will not be executed because default task_group_context of task_arena is cancelled. Has previously enqueued task thrown an exception?"); - a->enqueue_task(t, *a->my_default_ctx, *td); + "The task will not be executed because its task_group_context is cancelled."); + a->enqueue_task(t, *ctx, *td); } class nested_arena_context : no_copy { @@ -503,9 +521,10 @@ public: td.detach_task_dispatcher(); td.attach_arena(nested_arena, slot_index); + if (td.my_inbox.is_idle_state(true)) + td.my_inbox.set_is_idle(false); task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(m_orig_execute_data_ext.task_disp->m_stealing_threshold); - td.attach_task_dispatcher(task_disp); + td.enter_task_dispatcher(task_disp, m_orig_execute_data_ext.task_disp->m_stealing_threshold); // If the calling thread occupies the slots out of external thread reserve we need to notify the // market that this arena requires one worker less. @@ -549,13 +568,13 @@ public: td.my_arena->my_market->adjust_demand(*td.my_arena, /* delta = */ 1, /* mandatory = */ false); } - td.my_task_dispatcher->set_stealing_threshold(0); - td.detach_task_dispatcher(); + td.leave_task_dispatcher(); td.my_arena_slot->release(); td.my_arena->my_exit_monitors.notify_one(); // do not relax! td.attach_arena(*m_orig_arena, m_orig_slot_index); td.attach_task_dispatcher(*m_orig_execute_data_ext.task_disp); + __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); } td.my_task_dispatcher->m_execute_data_ext = m_orig_execute_data_ext; } @@ -608,7 +627,7 @@ class delegated_task : public d1::task { public: delegated_task(d1::delegate_base& d, concurrent_monitor& s, d1::wait_context& wo) : m_delegate(d), m_monitor(s), m_wait_ctx(wo), m_completed{ false }{} - ~delegated_task() { + ~delegated_task() override { // The destructor can be called earlier than the m_monitor is notified // because the waiting thread can be released after m_wait_ctx.release_wait. // To close that race we wait for the m_completed signal. @@ -645,7 +664,7 @@ void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { a->my_exit_monitors.cancel_wait(waiter); nested_arena_context scope(*td, *a, index2 ); r1::wait(wo, exec_context); - __TBB_ASSERT(!exec_context.my_exception, NULL); // exception can be thrown above, not deferred + __TBB_ASSERT(!exec_context.my_exception.load(std::memory_order_relaxed), nullptr); // exception can be thrown above, not deferred break; } a->my_exit_monitors.commit_wait(waiter); @@ -656,9 +675,10 @@ void task_arena_impl::execute(d1::task_arena_base& ta, d1::delegate_base& d) { a->my_exit_monitors.notify_one(); // do not relax! } // process possible exception - if (exec_context.my_exception) { + auto exception = exec_context.my_exception.load(std::memory_order_acquire); + if (exception) { __TBB_ASSERT(exec_context.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); - exec_context.my_exception->throw_self(); + exception->throw_self(); } __TBB_ASSERT(governor::is_thread_data_set(td), nullptr); return; @@ -702,7 +722,7 @@ int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { a = td->my_arena; // the current arena if any if( a ) { // Get parameters from the arena - __TBB_ASSERT( !ta || ta->my_max_concurrency==1, NULL ); + __TBB_ASSERT( !ta || ta->my_max_concurrency==1, nullptr); return a->my_num_reserved_slots + a->my_max_num_workers #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + (a->my_local_concurrency_flag.test() ? 1 : 0) @@ -728,7 +748,7 @@ int task_arena_impl::max_concurrency(const d1::task_arena_base *ta) { } #endif /*!__TBB_ARENA_BINDING*/ - __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, NULL ); + __TBB_ASSERT(!ta || ta->my_max_concurrency==d1::task_arena_base::automatic, nullptr); return int(governor::default_num_threads()); } @@ -746,7 +766,7 @@ void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { // Isolation within this callable d(); }).on_completion([&] { - __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, NULL); + __TBB_ASSERT(governor::get_thread_data()->my_task_dispatcher == dispatcher, nullptr); dispatcher->set_isolation(previous_isolation); }); } @@ -754,4 +774,3 @@ void isolate_within_arena(d1::delegate_base& d, std::intptr_t isolation) { } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/contrib/libs/tbb/src/tbb/arena.h b/contrib/libs/tbb/src/tbb/arena.h index b1b9c3dc93..0f4165d506 100644 --- a/contrib/libs/tbb/src/tbb/arena.h +++ b/contrib/libs/tbb/src/tbb/arena.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -184,6 +184,7 @@ public: //! The structure of an arena, except the array of slots. /** Separated in order to simplify padding. Intrusive list node base class is used by market to form a list of arenas. **/ +// TODO: Analyze arena_base cache lines placement struct arena_base : padded<intrusive_list_node> { //! The number of workers that have been marked out by the resource manager to service the arena. std::atomic<unsigned> my_num_workers_allotted; // heavy use in stealing loop @@ -216,9 +217,6 @@ struct arena_base : padded<intrusive_list_node> { task_stream<back_nonnull_accessor> my_critical_task_stream; #endif - //! The number of workers requested by the external thread owning the arena. - unsigned my_max_num_workers; - //! The total number of workers that are requested from the resource manager. int my_total_num_workers_requested; @@ -253,23 +251,10 @@ struct arena_base : padded<intrusive_list_node> { //! The market that owns this arena. market* my_market; - //! ABA prevention marker. - std::uintptr_t my_aba_epoch; - //! Default task group context. d1::task_group_context* my_default_ctx; - //! The number of slots in the arena. - unsigned my_num_slots; - - //! The number of reserved slots (can be occupied only by external threads). - unsigned my_num_reserved_slots; - #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY - // arena needs an extra worker despite the arena limit - atomic_flag my_local_concurrency_flag; - // the number of local mandatory concurrency requests - int my_local_concurrency_requests; // arena needs an extra worker despite a global limit std::atomic<bool> my_global_concurrency_mode; #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */ @@ -280,6 +265,28 @@ struct arena_base : padded<intrusive_list_node> { //! Coroutines (task_dispathers) cache buffer arena_co_cache my_co_cache; +#if __TBB_ENQUEUE_ENFORCED_CONCURRENCY + // arena needs an extra worker despite the arena limit + atomic_flag my_local_concurrency_flag; + // the number of local mandatory concurrency requests + int my_local_concurrency_requests; +#endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY*/ + + //! ABA prevention marker. + std::uintptr_t my_aba_epoch; + //! The number of slots in the arena. + unsigned my_num_slots; + //! The number of reserved slots (can be occupied only by external threads). + unsigned my_num_reserved_slots; + //! The number of workers requested by the external thread owning the arena. + unsigned my_max_num_workers; + + //! The target serialization epoch for callers of adjust_job_count_estimate + int my_adjust_demand_target_epoch; + + //! The current serialization epoch for callers of adjust_job_count_estimate + d1::waitable_atomic<int> my_adjust_demand_current_epoch; + #if TBB_USE_ASSERT //! Used to trap accesses to the object after its destruction. std::uintptr_t my_guard; @@ -472,18 +479,22 @@ inline void arena::on_thread_leaving ( ) { // concurrently, can't guarantee last is_out_of_work() return true. } #endif - if ( (my_references -= ref_param ) == 0 ) + + // Release our reference to sync with arena destroy + unsigned remaining_ref = my_references.fetch_sub(ref_param, std::memory_order_release) - ref_param; + if (remaining_ref == 0) { m->try_destroy_arena( this, aba_epoch, priority_level ); + } } template<arena::new_work_type work_type> void arena::advertise_new_work() { - auto is_related_arena = [&] (extended_context context) { + auto is_related_arena = [&] (market_context context) { return this == context.my_arena_addr; }; if( work_type == work_enqueued ) { - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY if ( my_market->my_num_workers_soft_limit.load(std::memory_order_acquire) == 0 && my_global_concurrency_mode.load(std::memory_order_acquire) == false ) @@ -497,7 +508,7 @@ void arena::advertise_new_work() { // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable. } else if( work_type == wakeup ) { - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); } // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences. @@ -560,7 +571,7 @@ inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execu arena_slot* victim = &my_slots[k]; d1::task **pool = victim->task_pool.load(std::memory_order_relaxed); d1::task *t = nullptr; - if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation))) { + if (pool == EmptyTaskPool || !(t = victim->steal_task(*this, isolation, k))) { return nullptr; } if (task_accessor::is_proxy_task(*t)) { @@ -572,10 +583,10 @@ inline d1::task* arena::steal_task(unsigned arena_index, FastRandom& frnd, execu tp.allocator.delete_object(&tp, ed); return nullptr; } - // Note affinity is called for any stealed task (proxy or general) + // Note affinity is called for any stolen task (proxy or general) ed.affinity_slot = slot; } else { - // Note affinity is called for any stealed task (proxy or general) + // Note affinity is called for any stolen task (proxy or general) ed.affinity_slot = d1::any_slot; } // Update task owner thread id to identify stealing diff --git a/contrib/libs/tbb/src/tbb/arena_slot.cpp b/contrib/libs/tbb/src/tbb/arena_slot.cpp index 72706b3de5..bce5701db3 100644 --- a/contrib/libs/tbb/src/tbb/arena_slot.cpp +++ b/contrib/libs/tbb/src/tbb/arena_slot.cpp @@ -145,7 +145,7 @@ d1::task* arena_slot::get_task(execution_data_ext& ed, isolation_type isolation) return result; } -d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) { +d1::task* arena_slot::steal_task(arena& a, isolation_type isolation, std::size_t slot_index) { d1::task** victim_pool = lock_task_pool(); if (!victim_pool) { return nullptr; @@ -175,7 +175,7 @@ d1::task* arena_slot::steal_task(arena& a, isolation_type isolation) { } task_proxy& tp = *static_cast<task_proxy*>(result); // If mailed task is likely to be grabbed by its destination thread, skip it. - if ( !(task_proxy::is_shared( tp.task_and_tag ) && tp.outbox->recipient_is_idle()) ) { + if (!task_proxy::is_shared(tp.task_and_tag) || !tp.outbox->recipient_is_idle() || a.mailbox(slot_index).recipient_is_idle()) { break; } } @@ -216,4 +216,3 @@ unlock: } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/contrib/libs/tbb/src/tbb/arena_slot.h b/contrib/libs/tbb/src/tbb/arena_slot.h index 83d61d2197..d9a70cfaf8 100644 --- a/contrib/libs/tbb/src/tbb/arena_slot.h +++ b/contrib/libs/tbb/src/tbb/arena_slot.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -119,23 +119,23 @@ public: //! Deallocate task pool that was allocated by means of allocate_task_pool. void free_task_pool( ) { // TODO: understand the assertion and modify - // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, NULL); + // __TBB_ASSERT( !task_pool /* TODO: == EmptyTaskPool */, nullptr); if( task_pool_ptr ) { - __TBB_ASSERT( my_task_pool_size, NULL); + __TBB_ASSERT( my_task_pool_size, nullptr); cache_aligned_deallocate( task_pool_ptr ); - task_pool_ptr = NULL; + task_pool_ptr = nullptr; my_task_pool_size = 0; } } //! Get a task from the local pool. /** Called only by the pool owner. - Returns the pointer to the task or NULL if a suitable task is not found. + Returns the pointer to the task or nullptr if a suitable task is not found. Resets the pool if it is empty. **/ d1::task* get_task(execution_data_ext&, isolation_type); //! Steal task from slot's ready pool - d1::task* steal_task(arena&, isolation_type); + d1::task* steal_task(arena&, isolation_type, std::size_t); //! Some thread is now the owner of this slot void occupy() { @@ -157,7 +157,7 @@ public: //! Spawn newly created tasks void spawn(d1::task& t) { std::size_t T = prepare_task_pool(1); - __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), NULL); + __TBB_ASSERT(is_poisoned(task_pool_ptr[T]), nullptr); task_pool_ptr[T] = &t; commit_spawned_tasks(T + 1); if (!is_task_pool_published()) { @@ -195,7 +195,7 @@ public: #endif private: //! Get a task from the local pool at specified location T. - /** Returns the pointer to the task or NULL if the task cannot be executed, + /** Returns the pointer to the task or nullptr if the task cannot be executed, e.g. proxy has been deallocated or isolation constraint is not met. tasks_omitted tells if some tasks have been omitted. Called only by the pool owner. The caller should guarantee that the @@ -213,8 +213,8 @@ private: std::size_t new_size = num_tasks; if ( !my_task_pool_size ) { - __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), NULL ); - __TBB_ASSERT( !task_pool_ptr, NULL ); + __TBB_ASSERT( !is_task_pool_published() && is_quiescent_local_task_pool_reset(), nullptr); + __TBB_ASSERT( !task_pool_ptr, nullptr); if ( num_tasks < min_task_pool_size ) new_size = min_task_pool_size; allocate_task_pool( new_size ); return 0; @@ -222,7 +222,7 @@ private: acquire_task_pool(); std::size_t H = head.load(std::memory_order_relaxed); // mirror d1::task** new_task_pool = task_pool_ptr;; - __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, NULL ); + __TBB_ASSERT( my_task_pool_size >= min_task_pool_size, nullptr); // Count not skipped tasks. Consider using std::count_if. for ( std::size_t i = H; i < T; ++i ) if ( new_task_pool[i] ) ++new_size; @@ -315,7 +315,7 @@ private: task_pool.store( task_pool_ptr, std::memory_order_release ); } - //! Locks victim's task pool, and returns pointer to it. The pointer can be NULL. + //! Locks victim's task pool, and returns pointer to it. The pointer can be nullptr. /** Garbles victim_arena_slot->task_pool for the duration of the lock. **/ d1::task** lock_task_pool() { d1::task** victim_task_pool; @@ -347,7 +347,7 @@ private: /** Restores victim_arena_slot->task_pool munged by lock_task_pool. **/ void unlock_task_pool(d1::task** victim_task_pool) { __TBB_ASSERT(task_pool.load(std::memory_order_relaxed) == LockedTaskPool, "victim arena slot is not locked"); - __TBB_ASSERT(victim_task_pool != LockedTaskPool, NULL); + __TBB_ASSERT(victim_task_pool != LockedTaskPool, nullptr); task_pool.store(victim_task_pool, std::memory_order_release); } diff --git a/contrib/libs/tbb/src/tbb/assert_impl.h b/contrib/libs/tbb/src/tbb/assert_impl.h index 7f411e06f7..0064c462bf 100644 --- a/contrib/libs/tbb/src/tbb/assert_impl.h +++ b/contrib/libs/tbb/src/tbb/assert_impl.h @@ -18,6 +18,7 @@ #define __TBB_assert_impl_H #include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" #include <cstdio> #include <cstdlib> @@ -29,29 +30,50 @@ #include <mutex> +#if __TBBMALLOC_BUILD +namespace rml { namespace internal { +#else namespace tbb { namespace detail { namespace r1 { - +#endif // TODO: consider extension for formatted error description string -static void assertion_failure_impl(const char* filename, int line, const char* expression, const char* comment) { - std::fprintf(stderr, "Assertion %s failed on line %d of file %s\n", expression, line, filename); +static void assertion_failure_impl(const char* location, int line, const char* expression, const char* comment) { + + std::fprintf(stderr, "Assertion %s failed (located in the %s function, line in file: %d)\n", + expression, location, line); + if (comment) { std::fprintf(stderr, "Detailed description: %s\n", comment); } #if _MSC_VER && _DEBUG - if (1 == _CrtDbgReport(_CRT_ASSERT, filename, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) { + if (1 == _CrtDbgReport(_CRT_ASSERT, location, line, "tbb_debug.dll", "%s\r\n%s", expression, comment?comment:"")) { _CrtDbgBreak(); - } -#else - std::fflush(stderr); - std::abort(); + } else #endif + { + std::fflush(stderr); + std::abort(); + } } -void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment) { - static std::once_flag flag; - std::call_once(flag, [&](){ assertion_failure_impl(filename, line, expression, comment); }); +// Do not move the definition into the assertion_failure function because it will require "magic statics". +// It will bring a dependency on C++ runtime on some platforms while assert_impl.h is reused in tbbmalloc +// that should not depend on C++ runtime +static std::atomic<tbb::detail::do_once_state> assertion_state; + +void __TBB_EXPORTED_FUNC assertion_failure(const char* location, int line, const char* expression, const char* comment) { +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + // Workaround for erroneous "unreachable code" during assertion throwing using call_once + #pragma warning (push) + #pragma warning (disable: 4702) +#endif + // We cannot use std::call_once because it brings a dependency on C++ runtime on some platforms + // while assert_impl.h is reused in tbbmalloc that should not depend on C++ runtime + atomic_do_once([&](){ assertion_failure_impl(location, line, expression, comment); }, assertion_state); +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + #pragma warning (pop) +#endif } //! Report a runtime warning. @@ -63,9 +85,13 @@ void runtime_warning( const char* format, ... ) { fprintf(stderr, "TBB Warning: %s\n", str); } +#if __TBBMALLOC_BUILD +}} // namespaces rml::internal +#else } // namespace r1 } // namespace detail } // namespace tbb +#endif #endif // __TBB_assert_impl_H diff --git a/contrib/libs/tbb/src/tbb/co_context.h b/contrib/libs/tbb/src/tbb/co_context.h index 552dec356b..8640c08964 100644 --- a/contrib/libs/tbb/src/tbb/co_context.h +++ b/contrib/libs/tbb/src/tbb/co_context.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -24,9 +24,20 @@ #include <cstddef> #include <cstdint> +#if __TBB_RESUMABLE_TASKS_USE_THREADS + #if _WIN32 || _WIN64 #include <windows.h> #else +#include <pthread.h> +#endif + +#include <condition_variable> +#include "governor.h" + +#elif _WIN32 || _WIN64 +#include <windows.h> +#else // ucontext.h API is deprecated since macOS 10.6 #if __APPLE__ #if __INTEL_COMPILER @@ -36,6 +47,7 @@ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdeprecated-declarations" #endif + #define _XOPEN_SOURCE 700 #endif // __APPLE__ #include <ucontext.h> @@ -57,7 +69,21 @@ namespace tbb { namespace detail { namespace r1 { +#if __TBB_RESUMABLE_TASKS_USE_THREADS + struct coroutine_type { #if _WIN32 || _WIN64 + using handle_type = HANDLE; +#else + using handle_type = pthread_t; +#endif + + handle_type my_thread; + std::condition_variable my_condvar; + std::mutex my_mutex; + thread_data* my_thread_data{ nullptr }; + bool my_is_active{ true }; + }; +#elif _WIN32 || _WIN64 typedef LPVOID coroutine_type; #else struct coroutine_type { @@ -89,7 +115,7 @@ public: : my_state(stack_size ? co_suspended : co_executing) { if (stack_size) { - __TBB_ASSERT(arg != 0, nullptr); + __TBB_ASSERT(arg != nullptr, nullptr); create_coroutine(my_coroutine, stack_size, arg); } else { current_coroutine(my_coroutine); @@ -97,16 +123,20 @@ public: } ~co_context() { - __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), NULL); - if (my_state == co_suspended) + __TBB_ASSERT(1 << my_state & (1 << co_suspended | 1 << co_executing), nullptr); + if (my_state == co_suspended) { +#if __TBB_RESUMABLE_TASKS_USE_THREADS + my_state = co_executing; +#endif destroy_coroutine(my_coroutine); + } my_state = co_destroyed; } void resume(co_context& target) { // Do not create non-trivial objects on the stack of this function. They might never be destroyed. - __TBB_ASSERT(my_state == co_executing, NULL); - __TBB_ASSERT(target.my_state == co_suspended, NULL); + __TBB_ASSERT(my_state == co_executing, nullptr); + __TBB_ASSERT(target.my_state == co_suspended, nullptr); my_state = co_suspended; target.my_state = co_executing; @@ -114,41 +144,163 @@ public: // 'target' can reference an invalid object after swap_coroutine. Do not access it. swap_coroutine(my_coroutine, target.my_coroutine); - __TBB_ASSERT(my_state == co_executing, NULL); + __TBB_ASSERT(my_state == co_executing, nullptr); } }; #if _WIN32 || _WIN64 /* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept; #else -/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept; +/* [[noreturn]] */ void co_local_wait_for_all(unsigned hi, unsigned lo) noexcept; +#endif + +#if __TBB_RESUMABLE_TASKS_USE_THREADS +void handle_perror(int error_code, const char* what); + +inline void check(int error_code, const char* routine) { + if (error_code) { + handle_perror(error_code, routine); + } +} + +using thread_data_t = std::pair<coroutine_type&, void*&>; + +#if _WIN32 || _WIN64 +inline unsigned WINAPI coroutine_thread_func(void* d) +#else +inline void* coroutine_thread_func(void* d) +#endif +{ + thread_data_t& data = *static_cast<thread_data_t*>(d); + coroutine_type& c = data.first; + void* arg = data.second; + { + std::unique_lock<std::mutex> lock(c.my_mutex); + __TBB_ASSERT(c.my_thread_data == nullptr, nullptr); + c.my_is_active = false; + + // We read the data notify the waiting thread + data.second = nullptr; + c.my_condvar.notify_one(); + + c.my_condvar.wait(lock, [&c] { return c.my_is_active == true; }); + } + __TBB_ASSERT(c.my_thread_data != nullptr, nullptr); + governor::set_thread_data(*c.my_thread_data); + +#if _WIN32 || _WIN64 + co_local_wait_for_all(arg); + + return 0; +#else + std::uintptr_t addr = std::uintptr_t(arg); + unsigned lo = unsigned(addr); + unsigned hi = unsigned(std::uint64_t(addr) >> 32); + __TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr); + + co_local_wait_for_all(hi, lo); + + return nullptr; +#endif +}; + +inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { + thread_data_t data{ c, arg }; + +#if _WIN32 || _WIN64 + c.my_thread = (HANDLE)_beginthreadex(nullptr, unsigned(stack_size), coroutine_thread_func, &data, STACK_SIZE_PARAM_IS_A_RESERVATION, nullptr); + if (!c.my_thread) { + handle_perror(0, "create_coroutine: _beginthreadex failed\n"); + } +#else + pthread_attr_t s; + check(pthread_attr_init(&s), "pthread_attr_init has failed"); + if (stack_size > 0) { + check(pthread_attr_setstacksize(&s, stack_size), "pthread_attr_setstack_size has failed"); + } + check(pthread_create(&c.my_thread, &s, coroutine_thread_func, &data), "pthread_create has failed"); + check(pthread_attr_destroy(&s), "pthread_attr_destroy has failed"); #endif + // Wait for the just created thread to read the data + std::unique_lock<std::mutex> lock(c.my_mutex); + c.my_condvar.wait(lock, [&arg] { return arg == nullptr; }); +} + +inline void current_coroutine(coroutine_type& c) { +#if _WIN32 || _WIN64 + c.my_thread = GetCurrentThread(); +#else + c.my_thread = pthread_self(); +#endif +} + +inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { + thread_data* td = governor::get_thread_data(); + __TBB_ASSERT(prev_coroutine.my_is_active == true, "The current thread should be active"); + + // Detach our state before notification other thread + // (because we might be notified just after other thread notification) + prev_coroutine.my_thread_data = nullptr; + prev_coroutine.my_is_active = false; + governor::clear_thread_data(); + + { + std::unique_lock<std::mutex> lock(new_coroutine.my_mutex); + __TBB_ASSERT(new_coroutine.my_is_active == false, "The sleeping thread should not be active"); + __TBB_ASSERT(new_coroutine.my_thread_data == nullptr, "The sleeping thread should not be active"); + + new_coroutine.my_thread_data = td; + new_coroutine.my_is_active = true; + new_coroutine.my_condvar.notify_one(); + } + + std::unique_lock<std::mutex> lock(prev_coroutine.my_mutex); + prev_coroutine.my_condvar.wait(lock, [&prev_coroutine] { return prev_coroutine.my_is_active == true; }); + __TBB_ASSERT(governor::get_thread_data() != nullptr, nullptr); + governor::set_thread_data(*prev_coroutine.my_thread_data); +} + +inline void destroy_coroutine(coroutine_type& c) { + { + std::unique_lock<std::mutex> lock(c.my_mutex); + __TBB_ASSERT(c.my_thread_data == nullptr, "The sleeping thread should not be active"); + __TBB_ASSERT(c.my_is_active == false, "The sleeping thread should not be active"); + c.my_is_active = true; + c.my_condvar.notify_one(); + } #if _WIN32 || _WIN64 + WaitForSingleObject(c.my_thread, INFINITE); + CloseHandle(c.my_thread); +#else + check(pthread_join(c.my_thread, nullptr), "pthread_join has failed"); +#endif +} +#elif _WIN32 || _WIN64 inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* arg) { - __TBB_ASSERT(arg, NULL); + __TBB_ASSERT(arg, nullptr); c = CreateFiber(stack_size, co_local_wait_for_all, arg); - __TBB_ASSERT(c, NULL); + __TBB_ASSERT(c, nullptr); } inline void current_coroutine(coroutine_type& c) { c = IsThreadAFiber() ? GetCurrentFiber() : ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); - __TBB_ASSERT(c, NULL); + __TBB_ASSERT(c, nullptr); } inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { if (!IsThreadAFiber()) { ConvertThreadToFiberEx(nullptr, FIBER_FLAG_FLOAT_SWITCH); } - __TBB_ASSERT(new_coroutine, NULL); + __TBB_ASSERT(new_coroutine, nullptr); prev_coroutine = GetCurrentFiber(); - __TBB_ASSERT(prev_coroutine, NULL); + __TBB_ASSERT(prev_coroutine, nullptr); SwitchToFiber(new_coroutine); } inline void destroy_coroutine(coroutine_type& c) { - __TBB_ASSERT(c, NULL); + __TBB_ASSERT(c, nullptr); DeleteFiber(c); } #else // !(_WIN32 || _WIN64) @@ -159,38 +311,44 @@ inline void create_coroutine(coroutine_type& c, std::size_t stack_size, void* ar const std::size_t protected_stack_size = page_aligned_stack_size + 2 * REG_PAGE_SIZE; // Allocate the stack with protection property - std::uintptr_t stack_ptr = (std::uintptr_t)mmap(NULL, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); - __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, NULL); + std::uintptr_t stack_ptr = (std::uintptr_t)mmap(nullptr, protected_stack_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + __TBB_ASSERT((void*)stack_ptr != MAP_FAILED, nullptr); // Allow read write on our stack (guarded pages are still protected) int err = mprotect((void*)(stack_ptr + REG_PAGE_SIZE), page_aligned_stack_size, PROT_READ | PROT_WRITE); - __TBB_ASSERT_EX(!err, NULL); + __TBB_ASSERT_EX(!err, nullptr); // Remember the stack state c.my_stack = (void*)(stack_ptr + REG_PAGE_SIZE); c.my_stack_size = page_aligned_stack_size; err = getcontext(&c.my_context); - __TBB_ASSERT_EX(!err, NULL); + __TBB_ASSERT_EX(!err, nullptr); - c.my_context.uc_link = 0; + c.my_context.uc_link = nullptr; // cast to char* to disable FreeBSD clang-3.4.1 'incompatible type' error c.my_context.uc_stack.ss_sp = (char*)c.my_stack; c.my_context.uc_stack.ss_size = c.my_stack_size; c.my_context.uc_stack.ss_flags = 0; typedef void(*coroutine_func_t)(); - makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, sizeof(arg) / sizeof(int), arg); + + std::uintptr_t addr = std::uintptr_t(arg); + unsigned lo = unsigned(addr); + unsigned hi = unsigned(std::uint64_t(addr) >> 32); + __TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr); + + makecontext(&c.my_context, (coroutine_func_t)co_local_wait_for_all, 2, hi, lo); } inline void current_coroutine(coroutine_type& c) { int err = getcontext(&c.my_context); - __TBB_ASSERT_EX(!err, NULL); + __TBB_ASSERT_EX(!err, nullptr); } inline void swap_coroutine(coroutine_type& prev_coroutine, coroutine_type& new_coroutine) { int err = swapcontext(&prev_coroutine.my_context, &new_coroutine.my_context); - __TBB_ASSERT_EX(!err, NULL); + __TBB_ASSERT_EX(!err, nullptr); } inline void destroy_coroutine(coroutine_type& c) { @@ -198,7 +356,7 @@ inline void destroy_coroutine(coroutine_type& c) { // Free stack memory with guarded pages munmap((void*)((std::uintptr_t)c.my_stack - REG_PAGE_SIZE), c.my_stack_size + 2 * REG_PAGE_SIZE); // Clear the stack state afterwards - c.my_stack = NULL; + c.my_stack = nullptr; c.my_stack_size = 0; } @@ -219,4 +377,3 @@ inline void destroy_coroutine(coroutine_type& c) { #endif /* __TBB_RESUMABLE_TASKS */ #endif /* _TBB_co_context_H */ - diff --git a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp index 90077936f6..14617175dd 100644 --- a/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp +++ b/contrib/libs/tbb/src/tbb/concurrent_bounded_queue.cpp @@ -58,8 +58,8 @@ void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitor } void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ) { - concurrent_monitor& items_avail = monitors[d1::cbq_items_avail_tag]; - concurrent_monitor& slots_avail = monitors[d1::cbq_slots_avail_tag]; + concurrent_monitor& items_avail = monitors[d2::cbq_items_avail_tag]; + concurrent_monitor& slots_avail = monitors[d2::cbq_slots_avail_tag]; items_avail.abort_all(); slots_avail.abort_all(); diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor.h b/contrib/libs/tbb/src/tbb/concurrent_monitor.h index cb1885a5d0..3d20ef5b98 100644 --- a/contrib/libs/tbb/src/tbb/concurrent_monitor.h +++ b/contrib/libs/tbb/src/tbb/concurrent_monitor.h @@ -20,9 +20,7 @@ #include "oneapi/tbb/spin_mutex.h" #include "oneapi/tbb/detail/_exception.h" #include "oneapi/tbb/detail/_aligned_space.h" -#include "oneapi/tbb/detail/_template_helpers.h" -#include "scheduler_common.h" - +#include "concurrent_monitor_mutex.h" #include "semaphore.h" #include <atomic> @@ -33,20 +31,21 @@ namespace r1 { //! Circular doubly-linked list with sentinel /** head.next points to the front and head.prev points to the back */ -class circular_doubly_linked_list_with_sentinel : no_copy { +class circular_doubly_linked_list_with_sentinel { public: struct base_node { base_node* next; base_node* prev; + + constexpr base_node(base_node* n, base_node* p) : next(n), prev(p) {} explicit base_node() : next((base_node*)(uintptr_t)0xcdcdcdcd), prev((base_node*)(uintptr_t)0xcdcdcdcd) {} }; // ctor - circular_doubly_linked_list_with_sentinel() { clear(); } - // dtor - ~circular_doubly_linked_list_with_sentinel() { - __TBB_ASSERT(head.next == &head && head.prev == &head, "the list is not empty"); - } + constexpr circular_doubly_linked_list_with_sentinel() : count(0), head(&head, &head) {} + + circular_doubly_linked_list_with_sentinel(const circular_doubly_linked_list_with_sentinel&) = delete; + circular_doubly_linked_list_with_sentinel& operator=(const circular_doubly_linked_list_with_sentinel&) = delete; inline std::size_t size() const { return count.load(std::memory_order_relaxed); } inline bool empty() const { return size() == 0; } @@ -149,8 +148,7 @@ class sleep_node : public wait_node<Context> { public: using base_type::base_type; - // Make it virtual due to Intel Compiler warning - virtual ~sleep_node() { + ~sleep_node() override { if (this->my_initialized) { if (this->my_skipped_wakeup) semaphore().P(); semaphore().~binary_semaphore(); @@ -191,17 +189,15 @@ private: //! concurrent_monitor /** fine-grained concurrent_monitor implementation */ template <typename Context> -class concurrent_monitor_base : no_copy { +class concurrent_monitor_base { public: //! ctor - concurrent_monitor_base() : my_epoch{} - {} - + constexpr concurrent_monitor_base() {} //! dtor - ~concurrent_monitor_base() { - abort_all(); - __TBB_ASSERT(my_waitset.empty(), "waitset not empty?"); - } + ~concurrent_monitor_base() = default; + + concurrent_monitor_base(const concurrent_monitor_base&) = delete; + concurrent_monitor_base& operator=(const concurrent_monitor_base&) = delete; //! prepare wait by inserting 'thr' into the wait queue void prepare_wait( wait_node<Context>& node) { @@ -217,14 +213,14 @@ public: node.my_is_in_list.store(true, std::memory_order_relaxed); { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); node.my_epoch = my_epoch.load(std::memory_order_relaxed); my_waitset.add(&node); } // Prepare wait guarantees Write Read memory barrier. // In C++ only full fence covers this type of barrier. - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); } //! Commit wait if event count has not changed; otherwise, cancel wait. @@ -248,7 +244,7 @@ public: // Cancel wait guarantees acquire memory barrier. bool in_list = node.my_is_in_list.load(std::memory_order_acquire); if (in_list) { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); if (node.my_is_in_list.load(std::memory_order_relaxed)) { my_waitset.remove(node); // node is removed from waitset, so there will be no wakeup @@ -276,7 +272,7 @@ public: //! Notify one thread about the event void notify_one() { - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); notify_one_relaxed(); } @@ -289,7 +285,7 @@ public: base_node* n; const base_node* end = my_waitset.end(); { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); n = my_waitset.front(); if (n != end) { @@ -305,7 +301,7 @@ public: //! Notify all waiting threads of the event void notify_all() { - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); notify_all_relaxed(); } @@ -318,7 +314,7 @@ public: base_list temp; const base_node* end; { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); // TODO: Possible optimization, don't change node state under lock, just do flush my_waitset.flush_to(temp); @@ -341,7 +337,7 @@ public: //! Notify waiting threads of the event that satisfies the given predicate template <typename P> void notify( const P& predicate ) { - atomic_fence(std::memory_order_seq_cst); + atomic_fence_seq_cst(); notify_relaxed( predicate ); } @@ -357,7 +353,7 @@ public: base_node* nxt; const base_node* end = my_waitset.end(); { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed); for (base_node* n = my_waitset.last(); n != end; n = nxt) { nxt = n->prev; @@ -380,9 +376,40 @@ public: #endif } + //! Notify waiting threads of the event that satisfies the given predicate; + //! the predicate is called under the lock. Relaxed version. + template<typename P> + void notify_one_relaxed( const P& predicate ) { + if (my_waitset.empty()) { + return; + } + + base_node* tmp = nullptr; + base_node* next{}; + const base_node* end = my_waitset.end(); + { + concurrent_monitor_mutex::scoped_lock l(my_mutex); + my_epoch.store(my_epoch.load( std::memory_order_relaxed ) + 1, std::memory_order_relaxed); + for (base_node* n = my_waitset.last(); n != end; n = next) { + next = n->prev; + auto* node = static_cast<wait_node<Context>*>(n); + if (predicate(node->my_context)) { + my_waitset.remove(*n); + node->my_is_in_list.store(false, std::memory_order_relaxed); + tmp = n; + break; + } + } + } + + if (tmp) { + to_wait_node(tmp)->notify(); + } + } + //! Abort any sleeping threads at the time of the call void abort_all() { - atomic_fence( std::memory_order_seq_cst ); + atomic_fence_seq_cst(); abort_all_relaxed(); } @@ -395,7 +422,7 @@ public: base_list temp; const base_node* end; { - tbb::spin_mutex::scoped_lock l(my_mutex); + concurrent_monitor_mutex::scoped_lock l(my_mutex); my_epoch.store(my_epoch.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); my_waitset.flush_to(temp); end = temp.end(); @@ -415,6 +442,12 @@ public: #endif } + void destroy() { + this->abort_all(); + my_mutex.destroy(); + __TBB_ASSERT(this->my_waitset.empty(), "waitset not empty?"); + } + private: template <typename NodeType, typename Pred> bool guarded_call(Pred&& predicate, NodeType& node) { @@ -428,9 +461,9 @@ private: return res; } - tbb::spin_mutex my_mutex; - base_list my_waitset; - std::atomic<unsigned> my_epoch; + concurrent_monitor_mutex my_mutex{}; + base_list my_waitset{}; + std::atomic<unsigned> my_epoch{}; wait_node<Context>* to_wait_node( base_node* node ) { return static_cast<wait_node<Context>*>(node); } }; @@ -439,87 +472,13 @@ class concurrent_monitor : public concurrent_monitor_base<std::uintptr_t> { using base_type = concurrent_monitor_base<std::uintptr_t>; public: using base_type::base_type; - /** per-thread descriptor for concurrent_monitor */ - using thread_context = sleep_node<std::uintptr_t>; -}; - -struct extended_context { - extended_context() = default; - - extended_context(std::uintptr_t first_addr, arena* a) : - my_uniq_addr(first_addr), my_arena_addr(a) - {} - - std::uintptr_t my_uniq_addr{0}; - arena* my_arena_addr{nullptr}; -}; - - -#if __TBB_RESUMABLE_TASKS -class resume_node : public wait_node<extended_context> { - using base_type = wait_node<extended_context>; -public: - resume_node(extended_context ctx, execution_data_ext& ed_ext, task_dispatcher& target) - : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target) - , my_suspend_point(my_curr_dispatcher->get_suspend_point()) - {} - - virtual ~resume_node() { - if (this->my_skipped_wakeup) { - spin_wait_until_eq(this->my_notify_calls, 1); - } - - poison_pointer(my_curr_dispatcher); - poison_pointer(my_target_dispatcher); - poison_pointer(my_suspend_point); - } - - void init() override { - base_type::init(); - } - - void wait() override { - my_curr_dispatcher->resume(*my_target_dispatcher); - __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); - } - - void reset() override { - base_type::reset(); - spin_wait_until_eq(this->my_notify_calls, 1); - my_notify_calls.store(0, std::memory_order_relaxed); - } - // notify is called (perhaps, concurrently) twice from: - // - concurrent_monitor::notify - // - post_resume_action::register_waiter - // The second notify is called after thread switches the stack - // (Because we can not call resume while the stack is occupied) - // We need calling resume only when both notifications are performed. - void notify() override { - if (++my_notify_calls == 2) { - r1::resume(my_suspend_point); - } + ~concurrent_monitor() { + destroy(); } -private: - friend class thread_data; - friend struct suspend_point_type::resume_task; - task_dispatcher* my_curr_dispatcher; - task_dispatcher* my_target_dispatcher; - suspend_point_type* my_suspend_point; - std::atomic<int> my_notify_calls{0}; -}; -#endif // __TBB_RESUMABLE_TASKS - -class extended_concurrent_monitor : public concurrent_monitor_base<extended_context> { - using base_type = concurrent_monitor_base<extended_context>; -public: - using base_type::base_type; /** per-thread descriptor for concurrent_monitor */ - using thread_context = sleep_node<extended_context>; -#if __TBB_RESUMABLE_TASKS - using resume_context = resume_node; -#endif + using thread_context = sleep_node<std::uintptr_t>; }; } // namespace r1 diff --git a/contrib/libs/tbb/src/tbb/concurrent_monitor_mutex.h b/contrib/libs/tbb/src/tbb/concurrent_monitor_mutex.h new file mode 100644 index 0000000000..cae6526dde --- /dev/null +++ b/contrib/libs/tbb/src/tbb/concurrent_monitor_mutex.h @@ -0,0 +1,113 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_monitor_mutex_H +#define __TBB_monitor_mutex_H + +#include "oneapi/tbb/detail/_utils.h" +#include "oneapi/tbb/detail/_aligned_space.h" +#include "semaphore.h" + +#include <mutex> + +namespace tbb { +namespace detail { +namespace r1 { + +class concurrent_monitor_mutex { +public: + using scoped_lock = std::lock_guard<concurrent_monitor_mutex>; + + constexpr concurrent_monitor_mutex() {} + + ~concurrent_monitor_mutex() = default; + + void destroy() { +#if !__TBB_USE_FUTEX + if (my_init_flag.load(std::memory_order_relaxed)) { + get_semaphore().~semaphore(); + } +#endif + } + + void lock() { + auto wakeup_condition = [&] { + return my_flag.load(std::memory_order_relaxed) == 0; + }; + + while (my_flag.exchange(1)) { + if (!timed_spin_wait_until(wakeup_condition)) { + ++my_waiters; + while (!wakeup_condition()) { + wait(); + } + --my_waiters; + } + } + } + + void unlock() { + my_flag.exchange(0); // full fence, so the next load is relaxed + if (my_waiters.load(std::memory_order_relaxed)) { + wakeup(); + } + } + +private: + void wait() { +#if __TBB_USE_FUTEX + futex_wait(&my_flag, 1); +#else + get_semaphore().P(); +#endif + } + + void wakeup() { +#if __TBB_USE_FUTEX + futex_wakeup_one(&my_flag); +#else + get_semaphore().V(); +#endif + } + + // The flag should be int for the futex operations + std::atomic<int> my_flag{0}; + std::atomic<int> my_waiters{0}; + +#if !__TBB_USE_FUTEX + semaphore& get_semaphore() { + if (!my_init_flag.load(std::memory_order_acquire)) { + std::lock_guard<std::mutex> lock(my_init_mutex); + if (!my_init_flag.load(std::memory_order_relaxed)) { + new (my_semaphore.begin()) semaphore(); + my_init_flag.store(true, std::memory_order_release); + } + } + + return *my_semaphore.begin(); + } + + static std::mutex my_init_mutex; + std::atomic<bool> my_init_flag{false}; + aligned_space<semaphore> my_semaphore{}; +#endif +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_monitor_mutex_H diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.cpp b/contrib/libs/tbb/src/tbb/dynamic_link.cpp index d5c5c7be7d..330415a829 100644 --- a/contrib/libs/tbb/src/tbb/dynamic_link.cpp +++ b/contrib/libs/tbb/src/tbb/dynamic_link.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ */ #include <cstdarg> // va_list etc. +#include <cstring> // strrchr #if _WIN32 #include <malloc.h> @@ -41,7 +42,6 @@ #include <dlfcn.h> #include <unistd.h> - #include <cstring> #include <climits> #include <cstdlib> #endif /* _WIN32 */ @@ -145,12 +145,12 @@ namespace r1 { #if __TBB_WIN8UI_SUPPORT bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle*, int flags ) { - dynamic_link_handle tmp_handle = NULL; + dynamic_link_handle tmp_handle = nullptr; TCHAR wlibrary[256]; if ( MultiByteToWideChar(CP_UTF8, 0, library, -1, wlibrary, 255) == 0 ) return false; if ( flags & DYNAMIC_LINK_LOAD ) tmp_handle = LoadPackagedLibrary( wlibrary, 0 ); - if (tmp_handle != NULL){ + if (tmp_handle != nullptr){ return resolve_symbols(tmp_handle, descriptors, required); }else{ return false; @@ -239,7 +239,7 @@ namespace r1 { char *backslash = std::strrchr( ap_data._path, '\\' ); if ( !backslash ) { // Backslash not found. - __TBB_ASSERT_EX( backslash!=NULL, "Unbelievable."); + __TBB_ASSERT_EX( backslash != nullptr, "Unbelievable."); return; } __TBB_ASSERT_EX( backslash >= ap_data._path, "Unbelievable."); @@ -254,7 +254,7 @@ namespace r1 { DYNAMIC_LINK_WARNING( dl_sys_fail, "dladdr", err ); return; } else { - __TBB_ASSERT_EX( dlinfo.dli_fname!=NULL, "Unbelievable." ); + __TBB_ASSERT_EX( dlinfo.dli_fname!=nullptr, "Unbelievable." ); } char const *slash = std::strrchr( dlinfo.dli_fname, '/' ); @@ -281,13 +281,13 @@ namespace r1 { } if ( fname_len>0 ) { + ap_data._len += fname_len; if ( ap_data._len>PATH_MAX ) { DYNAMIC_LINK_WARNING( dl_buff_too_small ); ap_data._len=0; return; } std::strncpy( ap_data._path+rc, dlinfo.dli_fname, fname_len ); - ap_data._len += fname_len; ap_data._path[ap_data._len]=0; } #endif /* _WIN32 */ @@ -316,12 +316,12 @@ namespace r1 { std::size_t name_len = std::strlen( name ); std::size_t full_len = name_len+ap_data._len; if ( full_len < len ) { - __TBB_ASSERT( ap_data._path[ap_data._len] == 0, NULL); - __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, NULL); + __TBB_ASSERT( ap_data._path[ap_data._len] == 0, nullptr); + __TBB_ASSERT( std::strlen(ap_data._path) == ap_data._len, nullptr); std::strncpy( path, ap_data._path, ap_data._len + 1 ); - __TBB_ASSERT( path[ap_data._len] == 0, NULL ); + __TBB_ASSERT( path[ap_data._len] == 0, nullptr); std::strncat( path, name, len - ap_data._len ); - __TBB_ASSERT( std::strlen(path) == full_len, NULL ); + __TBB_ASSERT( std::strlen(path) == full_len, nullptr); } return full_len+1; // +1 for null character } @@ -379,8 +379,8 @@ namespace r1 { static dynamic_link_handle global_symbols_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { dynamic_link_handle library_handle{}; #if _WIN32 - bool res = GetModuleHandleEx(0, library, &library_handle); - __TBB_ASSERT_EX(res && library_handle || !res && !library_handle, nullptr); + auto res = GetModuleHandleEx(0, library, &library_handle); + __TBB_ASSERT_EX((res && library_handle) || (!res && !library_handle), nullptr); #else /* _WIN32 */ #if !__TBB_DYNAMIC_LOAD_ENABLED /* only __TBB_WEAK_SYMBOLS_PRESENT is defined */ if ( !dlopen ) return 0; @@ -408,10 +408,24 @@ namespace r1 { #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ } - dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required ) { - ::tbb::detail::suppress_unused_warning( library, descriptors, required ); -#if __TBB_DYNAMIC_LOAD_ENABLED +#if !_WIN32 + int loading_flags(bool local_binding) { + int flags = RTLD_NOW; + if (local_binding) { + flags = flags | RTLD_LOCAL; +#if (__linux__ && __GLIBC__) && !__TBB_USE_SANITIZERS + flags = flags | RTLD_DEEPBIND; +#endif + } else { + flags = flags | RTLD_GLOBAL; + } + return flags; + } +#endif + dynamic_link_handle dynamic_load( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, bool local_binding ) { + ::tbb::detail::suppress_unused_warning( library, descriptors, required, local_binding ); +#if __TBB_DYNAMIC_LOAD_ENABLED std::size_t const len = PATH_MAX + 1; char path[ len ]; std::size_t rc = abs_path( library, path, len ); @@ -421,7 +435,8 @@ namespace r1 { // (e.g. because of MS runtime problems - one of those crazy manifest related ones) UINT prev_mode = SetErrorMode (SEM_FAILCRITICALERRORS); #endif /* _WIN32 */ - dynamic_link_handle library_handle = dlopen( path, RTLD_NOW | RTLD_GLOBAL ); + // The second argument (loading_flags) is ignored on Windows + dynamic_link_handle library_handle = dlopen( path, loading_flags(local_binding) ); #if _WIN32 SetErrorMode (prev_mode); #endif /* _WIN32 */ @@ -429,7 +444,7 @@ namespace r1 { if( !resolve_symbols( library_handle, descriptors, required ) ) { // The loaded library does not contain all the expected entry points dynamic_unlink( library_handle ); - library_handle = NULL; + library_handle = nullptr; } } else DYNAMIC_LINK_WARNING( dl_lib_not_found, path, dlerror() ); @@ -439,18 +454,26 @@ namespace r1 { // rc == 0 means failing of init_ap_data so the warning has already been issued. #endif /* __TBB_DYNAMIC_LOAD_ENABLED */ - return 0; + return nullptr; } bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, dynamic_link_handle *handle, int flags ) { init_dynamic_link_data(); // TODO: May global_symbols_link find weak symbols? - dynamic_link_handle library_handle = ( flags & DYNAMIC_LINK_GLOBAL ) ? global_symbols_link( library, descriptors, required ) : 0; + dynamic_link_handle library_handle = ( flags & DYNAMIC_LINK_GLOBAL ) ? global_symbols_link( library, descriptors, required ) : nullptr; +#if defined(_MSC_VER) && _MSC_VER <= 1900 +#pragma warning (push) +// MSVC 2015 warning: 'int': forcing value to bool 'true' or 'false' +#pragma warning (disable: 4800) +#endif if ( !library_handle && ( flags & DYNAMIC_LINK_LOAD ) ) - library_handle = dynamic_load( library, descriptors, required ); + library_handle = dynamic_load( library, descriptors, required, flags & DYNAMIC_LINK_LOCAL ); +#if defined(_MSC_VER) && _MSC_VER <= 1900 +#pragma warning (pop) +#endif if ( !library_handle && ( flags & DYNAMIC_LINK_WEAK ) ) return weak_symbol_link( descriptors, required ); diff --git a/contrib/libs/tbb/src/tbb/dynamic_link.h b/contrib/libs/tbb/src/tbb/dynamic_link.h index 91adcc507c..f07750b665 100644 --- a/contrib/libs/tbb/src/tbb/dynamic_link.h +++ b/contrib/libs/tbb/src/tbb/dynamic_link.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -29,8 +29,8 @@ and CLOSE_INTERNAL_NAMESPACE to override the following default definitions. **/ #include <cstddef> -#if _WIN32 -#include <Windows.h> +#ifdef _WIN32 +#include <windows.h> #endif /* _WIN32 */ namespace tbb { @@ -45,7 +45,7 @@ typedef void (*pointer_to_handler)(); // prevent warnings from some compilers (g++ 4.1) #if __TBB_WEAK_SYMBOLS_PRESENT #define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h), (pointer_to_handler)&s} -#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), NULL} +#define DLD_NOWEAK(s,h) {#s, (pointer_to_handler*)(void*)(&h), nullptr} #else #define DLD(s,h) {#s, (pointer_to_handler*)(void*)(&h)} #define DLD_NOWEAK(s,h) DLD(s,h) @@ -68,10 +68,13 @@ using dynamic_link_handle = HMODULE; using dynamic_link_handle = void*; #endif /* _WIN32 */ -const int DYNAMIC_LINK_GLOBAL = 0x01; -const int DYNAMIC_LINK_LOAD = 0x02; -const int DYNAMIC_LINK_WEAK = 0x04; -const int DYNAMIC_LINK_ALL = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK; +const int DYNAMIC_LINK_GLOBAL = 0x01; +const int DYNAMIC_LINK_LOAD = 0x02; +const int DYNAMIC_LINK_WEAK = 0x04; +const int DYNAMIC_LINK_LOCAL = 0x08; + +const int DYNAMIC_LINK_LOCAL_BINDING = DYNAMIC_LINK_LOCAL | DYNAMIC_LINK_LOAD; +const int DYNAMIC_LINK_DEFAULT = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMIC_LINK_WEAK; //! Fill in dynamically linked handlers. /** 'library' is the name of the requested library. It should not contain a full @@ -92,8 +95,8 @@ const int DYNAMIC_LINK_ALL = DYNAMIC_LINK_GLOBAL | DYNAMIC_LINK_LOAD | DYNAMI bool dynamic_link( const char* library, const dynamic_link_descriptor descriptors[], std::size_t required, - dynamic_link_handle* handle = 0, - int flags = DYNAMIC_LINK_ALL ); + dynamic_link_handle* handle = nullptr, + int flags = DYNAMIC_LINK_DEFAULT ); void dynamic_unlink( dynamic_link_handle handle ); diff --git a/contrib/libs/tbb/src/tbb/environment.h b/contrib/libs/tbb/src/tbb/environment.h index 8886ef09e1..eac6f20239 100644 --- a/contrib/libs/tbb/src/tbb/environment.h +++ b/contrib/libs/tbb/src/tbb/environment.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2018-2021 Intel Corporation + Copyright (c) 2018-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -44,7 +44,7 @@ static inline bool GetBoolEnvironmentVariable( const char * name ) { if (s[index] != '1') return false; index++; // Memory access after incrementing is safe, since the getenv() returns a - // NULL terminated string, and even if the character getting by index is '1', + // null-terminated string, and even if the character getting by index is '1', // and this character is the end of string, after incrementing we will get // an index of character, that contains '\0' index += std::strspn(&s[index], " "); @@ -55,7 +55,7 @@ static inline bool GetBoolEnvironmentVariable( const char * name ) { static inline long GetIntegralEnvironmentVariable( const char * name ) { if ( const char* s = std::getenv(name) ) { - char* end = NULL; + char* end = nullptr; errno = 0; long value = std::strtol(s, &end, 10); diff --git a/contrib/libs/tbb/src/tbb/exception.cpp b/contrib/libs/tbb/src/tbb/exception.cpp index c3e95d6d97..efc9b2a4d6 100644 --- a/contrib/libs/tbb/src/tbb/exception.cpp +++ b/contrib/libs/tbb/src/tbb/exception.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -48,6 +48,12 @@ const char* missing_wait::what() const noexcept(true) { return "wait() was not c /*[[noreturn]]*/ void do_throw_noexcept(void (*throw_func)()) noexcept { throw_func(); +#if __GNUC__ == 7 + // In release, GCC 7 loses noexcept attribute during tail call optimization. + // The following statement prevents tail call optimization. + volatile bool reach_this_point = true; + suppress_unused_warning(reach_this_point); +#endif } bool terminate_on_exception(); // defined in global_control.cpp and ipc_server.cpp @@ -82,9 +88,7 @@ void throw_exception ( exception_id eid ) { case exception_id::invalid_load_factor: DO_THROW(std::out_of_range, ("Invalid hash load factor")); break; case exception_id::invalid_key: DO_THROW(std::out_of_range, ("invalid key")); break; case exception_id::bad_tagged_msg_cast: DO_THROW(std::runtime_error, ("Illegal tagged_msg cast")); break; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE case exception_id::unsafe_wait: DO_THROW(unsafe_wait, ("Unsafe to wait further")); break; -#endif default: __TBB_ASSERT ( false, "Unknown exception ID" ); } __TBB_ASSERT(false, "Unreachable code"); @@ -148,7 +152,7 @@ bool gcc_rethrow_exception_broken() { is_broken = std::uncaught_exception(); } if( is_broken ) fix_broken_rethrow(); - __TBB_ASSERT( !std::uncaught_exception(), NULL ); + __TBB_ASSERT( !std::uncaught_exception(), nullptr); return is_broken; } #else diff --git a/contrib/libs/tbb/src/tbb/global_control.cpp b/contrib/libs/tbb/src/tbb/global_control.cpp index a9eac2cbc3..1bc3c22c1f 100644 --- a/contrib/libs/tbb/src/tbb/global_control.cpp +++ b/contrib/libs/tbb/src/tbb/global_control.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -59,19 +59,19 @@ public: }; class alignas(max_nfs_size) allowed_parallelism_control : public control_storage { - virtual std::size_t default_value() const override { + std::size_t default_value() const override { return max(1U, governor::default_num_threads()); } - virtual bool is_first_arg_preferred(std::size_t a, std::size_t b) const override { + bool is_first_arg_preferred(std::size_t a, std::size_t b) const override { return a<b; // prefer min allowed parallelism } - virtual void apply_active(std::size_t new_active) override { + void apply_active(std::size_t new_active) override { control_storage::apply_active(new_active); - __TBB_ASSERT( my_active_value>=1, NULL ); + __TBB_ASSERT( my_active_value>=1, nullptr); // -1 to take external thread into account market::set_active_num_workers( my_active_value-1 ); } - virtual std::size_t active_value() override { + std::size_t active_value() override { spin_mutex::scoped_lock lock(my_list_mutex); // protect my_list.empty() call if (my_list.empty()) return default_value(); @@ -88,10 +88,19 @@ public: }; class alignas(max_nfs_size) stack_size_control : public control_storage { - virtual std::size_t default_value() const override { + std::size_t default_value() const override { +#if _WIN32_WINNT >= 0x0602 /* _WIN32_WINNT_WIN8 */ + static auto ThreadStackSizeDefault = [] { + ULONG_PTR hi, lo; + GetCurrentThreadStackLimits(&lo, &hi); + return hi - lo; + }(); + return ThreadStackSizeDefault; +#else return ThreadStackSize; +#endif } - virtual void apply_active(std::size_t new_active) override { + void apply_active(std::size_t new_active) override { control_storage::apply_active(new_active); #if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) __TBB_ASSERT( false, "For Windows 8 Store* apps we must not set stack size" ); @@ -100,20 +109,19 @@ class alignas(max_nfs_size) stack_size_control : public control_storage { }; class alignas(max_nfs_size) terminate_on_exception_control : public control_storage { - virtual std::size_t default_value() const override { + std::size_t default_value() const override { return 0; } }; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE class alignas(max_nfs_size) lifetime_control : public control_storage { - virtual bool is_first_arg_preferred(std::size_t, std::size_t) const override { + bool is_first_arg_preferred(std::size_t, std::size_t) const override { return false; // not interested } - virtual std::size_t default_value() const override { + std::size_t default_value() const override { return 0; } - virtual void apply_active(std::size_t new_active) override { + void apply_active(std::size_t new_active) override { if (new_active == 1) { // reserve the market reference market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); @@ -137,21 +145,16 @@ public: return my_list.empty(); } }; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE static allowed_parallelism_control allowed_parallelism_ctl; static stack_size_control stack_size_ctl; static terminate_on_exception_control terminate_on_exception_ctl; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE static lifetime_control lifetime_ctl; static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl, &lifetime_ctl}; -#else -static control_storage *controls[] = {&allowed_parallelism_ctl, &stack_size_ctl, &terminate_on_exception_ctl}; -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE //! Comparator for a set of global_control objects inline bool control_storage_comparator::operator()(const global_control* lhs, const global_control* rhs) const { - __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , NULL); + __TBB_ASSERT_RELEASE(lhs->my_param < global_control::parameter_max , nullptr); return lhs->my_value < rhs->my_value || (lhs->my_value == rhs->my_value && lhs < rhs); } @@ -163,11 +166,9 @@ bool terminate_on_exception() { return global_control::active_value(global_control::terminate_on_exception) == 1; } -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE unsigned market::is_lifetime_control_present() { return !lifetime_ctl.is_empty(); } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE struct global_control_impl { private: @@ -183,7 +184,7 @@ private: public: static void create(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; spin_mutex::scoped_lock lock(c->my_list_mutex); @@ -196,27 +197,19 @@ public: } static void destroy(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), NULL); -#else - __TBB_ASSERT(!c->my_list.empty(), NULL); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE // Concurrent reading and changing global parameter is possible. spin_mutex::scoped_lock lock(c->my_list_mutex); + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle || !c->my_list.empty(), nullptr); std::size_t new_active = (std::size_t)(-1), old_active = c->my_active_value; if (!erase_if_present(c, gc)) { -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE - __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , NULL); + __TBB_ASSERT(gc.my_param == global_control::scheduler_handle , nullptr); return; -#else - __TBB_ASSERT(false, "Unreachable code"); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE } if (c->my_list.empty()) { - __TBB_ASSERT(new_active == (std::size_t) - 1, NULL); + __TBB_ASSERT(new_active == (std::size_t) - 1, nullptr); new_active = c->default_value(); } else { new_active = (*c->my_list.begin())->my_value; @@ -227,17 +220,17 @@ public: } static bool remove_and_check_if_empty(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; - __TBB_ASSERT(!c->my_list.empty(), NULL); spin_mutex::scoped_lock lock(c->my_list_mutex); + __TBB_ASSERT(!c->my_list.empty(), nullptr); erase_if_present(c, gc); return c->my_list.empty(); } #if TBB_USE_ASSERT static bool is_present(d1::global_control& gc) { - __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, NULL); + __TBB_ASSERT_RELEASE(gc.my_param < global_control::parameter_max, nullptr); control_storage* const c = controls[gc.my_param]; spin_mutex::scoped_lock lock(c->my_list_mutex); @@ -266,7 +259,7 @@ bool is_present(d1::global_control& gc) { } #endif // TBB_USE_ASSERT std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int param) { - __TBB_ASSERT_RELEASE(param < global_control::parameter_max, NULL); + __TBB_ASSERT_RELEASE(param < global_control::parameter_max, nullptr); return controls[param]->active_value(); } diff --git a/contrib/libs/tbb/src/tbb/governor.cpp b/contrib/libs/tbb/src/tbb/governor.cpp index b75b91a75c..3111ab3e7b 100644 --- a/contrib/libs/tbb/src/tbb/governor.cpp +++ b/contrib/libs/tbb/src/tbb/governor.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "market.h" #include "arena.h" #include "dynamic_link.h" +#include "concurrent_monitor.h" #include "oneapi/tbb/task_group.h" #include "oneapi/tbb/global_control.h" @@ -38,16 +39,20 @@ namespace tbb { namespace detail { namespace r1 { -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +void clear_address_waiter_table(); + //! global_control.cpp contains definition bool remove_and_check_if_empty(d1::global_control& gc); bool is_present(d1::global_control& gc); -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE namespace rml { tbb_server* make_private_server( tbb_client& client ); } // namespace rml +namespace system_topology { + void destroy(); +} + //------------------------------------------------------------------------ // governor //------------------------------------------------------------------------ @@ -61,6 +66,7 @@ void governor::acquire_resources () { if( status ) handle_perror(status, "TBB failed to initialize task scheduler TLS\n"); detect_cpu_features(cpu_features); + is_rethrow_broken = gcc_rethrow_exception_broken(); } @@ -73,11 +79,14 @@ void governor::release_resources () { int status = theTLS.destroy(); if( status ) runtime_warning("failed to destroy task scheduler TLS: %s", std::strerror(status)); + clear_address_waiter_table(); + + system_topology::destroy(); dynamic_unlink_all(); } rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { - rml::tbb_server* server = NULL; + rml::tbb_server* server = nullptr; if( !UsePrivateRML ) { ::rml::factory::status_type status = theRMLServerFactory.make_server( server, client ); if( status != ::rml::factory::st_success ) { @@ -86,7 +95,7 @@ rml::tbb_server* governor::create_rml_server ( rml::tbb_client& client ) { } } if ( !server ) { - __TBB_ASSERT( UsePrivateRML, NULL ); + __TBB_ASSERT( UsePrivateRML, nullptr); server = rml::make_private_server( client ); } __TBB_ASSERT( server, "Failed to create RML server" ); @@ -121,12 +130,12 @@ void governor::one_time_init() { static std::uintptr_t get_stack_base(std::size_t stack_size) { // Stacks are growing top-down. Highest address is called "stack base", // and the lowest is "stack limit". -#if USE_WINTHREAD +#if __TBB_USE_WINAPI suppress_unused_warning(stack_size); NT_TIB* pteb = (NT_TIB*)NtCurrentTeb(); __TBB_ASSERT(&pteb < pteb->StackBase && &pteb > pteb->StackLimit, "invalid stack info in TEB"); return reinterpret_cast<std::uintptr_t>(pteb->StackBase); -#else /* USE_PTHREAD */ +#else // There is no portable way to get stack base address in Posix, so we use // non-portable method (on all modern Linux) or the simplified approach // based on the common sense assumptions. The most important assumption @@ -153,8 +162,20 @@ static std::uintptr_t get_stack_base(std::size_t stack_size) { stack_base = reinterpret_cast<std::uintptr_t>(&anchor); } return stack_base; -#endif /* USE_PTHREAD */ +#endif /* __TBB_USE_WINAPI */ +} + +#if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED +static void register_external_thread_destructor() { + struct thread_destructor { + ~thread_destructor() { + governor::terminate_external_thread(); + } + }; + // ~thread_destructor() will be call during the calling thread termination + static thread_local thread_destructor thr_destructor; } +#endif // (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED void governor::init_external_thread() { one_time_init(); @@ -170,48 +191,66 @@ void governor::init_external_thread() { // External thread always occupies the first slot thread_data& td = *new(cache_aligned_allocate(sizeof(thread_data))) thread_data(0, false); td.attach_arena(a, /*slot index*/ 0); + __TBB_ASSERT(td.my_inbox.is_idle_state(false), nullptr); stack_size = a.my_market->worker_stack_size(); std::uintptr_t stack_base = get_stack_base(stack_size); task_dispatcher& task_disp = td.my_arena_slot->default_task_dispatcher(); - task_disp.set_stealing_threshold(calculate_stealing_threshold(stack_base, stack_size)); - td.attach_task_dispatcher(task_disp); + td.enter_task_dispatcher(task_disp, calculate_stealing_threshold(stack_base, stack_size)); td.my_arena_slot->occupy(); a.my_market->add_external_thread(td); set_thread_data(td); +#if (_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED + // The external thread destructor is called from dllMain but it is not available with a static build. + // Therefore, we need to register the current thread to call the destructor during thread termination. + register_external_thread_destructor(); +#endif } void governor::auto_terminate(void* tls) { __TBB_ASSERT(get_thread_data_if_initialized() == nullptr || - get_thread_data_if_initialized() == tls, NULL); + get_thread_data_if_initialized() == tls, nullptr); if (tls) { thread_data* td = static_cast<thread_data*>(tls); + auto clear_tls = [td] { + td->~thread_data(); + cache_aligned_deallocate(td); + clear_thread_data(); + }; + // Only external thread can be inside an arena during termination. if (td->my_arena_slot) { arena* a = td->my_arena; market* m = a->my_market; + // If the TLS slot is already cleared by OS or underlying concurrency + // runtime, restore its value to properly clean up arena + if (!is_thread_data_set(td)) { + set_thread_data(*td); + } + a->my_observers.notify_exit_observers(td->my_last_observer, td->my_is_worker); - td->my_task_dispatcher->m_stealing_threshold = 0; - td->detach_task_dispatcher(); + td->leave_task_dispatcher(); td->my_arena_slot->release(); // Release an arena a->on_thread_leaving<arena::ref_external>(); m->remove_external_thread(*td); + + // The tls should be cleared before market::release because + // market can destroy the tls key if we keep the last reference + clear_tls(); + // If there was an associated arena, it added a public market reference m->release( /*is_public*/ true, /*blocking_terminate*/ false); + } else { + clear_tls(); } - - td->~thread_data(); - cache_aligned_deallocate(td); - - clear_thread_data(); } - __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, NULL); + __TBB_ASSERT(get_thread_data_if_initialized() == nullptr, nullptr); } void governor::initialize_rml_factory () { @@ -219,7 +258,6 @@ void governor::initialize_rml_factory () { UsePrivateRML = res != ::rml::factory::st_success; } -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle& handle) { handle.m_ctl = new(allocate_memory(sizeof(global_control))) global_control(global_control::scheduler_handle, 1); } @@ -233,6 +271,7 @@ void release_impl(d1::task_scheduler_handle& handle) { } bool finalize_impl(d1::task_scheduler_handle& handle) { + __TBB_ASSERT_RELEASE(handle, "trying to finalize with null handle"); market::global_market_mutex_type::scoped_lock lock( market::theMarketMutex ); bool ok = true; // ok if theMarket does not exist yet market* m = market::theMarket; // read the state of theMarket @@ -270,12 +309,12 @@ bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle& handle, std::intptr return ok; } } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE #if __TBB_ARENA_BINDING #if __TBB_WEAK_SYMBOLS_PRESENT #pragma weak __TBB_internal_initialize_system_topology +#pragma weak __TBB_internal_destroy_system_topology #pragma weak __TBB_internal_allocate_binding_handler #pragma weak __TBB_internal_deallocate_binding_handler #pragma weak __TBB_internal_apply_affinity @@ -288,6 +327,7 @@ void __TBB_internal_initialize_system_topology( int& numa_nodes_count, int*& numa_indexes_list, int& core_types_count, int*& core_types_indexes_list ); +void __TBB_internal_destroy_system_topology( ); //TODO: consider renaming to `create_binding_handler` and `destroy_binding_handler` binding_handler* __TBB_internal_allocate_binding_handler( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ); @@ -301,6 +341,7 @@ int __TBB_internal_get_default_concurrency( int numa_id, int core_type_id, int m #endif /* __TBB_WEAK_SYMBOLS_PRESENT */ // Stubs that will be used if TBBbind library is unavailable. +static void dummy_destroy_system_topology ( ) { } static binding_handler* dummy_allocate_binding_handler ( int, int, int, int ) { return nullptr; } static void dummy_deallocate_binding_handler ( binding_handler* ) { } static void dummy_apply_affinity ( binding_handler*, int ) { } @@ -313,6 +354,7 @@ static void (*initialize_system_topology_ptr)( int& numa_nodes_count, int*& numa_indexes_list, int& core_types_count, int*& core_types_indexes_list ) = nullptr; +static void (*destroy_system_topology_ptr)( ) = dummy_destroy_system_topology; static binding_handler* (*allocate_binding_handler_ptr)( int slot_num, int numa_id, int core_type_id, int max_threads_per_core ) = dummy_allocate_binding_handler; @@ -325,10 +367,11 @@ static void (*restore_affinity_ptr)( binding_handler* handler_ptr, int slot_num int (*get_default_concurrency_ptr)( int numa_id, int core_type_id, int max_threads_per_core ) = dummy_get_default_concurrency; -#if _WIN32 || _WIN64 || __linux__ +#if _WIN32 || _WIN64 || __unix__ // Table describing how to link the handlers. static const dynamic_link_descriptor TbbBindLinkTable[] = { DLD(__TBB_internal_initialize_system_topology, initialize_system_topology_ptr), + DLD(__TBB_internal_destroy_system_topology, destroy_system_topology_ptr), DLD(__TBB_internal_allocate_binding_handler, allocate_binding_handler_ptr), DLD(__TBB_internal_deallocate_binding_handler, deallocate_binding_handler_ptr), DLD(__TBB_internal_apply_affinity, apply_affinity_ptr), @@ -347,15 +390,16 @@ static const unsigned LinkTableSize = sizeof(TbbBindLinkTable) / sizeof(dynamic_ #if _WIN32 || _WIN64 #define LIBRARY_EXTENSION ".dll" #define LIBRARY_PREFIX -#elif __linux__ +#elif __unix__ #define LIBRARY_EXTENSION __TBB_STRING(.so.3) #define LIBRARY_PREFIX "lib" -#endif /* __linux__ */ +#endif /* __unix__ */ #define TBBBIND_NAME LIBRARY_PREFIX "tbbbind" DEBUG_SUFFIX LIBRARY_EXTENSION #define TBBBIND_2_0_NAME LIBRARY_PREFIX "tbbbind_2_0" DEBUG_SUFFIX LIBRARY_EXTENSION -#define TBBBIND_2_4_NAME LIBRARY_PREFIX "tbbbind_2_4" DEBUG_SUFFIX LIBRARY_EXTENSION -#endif /* _WIN32 || _WIN64 || __linux__ */ + +#define TBBBIND_2_5_NAME LIBRARY_PREFIX "tbbbind_2_5" DEBUG_SUFFIX LIBRARY_EXTENSION +#endif /* _WIN32 || _WIN64 || __unix__ */ // Representation of system hardware topology information on the TBB side. // System topology may be initialized by third-party component (e.g. hwloc) @@ -374,19 +418,19 @@ int core_types_count = 0; int* core_types_indexes = nullptr; const char* load_tbbbind_shared_object() { -#if _WIN32 || _WIN64 || __linux__ +#if _WIN32 || _WIN64 || __unix__ #if _WIN32 && !_WIN64 // For 32-bit Windows applications, process affinity masks can only support up to 32 logical CPUs. SYSTEM_INFO si; GetNativeSystemInfo(&si); if (si.dwNumberOfProcessors > 32) return nullptr; #endif /* _WIN32 && !_WIN64 */ - for (const auto& tbbbind_version : {TBBBIND_2_4_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { - if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize)) { + for (const auto& tbbbind_version : {TBBBIND_2_5_NAME, TBBBIND_2_0_NAME, TBBBIND_NAME}) { + if (dynamic_link(tbbbind_version, TbbBindLinkTable, LinkTableSize, nullptr, DYNAMIC_LINK_LOCAL_BINDING)) { return tbbbind_version; } } -#endif /* _WIN32 || _WIN64 || __linux__ */ +#endif /* _WIN32 || _WIN64 || __unix__ */ return nullptr; } @@ -430,6 +474,10 @@ void initialization_impl() { void initialize() { atomic_do_once(initialization_impl, initialization_state); } + +void destroy() { + destroy_system_topology_ptr(); +} } // namespace system_topology binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core) { diff --git a/contrib/libs/tbb/src/tbb/governor.h b/contrib/libs/tbb/src/tbb/governor.h index 0ff4781414..3d861e5323 100644 --- a/contrib/libs/tbb/src/tbb/governor.h +++ b/contrib/libs/tbb/src/tbb/governor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -53,12 +53,6 @@ private: //! TLS for scheduler instances associated with individual threads static basic_tls<thread_data*> theTLS; - //! Caches the maximal level of parallelism supported by the hardware - static unsigned DefaultNumberOfThreads; - - //! Caches the size of OS regular memory page - static std::size_t DefaultPageSize; - // TODO (TBB_REVAMP_TODO): reconsider constant names static rml::tbb_factory theRMLServerFactory; @@ -78,13 +72,14 @@ private: public: static unsigned default_num_threads () { - // No memory fence required, because at worst each invoking thread calls AvailableHwConcurrency once. - return DefaultNumberOfThreads ? DefaultNumberOfThreads : - DefaultNumberOfThreads = AvailableHwConcurrency(); + // Caches the maximal level of parallelism supported by the hardware + static unsigned num_threads = AvailableHwConcurrency(); + return num_threads; } static std::size_t default_page_size () { - return DefaultPageSize ? DefaultPageSize : - DefaultPageSize = DefaultSystemPageSize(); + // Caches the size of OS regular memory page + static std::size_t page_size = DefaultSystemPageSize(); + return page_size; } static void one_time_init(); //! Processes scheduler initialization request (possibly nested) in an external thread @@ -107,7 +102,7 @@ public: } init_external_thread(); td = theTLS.get(); - __TBB_ASSERT(td, NULL); + __TBB_ASSERT(td, nullptr); return td; } @@ -138,7 +133,9 @@ public: static bool speculation_enabled() { return cpu_features.rtm_enabled; } +#if __TBB_WAITPKG_INTRINSICS_PRESENT static bool wait_package_enabled() { return cpu_features.waitpkg_enabled; } +#endif static bool rethrow_exception_broken() { return is_rethrow_broken; } diff --git a/contrib/libs/tbb/src/tbb/intrusive_list.h b/contrib/libs/tbb/src/tbb/intrusive_list.h index 699bc149aa..d317f5554e 100644 --- a/contrib/libs/tbb/src/tbb/intrusive_list.h +++ b/contrib/libs/tbb/src/tbb/intrusive_list.h @@ -17,22 +17,13 @@ #ifndef _TBB_intrusive_list_H #define _TBB_intrusive_list_H +#include "oneapi/tbb/detail/_intrusive_list_node.h" + namespace tbb { namespace detail { namespace r1 { -//! Data structure to be inherited by the types that can form intrusive lists. -/** Intrusive list is formed by means of the member_intrusive_list<T> template class. - Note that type T must derive from intrusive_list_node either publicly or - declare instantiation member_intrusive_list<T> as a friend. - This class implements a limited subset of std::list interface. **/ -struct intrusive_list_node { - intrusive_list_node* my_prev_node{}; - intrusive_list_node* my_next_node{}; -#if TBB_USE_ASSERT - intrusive_list_node() { my_prev_node = my_next_node = this; } -#endif /* TBB_USE_ASSERT */ -}; +using d1::intrusive_list_node; //! List of element of type T, where T is derived from intrusive_list_node /** The class is not thread safe. **/ diff --git a/contrib/libs/tbb/src/tbb/itt_notify.cpp b/contrib/libs/tbb/src/tbb/itt_notify.cpp index 0e60579a62..eda5e6ad5e 100644 --- a/contrib/libs/tbb/src/tbb/itt_notify.cpp +++ b/contrib/libs/tbb/src/tbb/itt_notify.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,10 +51,10 @@ int __TBB_load_ittnotify() { #if !(_WIN32||_WIN64) // tool_api crashes without dlopen, check that it's present. Common case // for lack of dlopen is static binaries, i.e. ones build with -static. - if (dlopen == NULL) + if (dlopen == nullptr) return 0; #endif - return __itt_init_ittlib(NULL, // groups for: + return __itt_init_ittlib(nullptr, // groups for: (__itt_group_id)(__itt_group_sync // prepare/cancel/acquired/releasing | __itt_group_thread // name threads | __itt_group_stitch // stack stitching diff --git a/contrib/libs/tbb/src/tbb/itt_notify.h b/contrib/libs/tbb/src/tbb/itt_notify.h index 9978bcd7cb..5fc9d5424f 100644 --- a/contrib/libs/tbb/src/tbb/itt_notify.h +++ b/contrib/libs/tbb/src/tbb/itt_notify.h @@ -50,7 +50,7 @@ namespace detail { namespace r1 { //! Unicode support -#if (_WIN32||_WIN64) && !__MINGW32__ +#if (_WIN32||_WIN64) //! Unicode character type. Always wchar_t on Windows. /** We do not use typedefs from Windows TCHAR family to keep consistence of TBB coding style. **/ using tchar = wchar_t; diff --git a/contrib/libs/tbb/src/tbb/mailbox.h b/contrib/libs/tbb/src/tbb/mailbox.h index 2f49e9b35e..d9166c1219 100644 --- a/contrib/libs/tbb/src/tbb/mailbox.h +++ b/contrib/libs/tbb/src/tbb/mailbox.h @@ -20,7 +20,6 @@ #include "oneapi/tbb/cache_aligned_allocator.h" #include "oneapi/tbb/detail/_small_object_pool.h" -#include "arena_slot.h" #include "scheduler_common.h" #include <atomic> @@ -83,11 +82,11 @@ struct task_proxy : public d1::task { return nullptr; } - virtual task* execute(d1::execution_data&) { + task* execute(d1::execution_data&) override { __TBB_ASSERT_RELEASE(false, nullptr); return nullptr; } - virtual task* cancel(d1::execution_data&) { + task* cancel(d1::execution_data&) override { __TBB_ASSERT_RELEASE(false, nullptr); return nullptr; } @@ -187,14 +186,12 @@ public: } //! Drain the mailbox - intptr_t drain() { - intptr_t k = 0; + void drain() { // No fences here because other threads have already quit. - for( ; task_proxy* t = my_first; ++k ) { + for( ; task_proxy* t = my_first; ) { my_first.store(t->next_in_mailbox, std::memory_order_relaxed); - // cache_aligned_deallocate((char*)t - task_prefix_reservation_size); + t->allocator.delete_object(t); } - return k; } //! True if thread that owns this mailbox is looking for work. diff --git a/contrib/libs/tbb/src/tbb/main.cpp b/contrib/libs/tbb/src/tbb/main.cpp index ec6c98d682..d86c3b696b 100644 --- a/contrib/libs/tbb/src/tbb/main.cpp +++ b/contrib/libs/tbb/src/tbb/main.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -35,8 +35,6 @@ namespace r1 { //------------------------------------------------------------------------ // governor data basic_tls<thread_data*> governor::theTLS; -unsigned governor::DefaultNumberOfThreads; -size_t governor::DefaultPageSize; rml::tbb_factory governor::theRMLServerFactory; bool governor::UsePrivateRML; bool governor::is_rethrow_broken; @@ -147,7 +145,7 @@ extern "C" bool WINAPI DllMain( HANDLE /*hinstDLL*/, DWORD reason, LPVOID lpvRes case DLL_PROCESS_DETACH: // Since THREAD_DETACH is not called for the main thread, call auto-termination // here as well - but not during process shutdown (due to risk of a deadlock). - if ( lpvReserved==NULL ) { // library unload + if ( lpvReserved == nullptr ) { // library unload governor::terminate_external_thread(); } __TBB_InitOnce::remove_ref(); diff --git a/contrib/libs/tbb/src/tbb/market.cpp b/contrib/libs/tbb/src/tbb/market.cpp index 9259eaf588..b6504e0f3d 100644 --- a/contrib/libs/tbb/src/tbb/market.cpp +++ b/contrib/libs/tbb/src/tbb/market.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -75,6 +75,11 @@ market::market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std:: __TBB_ASSERT( my_server, "Failed to create RML server" ); } +market::~market() { + poison_pointer(my_server); + poison_pointer(my_next_arena); +} + static unsigned calc_workers_soft_limit(unsigned workers_soft_limit, unsigned workers_hard_limit) { if( int soft_limit = market::app_parallelism_limit() ) workers_soft_limit = soft_limit-1; @@ -137,9 +142,9 @@ market& market::global_market(bool is_public, unsigned workers_requested, std::s const unsigned workers_soft_limit = calc_workers_soft_limit(workers_requested, workers_hard_limit); // Create the global market instance std::size_t size = sizeof(market); - __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(thread_data*) == sizeof(market), + __TBB_ASSERT( __TBB_offsetof(market, my_workers) + sizeof(std::atomic<thread_data*>) == sizeof(market), "my_workers must be the last data field of the market class"); - size += sizeof(thread_data*) * (workers_hard_limit - 1); + size += sizeof(std::atomic<thread_data*>) * (workers_hard_limit - 1); __TBB_InitOnce::add_ref(); void* storage = cache_aligned_allocate(size); std::memset( storage, 0, size ); @@ -147,12 +152,10 @@ market& market::global_market(bool is_public, unsigned workers_requested, std::s market* m = new (storage) market( workers_soft_limit, workers_hard_limit, stack_size ); if( is_public ) m->my_public_ref_count.store(1, std::memory_order_relaxed); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE if (market::is_lifetime_control_present()) { ++m->my_public_ref_count; ++m->my_ref_count; } -#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE theMarket = m; // This check relies on the fact that for shared RML default_concurrency==max_concurrency if ( !governor::UsePrivateRML && m->my_server->default_concurrency() < workers_soft_limit ) @@ -194,13 +197,13 @@ bool market::release ( bool is_public, bool blocking_terminate ) { } if ( is_public ) { __TBB_ASSERT( theMarket == this, "Global market instance was destroyed prematurely?" ); - __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( my_public_ref_count.load(std::memory_order_relaxed), nullptr); --my_public_ref_count; } if ( --my_ref_count == 0 ) { - __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( !my_public_ref_count.load(std::memory_order_relaxed), nullptr); do_release = true; - theMarket = NULL; + theMarket = nullptr; } } if( do_release ) { @@ -220,7 +223,7 @@ int market::update_workers_request() { (int)my_num_workers_soft_limit.load(std::memory_order_relaxed)); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); my_num_workers_requested = 1; } #endif @@ -245,7 +248,7 @@ void market::set_active_num_workers ( unsigned soft_limit ) { int delta = 0; { arenas_list_mutex_type::scoped_lock lock( m->my_arenas_list_mutex ); - __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, NULL); + __TBB_ASSERT(soft_limit <= m->my_num_workers_hard_limit, nullptr); #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY arena_list_type* arenas = m->my_arenas; @@ -258,7 +261,7 @@ void market::set_active_num_workers ( unsigned soft_limit ) { if (it->my_global_concurrency_mode.load(std::memory_order_relaxed)) m->disable_mandatory_concurrency_impl(&*it); } - __TBB_ASSERT(m->my_mandatory_num_requested == 0, NULL); + __TBB_ASSERT(m->my_mandatory_num_requested == 0, nullptr); #endif m->my_num_workers_soft_limit.store(soft_limit, std::memory_order_release); @@ -290,8 +293,8 @@ bool governor::does_client_join_workers (const rml::tbb_client &client) { arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned arena_priority_level, std::size_t stack_size ) { - __TBB_ASSERT( num_slots > 0, NULL ); - __TBB_ASSERT( num_reserved_slots <= num_slots, NULL ); + __TBB_ASSERT( num_slots > 0, nullptr); + __TBB_ASSERT( num_reserved_slots <= num_slots, nullptr); // Add public market reference for an external thread/task_arena (that adds an internal reference in exchange). market &m = global_market( /*is_public=*/true, num_slots-num_reserved_slots, stack_size ); arena& a = arena::allocate_arena( m, num_slots, num_reserved_slots, arena_priority_level ); @@ -304,7 +307,7 @@ arena* market::create_arena ( int num_slots, int num_reserved_slots, unsigned ar /** This method must be invoked under my_arenas_list_mutex. **/ void market::detach_arena ( arena& a ) { market::enforce([this] { return theMarket == this; }, "Global market instance was destroyed prematurely?"); - __TBB_ASSERT( !a.my_slots[0].is_occupied(), NULL ); + __TBB_ASSERT( !a.my_slots[0].is_occupied(), nullptr); if (a.my_global_concurrency_mode.load(std::memory_order_relaxed)) disable_mandatory_concurrency_impl(&a); @@ -316,17 +319,17 @@ void market::detach_arena ( arena& a ) { void market::try_destroy_arena ( arena* a, uintptr_t aba_epoch, unsigned priority_level ) { bool locked = true; - __TBB_ASSERT( a, NULL ); - // we hold reference to the market, so it cannot be destroyed at any moment here - market::enforce([this] { return theMarket == this; }, NULL); - __TBB_ASSERT( my_ref_count!=0, NULL ); + __TBB_ASSERT( a, nullptr); + // we hold reference to the server, so market cannot be destroyed at any moment here + __TBB_ASSERT(!is_poisoned(my_server), nullptr); my_arenas_list_mutex.lock(); arena_list_type::iterator it = my_arenas[priority_level].begin(); for ( ; it != my_arenas[priority_level].end(); ++it ) { if ( a == &*it ) { if ( it->my_aba_epoch == aba_epoch ) { // Arena is alive - if ( !a->my_num_workers_requested && !a->my_references.load(std::memory_order_relaxed) ) { + // Acquire my_references to sync with threads that just left the arena + if (!a->my_num_workers_requested && !a->my_references.load(std::memory_order_acquire)) { __TBB_ASSERT( !a->my_num_workers_allotted.load(std::memory_order_relaxed) && (a->my_pool_state == arena::SNAPSHOT_EMPTY || !a->my_max_num_workers), @@ -455,8 +458,8 @@ bool market::is_arena_alive(arena* a) { #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY void market::enable_mandatory_concurrency_impl ( arena *a ) { - __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + __TBB_ASSERT(!a->my_global_concurrency_mode.load(std::memory_order_relaxed), nullptr); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); a->my_global_concurrency_mode.store(true, std::memory_order_relaxed); my_mandatory_num_requested++; @@ -479,8 +482,8 @@ void market::enable_mandatory_concurrency ( arena *a ) { } void market::disable_mandatory_concurrency_impl(arena* a) { - __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), NULL); - __TBB_ASSERT(my_mandatory_num_requested > 0, NULL); + __TBB_ASSERT(a->my_global_concurrency_mode.load(std::memory_order_relaxed), nullptr); + __TBB_ASSERT(my_mandatory_num_requested > 0, nullptr); a->my_global_concurrency_mode.store(false, std::memory_order_relaxed); my_mandatory_num_requested--; @@ -498,7 +501,7 @@ void market::mandatory_concurrency_disable ( arena *a ) { if (a->has_enqueued_tasks()) return; - __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, NULL); + __TBB_ASSERT(my_num_workers_soft_limit.load(std::memory_order_relaxed) == 0, nullptr); disable_mandatory_concurrency_impl(a); delta = update_workers_request(); @@ -558,7 +561,7 @@ void market::adjust_demand ( arena& a, int delta, bool mandatory ) { my_priority_level_demand[a.my_priority_level] += delta; unsigned effective_soft_limit = my_num_workers_soft_limit.load(std::memory_order_relaxed); if (my_mandatory_num_requested > 0) { - __TBB_ASSERT(effective_soft_limit == 0, NULL); + __TBB_ASSERT(effective_soft_limit == 0, nullptr); effective_soft_limit = 1; } @@ -575,15 +578,16 @@ void market::adjust_demand ( arena& a, int delta, bool mandatory ) { delta = min(total_demand, (int)effective_soft_limit) - my_num_workers_requested; } my_num_workers_requested += delta; - __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, NULL); + __TBB_ASSERT(my_num_workers_requested <= (int)effective_soft_limit, nullptr); - target_epoch = my_adjust_demand_target_epoch++; + target_epoch = a.my_adjust_demand_target_epoch++; } - spin_wait_until_eq(my_adjust_demand_current_epoch, target_epoch); + a.my_adjust_demand_current_epoch.wait_until(target_epoch, /* context = */ target_epoch, std::memory_order_relaxed); // Must be called outside of any locks my_server->adjust_job_count_estimate( delta ); - my_adjust_demand_current_epoch.store(target_epoch + 1, std::memory_order_release); + a.my_adjust_demand_current_epoch.exchange(target_epoch + 1); + a.my_adjust_demand_current_epoch.notify_relaxed(target_epoch + 1); } void market::process( job& j ) { @@ -605,7 +609,7 @@ void market::process( job& j ) { } void market::cleanup( job& j) { - market::enforce([this] { return theMarket != this; }, NULL ); + market::enforce([this] { return theMarket != this; }, nullptr ); governor::auto_terminate(&j); } @@ -615,13 +619,13 @@ void market::acknowledge_close_connection() { ::rml::job* market::create_one_job() { unsigned short index = ++my_first_unused_worker_idx; - __TBB_ASSERT( index > 0, NULL ); + __TBB_ASSERT( index > 0, nullptr); ITT_THREAD_SET_NAME(_T("TBB Worker Thread")); // index serves as a hint decreasing conflicts between workers when they migrate between arenas thread_data* td = new(cache_aligned_allocate(sizeof(thread_data))) thread_data{ index, true }; - __TBB_ASSERT( index <= my_num_workers_hard_limit, NULL ); - __TBB_ASSERT( my_workers[index - 1] == nullptr, NULL ); - my_workers[index - 1] = td; + __TBB_ASSERT( index <= my_num_workers_hard_limit, nullptr); + __TBB_ASSERT( my_workers[index - 1].load(std::memory_order_relaxed) == nullptr, nullptr); + my_workers[index - 1].store(td, std::memory_order_release); return td; } diff --git a/contrib/libs/tbb/src/tbb/market.h b/contrib/libs/tbb/src/tbb/market.h index 8443467447..f3891df305 100644 --- a/contrib/libs/tbb/src/tbb/market.h +++ b/contrib/libs/tbb/src/tbb/market.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #define _TBB_market_H #include "scheduler_common.h" -#include "concurrent_monitor.h" +#include "market_concurrent_monitor.h" #include "intrusive_list.h" #include "rml_tbb.h" +#include "oneapi/tbb/rw_mutex.h" #include "oneapi/tbb/spin_rw_mutex.h" #include "oneapi/tbb/task_group.h" @@ -36,11 +37,9 @@ namespace tbb { namespace detail { -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE namespace d1 { class task_scheduler_handle; } -#endif namespace r1 { @@ -57,9 +56,7 @@ class market : no_copy, rml::tbb_client { template<typename SchedulerTraits> friend class custom_scheduler; friend class task_group_context; friend class governor; -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE friend class lifetime_control; -#endif public: //! Keys for the arena map array. The lower the value the higher priority of the arena list. @@ -67,9 +64,7 @@ public: private: friend void ITT_DoUnsafeOneTimeInitialization (); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE friend bool finalize_impl(d1::task_scheduler_handle& handle); -#endif typedef intrusive_list<arena> arena_list_type; typedef intrusive_list<thread_data> thread_data_list_type; @@ -83,7 +78,7 @@ private: static global_market_mutex_type theMarketMutex; //! Lightweight mutex guarding accounting operations with arenas list - typedef spin_rw_mutex arenas_list_mutex_type; + typedef rw_mutex arenas_list_mutex_type; // TODO: introduce fine-grained (per priority list) locking of arenas. arenas_list_mutex_type my_arenas_list_mutex; @@ -91,7 +86,7 @@ private: rml::tbb_server* my_server; //! Waiting object for external and coroutine waiters. - extended_concurrent_monitor my_sleep_monitor; + market_concurrent_monitor my_sleep_monitor; //! Maximal number of workers allowed for use by the underlying resource manager /** It can't be changed after market creation. **/ @@ -104,12 +99,6 @@ private: //! Number of workers currently requested from RML int my_num_workers_requested; - //! The target serialization epoch for callers of adjust_job_count_estimate - int my_adjust_demand_target_epoch; - - //! The current serialization epoch for callers of adjust_job_count_estimate - std::atomic<int> my_adjust_demand_current_epoch; - //! First unused index of worker /** Used to assign indices to the new workers coming from RML, and busy part of my_workers array. **/ @@ -157,6 +146,9 @@ private: //! Constructor market ( unsigned workers_soft_limit, unsigned workers_hard_limit, std::size_t stack_size ); + //! Destructor + ~market(); + //! Destroys and deallocates market object created by market::create() void destroy (); @@ -173,7 +165,7 @@ private: } } - //! Returns next arena that needs more workers, or NULL. + //! Returns next arena that needs more workers, or nullptr. arena* arena_in_need(arena* prev); template <typename Pred> @@ -233,7 +225,7 @@ public: unsigned arena_index, std::size_t stack_size ); //! Removes the arena from the market's list - void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned pririty_level ); + void try_destroy_arena ( arena*, uintptr_t aba_epoch, unsigned priority_level ); //! Removes the arena from the market's list void detach_arena ( arena& ); @@ -242,7 +234,7 @@ public: bool release ( bool is_public, bool blocking_terminate ); //! Return wait list - extended_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } + market_concurrent_monitor& get_wait_list() { return my_sleep_monitor; } #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY //! Imlpementation of mandatory concurrency enabling @@ -274,10 +266,8 @@ public: //! Reports active parallelism level according to user's settings static unsigned app_parallelism_limit(); -#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE //! Reports if any active global lifetime references are present static unsigned is_lifetime_control_present(); -#endif //! Finds all contexts affected by the state change and propagates the new state to them. /** The propagation is relayed to the market because tasks created by one @@ -293,7 +283,7 @@ public: //! Array of pointers to the registered workers /** Used by cancellation propagation mechanism. Must be the last data member of the class market. **/ - thread_data* my_workers[1]; + std::atomic<thread_data*> my_workers[1]; static unsigned max_num_workers() { global_market_mutex_type::scoped_lock lock( theMarketMutex ); diff --git a/contrib/libs/tbb/src/tbb/market_concurrent_monitor.h b/contrib/libs/tbb/src/tbb/market_concurrent_monitor.h new file mode 100644 index 0000000000..37927617ba --- /dev/null +++ b/contrib/libs/tbb/src/tbb/market_concurrent_monitor.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_market_concurrent_monitor_H +#define __TBB_market_concurrent_monitor_H + +#include "concurrent_monitor.h" +#include "scheduler_common.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { + +struct market_context { + market_context() = default; + + market_context(std::uintptr_t first_addr, arena* a) : + my_uniq_addr(first_addr), my_arena_addr(a) + {} + + std::uintptr_t my_uniq_addr{0}; + arena* my_arena_addr{nullptr}; +}; + +#if __TBB_RESUMABLE_TASKS +class resume_node : public wait_node<market_context> { + using base_type = wait_node<market_context>; +public: + resume_node(market_context ctx, execution_data_ext& ed_ext, task_dispatcher& target) + : base_type(ctx), my_curr_dispatcher(ed_ext.task_disp), my_target_dispatcher(&target) + , my_suspend_point(my_curr_dispatcher->get_suspend_point()) + {} + + ~resume_node() override { + if (this->my_skipped_wakeup) { + spin_wait_until_eq(this->my_notify_calls, 1); + } + + poison_pointer(my_curr_dispatcher); + poison_pointer(my_target_dispatcher); + poison_pointer(my_suspend_point); + } + + void init() override { + base_type::init(); + } + + void wait() override { + my_curr_dispatcher->resume(*my_target_dispatcher); + __TBB_ASSERT(!this->my_is_in_list.load(std::memory_order_relaxed), "Still in the queue?"); + } + + void reset() override { + base_type::reset(); + spin_wait_until_eq(this->my_notify_calls, 1); + my_notify_calls.store(0, std::memory_order_relaxed); + } + + // notify is called (perhaps, concurrently) twice from: + // - concurrent_monitor::notify + // - post_resume_action::register_waiter + // The second notify is called after thread switches the stack + // (Because we can not call resume while the stack is occupied) + // We need calling resume only when both notifications are performed. + void notify() override { + if (++my_notify_calls == 2) { + r1::resume(my_suspend_point); + } + } + +private: + friend class thread_data; + friend struct suspend_point_type::resume_task; + task_dispatcher* my_curr_dispatcher; + task_dispatcher* my_target_dispatcher; + suspend_point_type* my_suspend_point; + std::atomic<int> my_notify_calls{0}; +}; +#endif // __TBB_RESUMABLE_TASKS + +class market_concurrent_monitor : public concurrent_monitor_base<market_context> { + using base_type = concurrent_monitor_base<market_context>; +public: + using base_type::base_type; + + ~market_concurrent_monitor() { + destroy(); + } + + /** per-thread descriptor for concurrent_monitor */ + using thread_context = sleep_node<market_context>; +#if __TBB_RESUMABLE_TASKS + using resume_context = resume_node; +#endif +}; + +} // namespace r1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_market_concurrent_monitor_H diff --git a/contrib/libs/tbb/src/tbb/misc.cpp b/contrib/libs/tbb/src/tbb/misc.cpp index 0e1d33a596..17da1238f8 100644 --- a/contrib/libs/tbb/src/tbb/misc.cpp +++ b/contrib/libs/tbb/src/tbb/misc.cpp @@ -25,6 +25,7 @@ #include "misc.h" #include "governor.h" #include "assert_impl.h" // Out-of-line TBB assertion handling routines are instantiated here. +#include "concurrent_monitor_mutex.h" #include <cstdio> #include <cstdlib> @@ -49,6 +50,13 @@ namespace r1 { //------------------------------------------------------------------------ cpu_features_type governor::cpu_features; +//------------------------------------------------------------------------ +// concurrent_monitor_mutex data +//------------------------------------------------------------------------ +#if !__TBB_USE_FUTEX +std::mutex concurrent_monitor_mutex::my_init_mutex; +#endif + size_t DefaultSystemPageSize() { #if _WIN32 diff --git a/contrib/libs/tbb/src/tbb/misc.h b/contrib/libs/tbb/src/tbb/misc.h index 6a3cf778a4..b11c0029ef 100644 --- a/contrib/libs/tbb/src/tbb/misc.h +++ b/contrib/libs/tbb/src/tbb/misc.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ #include "oneapi/tbb/info.h" #endif /*__TBB_ARENA_BINDING*/ -#if __linux__ || __FreeBSD__ +#if __unix__ #include <sys/param.h> // __FreeBSD_version #if __FreeBSD_version >= 701000 #include <sys/cpuset.h> @@ -53,11 +53,9 @@ class task_scheduler_observer; const std::size_t MByte = 1024*1024; -#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) -// In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API -// that does not allow to specify the stack size. -// Still, the thread stack size value, either explicit or default, is used by the scheduler. -// So here we set the default value to match the platform's default of 1MB. +#if __TBB_USE_WINAPI +// The Microsoft Documentation about Thread Stack Size states that +// "The default stack reservation size used by the linker is 1 MB" const std::size_t ThreadStackSize = 1*MByte; #else const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; @@ -196,7 +194,7 @@ T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { basic_mask_t* threadMask; int is_changed; public: - affinity_helper() : threadMask(NULL), is_changed(0) {} + affinity_helper() : threadMask(nullptr), is_changed(0) {} ~affinity_helper(); void protect_affinity_mask( bool restore_process_mask ); void dismiss(); @@ -206,7 +204,6 @@ T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { class affinity_helper : no_copy { public: void protect_affinity_mask( bool ) {} - void dismiss() {} }; inline void destroy_process_mask(){} #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ diff --git a/contrib/libs/tbb/src/tbb/misc_ex.cpp b/contrib/libs/tbb/src/tbb/misc_ex.cpp index 177392bb65..55be0af3f3 100644 --- a/contrib/libs/tbb/src/tbb/misc_ex.cpp +++ b/contrib/libs/tbb/src/tbb/misc_ex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,8 +32,10 @@ #endif #else #include <unistd.h> +#if __unix__ #if __linux__ #include <sys/sysinfo.h> +#endif #include <cstring> #include <sched.h> #include <cerrno> @@ -53,7 +55,7 @@ namespace r1 { #if __TBB_USE_OS_AFFINITY_SYSCALL -#if __linux__ +#if __unix__ // Handlers for interoperation with libiomp static int (*libiomp_try_restoring_original_mask)(); // Table for mapping to libiomp entry points @@ -63,10 +65,10 @@ static const dynamic_link_descriptor iompLinkTable[] = { #endif static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* threadMask ) { -#if __linux__ - if( sched_setaffinity( 0, maskSize, threadMask ) ) -#else /* FreeBSD */ +#if __FreeBSD__ || __NetBSD__ || __OpenBSD__ if( cpuset_setaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#else /* __unix__ */ + if( sched_setaffinity( 0, maskSize, threadMask ) ) #endif // Here and below the error severity is lowered from critical level // because it may happen during TBB library unload because of not @@ -76,10 +78,10 @@ static void set_thread_affinity_mask( std::size_t maskSize, const basic_mask_t* } static void get_thread_affinity_mask( std::size_t maskSize, basic_mask_t* threadMask ) { -#if __linux__ - if( sched_getaffinity( 0, maskSize, threadMask ) ) -#else /* FreeBSD */ +#if __FreeBSD__ || __NetBSD__ || __OpenBSD__ if( cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, maskSize, threadMask ) ) +#else /* __unix__ */ + if( sched_getaffinity( 0, maskSize, threadMask ) ) #endif runtime_warning( "getaffinity syscall failed" ); } @@ -88,9 +90,8 @@ static basic_mask_t* process_mask; static int num_masks; void destroy_process_mask() { - if( process_mask ) { - delete [] process_mask; - } + delete [] process_mask; + process_mask = nullptr; } #define curMaskSize sizeof(basic_mask_t) * num_masks @@ -103,7 +104,7 @@ affinity_helper::~affinity_helper() { } } void affinity_helper::protect_affinity_mask( bool restore_process_mask ) { - if( threadMask == NULL && num_masks ) { // TODO: assert num_masks validity? + if( threadMask == nullptr && num_masks ) { // TODO: assert num_masks validity? threadMask = new basic_mask_t [num_masks]; std::memset( threadMask, 0, curMaskSize ); get_thread_affinity_mask( curMaskSize, threadMask ); @@ -119,10 +120,8 @@ void affinity_helper::protect_affinity_mask( bool restore_process_mask ) { } } void affinity_helper::dismiss() { - if( threadMask ) { - delete [] threadMask; - threadMask = NULL; - } + delete [] threadMask; + threadMask = nullptr; is_changed = 0; } #undef curMaskSize @@ -135,35 +134,31 @@ static void initialize_hardware_concurrency_info () { int err; int availableProcs = 0; int numMasks = 1; -#if __linux__ int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); - int pid = getpid(); -#else /* FreeBSD >= 7.1 */ - int maxProcs = sysconf(_SC_NPROCESSORS_ONLN); -#endif basic_mask_t* processMask; const std::size_t BasicMaskSize = sizeof(basic_mask_t); for (;;) { const int curMaskSize = BasicMaskSize * numMasks; processMask = new basic_mask_t[numMasks]; std::memset( processMask, 0, curMaskSize ); -#if __linux__ - err = sched_getaffinity( pid, curMaskSize, processMask ); - if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) - break; -#else /* FreeBSD >= 7.1 */ +#if __FreeBSD__ || __NetBSD__ || __OpenBSD__ // CPU_LEVEL_WHICH - anonymous (current) mask, CPU_LEVEL_CPUSET - assigned mask err = cpuset_getaffinity( CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, curMaskSize, processMask ); if ( !err || errno != ERANGE || curMaskSize * CHAR_BIT >= 16 * 1024 ) break; -#endif /* FreeBSD >= 7.1 */ +#else /* __unix__ */ + int pid = getpid(); + err = sched_getaffinity( pid, curMaskSize, processMask ); + if ( !err || errno != EINVAL || curMaskSize * CHAR_BIT >= 256 * 1024 ) + break; +#endif delete[] processMask; numMasks <<= 1; } if ( !err ) { // We have found the mask size and captured the process affinity mask into processMask. num_masks = numMasks; // do here because it's needed for affinity_helper to work -#if __linux__ +#if __unix__ // For better coexistence with libiomp which might have changed the mask already, // check for its presence and ask it to restore the mask. dynamic_link_handle libhandle; @@ -196,7 +191,7 @@ static void initialize_hardware_concurrency_info () { delete[] processMask; } theNumProcs = availableProcs > 0 ? availableProcs : 1; // Fail safety strap - __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), NULL ); + __TBB_ASSERT( theNumProcs <= sysconf(_SC_NPROCESSORS_ONLN), nullptr); } int AvailableHwConcurrency() { @@ -211,7 +206,7 @@ int AvailableHwConcurrency() { // Format of "present" file is: ([<int>-<int>|<int>],)+ int AvailableHwConcurrency() { FILE *fp = fopen("/sys/devices/system/cpu/present", "r"); - if (fp == NULL) return 1; + if (fp == nullptr) return 1; int num_args, lower, upper, num_cpus=0; while ((num_args = fscanf(fp, "%u-%u", &lower, &upper)) != EOF) { switch(num_args) { @@ -262,15 +257,16 @@ int ProcessorGroupInfo::NumGroups = 1; int ProcessorGroupInfo::HoleIndex = 0; ProcessorGroupInfo theProcessorGroups[MaxProcessorGroups]; - +int calculate_numa[MaxProcessorGroups]; //Array needed for FindProcessorGroupIndex to calculate Processor Group when number of threads > number of cores to distribute threads evenly between processor groups +int numaSum; struct TBB_GROUP_AFFINITY { DWORD_PTR Mask; WORD Group; WORD Reserved[3]; }; -static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = NULL; -static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = NULL; +static DWORD (WINAPI *TBB_GetActiveProcessorCount)( WORD groupIndex ) = nullptr; +static WORD (WINAPI *TBB_GetActiveProcessorGroupCount)() = nullptr; static BOOL (WINAPI *TBB_SetThreadGroupAffinity)( HANDLE hThread, const TBB_GROUP_AFFINITY* newAff, TBB_GROUP_AFFINITY *prevAff ); static BOOL (WINAPI *TBB_GetThreadGroupAffinity)( HANDLE hThread, TBB_GROUP_AFFINITY* ); @@ -283,6 +279,7 @@ static const dynamic_link_descriptor ProcessorGroupsApiLinkTable[] = { }; static void initialize_hardware_concurrency_info () { + suppress_unused_warning(TBB_ALL_PROCESSOR_GROUPS); #if __TBB_WIN8UI_SUPPORT // For these applications processor groups info is unavailable // Setting up a number of processors for one processor group @@ -299,14 +296,14 @@ static void initialize_hardware_concurrency_info () { if ( pam & m ) ++nproc; } - __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, NULL ); + __TBB_ASSERT( nproc <= (int)si.dwNumberOfProcessors, nullptr); // By default setting up a number of processors for one processor group theProcessorGroups[0].numProcs = theProcessorGroups[0].numProcsRunningTotal = nproc; // Setting up processor groups in case the process does not restrict affinity mask and more than one processor group is present if ( nproc == (int)si.dwNumberOfProcessors && TBB_GetActiveProcessorCount ) { // The process does not have restricting affinity mask and multiple processor groups are possible ProcessorGroupInfo::NumGroups = (int)TBB_GetActiveProcessorGroupCount(); - __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, NULL ); + __TBB_ASSERT( ProcessorGroupInfo::NumGroups <= MaxProcessorGroups, nullptr); // Fail safety bootstrap. Release versions will limit available concurrency // level, while debug ones would assert. if ( ProcessorGroupInfo::NumGroups > MaxProcessorGroups ) @@ -316,15 +313,27 @@ static void initialize_hardware_concurrency_info () { if ( TBB_GetThreadGroupAffinity( GetCurrentThread(), &ga ) ) ProcessorGroupInfo::HoleIndex = ga.Group; int nprocs = 0; + int min_procs = INT_MAX; for ( WORD i = 0; i < ProcessorGroupInfo::NumGroups; ++i ) { ProcessorGroupInfo &pgi = theProcessorGroups[i]; pgi.numProcs = (int)TBB_GetActiveProcessorCount(i); - __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, NULL ); + if (pgi.numProcs < min_procs) min_procs = pgi.numProcs; //Finding the minimum number of processors in the Processor Groups + calculate_numa[i] = pgi.numProcs; + __TBB_ASSERT( pgi.numProcs <= (int)sizeof(DWORD_PTR) * CHAR_BIT, nullptr); pgi.mask = pgi.numProcs == sizeof(DWORD_PTR) * CHAR_BIT ? ~(DWORD_PTR)0 : (DWORD_PTR(1) << pgi.numProcs) - 1; pgi.numProcsRunningTotal = nprocs += pgi.numProcs; } - __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), NULL ); + __TBB_ASSERT( nprocs == (int)TBB_GetActiveProcessorCount( TBB_ALL_PROCESSOR_GROUPS ), nullptr); + + calculate_numa[0] = (calculate_numa[0] / min_procs)-1; + for (WORD i = 1; i < ProcessorGroupInfo::NumGroups; ++i) { + calculate_numa[i] = calculate_numa[i-1] + (calculate_numa[i] / min_procs); + } + + numaSum = calculate_numa[ProcessorGroupInfo::NumGroups - 1]; + } + } #endif /* __TBB_WIN8UI_SUPPORT */ @@ -339,38 +348,29 @@ int NumberOfProcessorGroups() { return ProcessorGroupInfo::NumGroups; } -// Offset for the slot reserved for the first external thread -#define HoleAdjusted(procIdx, grpIdx) (procIdx + (holeIdx <= grpIdx)) - int FindProcessorGroupIndex ( int procIdx ) { - // In case of oversubscription spread extra workers in a round robin manner - int holeIdx; - const int numProcs = theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; - if ( procIdx >= numProcs - 1 ) { - holeIdx = INT_MAX; - procIdx = (procIdx - numProcs + 1) % numProcs; + int current_grp_idx = ProcessorGroupInfo::HoleIndex; + if (procIdx >= theProcessorGroups[current_grp_idx].numProcs && procIdx < theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal) { + procIdx = procIdx - theProcessorGroups[current_grp_idx].numProcs; + do { + current_grp_idx = (current_grp_idx + 1) % (ProcessorGroupInfo::NumGroups); + procIdx = procIdx - theProcessorGroups[current_grp_idx].numProcs; + + } while (procIdx >= 0); } - else - holeIdx = ProcessorGroupInfo::HoleIndex; - __TBB_ASSERT( hardware_concurrency_info == do_once_state::initialized, "FindProcessorGroupIndex is used before AvailableHwConcurrency" ); - // Approximate the likely group index assuming all groups are of the same size - int i = procIdx / theProcessorGroups[0].numProcs; - // Make sure the approximation is a valid group index - if (i >= ProcessorGroupInfo::NumGroups) i = ProcessorGroupInfo::NumGroups-1; - // Now adjust the approximation up or down - if ( theProcessorGroups[i].numProcsRunningTotal > HoleAdjusted(procIdx, i) ) { - while ( theProcessorGroups[i].numProcsRunningTotal - theProcessorGroups[i].numProcs > HoleAdjusted(procIdx, i) ) { - __TBB_ASSERT( i > 0, NULL ); - --i; + else if (procIdx >= theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal) { + int temp_grp_index = 0; + procIdx = procIdx - theProcessorGroups[ProcessorGroupInfo::NumGroups - 1].numProcsRunningTotal; + procIdx = procIdx % (numaSum+1); //ProcIdx to stay between 0 and numaSum + + while (procIdx - calculate_numa[temp_grp_index] > 0) { + temp_grp_index = (temp_grp_index + 1) % ProcessorGroupInfo::NumGroups; } + current_grp_idx = temp_grp_index; } - else { - do { - ++i; - } while ( theProcessorGroups[i].numProcsRunningTotal <= HoleAdjusted(procIdx, i) ); - } - __TBB_ASSERT( i < ProcessorGroupInfo::NumGroups, NULL ); - return i; + __TBB_ASSERT(current_grp_idx < ProcessorGroupInfo::NumGroups, nullptr); + + return current_grp_idx; } void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) { @@ -378,7 +378,7 @@ void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ) { if ( !TBB_SetThreadGroupAffinity ) return; TBB_GROUP_AFFINITY ga = { theProcessorGroups[groupIndex].mask, (WORD)groupIndex, {0,0,0} }; - TBB_SetThreadGroupAffinity( hThread, &ga, NULL ); + TBB_SetThreadGroupAffinity( hThread, &ga, nullptr); } int AvailableHwConcurrency() { diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.cpp b/contrib/libs/tbb/src/tbb/observer_proxy.cpp index 4f7c07c266..2717d7400c 100644 --- a/contrib/libs/tbb/src/tbb/observer_proxy.cpp +++ b/contrib/libs/tbb/src/tbb/observer_proxy.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,7 +33,7 @@ extern std::atomic<int> the_observer_proxy_count; #endif /* TBB_USE_ASSERT */ observer_proxy::observer_proxy( d1::task_scheduler_observer& tso ) - : my_ref_count(1), my_list(NULL), my_next(NULL), my_prev(NULL), my_observer(&tso) + : my_ref_count(1), my_list(nullptr), my_next(nullptr), my_prev(nullptr), my_observer(&tso) { #if TBB_USE_ASSERT ++the_observer_proxy_count; @@ -51,9 +51,6 @@ observer_proxy::~observer_proxy() { } void observer_list::clear() { - // Though the method will work fine for the empty list, we require the caller - // to check for the list emptiness before invoking it to avoid extra overhead. - __TBB_ASSERT( !empty(), NULL ); { scoped_lock lock(mutex(), /*is_writer=*/true); observer_proxy *next = my_head.load(std::memory_order_relaxed); @@ -103,7 +100,7 @@ void observer_list::insert( observer_proxy* p ) { void observer_list::remove(observer_proxy* p) { __TBB_ASSERT(my_head.load(std::memory_order_relaxed), "Attempt to remove an item from an empty list"); - __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be NULL"); + __TBB_ASSERT(!my_tail.load(std::memory_order_relaxed)->my_next, "Last item's my_next must be nullptr"); if (p == my_tail.load(std::memory_order_relaxed)) { __TBB_ASSERT(!p->my_next, nullptr); my_tail.store(p->my_prev, std::memory_order_relaxed); @@ -159,7 +156,7 @@ void observer_list::do_notify_entry_observers(observer_proxy*& last, bool worker // We were already processing the list. if (observer_proxy* q = p->my_next) { if (p == prev) { - remove_ref_fast(prev); // sets prev to NULL if successful + remove_ref_fast(prev); // sets prev to nullptr if successful } p = q; } else { @@ -221,7 +218,7 @@ void observer_list::do_notify_exit_observers(observer_proxy* last, bool worker) if (p != last) { __TBB_ASSERT(p->my_next, "List items before 'last' must have valid my_next pointer"); if (p == prev) - remove_ref_fast(prev); // sets prev to NULL if successful + remove_ref_fast(prev); // sets prev to nullptr if successful p = p->my_next; } else { // remove the reference from the last item @@ -308,7 +305,7 @@ void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer &tso, bool enable) // Proxy may still be held by other threads (to track the last notified observer) if( !--proxy->my_ref_count ) {// nobody can increase it under exclusive lock list.remove(proxy); - __TBB_ASSERT( !proxy->my_ref_count, NULL ); + __TBB_ASSERT( !proxy->my_ref_count, nullptr); delete proxy; } } diff --git a/contrib/libs/tbb/src/tbb/observer_proxy.h b/contrib/libs/tbb/src/tbb/observer_proxy.h index 2450247ecd..0ca2839d14 100644 --- a/contrib/libs/tbb/src/tbb/observer_proxy.h +++ b/contrib/libs/tbb/src/tbb/observer_proxy.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ class observer_list { arena* my_arena; //! Decrement refcount of the proxy p if there are other outstanding references. - /** In case of success sets p to NULL. Must be invoked from under the list lock. **/ + /** In case of success sets p to nullptr. Must be invoked from under the list lock. **/ inline static void remove_ref_fast( observer_proxy*& p ); //! Implements notify_entry_observers functionality. @@ -79,8 +79,6 @@ public: //! Accessor to the reader-writer mutex associated with the list. spin_rw_mutex& mutex () { return my_mutex.begin()[0]; } - bool empty () const { return my_head.load(std::memory_order_relaxed) == nullptr; } - //! Call entry notifications on observers added after last was notified. /** Updates last to become the last notified observer proxy (in the global list) or leaves it to be nullptr. The proxy has its refcount incremented. **/ @@ -95,7 +93,7 @@ public: object into a proxy so that a list item remained valid even after the corresponding proxy object is destroyed by the user code. **/ class observer_proxy { - friend class task_scheduler_observer; + friend class d1::task_scheduler_observer; friend class observer_list; friend void observe(d1::task_scheduler_observer&, bool); //! Reference count used for garbage collection. @@ -106,7 +104,7 @@ class observer_proxy { //! Reference to the list this observer belongs to. observer_list* my_list; //! Pointer to next observer in the list specified by my_head. - /** NULL for the last item in the list. **/ + /** nullptr for the last item in the list. **/ observer_proxy* my_next; //! Pointer to the previous observer in the list specified by my_head. /** For the head of the list points to the last item. **/ @@ -124,8 +122,8 @@ void observer_list::remove_ref_fast( observer_proxy*& p ) { if( p->my_observer ) { // Can decrement refcount quickly, as it cannot drop to zero while under the lock. std::uintptr_t r = --p->my_ref_count; - __TBB_ASSERT_EX( r, NULL ); - p = NULL; + __TBB_ASSERT_EX( r, nullptr); + p = nullptr; } else { // Use slow form of refcount decrementing, after the lock is released. } @@ -141,9 +139,9 @@ void observer_list::notify_exit_observers( observer_proxy*& last, bool worker ) if (last == nullptr) { return; } - __TBB_ASSERT(!is_poisoned(last), NULL); + __TBB_ASSERT(!is_poisoned(last), nullptr); do_notify_exit_observers( last, worker ); - __TBB_ASSERT(last != nullptr, NULL); + __TBB_ASSERT(last != nullptr, nullptr); poison_pointer(last); } diff --git a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp index b7655c6b35..bb8587b92f 100644 --- a/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp +++ b/contrib/libs/tbb/src/tbb/parallel_pipeline.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -87,7 +87,7 @@ private: d1::wait_context wait_ctx; }; -//! This structure is used to store task information in a input buffer +//! This structure is used to store task information in an input buffer struct task_info { void* my_object = nullptr; //! Invalid unless a task went through an ordered stage. @@ -143,8 +143,8 @@ class input_buffer { //! True for ordered filter, false otherwise. const bool is_ordered; - //! for parallel filters that accepts NULLs, thread-local flag for reaching end_of_input - using end_of_input_tls_t = basic_tls<std::intptr_t>; + //! for parallel filters that accepts nullptrs, thread-local flag for reaching end_of_input + using end_of_input_tls_t = basic_tls<input_buffer*>; end_of_input_tls_t end_of_input_tls; bool end_of_input_tls_allocated; // no way to test pthread creation of TLS @@ -240,10 +240,10 @@ public: handle_perror(status, "Failed to destroy filter TLS"); } bool my_tls_end_of_input() { - return end_of_input_tls.get() != 0; + return end_of_input_tls.get() != nullptr; } void set_my_tls_end_of_input() { - end_of_input_tls.set(1); + end_of_input_tls.set(this); } }; @@ -280,7 +280,7 @@ private: //! Spawn task if token is available. void try_spawn_stage_task(d1::execution_data& ed) { ITT_NOTIFY( sync_releasing, &my_pipeline.input_tokens ); - if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_relaxed)) > 1 ) { + if( (my_pipeline.input_tokens.fetch_sub(1, std::memory_order_release)) > 1 ) { d1::small_object_allocator alloc{}; r1::spawn( *alloc.new_object<stage_task>(ed, my_pipeline, alloc ), my_pipeline.my_context ); } @@ -331,7 +331,7 @@ public: return nullptr; } - ~stage_task() { + ~stage_task() override { if ( my_filter && my_object ) { my_filter->finalize(my_object); my_object = nullptr; @@ -397,7 +397,7 @@ bool stage_task::execute_filter(d1::execution_data& ed) { } } else { // Reached end of the pipe. - std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_relaxed); + std::size_t ntokens_avail = my_pipeline.input_tokens.fetch_add(1, std::memory_order_acquire); if( ntokens_avail>0 // Only recycle if there is one available token || my_pipeline.end_of_input.load(std::memory_order_relaxed) ) { @@ -410,7 +410,7 @@ bool stage_task::execute_filter(d1::execution_data& ed) { return true; } -pipeline:: ~pipeline() { +pipeline::~pipeline() { while( first_filter ) { d1::base_filter* f = first_filter; if( input_buffer* b = f->my_input_buffer ) { diff --git a/contrib/libs/tbb/src/tbb/private_server.cpp b/contrib/libs/tbb/src/tbb/private_server.cpp index bc0af84bb4..fbdbaba1ef 100644 --- a/contrib/libs/tbb/src/tbb/private_server.cpp +++ b/contrib/libs/tbb/src/tbb/private_server.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/mutex.h" #include "rml_tbb.h" #include "rml_thread_monitor.h" @@ -95,7 +96,7 @@ private: protected: private_worker( private_server& server, tbb_client& client, const std::size_t i ) : my_state(st_init), my_server(server), my_client(client), my_index(i), - my_thread_monitor(), my_handle(), my_next() + my_handle(), my_next() {} }; @@ -142,7 +143,7 @@ private: std::atomic<private_worker*> my_asleep_list_root; //! Protects my_asleep_list_root - typedef scheduler_mutex_type asleep_list_mutex_type; + typedef mutex asleep_list_mutex_type; asleep_list_mutex_type my_asleep_list_mutex; #if TBB_USE_ASSERT @@ -154,7 +155,7 @@ private: which in turn each wake up two threads, etc. */ void propagate_chain_reaction() { // First test of a double-check idiom. Second test is inside wake_some(0). - if( my_asleep_list_root.load(std::memory_order_acquire) ) + if( my_asleep_list_root.load(std::memory_order_relaxed) ) wake_some(0); } @@ -164,7 +165,7 @@ private: //! Equivalent of adding additional_slack to my_slack and waking up to 2 threads if my_slack permits. void wake_some( int additional_slack ); - virtual ~private_server(); + ~private_server() override; void remove_server_ref() { if( --my_ref_count==0 ) { @@ -190,13 +191,13 @@ public: void yield() override { d0::yield(); } - void independent_thread_number_changed( int ) override {__TBB_ASSERT(false,NULL);} + void independent_thread_number_changed( int ) override {__TBB_ASSERT(false, nullptr);} unsigned default_concurrency() const override { return governor::default_num_threads() - 1; } void adjust_job_count_estimate( int delta ) override; -#if _WIN32||_WIN64 +#if _WIN32 || _WIN64 void register_external_thread ( ::rml::server::execution_resource_t& ) override {} void unregister_external_thread ( ::rml::server::execution_resource_t ) override {} #endif /* _WIN32||_WIN64 */ @@ -218,6 +219,7 @@ __RML_DECL_THREAD_ROUTINE private_worker::thread_routine( void* arg ) { private_worker* self = static_cast<private_worker*>(arg); AVOID_64K_ALIASING( self->my_index ); self->run(); + // return 0 instead of nullptr due to the difference in the type __RML_DECL_THREAD_ROUTINE on various OSs return 0; } #if _MSC_VER && !defined(__INTEL_COMPILER) @@ -232,12 +234,17 @@ void private_worker::release_handle(thread_handle handle, bool join) { } void private_worker::start_shutdown() { - state_t expected_state = my_state.load(std::memory_order_acquire); - __TBB_ASSERT( expected_state!=st_quit, NULL ); + __TBB_ASSERT(my_state.load(std::memory_order_relaxed) != st_quit, "The quit state is expected to be set only once"); - while( !my_state.compare_exchange_strong( expected_state, st_quit ) ); + // `acq` to acquire my_handle + // `rel` to release market state + state_t prev_state = my_state.exchange(st_quit, std::memory_order_acq_rel); - if( expected_state==st_normal || expected_state==st_starting ) { + if (prev_state == st_init) { + // Perform action that otherwise would be performed by associated thread when it quits. + my_server.remove_server_ref(); + } else { + __TBB_ASSERT(prev_state == st_normal || prev_state == st_starting, nullptr); // May have invalidated invariant for sleeping, so wake up the thread. // Note that the notify() here occurs without maintaining invariants for my_slack. // It does not matter, because my_state==st_quit overrides checking of my_slack. @@ -245,11 +252,8 @@ void private_worker::start_shutdown() { // Do not need release handle in st_init state, // because in this case the thread wasn't started yet. // For st_starting release is done at launch site. - if (expected_state==st_normal) + if (prev_state == st_normal) release_handle(my_handle, governor::does_client_join_workers(my_client)); - } else if( expected_state==st_init ) { - // Perform action that otherwise would be performed by associated thread when it quits. - my_server.remove_server_ref(); } } @@ -261,22 +265,14 @@ void private_worker::run() noexcept { // complications in handle management on Windows. ::rml::job& j = *my_client.create_one_job(); - while( my_state.load(std::memory_order_acquire)!=st_quit ) { + // memory_order_seq_cst to be strictly ordered after thread_monitor::wait on the next iteration + while( my_state.load(std::memory_order_seq_cst)!=st_quit ) { if( my_server.my_slack.load(std::memory_order_acquire)>=0 ) { my_client.process(j); - } else { - thread_monitor::cookie c; - // Prepare to wait - my_thread_monitor.prepare_wait(c); - // Check/set the invariant for sleeping - if( my_state.load(std::memory_order_acquire)!=st_quit && my_server.try_insert_in_asleep_list(*this) ) { - my_thread_monitor.commit_wait(c); - __TBB_ASSERT( my_state==st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); - my_server.propagate_chain_reaction(); - } else { - // Invariant broken - my_thread_monitor.cancel_wait(); - } + } else if( my_server.try_insert_in_asleep_list(*this) ) { + my_thread_monitor.wait(); + __TBB_ASSERT(my_state.load(std::memory_order_relaxed) == st_quit || !my_next, "Thread monitor missed a spurious wakeup?" ); + my_server.propagate_chain_reaction(); } } my_client.cleanup(j); @@ -286,31 +282,42 @@ void private_worker::run() noexcept { } inline void private_worker::wake_or_launch() { - state_t expected_state = st_init; - if( my_state.compare_exchange_strong( expected_state, st_starting ) ) { - // after this point, remove_server_ref() must be done by created thread + state_t state = my_state.load(std::memory_order_relaxed); + + switch (state) { + case st_starting: + __TBB_fallthrough; + case st_normal: + __TBB_ASSERT(!my_next, "Should not wake a thread while it's still in asleep list"); + my_thread_monitor.notify(); + break; + case st_init: + if (my_state.compare_exchange_strong(state, st_starting)) { + // after this point, remove_server_ref() must be done by created thread #if __TBB_USE_WINAPI - my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size, &this->my_index ); + // Win thread_monitor::launch is designed on the assumption that the workers thread id go from 1 to Hard limit set by TBB market::global_market + const std::size_t worker_idx = my_server.my_n_thread - this->my_index; + my_handle = thread_monitor::launch(thread_routine, this, my_server.my_stack_size, &worker_idx); #elif __TBB_USE_POSIX - { - affinity_helper fpa; - fpa.protect_affinity_mask( /*restore_process_mask=*/true ); - my_handle = thread_monitor::launch( thread_routine, this, my_server.my_stack_size ); - // Implicit destruction of fpa resets original affinity mask. - } + { + affinity_helper fpa; + fpa.protect_affinity_mask( /*restore_process_mask=*/true); + my_handle = thread_monitor::launch(thread_routine, this, my_server.my_stack_size); + // Implicit destruction of fpa resets original affinity mask. + } #endif /* __TBB_USE_POSIX */ - expected_state = st_starting; - if ( !my_state.compare_exchange_strong( expected_state, st_normal ) ) { - // Do shutdown during startup. my_handle can't be released - // by start_shutdown, because my_handle value might be not set yet - // at time of transition from st_starting to st_quit. - __TBB_ASSERT( expected_state==st_quit, NULL ); - release_handle(my_handle, governor::does_client_join_workers(my_client)); + state = st_starting; + if (!my_state.compare_exchange_strong(state, st_normal)) { + // Do shutdown during startup. my_handle can't be released + // by start_shutdown, because my_handle value might be not set yet + // at time of transition from st_starting to st_quit. + __TBB_ASSERT(state == st_quit, nullptr); + release_handle(my_handle, governor::does_client_join_workers(my_client)); + } } - } - else { - __TBB_ASSERT( !my_next, "Should not wake a thread while it's still in asleep list" ); - my_thread_monitor.notify(); + break; + default: + __TBB_ASSERT(state == st_quit, nullptr); } } @@ -323,8 +330,8 @@ private_server::private_server( tbb_client& client ) : my_stack_size(client.min_stack_size()), my_slack(0), my_ref_count(my_n_thread+1), - my_thread_array(NULL), - my_asleep_list_root(NULL) + my_thread_array(nullptr), + my_asleep_list_root(nullptr) #if TBB_USE_ASSERT , my_net_slack_requests(0) #endif /* TBB_USE_ASSERT */ @@ -332,12 +339,13 @@ private_server::private_server( tbb_client& client ) : my_thread_array = tbb::cache_aligned_allocator<padded_private_worker>().allocate( my_n_thread ); for( std::size_t i=0; i<my_n_thread; ++i ) { private_worker* t = new( &my_thread_array[i] ) padded_private_worker( *this, client, i ); - t->my_next = my_asleep_list_root.exchange(t, std::memory_order_relaxed); + t->my_next = my_asleep_list_root.load(std::memory_order_relaxed); + my_asleep_list_root.store(t, std::memory_order_relaxed); } } private_server::~private_server() { - __TBB_ASSERT( my_net_slack_requests==0, NULL ); + __TBB_ASSERT( my_net_slack_requests==0, nullptr); for( std::size_t i=my_n_thread; i--; ) my_thread_array[i].~padded_private_worker(); tbb::cache_aligned_allocator<padded_private_worker>().deallocate( my_thread_array, my_n_thread ); @@ -350,49 +358,57 @@ inline bool private_server::try_insert_in_asleep_list( private_worker& t ) { return false; // Contribute to slack under lock so that if another takes that unit of slack, // it sees us sleeping on the list and wakes us up. - int k = ++my_slack; - if( k<=0 ) { - t.my_next = my_asleep_list_root.exchange(&t, std::memory_order_relaxed); - return true; - } else { - --my_slack; - return false; + auto expected = my_slack.load(std::memory_order_relaxed); + while (expected < 0) { + if (my_slack.compare_exchange_strong(expected, expected + 1)) { + t.my_next = my_asleep_list_root.load(std::memory_order_relaxed); + my_asleep_list_root.store(&t, std::memory_order_relaxed); + return true; + } } + + return false; } void private_server::wake_some( int additional_slack ) { - __TBB_ASSERT( additional_slack>=0, NULL ); + __TBB_ASSERT( additional_slack>=0, nullptr ); private_worker* wakee[2]; private_worker**w = wakee; - { + + if (additional_slack) { + // Contribute our unused slack to my_slack. + my_slack += additional_slack; + } + + int allotted_slack = 0; + while (allotted_slack < 2) { + // Chain reaction; Try to claim unit of slack + int old = my_slack.load(std::memory_order_relaxed); + do { + if (old <= 0) goto done; + } while (!my_slack.compare_exchange_strong(old, old - 1)); + ++allotted_slack; + } +done: + + if (allotted_slack) { asleep_list_mutex_type::scoped_lock lock(my_asleep_list_mutex); - while( my_asleep_list_root.load(std::memory_order_relaxed) && w<wakee+2 ) { - if( additional_slack>0 ) { - // additional demand does not exceed surplus supply - if ( additional_slack+my_slack.load(std::memory_order_acquire)<=0 ) - break; - --additional_slack; - } else { - // Chain reaction; Try to claim unit of slack - int old = my_slack; - do { - if( old<=0 ) goto done; - } while( !my_slack.compare_exchange_strong(old,old-1) ); - } + auto root = my_asleep_list_root.load(std::memory_order_relaxed); + while( root && w<wakee+2 && allotted_slack) { + --allotted_slack; // Pop sleeping worker to combine with claimed unit of slack - auto old = my_asleep_list_root.load(std::memory_order_relaxed); - my_asleep_list_root.store(old->my_next, std::memory_order_relaxed); - *w++ = old; + *w++ = root; + root = root->my_next; } - if( additional_slack ) { + my_asleep_list_root.store(root, std::memory_order_relaxed); + if(allotted_slack) { // Contribute our unused slack to my_slack. - my_slack += additional_slack; + my_slack += allotted_slack; } } -done: while( w>wakee ) { private_worker* ww = *--w; - ww->my_next = NULL; + ww->my_next = nullptr; ww->wake_or_launch(); } } diff --git a/contrib/libs/tbb/src/tbb/profiling.cpp b/contrib/libs/tbb/src/tbb/profiling.cpp index 2603f35b88..3cf4da3cea 100644 --- a/contrib/libs/tbb/src/tbb/profiling.cpp +++ b/contrib/libs/tbb/src/tbb/profiling.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ static resource_string strings_for_itt[] = { static __itt_string_handle* ITT_get_string_handle(std::uintptr_t idx) { __TBB_ASSERT(idx < NUM_STRINGS, "string handle out of valid range"); - return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : NULL; + return idx < NUM_STRINGS ? strings_for_itt[idx].itt_str_handle : nullptr; } static void ITT_init_domains() { @@ -122,7 +122,7 @@ void itt_set_sync_name(void* obj, const tchar* name) { const __itt_id itt_null_id = { 0, 0, 0 }; static inline __itt_domain* get_itt_domain(d1::itt_domain_enum idx) { - if (tbb_domains[idx] == NULL) { + if (tbb_domains[idx] == nullptr) { ITT_DoOneTimeInitialization(); } return tbb_domains[idx]; @@ -222,7 +222,7 @@ void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void *regi if ( parent ) { itt_id_make( &parent_id, parent, parent_extra ); } - __itt_region_begin( d, region_id, parent_id, NULL ); + __itt_region_begin( d, region_id, parent_id, nullptr ); } } diff --git a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp index cfdc4d3c2a..8818c51a20 100644 --- a/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/queuing_rw_mutex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -168,20 +168,21 @@ struct queuing_rw_mutex_impl { s.my_state.store(d1::queuing_rw_mutex::scoped_lock::state_t(write ? STATE_WRITER : STATE_READER), std::memory_order_relaxed); s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); - queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_release); + + // The CAS must have release semantics, because we are + // "sending" the fields initialized above to other actors. + // We need acquire semantics, because we are acquiring the predecessor (or mutex if no predecessor) + queuing_rw_mutex::scoped_lock* predecessor = m.q_tail.exchange(&s, std::memory_order_acq_rel); if( write ) { // Acquiring for write if( predecessor ) { ITT_NOTIFY(sync_prepare, s.my_mutex); predecessor = tricky_pointer(predecessor) & ~FLAG; - __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); - #if TBB_USE_ASSERT - atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); - #endif tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); - spin_wait_until_eq(s.my_going, 1U); + // We are acquiring the mutex + spin_wait_until_eq(s.my_going, 1U, std::memory_order_acquire); } } else { // Acquiring for read @@ -189,58 +190,64 @@ struct queuing_rw_mutex_impl { bool sync_prepare_done = false; #endif if( predecessor ) { - unsigned char pred_state; - __TBB_ASSERT( !s.my_prev, "the predecessor is already set" ); + unsigned char pred_state{}; + __TBB_ASSERT( !s.my_prev.load(std::memory_order_relaxed), "the predecessor is already set" ); if( tricky_pointer(predecessor) & FLAG ) { /* this is only possible if predecessor is an upgrading reader and it signals us to wait */ pred_state = STATE_UPGRADE_WAITING; predecessor = tricky_pointer(predecessor) & ~FLAG; } else { // Load predecessor->my_state now, because once predecessor->my_next becomes - // non-NULL, we must assume that *predecessor might be destroyed. - pred_state = STATE_READER; - predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_acq_rel); + // non-null, we must assume that *predecessor might be destroyed. + pred_state = predecessor->my_state.load(std::memory_order_relaxed); + if (pred_state == STATE_READER) { + // Notify the previous reader to unblock us. + predecessor->my_state.compare_exchange_strong(pred_state, STATE_READER_UNBLOCKNEXT, std::memory_order_relaxed); + } + if (pred_state == STATE_ACTIVEREADER) { // either we initially read it or CAS failed + // Active reader means that the predecessor already acquired the mutex and cannot notify us. + // Therefore, we need to acquire the mutex ourselves by re-reading predecessor state. + (void)predecessor->my_state.load(std::memory_order_acquire); + } } tricky_pointer::store(s.my_prev, predecessor, std::memory_order_relaxed); __TBB_ASSERT( !( tricky_pointer(predecessor) & FLAG ), "use of corrupted pointer!" ); - #if TBB_USE_ASSERT - atomic_fence(std::memory_order_seq_cst); // on "m.q_tail" - __TBB_ASSERT( !predecessor->my_next, "the predecessor has another successor!"); - #endif + __TBB_ASSERT( !predecessor->my_next.load(std::memory_order_relaxed), "the predecessor has another successor!"); tricky_pointer::store(predecessor->my_next, &s, std::memory_order_release); if( pred_state != STATE_ACTIVEREADER ) { #if __TBB_USE_ITT_NOTIFY sync_prepare_done = true; ITT_NOTIFY(sync_prepare, s.my_mutex); #endif - spin_wait_until_eq(s.my_going, 1U); + // We are acquiring the mutex + spin_wait_until_eq(s.my_going, 1U, std::memory_order_acquire); } } // The protected state must have been acquired here before it can be further released to any other reader(s): unsigned char old_state = STATE_READER; - s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_acq_rel); + // When this reader is signaled by previous actor it acquires the mutex. + // We need to build happens-before relation with all other coming readers that will read our ACTIVEREADER + // without blocking on my_going. Therefore, we need to publish ACTIVEREADER with release semantics. + // On fail it is relaxed, because we will build happens-before on my_going. + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release, std::memory_order_relaxed); if( old_state!=STATE_READER ) { #if __TBB_USE_ITT_NOTIFY if( !sync_prepare_done ) ITT_NOTIFY(sync_prepare, s.my_mutex); #endif // Failed to become active reader -> need to unblock the next waiting reader first - __TBB_ASSERT( s.my_state==STATE_READER_UNBLOCKNEXT, "unexpected state" ); - spin_wait_while_eq(s.my_next, 0U); + __TBB_ASSERT( s.my_state.load(std::memory_order_relaxed)==STATE_READER_UNBLOCKNEXT, "unexpected state" ); + spin_wait_while_eq(s.my_next, 0U, std::memory_order_acquire); /* my_state should be changed before unblocking the next otherwise it might finish and another thread can get our old state and left blocked */ s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed); tricky_pointer::load(s.my_next, std::memory_order_relaxed)->my_going.store(1U, std::memory_order_release); } - __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "unlocked reader is active reader" ); + __TBB_ASSERT(s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER, "unlocked reader is active reader"); } ITT_NOTIFY(sync_acquired, s.my_mutex); - - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); } //! A method to acquire queuing_rw_mutex if it is free @@ -260,13 +267,11 @@ struct queuing_rw_mutex_impl { s.my_internal_lock.store(RELEASED, std::memory_order_relaxed); // The CAS must have release semantics, because we are - // "sending" the fields initialized above to other processors. + // "sending" the fields initialized above to other actors. + // We need acquire semantics, because we are acquiring the mutex d1::queuing_rw_mutex::scoped_lock* expected = nullptr; - if( !m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_release) ) + if (!m.q_tail.compare_exchange_strong(expected, &s, std::memory_order_acq_rel)) return false; // Someone already took the lock - // Force acquire so that user's critical section receives correct values - // from processor that was previously in the user's critical section. - atomic_fence(std::memory_order_acquire); s.my_mutex = &m; ITT_NOTIFY(sync_acquired, s.my_mutex); return true; @@ -287,32 +292,50 @@ struct queuing_rw_mutex_impl { d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); if( !next ) { d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { + // Release mutex on success otherwise wait for successor publication + if( s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, + std::memory_order_release, std::memory_order_relaxed) ) + { // this was the only item in the queue, and the queue is now empty. goto done; } - spin_wait_while_eq( s.my_next, 0U ); + spin_wait_while_eq(s.my_next, 0U, std::memory_order_relaxed); next = tricky_pointer::load(s.my_next, std::memory_order_acquire); } next->my_going.store(2U, std::memory_order_relaxed); // protect next queue node from being destroyed too early - if( next->my_state==STATE_UPGRADE_WAITING ) { + // If the next is STATE_UPGRADE_WAITING, it is expected to acquire all other released readers via release + // sequence in next->my_state. In that case, we need to preserve release sequence in next->my_state + // contributed by other reader. So, there are two approaches not to break the release sequence: + // 1. Use read-modify-write (exchange) operation to store with release the UPGRADE_LOSER state; + // 2. Acquire the release sequence and store the sequence and UPGRADE_LOSER state. + // The second approach seems better on x86 because it does not involve interlocked operations. + // Therefore, we read next->my_state with acquire while it is not required for else branch to get the + // release sequence. + if( next->my_state.load(std::memory_order_acquire)==STATE_UPGRADE_WAITING ) { // the next waiting for upgrade means this writer was upgraded before. acquire_internal_lock(s); // Responsibility transition, the one who reads uncorrupted my_prev will do release. + // Guarantee that above store of 2 into next->my_going happens-before resetting of next->my_prev d1::queuing_rw_mutex::scoped_lock* tmp = tricky_pointer::exchange(next->my_prev, nullptr, std::memory_order_release); - next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); + // Pass the release sequence that we acquired with the above load of next->my_state. + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_release); + // We are releasing the mutex next->my_going.store(1U, std::memory_order_release); unblock_or_wait_on_internal_lock(s, get_flag(tmp)); } else { // next->state cannot be STATE_UPGRADE_REQUESTED - __TBB_ASSERT( next->my_state & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" ); - __TBB_ASSERT( !( next->my_prev.load() & FLAG ), "use of corrupted pointer!" ); - tricky_pointer::store(next->my_prev, nullptr, std::memory_order_relaxed); + __TBB_ASSERT( next->my_state.load(std::memory_order_relaxed) & (STATE_COMBINED_WAITINGREADER | STATE_WRITER), "unexpected state" ); + __TBB_ASSERT( !( next->my_prev.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" ); + // Guarantee that above store of 2 into next->my_going happens-before resetting of next->my_prev + tricky_pointer::store(next->my_prev, nullptr, std::memory_order_release); + // We are releasing the mutex next->my_going.store(1U, std::memory_order_release); } } else { // Acquired for read - + // The basic idea it to build happens-before relation with left and right readers via prev and next. In addition, + // the first reader should acquire the left (prev) signal and propagate to right (next). To simplify, we always + // build happens-before relation between left and right (left is happened before right). queuing_rw_mutex::scoped_lock *tmp = nullptr; retry: // Addition to the original paper: Mark my_prev as in use @@ -324,11 +347,9 @@ struct queuing_rw_mutex_impl { // Failed to acquire the lock on predecessor. The predecessor either unlinks or upgrades. // In the second case, it could or could not know my "in use" flag - need to check // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_release); + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor) | FLAG, predecessor, std::memory_order_acquire); if( !(tricky_pointer(tmp) & FLAG) ) { - // Wait for the predecessor to change my_prev (e.g. during unlink) - // TODO: spin_wait condition seems never reachable - tricky_pointer::spin_wait_while_eq( s.my_prev, tricky_pointer(predecessor)|FLAG ); + __TBB_ASSERT(tricky_pointer::load(s.my_prev, std::memory_order_relaxed) != (tricky_pointer(predecessor) | FLAG), nullptr); // Now owner of predecessor is waiting for _us_ to release its lock release_internal_lock(*predecessor); } @@ -344,13 +365,13 @@ struct queuing_rw_mutex_impl { tricky_pointer::store(predecessor->my_next, nullptr, std::memory_order_release); d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !tricky_pointer::load(s.my_next, std::memory_order_relaxed) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); + if( !tricky_pointer::load(s.my_next, std::memory_order_acquire) && !s.my_mutex->q_tail.compare_exchange_strong(expected, predecessor, std::memory_order_release) ) { + spin_wait_while_eq( s.my_next, 0U, std::memory_order_acquire ); } - __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer" ); + __TBB_ASSERT( !(s.my_next.load(std::memory_order_relaxed) & FLAG), "use of corrupted pointer" ); - // ensure acquire semantics of reading 'my_next' - if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_acquire) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below + // my_next is acquired either with load or spin_wait. + if(d1::queuing_rw_mutex::scoped_lock *const l_next = tricky_pointer::load(s.my_next, std::memory_order_relaxed) ) { // I->next != nil, TODO: rename to next after clearing up and adapting the n in the comment two lines below // Equivalent to I->next->prev = I->prev but protected against (prev[n]&FLAG)!=0 tmp = tricky_pointer::exchange(l_next->my_prev, predecessor, std::memory_order_release); // I->prev->next = I->next; @@ -365,9 +386,12 @@ struct queuing_rw_mutex_impl { d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); if( !next ) { d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); - next = tricky_pointer::load(s.my_next, std::memory_order_relaxed); + // Release mutex on success otherwise wait for successor publication + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, nullptr, + std::memory_order_release, std::memory_order_relaxed) ) + { + spin_wait_while_eq( s.my_next, 0U, std::memory_order_relaxed ); + next = tricky_pointer::load(s.my_next, std::memory_order_acquire); } else { goto unlock_self; } @@ -381,7 +405,8 @@ struct queuing_rw_mutex_impl { unblock_or_wait_on_internal_lock(s, get_flag(tmp)); } done: - spin_wait_while_eq( s.my_going, 2U ); + // Lifetime synchronization, no need to build happens-before relation + spin_wait_while_eq( s.my_going, 2U, std::memory_order_relaxed ); s.initialize(); } @@ -390,48 +415,70 @@ struct queuing_rw_mutex_impl { if ( s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER ) return true; // Already a reader ITT_NOTIFY(sync_releasing, s.my_mutex); - s.my_state.store(STATE_READER, std::memory_order_relaxed); - if( ! tricky_pointer::load(s.my_next, std::memory_order_relaxed)) { + d1::queuing_rw_mutex::scoped_lock* next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + if( !next ) { + s.my_state.store(STATE_READER, std::memory_order_seq_cst); // the following load of q_tail must not be reordered with setting STATE_READER above - if( &s==s.my_mutex->q_tail.load() ) { + if( &s == s.my_mutex->q_tail.load(std::memory_order_seq_cst) ) { unsigned char old_state = STATE_READER; - s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release); + // When this reader is signaled by previous actor it acquires the mutex. + // We need to build happens-before relation with all other coming readers that will read our ACTIVEREADER + // without blocking on my_going. Therefore, we need to publish ACTIVEREADER with release semantics. + // On fail it is relaxed, because we will build happens-before on my_going. + s.my_state.compare_exchange_strong(old_state, STATE_ACTIVEREADER, std::memory_order_release, std::memory_order_relaxed); if( old_state==STATE_READER ) return true; // Downgrade completed } /* wait for the next to register */ - spin_wait_while_eq( s.my_next, 0U ); + spin_wait_while_eq(s.my_next, 0U, std::memory_order_relaxed); + next = tricky_pointer::load(s.my_next, std::memory_order_acquire); } - d1::queuing_rw_mutex::scoped_lock *const next = tricky_pointer::load(s.my_next, std::memory_order_acquire); + __TBB_ASSERT( next, "still no successor at this point!" ); - if( next->my_state & STATE_COMBINED_WAITINGREADER ) + if( next->my_state.load(std::memory_order_relaxed) & STATE_COMBINED_WAITINGREADER ) next->my_going.store(1U, std::memory_order_release); - else if( next->my_state==STATE_UPGRADE_WAITING ) + // If the next is STATE_UPGRADE_WAITING, it is expected to acquire all other released readers via release + // sequence in next->my_state. In that case, we need to preserve release sequence in next->my_state + // contributed by other reader. So, there are two approaches not to break the release sequence: + // 1. Use read-modify-write (exchange) operation to store with release the UPGRADE_LOSER state; + // 2. Acquire the release sequence and store the sequence and UPGRADE_LOSER state. + // The second approach seems better on x86 because it does not involve interlocked operations. + // Therefore, we read next->my_state with acquire while it is not required for else branch to get the + // release sequence. + else if( next->my_state.load(std::memory_order_acquire)==STATE_UPGRADE_WAITING ) // the next waiting for upgrade means this writer was upgraded before. - next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_relaxed); - s.my_state.store(STATE_ACTIVEREADER, std::memory_order_relaxed);; + // To safe release sequence on next->my_state read it with acquire + next->my_state.store(STATE_UPGRADE_LOSER, std::memory_order_release); + s.my_state.store(STATE_ACTIVEREADER, std::memory_order_release); return true; } static bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) { - if ( s.my_state.load(std::memory_order_relaxed) == STATE_WRITER ) return true; // Already a writer + if (s.my_state.load(std::memory_order_relaxed) == STATE_WRITER) { + // Already a writer + return true; + } - __TBB_ASSERT( s.my_state==STATE_ACTIVEREADER, "only active reader can be updated" ); + __TBB_ASSERT(s.my_state.load(std::memory_order_relaxed) == STATE_ACTIVEREADER, "only active reader can be updated"); - queuing_rw_mutex::scoped_lock * tmp; - queuing_rw_mutex::scoped_lock * me = &s; + queuing_rw_mutex::scoped_lock* tmp{}; + queuing_rw_mutex::scoped_lock* me = &s; ITT_NOTIFY(sync_releasing, s.my_mutex); - s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_relaxed); + // Publish ourselves into my_state that other UPGRADE_WAITING actors can acquire our state. + s.my_state.store(STATE_UPGRADE_REQUESTED, std::memory_order_release); requested: - __TBB_ASSERT( !(s.my_next.load() & FLAG), "use of corrupted pointer!" ); + __TBB_ASSERT( !(s.my_next.load(std::memory_order_relaxed) & FLAG), "use of corrupted pointer!" ); acquire_internal_lock(s); d1::queuing_rw_mutex::scoped_lock* expected = &s; - if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_release) ) { - spin_wait_while_eq( s.my_next, 0U ); + if( !s.my_mutex->q_tail.compare_exchange_strong(expected, tricky_pointer(me)|FLAG, std::memory_order_acq_rel) ) { + spin_wait_while_eq( s.my_next, 0U, std::memory_order_relaxed ); queuing_rw_mutex::scoped_lock * next; next = tricky_pointer::fetch_add(s.my_next, FLAG, std::memory_order_acquire); - unsigned short n_state = next->my_state; + // While we were READER the next READER might reach STATE_UPGRADE_WAITING state. + // Therefore, it did not build happens before relation with us and we need to acquire the + // next->my_state to build the happens before relation ourselves + unsigned short n_state = next->my_state.load(std::memory_order_acquire); /* the next reader can be blocked by our state. the best thing to do is to unblock it */ if( n_state & STATE_COMBINED_WAITINGREADER ) next->my_going.store(1U, std::memory_order_release); @@ -442,7 +489,7 @@ struct queuing_rw_mutex_impl { // save next|FLAG for simplicity of following comparisons tmp = tricky_pointer(next)|FLAG; for( atomic_backoff b; tricky_pointer::load(s.my_next, std::memory_order_relaxed)==tmp; b.pause() ) { - if( s.my_state & STATE_COMBINED_UPGRADING ) { + if( s.my_state.load(std::memory_order_acquire) & STATE_COMBINED_UPGRADING ) { if( tricky_pointer::load(s.my_next, std::memory_order_acquire)==tmp ) tricky_pointer::store(s.my_next, next, std::memory_order_relaxed); goto waiting; @@ -461,7 +508,11 @@ struct queuing_rw_mutex_impl { } // if( this != my_mutex->q_tail... ) { unsigned char old_state = STATE_UPGRADE_REQUESTED; - s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_acquire); + // If we reach STATE_UPGRADE_WAITING state we do not build happens-before relation with READER on + // left. We delegate this responsibility to READER on left when it try upgrading. Therefore, we are releasing + // on success. + // Otherwise, on fail, we already acquired the next->my_state. + s.my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release, std::memory_order_relaxed); } waiting: __TBB_ASSERT( !( s.my_next.load(std::memory_order_relaxed) & FLAG ), "use of corrupted pointer!" ); @@ -480,11 +531,14 @@ struct queuing_rw_mutex_impl { // While the predecessor pointer (my_prev) is in use (FLAG is set), we can safely update the node`s state. // Corrupted pointer transitions responsibility to release the predecessor`s node on us. unsigned char old_state = STATE_UPGRADE_REQUESTED; - predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release); + // Try to build happens before with the upgrading READER on left. If fail, the predecessor state is not + // important for us because it will acquire our state. + predecessor->my_state.compare_exchange_strong(old_state, STATE_UPGRADE_WAITING, std::memory_order_release, + std::memory_order_relaxed); } if( !success ) { // Responsibility transition, the one who reads uncorrupted my_prev will do release. - tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_release); + tmp = tricky_pointer::compare_exchange_strong(s.my_prev, tricky_pointer(predecessor)|FLAG, predecessor, std::memory_order_acquire); if( tricky_pointer(tmp) & FLAG ) { tricky_pointer::spin_wait_while_eq(s.my_prev, predecessor); predecessor = tricky_pointer::load(s.my_prev, std::memory_order_relaxed); @@ -512,9 +566,6 @@ struct queuing_rw_mutex_impl { // now wait for the predecessor to finish working with my fields spin_wait_while_eq( s.my_going, 2U ); - // Acquire critical section indirectly from previous owner or directly from predecessor (TODO: not clear). - atomic_fence(std::memory_order_acquire); // on either "my_mutex->q_tail" or "my_going" (TODO: not clear) - bool result = ( s.my_state != STATE_UPGRADE_LOSER ); s.my_state.store(STATE_WRITER, std::memory_order_relaxed); s.my_going.store(1U, std::memory_order_relaxed); @@ -523,6 +574,10 @@ struct queuing_rw_mutex_impl { return result; } + static bool is_writer(const d1::queuing_rw_mutex::scoped_lock& m) { + return m.my_state.load(std::memory_order_relaxed) == STATE_WRITER; + } + static void construct(d1::queuing_rw_mutex& m) { suppress_unused_warning(m); ITT_SYNC_CREATE(&m, _T("tbb::queuing_rw_mutex"), _T("")); @@ -545,6 +600,10 @@ bool __TBB_EXPORTED_FUNC upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock& s) return queuing_rw_mutex_impl::upgrade_to_writer(s); } +bool __TBB_EXPORTED_FUNC is_writer(const d1::queuing_rw_mutex::scoped_lock& s) { + return queuing_rw_mutex_impl::is_writer(s); +} + bool __TBB_EXPORTED_FUNC downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock& s) { return queuing_rw_mutex_impl::downgrade_to_reader(s); } diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.cpp b/contrib/libs/tbb/src/tbb/rml_tbb.cpp index 122e2709f7..4c772eae06 100644 --- a/contrib/libs/tbb/src/tbb/rml_tbb.cpp +++ b/contrib/libs/tbb/src/tbb/rml_tbb.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -50,10 +50,10 @@ namespace rml { #define RML_SERVER_NAME "irml" DEBUG_SUFFIX ".dll" #elif __APPLE__ #define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".dylib" -#elif __linux__ -#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1" #elif __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __sun || _AIX #define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so" +#elif __unix__ +#define RML_SERVER_NAME "libirml" DEBUG_SUFFIX ".so.1" #else #error Unknown OS #endif @@ -71,7 +71,7 @@ const ::rml::versioned_object::version_type CLIENT_VERSION = 2; ::rml::factory::status_type FACTORY::open() { // Failure of following assertion indicates that factory is already open, or not zero-inited. - __TBB_ASSERT_EX( !library_handle, NULL ); + __TBB_ASSERT_EX( !library_handle, nullptr); status_type (*open_factory_routine)( factory&, version_type&, version_type ); dynamic_link_descriptor server_link_table[4] = { DLD(__RML_open_factory,open_factory_routine), @@ -85,7 +85,7 @@ const ::rml::versioned_object::version_type CLIENT_VERSION = 2; result = (*open_factory_routine)( *this, server_version, CLIENT_VERSION ); // server_version can be checked here for incompatibility if necessary. } else { - library_handle = NULL; + library_handle = nullptr; result = st_not_found; } return result; @@ -96,13 +96,13 @@ void FACTORY::close() { (*my_wait_to_close_routine)(*this); if ( (size_t)library_handle>FACTORY::c_dont_unload ) { dynamic_unlink(library_handle); - library_handle = NULL; + library_handle = nullptr; } } ::rml::factory::status_type FACTORY::make_server( SERVER*& s, CLIENT& c) { // Failure of following assertion means that factory was not successfully opened. - __TBB_ASSERT_EX( my_make_server_routine, NULL ); + __TBB_ASSERT_EX( my_make_server_routine, nullptr); return (*my_make_server_routine)(*this,s,c); } @@ -110,4 +110,3 @@ void FACTORY::close() { } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/contrib/libs/tbb/src/tbb/rml_tbb.h b/contrib/libs/tbb/src/tbb/rml_tbb.h index de923be1b2..61176f8d76 100644 --- a/contrib/libs/tbb/src/tbb/rml_tbb.h +++ b/contrib/libs/tbb/src/tbb/rml_tbb.h @@ -37,7 +37,7 @@ public: //! Inform server of adjustments in the number of workers that the client can profitably use. virtual void adjust_job_count_estimate( int delta ) = 0; -#if _WIN32||_WIN64 +#if _WIN32 || _WIN64 //! Inform server of a oneTBB external thread. virtual void register_external_thread( execution_resource_t& v ) = 0; diff --git a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h index 613ec72e98..13b556380f 100644 --- a/contrib/libs/tbb/src/tbb/rml_thread_monitor.h +++ b/contrib/libs/tbb/src/tbb/rml_thread_monitor.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -78,29 +78,17 @@ static const ::tbb::detail::r1::tchar *SyncObj_ThreadMonitor = _T("RML Thr Monit /** At most one thread should wait on an instance at a time. */ class thread_monitor { public: - class cookie { - friend class thread_monitor; - std::atomic<std::size_t> my_epoch{0}; - }; - thread_monitor() : skipped_wakeup(false), my_sema() { + thread_monitor() { ITT_SYNC_CREATE(&my_sema, SyncType_RML, SyncObj_ThreadMonitor); } ~thread_monitor() {} - //! If a thread is waiting or started a two-phase wait, notify it. + //! Notify waiting thread /** Can be called by any thread. */ void notify(); - //! Begin two-phase wait. - /** Should only be called by thread that owns the monitor. - The caller must either complete the wait or cancel it. */ - void prepare_wait( cookie& c ); - - //! Complete a two-phase wait and wait until notification occurs after the earlier prepare_wait. - void commit_wait( cookie& c ); - - //! Cancel a two-phase wait. - void cancel_wait(); + //! Wait for notification + void wait(); #if __TBB_USE_WINAPI typedef HANDLE handle_type; @@ -109,7 +97,7 @@ public: typedef unsigned (WINAPI *thread_routine_type)(void*); //! Launch a thread - static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = NULL ); + static handle_type launch( thread_routine_type thread_routine, void* arg, std::size_t stack_size, const size_t* worker_index = nullptr ); #elif __TBB_USE_POSIX typedef pthread_t handle_type; @@ -127,9 +115,8 @@ public: //! Detach thread static void detach_thread(handle_type handle); private: - cookie my_cookie; // epoch counter - std::atomic<bool> in_wait{false}; - bool skipped_wakeup; + // The protection from double notification of the binary semaphore + std::atomic<bool> my_notified{ false }; binary_semaphore my_sema; #if __TBB_USE_POSIX static void check( int error_code, const char* routine ); @@ -154,7 +141,7 @@ inline thread_monitor::handle_type thread_monitor::launch( thread_routine_type t unsigned thread_id; int number_of_processor_groups = ( worker_index ) ? NumberOfProcessorGroups() : 0; unsigned create_flags = ( number_of_processor_groups > 1 ) ? CREATE_SUSPENDED : 0; - HANDLE h = (HANDLE)_beginthreadex( NULL, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id ); + HANDLE h = (HANDLE)_beginthreadex( nullptr, unsigned(stack_size), thread_routine, arg, STACK_SIZE_PARAM_IS_A_RESERVATION | create_flags, &thread_id ); if( !h ) { handle_perror(0, "thread_monitor::launch: _beginthreadex failed\n"); } @@ -171,12 +158,12 @@ void thread_monitor::join(handle_type handle) { DWORD res = #endif WaitForSingleObjectEx(handle, INFINITE, FALSE); - __TBB_ASSERT( res==WAIT_OBJECT_0, NULL ); + __TBB_ASSERT( res==WAIT_OBJECT_0, nullptr); #if TBB_USE_ASSERT BOOL val = #endif CloseHandle(handle); - __TBB_ASSERT( val, NULL ); + __TBB_ASSERT( val, nullptr); } void thread_monitor::detach_thread(handle_type handle) { @@ -184,7 +171,7 @@ void thread_monitor::detach_thread(handle_type handle) { BOOL val = #endif CloseHandle(handle); - __TBB_ASSERT( val, NULL ); + __TBB_ASSERT( val, nullptr); } #endif /* __TBB_USE_WINAPI */ @@ -211,7 +198,7 @@ inline thread_monitor::handle_type thread_monitor::launch( void* (*thread_routin } void thread_monitor::join(handle_type handle) { - check(pthread_join(handle, NULL), "pthread_join has failed"); + check(pthread_join(handle, nullptr), "pthread_join has failed"); } void thread_monitor::detach_thread(handle_type handle) { @@ -220,33 +207,17 @@ void thread_monitor::detach_thread(handle_type handle) { #endif /* __TBB_USE_POSIX */ inline void thread_monitor::notify() { - my_cookie.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire) + 1, std::memory_order_release); - bool do_signal = in_wait.exchange( false ); - if( do_signal ) + // Check that the semaphore is not notified twice + if (!my_notified.exchange(true, std::memory_order_release)) { my_sema.V(); -} - -inline void thread_monitor::prepare_wait( cookie& c ) { - if( skipped_wakeup ) { - // Lazily consume a signal that was skipped due to cancel_wait - skipped_wakeup = false; - my_sema.P(); // does not really wait on the semaphore } - // Former c = my_cookie - c.my_epoch.store(my_cookie.my_epoch.load(std::memory_order_acquire), std::memory_order_release); - in_wait.store( true, std::memory_order_seq_cst ); -} - -inline void thread_monitor::commit_wait( cookie& c ) { - bool do_it = ( c.my_epoch.load(std::memory_order_relaxed) == my_cookie.my_epoch.load(std::memory_order_relaxed) ); - if( do_it ) my_sema.P(); - else cancel_wait(); } -inline void thread_monitor::cancel_wait() { - // if not in_wait, then some thread has sent us a signal; - // it will be consumed by the next prepare_wait call - skipped_wakeup = ! in_wait.exchange( false ); +inline void thread_monitor::wait() { + my_sema.P(); + // memory_order_seq_cst is required here to be ordered with + // further load checking shutdown state + my_notified.store(false, std::memory_order_seq_cst); } } // namespace internal diff --git a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp index fe7fb66dc8..f386735b9a 100644 --- a/contrib/libs/tbb/src/tbb/rtm_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/rtm_mutex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,11 +26,13 @@ namespace tbb { namespace detail { namespace r1 { -// maximum number of times to retry -// TODO: experiment on retry values. -static constexpr int retry_threshold = 10; struct rtm_mutex_impl { + // maximum number of times to retry + // TODO: experiment on retry values. + static constexpr int retry_threshold = 10; + using transaction_result_type = decltype(begin_transaction()); + //! Release speculative mutex static void release(d1::rtm_mutex::scoped_lock& s) { switch(s.m_transaction_state) { @@ -57,14 +59,14 @@ struct rtm_mutex_impl { __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, "scoped_lock already in transaction"); if(governor::speculation_enabled()) { int num_retries = 0; - unsigned int abort_code = 0; + transaction_result_type abort_code = 0; do { if(m.m_flag.load(std::memory_order_acquire)) { if(only_speculate) return; spin_wait_while_eq(m.m_flag, true); } // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) + if((abort_code = begin_transaction()) == transaction_result_type(speculation_successful_begin)) { // started speculation if(m.m_flag.load(std::memory_order_relaxed)) { @@ -84,7 +86,6 @@ struct rtm_mutex_impl { s.m_mutex = &m; s.m_mutex->lock(); s.m_transaction_state = d1::rtm_mutex::rtm_state::rtm_real; - return; } //! Try to acquire lock on the given mutex. @@ -93,7 +94,7 @@ struct rtm_mutex_impl { if (s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_transacting) { return true; } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, NULL); + __TBB_ASSERT(s.m_transaction_state == d1::rtm_mutex::rtm_state::rtm_none, nullptr); // transacting acquire failed. try_lock the real mutex if (m.try_lock()) { s.m_mutex = &m; diff --git a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp index 5e50de4c39..fa87d0e393 100644 --- a/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp +++ b/contrib/libs/tbb/src/tbb/rtm_rw_mutex.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -31,6 +31,7 @@ struct rtm_rw_mutex_impl { // TODO: experiment on retry values. static constexpr int retry_threshold_read = 10; static constexpr int retry_threshold_write = 10; + using transaction_result_type = decltype(begin_transaction()); //! Release speculative mutex static void release(d1::rtm_rw_mutex::scoped_lock& s) { @@ -66,14 +67,14 @@ struct rtm_rw_mutex_impl { __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); if(governor::speculation_enabled()) { int num_retries = 0; - unsigned int abort_code = 0; + transaction_result_type abort_code = 0; do { if(m.m_state.load(std::memory_order_acquire)) { if(only_speculate) return; spin_wait_until_eq(m.m_state, d1::rtm_rw_mutex::state_type(0)); } // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) + if((abort_code = begin_transaction()) == transaction_result_type(speculation_successful_begin)) { // started speculation if(m.m_state.load(std::memory_order_relaxed)) { // add spin_rw_mutex to read-set. @@ -96,7 +97,6 @@ struct rtm_rw_mutex_impl { __TBB_ASSERT(!m.write_flag.load(std::memory_order_relaxed), "After acquire for write, write_flag already true"); m.write_flag.store(true, std::memory_order_relaxed); // kill transactional readers s.m_transaction_state = d1::rtm_rw_mutex::rtm_type::rtm_real_writer; - return; } //! Acquire read lock on given mutex. @@ -107,7 +107,7 @@ struct rtm_rw_mutex_impl { __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, "scoped_lock already in transaction"); if(governor::speculation_enabled()) { int num_retries = 0; - unsigned int abort_code = 0; + transaction_result_type abort_code = 0; do { // if in try_acquire, and lock is held as writer, don't attempt to speculate. if(m.write_flag.load(std::memory_order_acquire)) { @@ -115,7 +115,7 @@ struct rtm_rw_mutex_impl { spin_wait_while_eq(m.write_flag, true); } // _xbegin returns -1 on success or the abort code, so capture it - if((abort_code = begin_transaction()) == speculation_successful_begin) + if((abort_code = begin_transaction()) == transaction_result_type(speculation_successful_begin)) { // started speculation if(m.write_flag.load(std::memory_order_relaxed)) { // add write_flag to read-set. @@ -204,7 +204,7 @@ struct rtm_rw_mutex_impl { if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_writer) { return true; } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, nullptr); // transacting write acquire failed. try_lock the real mutex if (m.try_lock()) { s.m_mutex = &m; @@ -224,7 +224,7 @@ struct rtm_rw_mutex_impl { if (s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_transacting_reader) { return true; } - __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, NULL); + __TBB_ASSERT(s.m_transaction_state == d1::rtm_rw_mutex::rtm_type::rtm_not_in_mutex, nullptr); // transacting read acquire failed. try_lock_shared the real mutex if (m.try_lock_shared()) { s.m_mutex = &m; diff --git a/contrib/libs/tbb/src/tbb/scheduler_common.h b/contrib/libs/tbb/src/tbb/scheduler_common.h index ee13dbf981..9e10365736 100644 --- a/contrib/libs/tbb/src/tbb/scheduler_common.h +++ b/contrib/libs/tbb/src/tbb/scheduler_common.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -33,6 +33,7 @@ #endif // TODO: add conditional inclusion based on specified type #include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/mutex.h" #if TBB_USE_ASSERT #include <atomic> @@ -114,7 +115,7 @@ struct task_accessor { //------------------------------------------------------------------------ //! Extended variant of the standard offsetof macro /** The standard offsetof macro is not sufficient for TBB as it can be used for - POD-types only. The constant 0x1000 (not NULL) is necessary to appease GCC. **/ + POD-types only. The constant 0x1000 (not nullptr) is necessary to appease GCC. **/ #define __TBB_offsetof(class_name, member_name) \ ((ptrdiff_t)&(reinterpret_cast<class_name*>(0x1000)->member_name) - 0x1000) @@ -132,7 +133,7 @@ class context_guard_helper { d1::cpu_ctl_env guard_cpu_ctl_env; d1::cpu_ctl_env curr_cpu_ctl_env; public: - context_guard_helper() : curr_ctx(NULL) { + context_guard_helper() : curr_ctx(nullptr) { guard_cpu_ctl_env.get_env(); curr_cpu_ctl_env = guard_cpu_ctl_env; } @@ -159,8 +160,8 @@ public: ITT_TASK_END; // reporting begin of new task group context execution frame. // using address of task group context object to group tasks (parent). - // id of task execution frame is NULL and reserved for future use. - ITT_TASK_BEGIN(ctx, ctx->my_name, NULL); + // id of task execution frame is nullptr and reserved for future use. + ITT_TASK_BEGIN(ctx, ctx->my_name, nullptr); curr_ctx = ctx; } } @@ -174,7 +175,7 @@ public: #endif // _WIN64 }; -#if (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) +#if (_WIN32 || _WIN64 || __unix__ || __APPLE__) && (__TBB_x86_32 || __TBB_x86_64) #if _MSC_VER #pragma intrinsic(__rdtsc) #endif @@ -219,7 +220,7 @@ inline void prolonged_pause_impl() { #endif inline void prolonged_pause() { -#if __TBB_WAITPKG_INTRINSICS_PRESENT && (_WIN32 || _WIN64 || __linux__) && (__TBB_x86_32 || __TBB_x86_64) +#if __TBB_WAITPKG_INTRINSICS_PRESENT if (governor::wait_package_enabled()) { std::uint64_t time_stamp = machine_time_stamp(); // _tpause function directs the processor to enter an implementation-dependent optimized state @@ -233,6 +234,10 @@ inline void prolonged_pause() { prolonged_pause_impl(); } +// TODO: investigate possibility to work with number of CPU cycles +// because for different configurations this number of pauses + yields +// will be calculated in different amount of CPU cycles +// for example use rdtsc for it class stealing_loop_backoff { const int my_pause_threshold; const int my_yield_threshold; @@ -243,13 +248,13 @@ public: // the time spent spinning before calling is_out_of_work() should be approximately // the time it takes for a thread to be woken up. Doing so would guarantee that we do // no worse than 2x the optimal spin time. Or perhaps a time-slice quantum is the right amount. - stealing_loop_backoff(int num_workers) + stealing_loop_backoff(int num_workers, int yields_multiplier) : my_pause_threshold{ 2 * (num_workers + 1) } #if __APPLE__ // threshold value tuned separately for macOS due to high cost of sched_yield there - , my_yield_threshold{10} + , my_yield_threshold{10 * yields_multiplier} #else - , my_yield_threshold{100} + , my_yield_threshold{100 * yields_multiplier} #endif , my_pause_count{} , my_yield_count{} @@ -348,6 +353,54 @@ struct suspend_point_type { bool m_is_critical{ false }; //! Associated coroutine co_context m_co_context; + //! Supend point before resume + suspend_point_type* m_prev_suspend_point{nullptr}; + + // Possible state transitions: + // A -> S -> N -> A + // A -> N -> S -> N -> A + enum class stack_state { + active, // some thread is working with this stack + suspended, // no thread is working with this stack + notified // some thread tried to resume this stack + }; + + //! The flag required to protect suspend finish and resume call + std::atomic<stack_state> m_stack_state{stack_state::active}; + + void resume(suspend_point_type* sp) { + __TBB_ASSERT(m_stack_state.load(std::memory_order_relaxed) != stack_state::suspended, "The stack is expected to be active"); + + sp->m_prev_suspend_point = this; + + // Do not access sp after resume + m_co_context.resume(sp->m_co_context); + __TBB_ASSERT(m_stack_state.load(std::memory_order_relaxed) != stack_state::active, nullptr); + + finilize_resume(); + } + + void finilize_resume() { + m_stack_state.store(stack_state::active, std::memory_order_relaxed); + // Set the suspended state for the stack that we left. If the state is already notified, it means that + // someone already tried to resume our previous stack but failed. So, we need to resume it. + // m_prev_suspend_point might be nullptr when destroying co_context based on threads + if (m_prev_suspend_point && m_prev_suspend_point->m_stack_state.exchange(stack_state::suspended) == stack_state::notified) { + r1::resume(m_prev_suspend_point); + } + m_prev_suspend_point = nullptr; + } + + bool try_notify_resume() { + // Check that stack is already suspended. Return false if not yet. + return m_stack_state.exchange(stack_state::notified) == stack_state::suspended; + } + + void recall_owner() { + __TBB_ASSERT(m_stack_state.load(std::memory_order_relaxed) == stack_state::suspended, nullptr); + m_stack_state.store(stack_state::notified, std::memory_order_relaxed); + m_is_owner_recalled.store(true, std::memory_order_release); + } struct resume_task final : public d1::task { task_dispatcher& m_target; @@ -365,6 +418,12 @@ struct suspend_point_type { #endif /*__TBB_RESUMABLE_TASKS */ }; +#if _MSC_VER && !defined(__INTEL_COMPILER) +// structure was padded due to alignment specifier +#pragma warning( push ) +#pragma warning( disable: 4324 ) +#endif + class alignas (max_nfs_size) task_dispatcher { public: // TODO: reconsider low level design to better organize dependencies and files. @@ -374,6 +433,15 @@ public: friend class delegated_task; friend struct base_waiter; + //! The list of possible post resume actions. + enum class post_resume_action { + invalid, + register_waiter, + cleanup, + notify, + none + }; + //! The data of the current thread attached to this task_dispatcher thread_data* m_thread_data{ nullptr }; @@ -395,7 +463,7 @@ public: //! Attempt to get a task from the mailbox. /** Gets a task only if it has not been executed by its sender or a thief - that has stolen it from the sender's task pool. Otherwise returns NULL. + that has stolen it from the sender's task pool. Otherwise returns nullptr. This method is intended to be used only by the thread extracting the proxy from its mailbox. (In contrast to local task pool, mailbox can be read only by its owner). **/ @@ -464,7 +532,10 @@ public: #if __TBB_RESUMABLE_TASKS /* [[noreturn]] */ void co_local_wait_for_all() noexcept; void suspend(suspend_callback_type suspend_callback, void* user_callback); - void resume(task_dispatcher& target); + void internal_suspend(); + void do_post_resume_action(); + + bool resume(task_dispatcher& target); suspend_point_type* get_suspend_point(); void init_suspend_point(arena* a, std::size_t stack_size); friend void internal_resume(suspend_point_type*); @@ -472,7 +543,12 @@ public: #endif /* __TBB_RESUMABLE_TASKS */ }; +#if _MSC_VER && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif + inline std::uintptr_t calculate_stealing_threshold(std::uintptr_t base, std::size_t stack_size) { + __TBB_ASSERT(base > stack_size / 2, "Stack anchor calculation overflow"); return base - stack_size / 2; } diff --git a/contrib/libs/tbb/src/tbb/semaphore.cpp b/contrib/libs/tbb/src/tbb/semaphore.cpp index 92c9e675ab..db82442e01 100644 --- a/contrib/libs/tbb/src/tbb/semaphore.cpp +++ b/contrib/libs/tbb/src/tbb/semaphore.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ static std::atomic<do_once_state> concmon_module_inited; void WINAPI init_binsem_using_event( SRWLOCK* h_ ) { srwl_or_handle* shptr = (srwl_or_handle*) h_; - shptr->h = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS|SEMAPHORE_ALL_ACCESS ); + shptr->h = CreateEventEx( nullptr, nullptr, 0, EVENT_ALL_ACCESS|SEMAPHORE_ALL_ACCESS ); } void WINAPI acquire_binsem_using_event( SRWLOCK* h_ ) @@ -60,11 +60,11 @@ static const dynamic_link_descriptor SRWLLinkTable[] = { inline void init_concmon_module() { - __TBB_ASSERT( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event, NULL ); + __TBB_ASSERT( (uintptr_t)__TBB_init_binsem==(uintptr_t)&init_binsem_using_event, nullptr); if( dynamic_link( "Kernel32.dll", SRWLLinkTable, sizeof(SRWLLinkTable)/sizeof(dynamic_link_descriptor) ) ) { - __TBB_ASSERT( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event, NULL ); - __TBB_ASSERT( (uintptr_t)__TBB_acquire_binsem!=(uintptr_t)&acquire_binsem_using_event, NULL ); - __TBB_ASSERT( (uintptr_t)__TBB_release_binsem!=(uintptr_t)&release_binsem_using_event, NULL ); + __TBB_ASSERT( (uintptr_t)__TBB_init_binsem!=(uintptr_t)&init_binsem_using_event, nullptr); + __TBB_ASSERT( (uintptr_t)__TBB_acquire_binsem!=(uintptr_t)&acquire_binsem_using_event, nullptr); + __TBB_ASSERT( (uintptr_t)__TBB_release_binsem!=(uintptr_t)&release_binsem_using_event, nullptr); } } diff --git a/contrib/libs/tbb/src/tbb/semaphore.h b/contrib/libs/tbb/src/tbb/semaphore.h index 0a88536e36..8bc1924f03 100644 --- a/contrib/libs/tbb/src/tbb/semaphore.h +++ b/contrib/libs/tbb/src/tbb/semaphore.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -35,11 +35,18 @@ #include <atomic> -#if __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ +#if __unix__ +#if defined(__has_include) +#define __TBB_has_include __has_include +#else +#define __TBB_has_include(x) 0 +#endif /* Futex definitions */ #include <unistd.h> +#if defined(__linux__) || __TBB_has_include(<sys/syscall.h>) #include <sys/syscall.h> +#endif #if defined(SYS_futex) @@ -47,12 +54,6 @@ #define __TBB_USE_FUTEX 1 -#if defined(__has_include) -#define __TBB_has_include __has_include -#else -#define __TBB_has_include(x) 0 -#endif - /* If available, use typical headers where futex API is defined. While Linux and OpenBSD are known to provide such headers, other systems might have them as well. @@ -87,7 +88,7 @@ the actual parameter values to match Linux: 0 for wait, 1 for wake. #endif #endif // SYS_futex -#endif // __linux__ || __FreeBSD__ || __NetBSD__ || __OpenBSD__ +#endif // __unix__ namespace tbb { namespace detail { @@ -100,23 +101,23 @@ namespace r1 { #if __TBB_USE_FUTEX static inline int futex_wait( void *futex, int comparand ) { - int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 ); + int r = ::syscall(SYS_futex, futex, __TBB_FUTEX_WAIT, comparand, nullptr, nullptr, 0); #if TBB_USE_ASSERT int e = errno; - __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." ); + __TBB_ASSERT(r == 0 || r == EWOULDBLOCK || (r == -1 && (e == EAGAIN || e == EINTR)), "futex_wait failed."); #endif /* TBB_USE_ASSERT */ return r; } static inline int futex_wakeup_one( void *futex ) { - int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 ); - __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" ); + int r = ::syscall(SYS_futex, futex, __TBB_FUTEX_WAKE, 1, nullptr, nullptr, 0); + __TBB_ASSERT(r == 0 || r == 1, "futex_wakeup_one: more than one thread woken up?"); return r; } // Additional possible methods that are not required right now // static inline int futex_wakeup_all( void *futex ) { -// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 ); +// int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,nullptr,nullptr,0 ); // __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" ); // return r; // } @@ -137,11 +138,11 @@ public: //! wait/acquire void P() {WaitForSingleObjectEx( sem, INFINITE, FALSE );} //! post/release - void V() {ReleaseSemaphore( sem, 1, NULL );} + void V() {ReleaseSemaphore( sem, 1, nullptr);} private: HANDLE sem; void init_semaphore(size_t start_cnt_) { - sem = CreateSemaphoreEx( NULL, LONG(start_cnt_), max_semaphore_cnt, NULL, 0, SEMAPHORE_ALL_ACCESS ); + sem = CreateSemaphoreEx( nullptr, LONG(start_cnt_), max_semaphore_cnt, nullptr, 0, SEMAPHORE_ALL_ACCESS ); } }; #elif __APPLE__ @@ -153,7 +154,7 @@ public: //! dtor ~semaphore() { kern_return_t ret = semaphore_destroy( mach_task_self(), sem ); - __TBB_ASSERT_EX( ret==err_none, NULL ); + __TBB_ASSERT_EX( ret==err_none, nullptr); } //! wait/acquire void P() { @@ -183,12 +184,12 @@ public: //! dtor ~semaphore() { int ret = sem_destroy( &sem ); - __TBB_ASSERT_EX( !ret, NULL ); + __TBB_ASSERT_EX( !ret, nullptr); } //! wait/acquire void P() { while( sem_wait( &sem )!=0 ) - __TBB_ASSERT( errno==EINTR, NULL ); + __TBB_ASSERT( errno==EINTR, nullptr); } //! post/release void V() { sem_post( &sem ); } @@ -196,7 +197,7 @@ private: sem_t sem; void init_semaphore(int start_cnt_) { int ret = sem_init( &sem, /*shared among threads*/ 0, start_cnt_ ); - __TBB_ASSERT_EX( !ret, NULL ); + __TBB_ASSERT_EX( !ret, nullptr); } }; #endif /* _WIN32||_WIN64 */ @@ -209,7 +210,7 @@ private: class binary_semaphore : no_copy { public: //! ctor - binary_semaphore() { my_sem = CreateEventEx( NULL, NULL, 0, EVENT_ALL_ACCESS ); } + binary_semaphore() { my_sem = CreateEventEx( nullptr, nullptr, 0, EVENT_ALL_ACCESS ); } //! dtor ~binary_semaphore() { CloseHandle( my_sem ); } //! wait/acquire @@ -253,7 +254,7 @@ public: //! dtor ~binary_semaphore() { kern_return_t ret = semaphore_destroy( mach_task_self(), my_sem ); - __TBB_ASSERT_EX( ret==err_none, NULL ); + __TBB_ASSERT_EX( ret==err_none, nullptr); } //! wait/acquire void P() { @@ -308,17 +309,17 @@ public: //! ctor binary_semaphore() { int ret = sem_init( &my_sem, /*shared among threads*/ 0, 0 ); - __TBB_ASSERT_EX( !ret, NULL ); + __TBB_ASSERT_EX( !ret, nullptr); } //! dtor ~binary_semaphore() { int ret = sem_destroy( &my_sem ); - __TBB_ASSERT_EX( !ret, NULL ); + __TBB_ASSERT_EX( !ret, nullptr); } //! wait/acquire void P() { while( sem_wait( &my_sem )!=0 ) - __TBB_ASSERT( errno==EINTR, NULL ); + __TBB_ASSERT( errno==EINTR, nullptr); } //! post/release void V() { sem_post( &my_sem ); } diff --git a/contrib/libs/tbb/src/tbb/task.cpp b/contrib/libs/tbb/src/tbb/task.cpp index 129614447a..bd4e32dfe5 100644 --- a/contrib/libs/tbb/src/tbb/task.cpp +++ b/contrib/libs/tbb/src/tbb/task.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,29 +47,29 @@ void suspend(suspend_callback_type suspend_callback, void* user_callback) { void resume(suspend_point_type* sp) { assert_pointers_valid(sp, sp->m_arena); task_dispatcher& task_disp = sp->m_resume_task.m_target; - __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); - // TODO: remove this work-around - // Prolong the arena's lifetime while all coroutines are alive - // (otherwise the arena can be destroyed while some tasks are suspended). - arena& a = *sp->m_arena; - a.my_references += arena::ref_external; - - if (task_disp.m_properties.critical_task_allowed) { - // The target is not in the process of executing critical task, so the resume task is not critical. - a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); - } else { -#if __TBB_PREVIEW_CRITICAL_TASKS - // The target is in the process of executing critical task, so the resume task is critical. - a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); -#endif + if (sp->try_notify_resume()) { + // TODO: remove this work-around + // Prolong the arena's lifetime while all coroutines are alive + // (otherwise the arena can be destroyed while some tasks are suspended). + arena& a = *sp->m_arena; + a.my_references += arena::ref_external; + + if (task_disp.m_properties.critical_task_allowed) { + // The target is not in the process of executing critical task, so the resume task is not critical. + a.my_resume_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); + } else { + #if __TBB_PREVIEW_CRITICAL_TASKS + // The target is in the process of executing critical task, so the resume task is critical. + a.my_critical_task_stream.push(&sp->m_resume_task, random_lane_selector(sp->m_random)); + #endif + } + // Do not access target after that point. + a.advertise_new_work<arena::wakeup>(); + // Release our reference to my_arena. + a.on_thread_leaving<arena::ref_external>(); } - // Do not access target after that point. - a.advertise_new_work<arena::wakeup>(); - - // Release our reference to my_arena. - a.on_thread_leaving<arena::ref_external>(); } suspend_point_type* current_suspend_point() { @@ -92,9 +92,7 @@ static task_dispatcher& create_coroutine(thread_data& td) { return *task_disp; } -void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) { - __TBB_ASSERT(suspend_callback != nullptr, nullptr); - __TBB_ASSERT(user_callback != nullptr, nullptr); +void task_dispatcher::internal_suspend() { __TBB_ASSERT(m_thread_data != nullptr, nullptr); arena_slot* slot = m_thread_data->my_arena_slot; @@ -105,8 +103,6 @@ void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user bool is_recalled = default_task_disp.get_suspend_point()->m_is_owner_recalled.load(std::memory_order_acquire); task_dispatcher& target = is_recalled ? default_task_disp : create_coroutine(*m_thread_data); - thread_data::suspend_callback_wrapper callback = { suspend_callback, user_callback, get_suspend_point() }; - m_thread_data->set_post_resume_action(thread_data::post_resume_action::callback, &callback); resume(target); if (m_properties.outermost) { @@ -114,15 +110,24 @@ void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user } } -void task_dispatcher::resume(task_dispatcher& target) { +void task_dispatcher::suspend(suspend_callback_type suspend_callback, void* user_callback) { + __TBB_ASSERT(suspend_callback != nullptr, nullptr); + __TBB_ASSERT(user_callback != nullptr, nullptr); + suspend_callback(user_callback, get_suspend_point()); + + __TBB_ASSERT(m_thread_data != nullptr, nullptr); + __TBB_ASSERT(m_thread_data->my_post_resume_action == post_resume_action::none, nullptr); + __TBB_ASSERT(m_thread_data->my_post_resume_arg == nullptr, nullptr); + internal_suspend(); +} + +bool task_dispatcher::resume(task_dispatcher& target) { // Do not create non-trivial objects on the stack of this function. They might never be destroyed { thread_data* td = m_thread_data; __TBB_ASSERT(&target != this, "We cannot resume to ourself"); __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); - __TBB_ASSERT(td->my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); - __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); // Change the task dispatcher td->detach_task_dispatcher(); @@ -131,13 +136,14 @@ void task_dispatcher::resume(task_dispatcher& target) { __TBB_ASSERT(m_suspend_point != nullptr, "Suspend point must be created"); __TBB_ASSERT(target.m_suspend_point != nullptr, "Suspend point must be created"); // Swap to the target coroutine. - m_suspend_point->m_co_context.resume(target.m_suspend_point->m_co_context); + + m_suspend_point->resume(target.m_suspend_point); // Pay attention that m_thread_data can be changed after resume - { + if (m_thread_data) { thread_data* td = m_thread_data; __TBB_ASSERT(td != nullptr, "This task dispatcher must be attach to a thread data"); __TBB_ASSERT(td->my_task_dispatcher == this, "Thread data must be attached to this task dispatcher"); - td->do_post_resume_action(); + do_post_resume_action(); // Remove the recall flag if the thread in its original task dispatcher arena_slot* slot = td->my_arena_slot; @@ -146,52 +152,48 @@ void task_dispatcher::resume(task_dispatcher& target) { __TBB_ASSERT(m_suspend_point != nullptr, nullptr); m_suspend_point->m_is_owner_recalled.store(false, std::memory_order_relaxed); } + return true; } + return false; } -void thread_data::do_post_resume_action() { - __TBB_ASSERT(my_post_resume_action != thread_data::post_resume_action::none, "The post resume action must be set"); - __TBB_ASSERT(my_post_resume_arg, "The post resume action must have an argument"); - - switch (my_post_resume_action) { +void task_dispatcher::do_post_resume_action() { + thread_data* td = m_thread_data; + switch (td->my_post_resume_action) { case post_resume_action::register_waiter: { - static_cast<extended_concurrent_monitor::resume_context*>(my_post_resume_arg)->notify(); - break; - } - case post_resume_action::resume: - { - r1::resume(static_cast<suspend_point_type*>(my_post_resume_arg)); - break; - } - case post_resume_action::callback: - { - suspend_callback_wrapper callback = *static_cast<suspend_callback_wrapper*>(my_post_resume_arg); - callback(); + __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); + static_cast<market_concurrent_monitor::resume_context*>(td->my_post_resume_arg)->notify(); break; } case post_resume_action::cleanup: { - task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(my_post_resume_arg); - // Release coroutine's reference to my_arena. - my_arena->on_thread_leaving<arena::ref_external>(); + __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); + task_dispatcher* to_cleanup = static_cast<task_dispatcher*>(td->my_post_resume_arg); + // Release coroutine's reference to my_arena + td->my_arena->on_thread_leaving<arena::ref_external>(); // Cache the coroutine for possible later re-usage - my_arena->my_co_cache.push(to_cleanup); + td->my_arena->my_co_cache.push(to_cleanup); break; } case post_resume_action::notify: { - std::atomic<bool>& owner_recall_flag = *static_cast<std::atomic<bool>*>(my_post_resume_arg); - owner_recall_flag.store(true, std::memory_order_release); - // Do not access recall_flag because it can be destroyed after the notification. + __TBB_ASSERT(td->my_post_resume_arg, "The post resume action must have an argument"); + suspend_point_type* sp = static_cast<suspend_point_type*>(td->my_post_resume_arg); + sp->recall_owner(); + // Do not access sp because it can be destroyed after recall + + auto is_our_suspend_point = [sp] (market_context ctx) { + return std::uintptr_t(sp) == ctx.my_uniq_addr; + }; + td->my_arena->my_market->get_wait_list().notify(is_our_suspend_point); break; } default: - __TBB_ASSERT(false, "Unknown post resume action"); + __TBB_ASSERT(td->my_post_resume_action == post_resume_action::none, "Unknown post resume action"); + __TBB_ASSERT(td->my_post_resume_arg == nullptr, "The post resume argument should not be set"); } - - my_post_resume_action = post_resume_action::none; - my_post_resume_arg = nullptr; + td->clear_post_resume_action(); } #else @@ -212,7 +214,7 @@ suspend_point_type* current_suspend_point() { #endif /* __TBB_RESUMABLE_TASKS */ void notify_waiters(std::uintptr_t wait_ctx_addr) { - auto is_related_wait_ctx = [&] (extended_context context) { + auto is_related_wait_ctx = [&] (market_context context) { return wait_ctx_addr == context.my_uniq_addr; }; diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp index 86818af1d1..5ea7d3f534 100644 --- a/contrib/libs/tbb/src/tbb/task_dispatcher.cpp +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -51,7 +51,7 @@ void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slo // Mark isolation task_accessor::isolation(t) = ed.isolation; - if ( id != d1::no_slot && id != tls->my_arena_index ) { + if ( id != d1::no_slot && id != tls->my_arena_index && id < a->my_num_slots) { // Allocate proxy task d1::small_object_allocator alloc{}; auto proxy = alloc.new_object<task_proxy>(static_cast<d1::execution_data&>(ed)); @@ -133,7 +133,7 @@ d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data* ed) { return ed_ext->task_disp->m_thread_data->my_arena_index; } else { thread_data* td = governor::get_thread_data_if_initialized(); - return td ? int(td->my_arena_index) : -1; + return td ? td->my_arena_index : d1::slot_id(-1); } } @@ -173,24 +173,27 @@ void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, local_td.m_thread_data->my_inbox.set_is_idle(false); } - if (w_ctx.my_exception) { + auto exception = w_ctx.my_exception.load(std::memory_order_acquire); + if (exception) { __TBB_ASSERT(w_ctx.is_group_execution_cancelled(), "The task group context with an exception should be canceled."); - w_ctx.my_exception->throw_self(); + exception->throw_self(); } } #if __TBB_RESUMABLE_TASKS #if _WIN32 -/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* arg) noexcept +/* [[noreturn]] */ void __stdcall co_local_wait_for_all(void* addr) noexcept #else -/* [[noreturn]] */ void co_local_wait_for_all(void* arg) noexcept +/* [[noreturn]] */ void co_local_wait_for_all(unsigned hi, unsigned lo) noexcept #endif { - // Do not create non-trivial objects on the stack of this function. They will never be destroyed. - __TBB_ASSERT(arg != nullptr, nullptr); - task_dispatcher& task_disp = *static_cast<task_dispatcher*>(arg); - +#if !_WIN32 + std::uintptr_t addr = lo; + __TBB_ASSERT(sizeof(addr) == 8 || hi == 0, nullptr); + addr += std::uintptr_t(std::uint64_t(hi) << 32); +#endif + task_dispatcher& task_disp = *reinterpret_cast<task_dispatcher*>(addr); assert_pointers_valid(task_disp.m_thread_data, task_disp.m_thread_data->my_arena); task_disp.set_stealing_threshold(task_disp.m_thread_data->my_arena->calculate_stealing_threshold()); __TBB_ASSERT(task_disp.can_steal(), nullptr); @@ -202,21 +205,23 @@ void task_dispatcher::execute_and_wait(d1::task* t, d1::wait_context& wait_ctx, // Do not create non-trivial objects on the stack of this function. They will never be destroyed. assert_pointer_valid(m_thread_data); + m_suspend_point->finilize_resume(); // Basically calls the user callback passed to the tbb::task::suspend function - m_thread_data->do_post_resume_action(); + do_post_resume_action(); // Endless loop here because coroutine could be reused - for (;;) { + d1::task* resume_task{}; + do { arena* a = m_thread_data->my_arena; coroutine_waiter waiter(*a); - d1::task* resume_task = local_wait_for_all(nullptr, waiter); + resume_task = local_wait_for_all(nullptr, waiter); assert_task_valid(resume_task); __TBB_ASSERT(this == m_thread_data->my_task_dispatcher, nullptr); - m_thread_data->set_post_resume_action(thread_data::post_resume_action::cleanup, this); - resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target); - } - // This code is unreachable + m_thread_data->set_post_resume_action(post_resume_action::cleanup, this); + + } while (resume(static_cast<suspend_point_type::resume_task*>(resume_task)->m_target)); + // This code might be unreachable } d1::suspend_point task_dispatcher::get_suspend_point() { @@ -237,4 +242,3 @@ void task_dispatcher::init_suspend_point(arena* a, std::size_t stack_size) { } // namespace r1 } // namespace detail } // namespace tbb - diff --git a/contrib/libs/tbb/src/tbb/task_dispatcher.h b/contrib/libs/tbb/src/tbb/task_dispatcher.h index 54a6c0d934..f6ff3f173c 100644 --- a/contrib/libs/tbb/src/tbb/task_dispatcher.h +++ b/contrib/libs/tbb/src/tbb/task_dispatcher.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -65,25 +65,25 @@ inline d1::task* suspend_point_type::resume_task::execute(d1::execution_data& ed execution_data_ext& ed_ext = static_cast<execution_data_ext&>(ed); if (ed_ext.wait_ctx) { - extended_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; + market_concurrent_monitor::resume_context monitor_node{{std::uintptr_t(ed_ext.wait_ctx), nullptr}, ed_ext, m_target}; // The wait_ctx is present only in external_waiter. In that case we leave the current stack // in the abandoned state to resume when waiting completes. thread_data* td = ed_ext.task_disp->m_thread_data; - td->set_post_resume_action(thread_data::post_resume_action::register_waiter, &monitor_node); + td->set_post_resume_action(task_dispatcher::post_resume_action::register_waiter, &monitor_node); - extended_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); + market_concurrent_monitor& wait_list = td->my_arena->my_market->get_wait_list(); if (wait_list.wait([&] { return !ed_ext.wait_ctx->continue_execution(); }, monitor_node)) { return nullptr; } td->clear_post_resume_action(); - td->set_post_resume_action(thread_data::post_resume_action::resume, ed_ext.task_disp->get_suspend_point()); + r1::resume(ed_ext.task_disp->get_suspend_point()); } else { // If wait_ctx is null, it can be only a worker thread on outermost level because // coroutine_waiter interrupts bypass loop before the resume_task execution. - ed_ext.task_disp->m_thread_data->set_post_resume_action(thread_data::post_resume_action::notify, - &ed_ext.task_disp->get_suspend_point()->m_is_owner_recalled); + ed_ext.task_disp->m_thread_data->set_post_resume_action(task_dispatcher::post_resume_action::notify, + ed_ext.task_disp->get_suspend_point()); } // Do not access this task because it might be destroyed ed_ext.task_disp->resume(m_target); @@ -171,7 +171,7 @@ d1::task* task_dispatcher::receive_or_steal_task( thread_data& tls, execution_data_ext& ed, Waiter& waiter, isolation_type isolation, bool fifo_allowed, bool critical_allowed) { - __TBB_ASSERT(governor::is_thread_data_set(&tls), NULL); + __TBB_ASSERT(governor::is_thread_data_set(&tls), nullptr); // Task to return d1::task* t = nullptr; // Get tls data (again) @@ -230,6 +230,8 @@ d1::task* task_dispatcher::receive_or_steal_task( // Nothing to do, pause a little. waiter.pause(slot); } // end of nonlocal task retrieval loop + + __TBB_ASSERT(is_alive(a.my_guard), nullptr); if (inbox.is_idle_state(true)) { inbox.set_is_idle(false); } @@ -280,6 +282,11 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { m_properties.fifo_tasks_allowed = false; t = get_critical_task(t, ed, isolation, critical_allowed); + if (t && m_thread_data->my_inbox.is_idle_state(true)) { + // The thread has a work to do. Therefore, marking its inbox as not idle so that + // affinitized tasks can be stolen from it. + m_thread_data->my_inbox.set_is_idle(false); + } // Infinite exception loop for (;;) { @@ -293,8 +300,8 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { while (t != nullptr) { assert_task_valid(t); assert_pointer_valid</*alignment = */alignof(void*)>(ed.context); - __TBB_ASSERT(ed.context->my_lifetime_state > d1::task_group_context::lifetime_state::locked && - ed.context->my_lifetime_state < d1::task_group_context::lifetime_state::dying, nullptr); + __TBB_ASSERT(ed.context->my_state == d1::task_group_context::state::bound || + ed.context->my_state == d1::task_group_context::state::isolated, nullptr); __TBB_ASSERT(m_thread_data->my_inbox.is_idle_state(false), nullptr); __TBB_ASSERT(task_accessor::is_resume_task(*t) || isolation == no_isolation || isolation == ed.isolation, nullptr); // Check premature leave @@ -334,7 +341,7 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { } // Retrieve the task from local task pool if (t || (slot.is_task_pool_published() && (t = slot.get_task(ed, isolation)))) { - __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, NULL); + __TBB_ASSERT(ed.original_slot == m_thread_data->my_arena_index, nullptr); ed.context = task_accessor::context(*t); ed.isolation = task_accessor::isolation(*t); continue; @@ -352,7 +359,7 @@ d1::task* task_dispatcher::local_wait_for_all(d1::task* t, Waiter& waiter ) { } if (ed.context->cancel_group_execution()) { /* We are the first to signal cancellation, so store the exception that caused it. */ - ed.context->my_exception = tbb_exception_ptr::allocate(); + ed.context->my_exception.store(tbb_exception_ptr::allocate(), std::memory_order_release); } } } // Infinite exception loop @@ -373,14 +380,9 @@ inline void task_dispatcher::recall_point() { if (this != &m_thread_data->my_arena_slot->default_task_dispatcher()) { __TBB_ASSERT(m_suspend_point != nullptr, nullptr); __TBB_ASSERT(m_suspend_point->m_is_owner_recalled.load(std::memory_order_relaxed) == false, nullptr); - d1::suspend([](suspend_point_type* sp) { - sp->m_is_owner_recalled.store(true, std::memory_order_release); - auto is_related_suspend_point = [sp] (extended_context context) { - std::uintptr_t sp_addr = std::uintptr_t(sp); - return sp_addr == context.my_uniq_addr; - }; - sp->m_arena->my_market->get_wait_list().notify(is_related_suspend_point); - }); + + m_thread_data->set_post_resume_action(post_resume_action::notify, get_suspend_point()); + internal_suspend(); if (m_thread_data->my_inbox.is_idle_state(true)) { m_thread_data->my_inbox.set_is_idle(false); @@ -445,7 +447,7 @@ inline d1::task* task_dispatcher::get_mailbox_task(mail_inbox& my_inbox, executi // We have exclusive access to the proxy, and can destroy it. tp->allocator.delete_object(tp, ed); } - return NULL; + return nullptr; } template <typename Waiter> diff --git a/contrib/libs/tbb/src/tbb/task_group_context.cpp b/contrib/libs/tbb/src/tbb/task_group_context.cpp index 3c296648ec..177dd555b8 100644 --- a/contrib/libs/tbb/src/tbb/task_group_context.cpp +++ b/contrib/libs/tbb/src/tbb/task_group_context.cpp @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -52,113 +52,48 @@ void tbb_exception_ptr::throw_self() { //------------------------------------------------------------------------ void task_group_context_impl::destroy(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); - auto ctx_lifetime_state = ctx.my_lifetime_state.load(std::memory_order_relaxed); - __TBB_ASSERT(ctx_lifetime_state != d1::task_group_context::lifetime_state::locked, nullptr); - - if (ctx_lifetime_state == d1::task_group_context::lifetime_state::bound) { + if (ctx.my_context_list != nullptr) { + __TBB_ASSERT(ctx.my_state.load(std::memory_order_relaxed) == d1::task_group_context::state::bound, nullptr); // The owner can be destroyed at any moment. Access the associate data with caution. - thread_data* owner = ctx.my_owner.load(std::memory_order_relaxed); - if (governor::is_thread_data_set(owner)) { - thread_data::context_list_state& cls = owner->my_context_list_state; - // We are the owner, so cls is valid. - // Local update of the context list - std::uintptr_t local_count_snapshot = cls.epoch.load(std::memory_order_relaxed); - // The sequentially-consistent store to prevent load of nonlocal update flag - // from being hoisted before the store to local update flag. - cls.local_update = 1; - if (cls.nonlocal_update.load(std::memory_order_relaxed)) { - spin_mutex::scoped_lock lock(cls.mutex); - ctx.my_node.remove_relaxed(); - cls.local_update.store(0, std::memory_order_relaxed); - } else { - ctx.my_node.remove_relaxed(); - // Release fence is necessary so that update of our neighbors in - // the context list was committed when possible concurrent destroyer - // proceeds after local update flag is reset by the following store. - cls.local_update.store(0, std::memory_order_release); - if (local_count_snapshot != the_context_state_propagation_epoch.load(std::memory_order_relaxed)) { - // Another thread was propagating cancellation request when we removed - // ourselves from the list. We must ensure that it is not accessing us - // when this destructor finishes. We'll be able to acquire the lock - // below only after the other thread finishes with us. - spin_mutex::scoped_lock lock(cls.mutex); - } - } - } else { - d1::task_group_context::lifetime_state expected = d1::task_group_context::lifetime_state::bound; - if ( -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - !((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)expected, - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) -#else - !ctx.my_lifetime_state.compare_exchange_strong(expected, d1::task_group_context::lifetime_state::locked) -#endif - ) { - __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::detached, nullptr); - // The "owner" local variable can be a dangling pointer here. Do not access it. - owner = nullptr; - spin_wait_until_eq(ctx.my_owner, nullptr); - // It is unsafe to remove the node because its neighbors might be already destroyed. - // TODO: reconsider the logic. - // ctx.my_node.remove_relaxed(); - } - else { - __TBB_ASSERT(expected == d1::task_group_context::lifetime_state::bound, nullptr); - __TBB_ASSERT(ctx.my_owner.load(std::memory_order_relaxed) != nullptr, nullptr); - thread_data::context_list_state& cls = owner->my_context_list_state; - __TBB_ASSERT(is_alive(cls.nonlocal_update.load(std::memory_order_relaxed)), "The owner should be alive."); - - ++cls.nonlocal_update; - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::dying, std::memory_order_release); - spin_wait_until_eq(cls.local_update, 0u); - { - spin_mutex::scoped_lock lock(cls.mutex); - ctx.my_node.remove_relaxed(); - } - --cls.nonlocal_update; - } - } - } - - if (ctx_lifetime_state == d1::task_group_context::lifetime_state::detached) { - spin_wait_until_eq(ctx.my_owner, nullptr); + ctx.my_context_list->remove(ctx.my_node); } - d1::cpu_ctl_env* ctl = reinterpret_cast<d1::cpu_ctl_env*>(&ctx.my_cpu_ctl_env); #if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER suppress_unused_warning(ctl); #endif ctl->~cpu_ctl_env(); - if (ctx.my_exception) - ctx.my_exception->destroy(); + auto exception = ctx.my_exception.load(std::memory_order_relaxed); + if (exception) { + exception->destroy(); + } ITT_STACK_DESTROY(ctx.my_itt_caller); poison_pointer(ctx.my_parent); - poison_pointer(ctx.my_parent); - poison_pointer(ctx.my_owner); - poison_pointer(ctx.my_node.next); - poison_pointer(ctx.my_node.prev); + poison_pointer(ctx.my_context_list); + poison_pointer(ctx.my_node.my_next_node); + poison_pointer(ctx.my_node.my_prev_node); poison_pointer(ctx.my_exception); poison_pointer(ctx.my_itt_caller); + + ctx.my_state.store(d1::task_group_context::state::dead, std::memory_order_release); } void task_group_context_impl::initialize(d1::task_group_context& ctx) { ITT_TASK_GROUP(&ctx, ctx.my_name, nullptr); + ctx.my_node.my_next_node = &ctx.my_node; + ctx.my_node.my_prev_node = &ctx.my_node; ctx.my_cpu_ctl_env = 0; ctx.my_cancellation_requested = 0; - ctx.my_state.store(0, std::memory_order_relaxed); + ctx.my_may_have_children.store(0, std::memory_order_relaxed); // Set the created state to bound at the first usage. - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::created, std::memory_order_relaxed); + ctx.my_state.store(d1::task_group_context::state::created, std::memory_order_relaxed); ctx.my_parent = nullptr; - ctx.my_owner = nullptr; - ctx.my_node.next.store(nullptr, std::memory_order_relaxed); - ctx.my_node.next.store(nullptr, std::memory_order_relaxed); - ctx.my_exception = nullptr; + ctx.my_context_list = nullptr; + ctx.my_exception.store(nullptr, std::memory_order_relaxed); ctx.my_itt_caller = nullptr; static_assert(sizeof(d1::cpu_ctl_env) <= sizeof(ctx.my_cpu_ctl_env), "FPU settings storage does not fit to uint64_t"); @@ -168,54 +103,28 @@ void task_group_context_impl::initialize(d1::task_group_context& ctx) { } void task_group_context_impl::register_with(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(td, NULL); - ctx.my_owner.store(td, std::memory_order_relaxed); - thread_data::context_list_state& cls = td->my_context_list_state; - // state propagation logic assumes new contexts are bound to head of the list - ctx.my_node.prev.store(&cls.head, std::memory_order_relaxed); - // Notify threads that may be concurrently destroying contexts registered - // in this scheduler's list that local list update is underway. - // Prevent load of global propagation epoch counter from being hoisted before - // speculative stores above, as well as load of nonlocal update flag from - // being hoisted before the store to local update flag. - cls.local_update = 1; - // Finalize local context list update - if (cls.nonlocal_update.load(std::memory_order_relaxed)) { - spin_mutex::scoped_lock lock(cls.mutex); - d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); - head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); - ctx.my_node.next.store(head_next, std::memory_order_relaxed); - cls.local_update.store(0, std::memory_order_relaxed); - cls.head.next.store(&ctx.my_node, std::memory_order_relaxed); - } else { - d1::context_list_node* head_next = cls.head.next.load(std::memory_order_relaxed); - head_next->prev.store(&ctx.my_node, std::memory_order_relaxed); - ctx.my_node.next.store(head_next, std::memory_order_relaxed); - cls.local_update.store(0, std::memory_order_release); - // Thread-local list of contexts allows concurrent traversal by another thread - // while propagating state change. To ensure visibility of ctx.my_node's members - // to the concurrently traversing thread, the list's head is updated by means - // of store-with-release. - cls.head.next.store(&ctx.my_node, std::memory_order_release); - } + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); + __TBB_ASSERT(td, nullptr); + ctx.my_context_list = td->my_context_list; + + ctx.my_context_list->push_front(ctx.my_node); } void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) == d1::task_group_context::lifetime_state::locked, "The context can be bound only under the lock."); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); + __TBB_ASSERT(ctx.my_state.load(std::memory_order_relaxed) == d1::task_group_context::state::locked, "The context can be bound only under the lock."); __TBB_ASSERT(!ctx.my_parent, "Parent is set before initial binding"); ctx.my_parent = td->my_task_dispatcher->m_execute_data_ext.context; - __TBB_ASSERT(ctx.my_parent, NULL); + __TBB_ASSERT(ctx.my_parent, nullptr); // Inherit FPU settings only if the context has not captured FPU settings yet. if (!ctx.my_traits.fp_settings) copy_fp_settings(ctx, *ctx.my_parent); // Condition below prevents unnecessary thrashing parent context's cache line - if (ctx.my_parent->my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { - ctx.my_parent->my_state.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below + if (ctx.my_parent->my_may_have_children.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) { + ctx.my_parent->my_may_have_children.store(d1::task_group_context::may_have_children, std::memory_order_relaxed); // full fence is below } if (ctx.my_parent->my_parent) { // Even if this context were made accessible for state change propagation @@ -229,7 +138,7 @@ void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_d // Acquire fence is necessary to prevent reordering subsequent speculative // loads of parent state data out of the scope where epoch counters comparison // can reliably validate it. - uintptr_t local_count_snapshot = ctx.my_parent->my_owner.load(std::memory_order_relaxed)->my_context_list_state.epoch.load(std::memory_order_acquire); + uintptr_t local_count_snapshot = ctx.my_parent->my_context_list->epoch.load(std::memory_order_acquire); // Speculative propagation of parent's state. The speculation will be // validated by the epoch counters check further on. ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); @@ -251,60 +160,61 @@ void task_group_context_impl::bind_to_impl(d1::task_group_context& ctx, thread_d // copy the state from it. ctx.my_cancellation_requested.store(ctx.my_parent->my_cancellation_requested.load(std::memory_order_relaxed), std::memory_order_relaxed); } - - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::bound, std::memory_order_release); } void task_group_context_impl::bind_to(d1::task_group_context& ctx, thread_data* td) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - d1::task_group_context::lifetime_state state = ctx.my_lifetime_state.load(std::memory_order_acquire); - if (state <= d1::task_group_context::lifetime_state::locked) { - if (state == d1::task_group_context::lifetime_state::created && + d1::task_group_context::state state = ctx.my_state.load(std::memory_order_acquire); + if (state <= d1::task_group_context::state::locked) { + if (state == d1::task_group_context::state::created && #if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - ((std::atomic<typename std::underlying_type<d1::task_group_context::lifetime_state>::type>&)ctx.my_lifetime_state).compare_exchange_strong( - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type&)state, - (typename std::underlying_type<d1::task_group_context::lifetime_state>::type)d1::task_group_context::lifetime_state::locked) + ((std::atomic<typename std::underlying_type<d1::task_group_context::state>::type>&)ctx.my_state).compare_exchange_strong( + (typename std::underlying_type<d1::task_group_context::state>::type&)state, + (typename std::underlying_type<d1::task_group_context::state>::type)d1::task_group_context::state::locked) #else - ctx.my_lifetime_state.compare_exchange_strong(state, d1::task_group_context::lifetime_state::locked) + ctx.my_state.compare_exchange_strong(state, d1::task_group_context::state::locked) #endif ) { // If we are in the outermost task dispatch loop of an external thread, then // there is nothing to bind this context to, and we skip the binding part // treating the context as isolated. __TBB_ASSERT(td->my_task_dispatcher->m_execute_data_ext.context != nullptr, nullptr); + d1::task_group_context::state release_state{}; if (td->my_task_dispatcher->m_execute_data_ext.context == td->my_arena->my_default_ctx || !ctx.my_traits.bound) { if (!ctx.my_traits.fp_settings) { copy_fp_settings(ctx, *td->my_arena->my_default_ctx); } - ctx.my_lifetime_state.store(d1::task_group_context::lifetime_state::isolated, std::memory_order_release); + release_state = d1::task_group_context::state::isolated; } else { bind_to_impl(ctx, td); + release_state = d1::task_group_context::state::bound; } ITT_STACK_CREATE(ctx.my_itt_caller); + ctx.my_state.store(release_state, std::memory_order_release); } - spin_wait_while_eq(ctx.my_lifetime_state, d1::task_group_context::lifetime_state::locked); + spin_wait_while_eq(ctx.my_state, d1::task_group_context::state::locked); } - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::created, NULL); - __TBB_ASSERT(ctx.my_lifetime_state.load(std::memory_order_relaxed) != d1::task_group_context::lifetime_state::locked, NULL); + __TBB_ASSERT(ctx.my_state.load(std::memory_order_relaxed) != d1::task_group_context::state::created, nullptr); + __TBB_ASSERT(ctx.my_state.load(std::memory_order_relaxed) != d1::task_group_context::state::locked, nullptr); } template <typename T> void task_group_context_impl::propagate_task_group_state(d1::task_group_context& ctx, std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); - if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state) { - // Nothing to do, whether descending from "src" or not, so no need to scan. - // Hopefully this happens often thanks to earlier invocations. - // This optimization is enabled by LIFO order in the context lists: - // - new contexts are bound to the beginning of lists; - // - descendants are newer than ancestors; - // - earlier invocations are therefore likely to "paint" long chains. - } else if (&ctx == &src) { - // This clause is disjunct from the traversal below, which skips src entirely. - // Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). - // Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). - // Letting the other thread prevail may also be fairer. - } else { - for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != NULL; ancestor = ancestor->my_parent) { + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); + /* 1. if ((ctx.*mptr_state).load(std::memory_order_relaxed) == new_state): + Nothing to do, whether descending from "src" or not, so no need to scan. + Hopefully this happens often thanks to earlier invocations. + This optimization is enabled by LIFO order in the context lists: + - new contexts are bound to the beginning of lists; + - descendants are newer than ancestors; + - earlier invocations are therefore likely to "paint" long chains. + 2. if (&ctx != &src): + This clause is disjunct from the traversal below, which skips src entirely. + Note that src.*mptr_state is not necessarily still equal to new_state (another thread may have changed it again). + Such interference is probably not frequent enough to aim for optimisation by writing new_state again (to make the other thread back down). + Letting the other thread prevail may also be fairer. + */ + if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state && &ctx != &src) { + for (d1::task_group_context* ancestor = ctx.my_parent; ancestor != nullptr; ancestor = ancestor->my_parent) { if (ancestor == &src) { for (d1::task_group_context* c = &ctx; c != ancestor; c = c->my_parent) (c->*mptr_state).store(new_state, std::memory_order_relaxed); @@ -314,8 +224,52 @@ void task_group_context_impl::propagate_task_group_state(d1::task_group_context& } } +template <typename T> +void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + mutex::scoped_lock lock(my_context_list->m_mutex); + // Acquire fence is necessary to ensure that the subsequent node->my_next load + // returned the correct value in case it was just inserted in another thread. + // The fence also ensures visibility of the correct ctx.my_parent value. + for (context_list::iterator it = my_context_list->begin(); it != my_context_list->end(); ++it) { + d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, &(*it)); + if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) + task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); + } + // Sync up local propagation epoch with the global one. Release fence prevents + // reordering of possible store to *mptr_state after the sync point. + my_context_list->epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); +} + +template <typename T> +bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { + if (src.my_may_have_children.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) + return true; + // The whole propagation algorithm is under the lock in order to ensure correctness + // in case of concurrent state changes at the different levels of the context tree. + // See comment at the bottom of scheduler.cpp + context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); + if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) + // Another thread has concurrently changed the state. Back down. + return false; + // Advance global state propagation epoch + ++the_context_state_propagation_epoch; + // Propagate to all workers and external threads and sync up their local epochs with the global one + unsigned num_workers = my_first_unused_worker_idx; + for (unsigned i = 0; i < num_workers; ++i) { + thread_data* td = my_workers[i].load(std::memory_order_acquire); + // If the worker is only about to be registered, skip it. + if (td) + td->propagate_task_group_state(mptr_state, src, new_state); + } + // Propagate to all external threads + // The whole propagation sequence is locked, thus no contention is expected + for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) + it->propagate_task_group_state(mptr_state, src, new_state); + return true; +} + bool task_group_context_impl::cancel_group_execution(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); __TBB_ASSERT(ctx.my_cancellation_requested.load(std::memory_order_relaxed) <= 1, "The cancellation state can be either 0 or 1"); if (ctx.my_cancellation_requested.load(std::memory_order_relaxed) || ctx.my_cancellation_requested.exchange(1)) { // This task group and any descendants have already been canceled. @@ -333,20 +287,22 @@ bool task_group_context_impl::is_group_execution_cancelled(const d1::task_group_ // IMPORTANT: It is assumed that this method is not used concurrently! void task_group_context_impl::reset(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); //! TODO: Add assertion that this context does not have children // No fences are necessary since this context can be accessed from another thread // only after stealing happened (which means necessary fences were used). - if (ctx.my_exception) { - ctx.my_exception->destroy(); - ctx.my_exception = NULL; + + auto exception = ctx.my_exception.load(std::memory_order_relaxed); + if (exception) { + exception->destroy(); + ctx.my_exception.store(nullptr, std::memory_order_relaxed); } ctx.my_cancellation_requested = 0; } // IMPORTANT: It is assumed that this method is not used concurrently! void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); //! TODO: Add assertion that this context does not have children // No fences are necessary since this context can be accessed from another thread // only after stealing happened (which means necessary fences were used). @@ -359,7 +315,7 @@ void task_group_context_impl::capture_fp_settings(d1::task_group_context& ctx) { } void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, const d1::task_group_context& src) { - __TBB_ASSERT(!is_poisoned(ctx.my_owner), NULL); + __TBB_ASSERT(!is_poisoned(ctx.my_context_list), nullptr); __TBB_ASSERT(!ctx.my_traits.fp_settings, "The context already has FPU settings."); __TBB_ASSERT(src.my_traits.fp_settings, "The source context does not have FPU settings."); @@ -368,52 +324,6 @@ void task_group_context_impl::copy_fp_settings(d1::task_group_context& ctx, cons ctx.my_traits.fp_settings = true; } -template <typename T> -void thread_data::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - spin_mutex::scoped_lock lock(my_context_list_state.mutex); - // Acquire fence is necessary to ensure that the subsequent node->my_next load - // returned the correct value in case it was just inserted in another thread. - // The fence also ensures visibility of the correct ctx.my_parent value. - d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_acquire); - while (node != &my_context_list_state.head) { - d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); - if ((ctx.*mptr_state).load(std::memory_order_relaxed) != new_state) - task_group_context_impl::propagate_task_group_state(ctx, mptr_state, src, new_state); - node = node->next.load(std::memory_order_relaxed); - } - // Sync up local propagation epoch with the global one. Release fence prevents - // reordering of possible store to *mptr_state after the sync point. - my_context_list_state.epoch.store(the_context_state_propagation_epoch.load(std::memory_order_relaxed), std::memory_order_release); -} - -template <typename T> -bool market::propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state) { - if (src.my_state.load(std::memory_order_relaxed) != d1::task_group_context::may_have_children) - return true; - // The whole propagation algorithm is under the lock in order to ensure correctness - // in case of concurrent state changes at the different levels of the context tree. - // See comment at the bottom of scheduler.cpp - context_state_propagation_mutex_type::scoped_lock lock(the_context_state_propagation_mutex); - if ((src.*mptr_state).load(std::memory_order_relaxed) != new_state) - // Another thread has concurrently changed the state. Back down. - return false; - // Advance global state propagation epoch - ++the_context_state_propagation_epoch; - // Propagate to all workers and external threads and sync up their local epochs with the global one - unsigned num_workers = my_first_unused_worker_idx; - for (unsigned i = 0; i < num_workers; ++i) { - thread_data* td = my_workers[i]; - // If the worker is only about to be registered, skip it. - if (td) - td->propagate_task_group_state(mptr_state, src, new_state); - } - // Propagate to all external threads - // The whole propagation sequence is locked, thus no contention is expected - for (thread_data_list_type::iterator it = my_masters.begin(); it != my_masters.end(); it++) - it->propagate_task_group_state(mptr_state, src, new_state); - return true; -} - /* Comments: diff --git a/contrib/libs/tbb/src/tbb/task_stream.h b/contrib/libs/tbb/src/tbb/task_stream.h index f32ef94e80..dc0b6818bc 100644 --- a/contrib/libs/tbb/src/tbb/task_stream.h +++ b/contrib/libs/tbb/src/tbb/task_stream.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,9 +25,8 @@ //! would be suitable for critical tasks due to linear time complexity on its operations. #include "oneapi/tbb/detail/_utils.h" - -#include "oneapi/tbb/spin_mutex.h" #include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/mutex.h" #include "scheduler_common.h" #include "misc.h" // for FastRandom @@ -55,20 +54,20 @@ using population_t = uintptr_t; const population_t one = 1; inline void set_one_bit( std::atomic<population_t>& dest, int pos ) { - __TBB_ASSERT( pos>=0, NULL ); - __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + __TBB_ASSERT( pos>=0, nullptr); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), nullptr); dest.fetch_or( one<<pos ); } inline void clear_one_bit( std::atomic<population_t>& dest, int pos ) { - __TBB_ASSERT( pos>=0, NULL ); - __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + __TBB_ASSERT( pos>=0, nullptr); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), nullptr); dest.fetch_and( ~(one<<pos) ); } inline bool is_bit_set( population_t val, int pos ) { - __TBB_ASSERT( pos>=0, NULL ); - __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), NULL ); + __TBB_ASSERT( pos>=0, nullptr); + __TBB_ASSERT( pos<int(sizeof(population_t)*CHAR_BIT), nullptr); return (val & (one<<pos)) != 0; } @@ -120,7 +119,7 @@ struct preceding_lane_selector : lane_selector_base { template<task_stream_accessor_type accessor> class task_stream_accessor : no_copy { protected: - using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + using lane_t = queue_and_mutex <d1::task*, mutex>; d1::task* get_item( lane_t::queue_base_t& queue ) { d1::task* result = queue.front(); queue.pop_front(); @@ -131,7 +130,7 @@ protected: template<> class task_stream_accessor< back_nonnull_accessor > : no_copy { protected: - using lane_t = queue_and_mutex <d1::task*, spin_mutex>; + using lane_t = queue_and_mutex <d1::task*, mutex>; d1::task* get_item( lane_t::queue_base_t& queue ) { d1::task* result = nullptr; __TBB_ASSERT(!queue.empty(), nullptr); @@ -140,8 +139,6 @@ protected: result = queue.back(); queue.pop_back(); } while ( !result && !queue.empty() ); - - __TBB_ASSERT_RELEASE(result, nullptr); return result; } }; @@ -162,12 +159,12 @@ public: N = n_lanes >= max_lanes ? max_lanes : n_lanes > 2 ? 1 << (tbb::detail::log2(n_lanes - 1) + 1) : 2; __TBB_ASSERT( N == max_lanes || (N >= n_lanes && ((N - 1) & N) == 0), "number of lanes miscalculated" ); - __TBB_ASSERT( N <= sizeof(population_t) * CHAR_BIT, NULL ); + __TBB_ASSERT( N <= sizeof(population_t) * CHAR_BIT, nullptr); lanes = static_cast<lane_t*>(cache_aligned_allocate(sizeof(lane_t) * N)); for (unsigned i = 0; i < N; ++i) { new (lanes + i) lane_t; } - __TBB_ASSERT( !population.load(std::memory_order_relaxed), NULL ); + __TBB_ASSERT( !population.load(std::memory_order_relaxed), nullptr); } ~task_stream() { @@ -194,7 +191,7 @@ public: //! updated inside lane selector. template<typename lane_selector_t> d1::task* pop( const lane_selector_t& next_lane ) { - d1::task* popped = NULL; + d1::task* popped = nullptr; unsigned lane = 0; do { lane = next_lane( /*out_of=*/N ); @@ -205,13 +202,13 @@ public: //! Try finding and popping a related task. d1::task* pop_specific( unsigned& last_used_lane, isolation_type isolation ) { - d1::task* result = NULL; + d1::task* result = nullptr; // Lane selection is round-robin in backward direction. unsigned idx = last_used_lane & (N-1); do { if( is_bit_set( population.load(std::memory_order_relaxed), idx ) ) { lane_t& lane = lanes[idx]; - spin_mutex::scoped_lock lock; + mutex::scoped_lock lock; if( lock.try_acquire(lane.my_mutex) && !lane.my_queue.empty() ) { result = look_specific( lane.my_queue, isolation ); if( lane.my_queue.empty() ) @@ -234,7 +231,7 @@ public: private: //! Returns true on successful push, otherwise - false. bool try_push(d1::task* source, unsigned lane_idx ) { - spin_mutex::scoped_lock lock; + mutex::scoped_lock lock; if( lock.try_acquire( lanes[lane_idx].my_mutex ) ) { lanes[lane_idx].my_queue.push_back( source ); set_one_bit( population, lane_idx ); // TODO: avoid atomic op if the bit is already set @@ -243,13 +240,13 @@ private: return false; } - //! Returns pointer to task on successful pop, otherwise - NULL. + //! Returns pointer to task on successful pop, otherwise - nullptr. d1::task* try_pop( unsigned lane_idx ) { if( !is_bit_set( population.load(std::memory_order_relaxed), lane_idx ) ) - return NULL; - d1::task* result = NULL; + return nullptr; + d1::task* result = nullptr; lane_t& lane = lanes[lane_idx]; - spin_mutex::scoped_lock lock; + mutex::scoped_lock lock; if( lock.try_acquire( lane.my_mutex ) && !lane.my_queue.empty() ) { result = this->get_item( lane.my_queue ); if( lane.my_queue.empty() ) @@ -260,7 +257,7 @@ private: // TODO: unify '*_specific' logic with 'pop' methods above d1::task* look_specific( typename lane_t::queue_base_t& queue, isolation_type isolation ) { - __TBB_ASSERT( !queue.empty(), NULL ); + __TBB_ASSERT( !queue.empty(), nullptr); // TODO: add a worst-case performance test and consider an alternative container with better // performance for isolation search. typename lane_t::queue_base_t::iterator curr = queue.end(); @@ -271,12 +268,12 @@ private: if( queue.end() - curr == 1 ) queue.pop_back(); // a little of housekeeping along the way else - *curr = 0; // grabbing task with the same isolation + *curr = nullptr; // grabbing task with the same isolation // TODO: move one of the container's ends instead if the task has been found there return result; } } while( curr != queue.begin() ); - return NULL; + return nullptr; } }; // task_stream diff --git a/contrib/libs/tbb/src/tbb/thread_data.h b/contrib/libs/tbb/src/tbb/thread_data.h index 41d4a0cf60..808f3cc311 100644 --- a/contrib/libs/tbb/src/tbb/thread_data.h +++ b/contrib/libs/tbb/src/tbb/thread_data.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2020-2021 Intel Corporation + Copyright (c) 2020-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -40,6 +40,55 @@ class arena_slot; class task_group_context; class task_dispatcher; +class context_list : public intrusive_list<intrusive_list_node> { +public: + bool orphaned{false}; + + //! Last state propagation epoch known to this thread + /** Together with the_context_state_propagation_epoch constitute synchronization protocol + that keeps hot path of task group context construction destruction mostly + lock-free. + When local epoch equals the global one, the state of task group contexts + registered with this thread is consistent with that of the task group trees + they belong to. **/ + std::atomic<std::uintptr_t> epoch{}; + + //! Mutex protecting access to the list of task group contexts. + d1::mutex m_mutex{}; + + void destroy() { + this->~context_list(); + cache_aligned_deallocate(this); + } + + void remove(intrusive_list_node& val) { + mutex::scoped_lock lock(m_mutex); + + intrusive_list<intrusive_list_node>::remove(val); + + if (orphaned && empty()) { + lock.release(); + destroy(); + } + } + + void push_front(intrusive_list_node& val) { + mutex::scoped_lock lock(m_mutex); + + intrusive_list<intrusive_list_node>::push_front(val); + } + + void orphan() { + mutex::scoped_lock lock(m_mutex); + + orphaned = true; + if (empty()) { + lock.release(); + destroy(); + } + } +}; + //------------------------------------------------------------------------ // Thread Data //------------------------------------------------------------------------ @@ -53,42 +102,38 @@ public: , my_task_dispatcher{ nullptr } , my_arena{} , my_arena_slot{} - , my_inbox{} , my_random{ this } , my_last_observer{ nullptr } , my_small_object_pool{new (cache_aligned_allocate(sizeof(small_object_pool_impl))) small_object_pool_impl{}} - , my_context_list_state{} + , my_context_list(new (cache_aligned_allocate(sizeof(context_list))) context_list{}) #if __TBB_RESUMABLE_TASKS - , my_post_resume_action{ post_resume_action::none } + , my_post_resume_action{ task_dispatcher::post_resume_action::none } , my_post_resume_arg{nullptr} #endif /* __TBB_RESUMABLE_TASKS */ { - ITT_SYNC_CREATE(&my_context_list_state.mutex, SyncType_Scheduler, SyncObj_ContextsList); - my_context_list_state.head.next.store(&my_context_list_state.head, std::memory_order_relaxed); - my_context_list_state.head.prev.store(&my_context_list_state.head, std::memory_order_relaxed); + ITT_SYNC_CREATE(&my_context_list->m_mutex, SyncType_Scheduler, SyncObj_ContextsList); } ~thread_data() { - context_list_cleanup(); + my_context_list->orphan(); my_small_object_pool->destroy(); poison_pointer(my_task_dispatcher); poison_pointer(my_arena); poison_pointer(my_arena_slot); poison_pointer(my_last_observer); poison_pointer(my_small_object_pool); + poison_pointer(my_context_list); #if __TBB_RESUMABLE_TASKS poison_pointer(my_post_resume_arg); #endif /* __TBB_RESUMABLE_TASKS */ - poison_value(my_context_list_state.epoch); - poison_value(my_context_list_state.local_update); - poison_value(my_context_list_state.nonlocal_update); } void attach_arena(arena& a, std::size_t index); bool is_attached_to(arena*); void attach_task_dispatcher(task_dispatcher&); void detach_task_dispatcher(); - void context_list_cleanup(); + void enter_task_dispatcher(task_dispatcher& task_disp, std::uintptr_t stealing_threshold); + void leave_task_dispatcher(); template <typename T> void propagate_task_group_state(std::atomic<T> d1::task_group_context::* mptr_state, d1::task_group_context& src, T new_state); @@ -119,58 +164,8 @@ public: //! Pool of small object for fast task allocation small_object_pool_impl* my_small_object_pool; - struct context_list_state { - //! Head of the thread specific list of task group contexts. - d1::context_list_node head{}; - - //! Mutex protecting access to the list of task group contexts. - // TODO: check whether it can be deadly preempted and replace by spinning/sleeping mutex - spin_mutex mutex{}; - - //! Last state propagation epoch known to this thread - /** Together with the_context_state_propagation_epoch constitute synchronization protocol - that keeps hot path of task group context construction destruction mostly - lock-free. - When local epoch equals the global one, the state of task group contexts - registered with this thread is consistent with that of the task group trees - they belong to. **/ - std::atomic<std::uintptr_t> epoch{}; - - //! Flag indicating that a context is being destructed by its owner thread - /** Together with my_nonlocal_ctx_list_update constitute synchronization protocol - that keeps hot path of context destruction (by the owner thread) mostly - lock-free. **/ - std::atomic<std::uintptr_t> local_update{}; - - //! Flag indicating that a context is being destructed by non-owner thread. - /** See also my_local_update. **/ - std::atomic<std::uintptr_t> nonlocal_update{}; - } my_context_list_state; - + context_list* my_context_list; #if __TBB_RESUMABLE_TASKS - //! The list of possible post resume actions. - enum class post_resume_action { - invalid, - register_waiter, - resume, - callback, - cleanup, - notify, - none - }; - - //! The callback to call the user callback passed to tbb::suspend. - struct suspend_callback_wrapper { - suspend_callback_type suspend_callback; - void* user_callback; - suspend_point_type* tag; - - void operator()() { - __TBB_ASSERT(suspend_callback && user_callback && tag, nullptr); - suspend_callback(user_callback, tag); - } - }; - //! Suspends the current coroutine (task_dispatcher). void suspend(void* suspend_callback, void* user_callback); @@ -178,23 +173,20 @@ public: void resume(task_dispatcher& target); //! Set post resume action to perform after resume. - void set_post_resume_action(post_resume_action pra, void* arg) { - __TBB_ASSERT(my_post_resume_action == post_resume_action::none, "The Post resume action must not be set"); + void set_post_resume_action(task_dispatcher::post_resume_action pra, void* arg) { + __TBB_ASSERT(my_post_resume_action == task_dispatcher::post_resume_action::none, "The Post resume action must not be set"); __TBB_ASSERT(!my_post_resume_arg, "The post resume action must not have an argument"); my_post_resume_action = pra; my_post_resume_arg = arg; } void clear_post_resume_action() { - my_post_resume_action = thread_data::post_resume_action::none; + my_post_resume_action = task_dispatcher::post_resume_action::none; my_post_resume_arg = nullptr; } - //! Performs post resume action. - void do_post_resume_action(); - //! The post resume action requested after the swap contexts. - post_resume_action my_post_resume_action; + task_dispatcher::post_resume_action my_post_resume_action; //! The post resume action argument. void* my_post_resume_arg; @@ -216,41 +208,6 @@ inline void thread_data::attach_arena(arena& a, std::size_t index) { inline bool thread_data::is_attached_to(arena* a) { return my_arena == a; } -inline void thread_data::context_list_cleanup() { - // Detach contexts remaining in the local list. - { - spin_mutex::scoped_lock lock(my_context_list_state.mutex); - d1::context_list_node* node = my_context_list_state.head.next.load(std::memory_order_relaxed); - while (node != &my_context_list_state.head) { - using state_t = d1::task_group_context::lifetime_state; - - d1::task_group_context& ctx = __TBB_get_object_ref(d1::task_group_context, my_node, node); - std::atomic<state_t>& state = ctx.my_lifetime_state; - - node = node->next.load(std::memory_order_relaxed); - - __TBB_ASSERT(ctx.my_owner == this, "The context should belong to the current thread."); - state_t expected = state_t::bound; - if ( -#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 - !((std::atomic<typename std::underlying_type<state_t>::type>&)state).compare_exchange_strong( - (typename std::underlying_type<state_t>::type&)expected, - (typename std::underlying_type<state_t>::type)state_t::detached) -#else - !state.compare_exchange_strong(expected, state_t::detached) -#endif - ) { - __TBB_ASSERT(expected == state_t::locked || expected == state_t::dying, nullptr); - spin_wait_until_eq(state, state_t::dying); - } else { - __TBB_ASSERT(expected == state_t::bound, nullptr); - ctx.my_owner.store(nullptr, std::memory_order_release); - } - } - } - spin_wait_until_eq(my_context_list_state.nonlocal_update, 0u); -} - inline void thread_data::attach_task_dispatcher(task_dispatcher& task_disp) { __TBB_ASSERT(my_task_dispatcher == nullptr, nullptr); __TBB_ASSERT(task_disp.m_thread_data == nullptr, nullptr); @@ -265,6 +222,16 @@ inline void thread_data::detach_task_dispatcher() { my_task_dispatcher = nullptr; } +inline void thread_data::enter_task_dispatcher(task_dispatcher& task_disp, std::uintptr_t stealing_threshold) { + task_disp.set_stealing_threshold(stealing_threshold); + attach_task_dispatcher(task_disp); +} + +inline void thread_data::leave_task_dispatcher() { + my_task_dispatcher->set_stealing_threshold(0); + detach_task_dispatcher(); +} + } // namespace r1 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/tls.h b/contrib/libs/tbb/src/tbb/tls.h index 5d28ca4dae..e87a943aa2 100644 --- a/contrib/libs/tbb/src/tbb/tls.h +++ b/contrib/libs/tbb/src/tbb/tls.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2005-2021 Intel Corporation + Copyright (c) 2005-2022 Intel Corporation Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -37,7 +37,7 @@ class basic_tls { #if __TBB_USE_POSIX typedef pthread_key_t tls_key_t; public: - int create( tls_dtor_t dtor = NULL ) { + int create( tls_dtor_t dtor = nullptr ) { return pthread_key_create(&my_key, dtor); } int destroy() { return pthread_key_delete(my_key); } @@ -59,7 +59,7 @@ public: T get() { return (T)TlsGetValue(my_key); } #else /*!__TBB_WIN8UI_SUPPORT*/ int create() { - tls_key_t tmp = FlsAlloc(NULL); + tls_key_t tmp = FlsAlloc(nullptr); if( tmp== (DWORD)0xFFFFFFFF ) return (DWORD)0xFFFFFFFF; my_key = tmp; @@ -74,18 +74,6 @@ private: tls_key_t my_key; }; -//! More advanced TLS support template class. -/** It supports RAII and to some extent mimic __declspec(thread) variables. */ -template <typename T> -class tls : public basic_tls<T> { - typedef basic_tls<T> base; -public: - tls() { base::create(); } - ~tls() { base::destroy(); } - T operator=(T value) { base::set(value); return value; } - operator T() { return base::get(); } -}; - } // namespace r1 } // namespace detail } // namespace tbb diff --git a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h index e1ba837404..27aa3ee0ce 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h +++ b/contrib/libs/tbb/src/tbb/tools_api/disable_warnings.h @@ -16,7 +16,7 @@ #include "ittnotify_config.h" -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning (disable: 593) /* parameter "XXXX" was set but never used */ #pragma warning (disable: 344) /* typedef name has already been declared (with same type) */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h index 993b7b0bfd..1eecd2faa7 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify.h @@ -188,7 +188,7 @@ The same ID may not be reused for different instances, unless a previous #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline +#define ITT_INLINE static __forceinline #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -210,8 +210,7 @@ The same ID may not be reused for different instances, unless a previous # if ITT_PLATFORM==ITT_PLATFORM_WIN # pragma message("WARNING!!! Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro") # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -// #warning usage leads to ICC's compilation error -// # warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" +# warning "Deprecated API is used. Please undefine INTEL_ITTNOTIFY_ENABLE_LEGACY macro" # endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # include "legacy/ittnotify.h" #endif /* INTEL_ITTNOTIFY_ENABLE_LEGACY */ @@ -1538,7 +1537,7 @@ ITT_STUBV(ITTAPI, void, heap_allocate_end, (__itt_heap_function h, void** addr, /** @endcond */ /** - * @brief Record an free begin occurrence. + * @brief Record a free begin occurrence. */ void ITTAPI __itt_heap_free_begin(__itt_heap_function h, void* addr); @@ -1558,7 +1557,7 @@ ITT_STUBV(ITTAPI, void, heap_free_begin, (__itt_heap_function h, void* addr)) /** @endcond */ /** - * @brief Record an free end occurrence. + * @brief Record a free end occurrence. */ void ITTAPI __itt_heap_free_end(__itt_heap_function h, void* addr); @@ -1578,7 +1577,7 @@ ITT_STUBV(ITTAPI, void, heap_free_end, (__itt_heap_function h, void* addr)) /** @endcond */ /** - * @brief Record an reallocation begin occurrence. + * @brief Record a reallocation begin occurrence. */ void ITTAPI __itt_heap_reallocate_begin(__itt_heap_function h, void* addr, size_t new_size, int initialized); @@ -1598,7 +1597,7 @@ ITT_STUBV(ITTAPI, void, heap_reallocate_begin, (__itt_heap_function h, void* add /** @endcond */ /** - * @brief Record an reallocation end occurrence. + * @brief Record a reallocation end occurrence. */ void ITTAPI __itt_heap_reallocate_end(__itt_heap_function h, void* addr, void** new_addr, size_t new_size, int initialized); @@ -3639,11 +3638,12 @@ ITT_STUBV(ITTAPI, void, enable_attach, (void)) /** @endcond */ /** - * @brief Module load info - * This API is used to report necessary information in case of module relocation - * @param[in] start_addr - relocated module start address - * @param[in] end_addr - relocated module end address - * @param[in] path - file system path to the module + * @brief Module load notification + * This API is used to report necessary information in case of bypassing default system loader. + * Notification should be done immediately after this module is loaded to process memory. + * @param[in] start_addr - module start address + * @param[in] end_addr - module end address + * @param[in] path - file system full path to the module */ #if ITT_PLATFORM==ITT_PLATFORM_WIN void ITTAPI __itt_module_loadA(void *start_addr, void *end_addr, const char *path); @@ -3698,7 +3698,176 @@ ITT_STUB(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const ch #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ +/** + * @brief Report module unload + * This API is used to report necessary information in case of bypassing default system loader. + * Notification should be done just before the module is unloaded from process memory. + * @param[in] addr - base address of loaded module + */ +void ITTAPI __itt_module_unload(void *addr); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, module_unload, (void *addr)) +#define __itt_module_unload ITTNOTIFY_VOID(module_unload) +#define __itt_module_unload_ptr ITTNOTIFY_NAME(module_unload) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_module_unload(addr) +#define __itt_module_unload_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_module_unload_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum +{ + __itt_module_type_unknown = 0, + __itt_module_type_elf, + __itt_module_type_coff +} __itt_module_type; +/** @endcond */ + +/** @cond exclude_from_documentation */ +typedef enum +{ + itt_section_type_unknown, + itt_section_type_bss, /* notifies that the section contains uninitialized data. These are the relevant section types and the modules that contain them: + * ELF module: SHT_NOBITS section type + * COFF module: IMAGE_SCN_CNT_UNINITIALIZED_DATA section type + */ + itt_section_type_data, /* notifies that section contains initialized data. These are the relevant section types and the modules that contain them: + * ELF module: SHT_PROGBITS section type + * COFF module: IMAGE_SCN_CNT_INITIALIZED_DATA section type + */ + itt_section_type_text /* notifies that the section contains executable code. These are the relevant section types and the modules that contain them: + * ELF module: SHT_PROGBITS section type + * COFF module: IMAGE_SCN_CNT_CODE section type + */ +} __itt_section_type; +/** @endcond */ + +/** + * @hideinitializer + * @brief bit-mask, detects a section attribute that indicates whether a section can be executed as code: + * These are the relevant section attributes and the modules that contain them: + * ELF module: PF_X section attribute + * COFF module: IMAGE_SCN_MEM_EXECUTE attribute + */ +#define __itt_section_exec 0x20000000 + +/** + * @hideinitializer + * @brief bit-mask, detects a section attribute that indicates whether a section can be read. + * These are the relevant section attributes and the modules that contain them: + * ELF module: PF_R attribute + * COFF module: IMAGE_SCN_MEM_READ attribute + */ +#define __itt_section_read 0x40000000 + +/** + * @hideinitializer + * @brief bit-mask, detects a section attribute that indicates whether a section can be written to. + * These are the relevant section attributes and the modules that contain them: + * ELF module: PF_W attribute + * COFF module: IMAGE_SCN_MEM_WRITE attribute + */ +#define __itt_section_write 0x80000000 + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) +typedef struct ___itt_section_info +{ + const char* name; /*!< Section name in UTF8 */ + __itt_section_type type; /*!< Section content and semantics description */ + size_t flags; /*!< Section bit flags that describe attributes using bit mask + * Zero if disabled, non-zero if enabled + */ + void* start_addr; /*!< Section load(relocated) start address */ + size_t size; /*!< Section file offset */ + size_t file_offset; /*!< Section size */ +} __itt_section_info; + +#pragma pack(pop) +/** @endcond */ + +/** @cond exclude_from_documentation */ +#pragma pack(push, 8) + +typedef struct ___itt_module_object +{ + unsigned int version; /*!< API version*/ + __itt_id module_id; /*!< Unique identifier. This is unchanged for sections that belong to the same module */ + __itt_module_type module_type; /*!< Binary module format */ + const char* module_name; /*!< Unique module name or path to module in UTF8 + * Contains module name when module_bufer and module_size exist + * Contains module path when module_bufer and module_size absent + * module_name remains the same for the certain module_id + */ + void* module_buffer; /*!< Module buffer content */ + size_t module_size; /*!< Module buffer size */ + /*!< If module_buffer and module_size exist, the binary module is dumped onto the system. + * If module_buffer and module_size do not exist, + * the binary module exists on the system already. + * The module_name parameter contains the path to the module. + */ + __itt_section_info* section_array; /*!< Reference to section information */ + size_t section_number; +} __itt_module_object; + +#pragma pack(pop) +/** @endcond */ + +/** + * @brief Load module content and its loaded(relocated) sections. + * This API is useful to save a module, or specify its location on the system and report information about loaded sections. + * The target module is saved on the system if module buffer content and size are available. + * If module buffer content and size are unavailable, the module name contains the path to the existing binary module. + * @param[in] module_obj - provides module and section information, along with unique module identifiers (name,module ID) + * which bind the binary module to particular sections. + */ +void ITTAPI __itt_module_load_with_sections(__itt_module_object* module_obj); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, module_load_with_sections, (__itt_module_object* module_obj)) +#define __itt_module_load_with_sections ITTNOTIFY_VOID(module_load_with_sections) +#define __itt_module_load_with_sections_ptr ITTNOTIFY_NAME(module_load_with_sections) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_module_load_with_sections(module_obj) +#define __itt_module_load_with_sections_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_module_load_with_sections_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ + +/** + * @brief Unload a module and its loaded(relocated) sections. + * This API notifies that the module and its sections were unloaded. + * @param[in] module_obj - provides module and sections information, along with unique module identifiers (name,module ID) + * which bind the binary module to particular sections. + */ +void ITTAPI __itt_module_unload_with_sections(__itt_module_object* module_obj); + +/** @cond exclude_from_documentation */ +#ifndef INTEL_NO_MACRO_BODY +#ifndef INTEL_NO_ITTNOTIFY_API +ITT_STUBV(ITTAPI, void, module_unload_with_sections, (__itt_module_object* module_obj)) +#define __itt_module_unload_with_sections ITTNOTIFY_VOID(module_unload_with_sections) +#define __itt_module_unload_with_sections_ptr ITTNOTIFY_NAME(module_unload_with_sections) +#else /* INTEL_NO_ITTNOTIFY_API */ +#define __itt_module_unload_with_sections(module_obj) +#define __itt_module_unload_with_sections_ptr 0 +#endif /* INTEL_NO_ITTNOTIFY_API */ +#else /* INTEL_NO_MACRO_BODY */ +#define __itt_module_unload_with_sections_ptr 0 +#endif /* INTEL_NO_MACRO_BODY */ +/** @endcond */ #ifdef __cplusplus } diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h index c25730d522..f904a8e9d7 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_config.h @@ -121,7 +121,7 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline +#define ITT_INLINE static __forceinline #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -147,6 +147,10 @@ # define ITT_ARCH_IA32E 2 #endif /* ITT_ARCH_IA32E */ +#ifndef ITT_ARCH_IA64 +# define ITT_ARCH_IA64 3 +#endif /* ITT_ARCH_IA64 */ + #ifndef ITT_ARCH_ARM # define ITT_ARCH_ARM 4 #endif /* ITT_ARCH_ARM */ @@ -155,6 +159,10 @@ # define ITT_ARCH_PPC64 5 #endif /* ITT_ARCH_PPC64 */ +#ifndef ITT_ARCH_ARM64 +# define ITT_ARCH_ARM64 6 +#endif /* ITT_ARCH_ARM64 */ + #ifndef ITT_ARCH # if defined _M_IX86 || defined __i386__ # define ITT_ARCH ITT_ARCH_IA32 @@ -164,6 +172,8 @@ # define ITT_ARCH ITT_ARCH_IA64 # elif defined _M_ARM || defined __arm__ # define ITT_ARCH ITT_ARCH_ARM +# elif defined __aarch64__ +# define ITT_ARCH ITT_ARCH_ARM64 # elif defined __powerpc64__ # define ITT_ARCH ITT_ARCH_PPC64 # endif @@ -195,7 +205,7 @@ #define API_VERSION_BUILD 20180723 #ifndef API_VERSION_NUM -#define API_VERSION_NUM 0.0.0 +#define API_VERSION_NUM 3.18.6 #endif /* API_VERSION_NUM */ #define API_VERSION "ITT-API-Version " ITT_TO_STR(API_VERSION_NUM) \ @@ -207,7 +217,11 @@ typedef HMODULE lib_t; typedef DWORD TIDT; typedef CRITICAL_SECTION mutex_t; +#ifdef __cplusplus +#define MUTEX_INITIALIZER {} +#else #define MUTEX_INITIALIZER { 0 } +#endif #define strong_alias(name, aliasname) /* empty for Windows */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #include <dlfcn.h> @@ -322,12 +336,12 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend) { long result; __asm__ __volatile__("lock\nxadd %0,%1" - : "=r"(result),"=m"(*(int*)ptr) - : "0"(addend), "m"(*(int*)ptr) + : "=r"(result),"=m"(*(volatile int*)ptr) + : "0"(addend), "m"(*(volatile int*)ptr) : "memory"); return result; } -#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64 +#else #define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val) #endif /* ITT_ARCH==ITT_ARCH_IA64 */ #ifndef ITT_SIMPLE_INIT diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c index dd8ca8e755..44dc8a027d 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.c @@ -17,6 +17,9 @@ #include "ittnotify_config.h" #if ITT_PLATFORM==ITT_PLATFORM_WIN +#ifdef PATH_MAX +#undef PATH_MAX +#endif #define PATH_MAX 512 #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ #include <limits.h> @@ -28,7 +31,7 @@ #include <stdarg.h> #include <string.h> -#define INTEL_NO_MACRO_BODY +#define INTEL_NO_MACRO_BODY #define INTEL_ITTNOTIFY_API_PRIVATE #include "ittnotify.h" #include "legacy/ittnotify.h" @@ -39,6 +42,44 @@ static const char api_version[] = API_VERSION "\0\n@(#) $Revision$\n"; #define _N_(n) ITT_JOIN(INTEL_ITTNOTIFY_PREFIX,n) +#ifndef HAS_CPP_ATTR +#if defined(__cplusplus) && defined(__has_cpp_attribute) +#define HAS_CPP_ATTR(X) __has_cpp_attribute(X) +#else +#define HAS_CPP_ATTR(X) 0 +#endif +#endif + +#ifndef HAS_C_ATTR +#if defined(__STDC__) && defined(__has_c_attribute) +#define HAS_C_ATTR(X) __has_c_attribute(X) +#else +#define HAS_C_ATTR(X) 0 +#endif +#endif + +#ifndef HAS_GNU_ATTR +#if defined(__has_attribute) +#define HAS_GNU_ATTR(X) __has_attribute(X) +#else +#define HAS_GNU_ATTR(X) 0 +#endif +#endif + +#ifndef ITT_ATTRIBUTE_FALLTHROUGH +#if (HAS_CPP_ATTR(fallthrough) || HAS_C_ATTR(fallthrough)) && (__cplusplus >= 201703L || _MSVC_LANG >= 201703L) +#define ITT_ATTRIBUTE_FALLTHROUGH [[fallthrough]] +#elif HAS_CPP_ATTR(gnu::fallthrough) +#define ITT_ATTRIBUTE_FALLTHROUGH [[gnu::fallthrough]] +#elif HAS_CPP_ATTR(clang::fallthrough) +#define ITT_ATTRIBUTE_FALLTHROUGH [[clang::fallthrough]] +#elif HAS_GNU_ATTR(fallthrough) && !__INTEL_COMPILER +#define ITT_ATTRIBUTE_FALLTHROUGH __attribute__((fallthrough)) +#else +#define ITT_ATTRIBUTE_FALLTHROUGH +#endif +#endif + #if ITT_OS==ITT_OS_WIN static const char* ittnotify_lib_name = "libittnotify.dll"; #elif ITT_OS==ITT_OS_LINUX || ITT_OS==ITT_OS_FREEBSD @@ -110,7 +151,7 @@ static const char* ittnotify_lib_name = "libittnotify.dylib"; } \ } -const int _N_(err) = 0; +#define ITT_MODULE_OBJECT_VERSION 1 typedef int (__itt_init_ittlib_t)(const char*, __itt_group_id); @@ -201,7 +242,7 @@ static __itt_group_alias group_alias[] = { #pragma pack(pop) -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -224,12 +265,10 @@ static __itt_api_info api_list[] = { {NULL, NULL, NULL, NULL, __itt_group_none} }; -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static const char dll_path[PATH_MAX] = { 0 }; - /* static part descriptor which handles. all notification api attributes. */ __itt_global _N_(_ittapi_global) = { ITT_MAGIC, /* identification info */ @@ -240,7 +279,7 @@ __itt_global _N_(_ittapi_global) = { MUTEX_INITIALIZER, /* mutex */ NULL, /* dynamic library handle */ NULL, /* error_handler */ - (const char**)&dll_path, /* dll_path_ptr */ + NULL, /* dll_path_ptr */ (__itt_api_info*)&api_list, /* api_list_ptr */ NULL, /* next __itt_global */ NULL, /* thread_list */ @@ -254,18 +293,20 @@ __itt_global _N_(_ittapi_global) = { typedef void (__itt_api_init_t)(__itt_global*, __itt_group_id); typedef void (__itt_api_fini_t)(__itt_global*); +static __itt_domain dummy_domain; /* ========================================================================= */ #ifdef ITT_NOTIFY_EXT_REPORT ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); #endif /* ITT_NOTIFY_EXT_REPORT */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(push) #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -static void __itt_report_error_impl(int code, ...) { +static void __itt_report_error(int code, ...) +{ va_list args; va_start(args, code); if (_N_(_ittapi_global).error_handler != NULL) @@ -274,17 +315,12 @@ static void __itt_report_error_impl(int code, ...) { handler((__itt_error_code)code, args); } #ifdef ITT_NOTIFY_EXT_REPORT - _N_(error_handler)(code, args); + _N_(error_handler)((__itt_error_code)code, args); #endif /* ITT_NOTIFY_EXT_REPORT */ va_end(args); } -//va_start cannot take enum (__itt_error_code) on clang, so it is necessary to transform it to int -#define __itt_report_error(code, ...) \ - __itt_report_error_impl((int)code,__VA_ARGS__) - - -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -306,6 +342,11 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_createW),_init))( __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(domain_createW)(name); } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return &dummy_domain; + } } for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) { @@ -347,6 +388,15 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c return ITTNOTIFY_NAME(domain_create)(name); } #endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return &dummy_domain; + } } for (h_tail = NULL, h = _N_(_ittapi_global).domain_list; h != NULL; h_tail = h, h = h->next) { @@ -360,6 +410,38 @@ static __itt_domain* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(domain_create),_init))(c return h; } +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(module_load_with_sections),_init))(__itt_module_object* module_obj) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(module_load_with_sections) && ITTNOTIFY_NAME(module_load_with_sections) != ITT_VERSIONIZE(ITT_JOIN(_N_(module_load_with_sections),_init))) + { + if(module_obj != NULL) + { + module_obj->version = ITT_MODULE_OBJECT_VERSION; + ITTNOTIFY_NAME(module_load_with_sections)(module_obj); + } + } +} + +static void ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(module_unload_with_sections),_init))(__itt_module_object* module_obj) +{ + if (!_N_(_ittapi_global).api_initialized && _N_(_ittapi_global).thread_list == NULL) + { + __itt_init_ittlib_name(NULL, __itt_group_all); + } + if (ITTNOTIFY_NAME(module_unload_with_sections) && ITTNOTIFY_NAME(module_unload_with_sections) != ITT_VERSIONIZE(ITT_JOIN(_N_(module_unload_with_sections),_init))) + { + if(module_obj != NULL) + { + module_obj->version = ITT_MODULE_OBJECT_VERSION; + ITTNOTIFY_NAME(module_unload_with_sections)(module_obj); + } + } +} + #if ITT_PLATFORM==ITT_PLATFORM_WIN static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_createW),_init))(const wchar_t* name) { @@ -378,6 +460,11 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(string_handle_createW)(name); } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) { @@ -419,6 +506,15 @@ static __itt_string_handle* ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(string_handle_cre return ITTNOTIFY_NAME(string_handle_create)(name); } #endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).string_list; h != NULL; h_tail = h, h = h->next) { @@ -451,10 +547,15 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createW),_init)) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(counter_createW)(name, domain); } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; } @@ -485,7 +586,7 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))( #if ITT_PLATFORM==ITT_PLATFORM_WIN if (ITTNOTIFY_NAME(counter_createA) && ITTNOTIFY_NAME(counter_createA) != ITT_VERSIONIZE(ITT_JOIN(_N_(counter_createA),_init))) { - __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(counter_createA)(name, domain); } #else @@ -495,10 +596,19 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create),_init))( return ITTNOTIFY_NAME(counter_create)(name, domain); } #endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; } if (h == NULL) @@ -527,10 +637,15 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typedW),_ __itt_mutex_unlock(&_N_(_ittapi_global).mutex); return ITTNOTIFY_NAME(counter_create_typedW)(name, domain, type); } + else + { + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) { - if (h->nameW != NULL && h->type == type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || + if (h->nameW != NULL && h->type == (int)type && !wcscmp(h->nameW, name) && ((h->domainW == NULL && domain == NULL) || (h->domainW != NULL && domain != NULL && !wcscmp(h->domainW, domain)))) break; } @@ -570,10 +685,19 @@ static __itt_counter ITTAPI ITT_VERSIONIZE(ITT_JOIN(_N_(counter_create_typed),_i return ITTNOTIFY_NAME(counter_create_typed)(name, domain, type); } #endif + else + { +#if ITT_PLATFORM==ITT_PLATFORM_WIN + __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#else + if (PTHREAD_SYMBOLS) __itt_mutex_unlock(&_N_(_ittapi_global).mutex); +#endif + return NULL; + } } for (h_tail = NULL, h = _N_(_ittapi_global).counter_list; h != NULL; h_tail = h, h = h->next) { - if (h->nameA != NULL && h->type == type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || + if (h->nameA != NULL && h->type == (int)type && !__itt_fstrcmp(h->nameA, name) && ((h->domainA == NULL && domain == NULL) || (h->domainA != NULL && domain != NULL && !__itt_fstrcmp(h->domainA, domain)))) break; } if (h == NULL) @@ -788,7 +912,7 @@ static const char* __itt_get_env_var(const char* name) } else { - /* If environment variable is empty, GetEnvirornmentVariables() + /* If environment variable is empty, GetEnvironmentVariables() * returns zero (number of characters (not including terminating null), * and GetLastError() returns ERROR_SUCCESS. */ DWORD err = GetLastError(); @@ -1019,7 +1143,7 @@ static void __itt_nullify_all_pointers(void) *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ @@ -1096,10 +1220,11 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou __itt_api_init_t* __itt_api_init_ptr; int lib_version = __itt_lib_version(_N_(_ittapi_global).lib); - switch (lib_version) { + switch (lib_version) + { case 0: groups = __itt_group_legacy; - /* Falls through */ + ITT_ATTRIBUTE_FALLTHROUGH; case 1: /* Fill all pointers from dynamic library */ for (i = 0; _N_(_ittapi_global).api_list_ptr[i].name != NULL; i++) @@ -1154,12 +1279,13 @@ ITT_EXTERN_C int _N_(init_ittlib)(const char* lib_name, __itt_group_id init_grou { __itt_nullify_all_pointers(); + __itt_report_error(__itt_error_no_module, lib_name, #if ITT_PLATFORM==ITT_PLATFORM_WIN - int error = __itt_system_error(); + __itt_system_error() #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - const char* error = dlerror(); + dlerror() #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - __itt_report_error(__itt_error_no_module, lib_name, error); + ); } } else @@ -1197,7 +1323,7 @@ ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t return prev; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && _MSC_VER #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -1241,4 +1367,3 @@ ITT_EXTERN_C void _N_(mark_pt_region_end)(__itt_pt_region region) (void)region; #endif } - diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h index 67cf683880..0aab7c87f1 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_static.h @@ -39,6 +39,9 @@ ITT_STUB(ITTAPI, __itt_domain*, domain_createW, (const wchar_t *name), (ITT_FORM ITT_STUB(ITTAPI, __itt_domain*, domain_create, (const char *name), (ITT_FORMAT name), domain_create, __itt_group_structure, "\"%s\"") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ +ITT_STUBV(ITTAPI, void, module_load_with_sections, (__itt_module_object* module_obj), (ITT_FORMAT module_obj), module_load_with_sections, __itt_group_module, "%p") +ITT_STUBV(ITTAPI, void, module_unload_with_sections, (__itt_module_object* module_obj), (ITT_FORMAT module_obj), module_unload_with_sections, __itt_group_module, "%p") + #if ITT_PLATFORM==ITT_PLATFORM_WIN ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createA, (const char *name), (ITT_FORMAT name), string_handle_createA, __itt_group_structure, "\"%s\"") ITT_STUB(ITTAPI, __itt_string_handle*, string_handle_createW, (const wchar_t *name), (ITT_FORMAT name), string_handle_createW, __itt_group_structure, "\"%S\"") @@ -341,14 +344,13 @@ ITT_STUB(ITTAPI, int, av_save, (void *data, int rank, const int *dimensions, in #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ #endif /* __ITT_INTERNAL_BODY */ -#ifndef __ITT_INTERNAL_BODY #if ITT_PLATFORM==ITT_PLATFORM_WIN -ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_none, "%p, %p, %p") -ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_none, "%p, %p, %p") +ITT_STUBV(ITTAPI, void, module_loadA, (void *start_addr, void* end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_loadA, __itt_group_module, "%p, %p, %p") +ITT_STUBV(ITTAPI, void, module_loadW, (void *start_addr, void* end_addr, const wchar_t *path), (ITT_FORMAT start_addr, end_addr, path), module_loadW, __itt_group_module, "%p, %p, %p") #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ -ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_none, "%p, %p, %p") +ITT_STUBV(ITTAPI, void, module_load, (void *start_addr, void *end_addr, const char *path), (ITT_FORMAT start_addr, end_addr, path), module_load, __itt_group_module, "%p, %p, %p") #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -#endif /* __ITT_INTERNAL_BODY */ +ITT_STUBV(ITTAPI, void, module_unload, (void *start_addr), (ITT_FORMAT start_addr), module_unload, __itt_group_module, "%p") #endif /* __ITT_INTERNAL_INIT */ diff --git a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h index 3849452c27..7693c46f3d 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h +++ b/contrib/libs/tbb/src/tbb/tools_api/ittnotify_types.h @@ -19,25 +19,26 @@ typedef enum ___itt_group_id { - __itt_group_none = 0, - __itt_group_legacy = 1<<0, - __itt_group_control = 1<<1, - __itt_group_thread = 1<<2, - __itt_group_mark = 1<<3, - __itt_group_sync = 1<<4, - __itt_group_fsync = 1<<5, - __itt_group_jit = 1<<6, - __itt_group_model = 1<<7, - __itt_group_splitter_min = 1<<7, - __itt_group_counter = 1<<8, - __itt_group_frame = 1<<9, - __itt_group_stitch = 1<<10, - __itt_group_heap = 1<<11, - __itt_group_splitter_max = 1<<12, - __itt_group_structure = 1<<12, - __itt_group_suppress = 1<<13, - __itt_group_arrays = 1<<14, - __itt_group_all = -1 + __itt_group_none = 0, + __itt_group_legacy = 1<<0, + __itt_group_control = 1<<1, + __itt_group_thread = 1<<2, + __itt_group_mark = 1<<3, + __itt_group_sync = 1<<4, + __itt_group_fsync = 1<<5, + __itt_group_jit = 1<<6, + __itt_group_model = 1<<7, + __itt_group_splitter_min = 1<<7, + __itt_group_counter = 1<<8, + __itt_group_frame = 1<<9, + __itt_group_stitch = 1<<10, + __itt_group_heap = 1<<11, + __itt_group_splitter_max = 1<<12, + __itt_group_structure = 1<<12, + __itt_group_suppress = 1<<13, + __itt_group_arrays = 1<<14, + __itt_group_module = 1<<15, + __itt_group_all = -1 } __itt_group_id; #pragma pack(push, 8) @@ -67,6 +68,7 @@ typedef struct ___itt_group_list { __itt_group_structure, "structure" }, \ { __itt_group_suppress, "suppress" }, \ { __itt_group_arrays, "arrays" }, \ + { __itt_group_module, "module" }, \ { __itt_group_none, NULL } \ } diff --git a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h index b05a199d1f..3d3561ecc4 100644 --- a/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h +++ b/contrib/libs/tbb/src/tbb/tools_api/legacy/ittnotify.h @@ -97,7 +97,7 @@ # if ITT_PLATFORM==ITT_PLATFORM_WIN # define ITTAPI_CDECL __cdecl # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ -# if defined _M_IX86 || defined __i386__ +# if defined _M_IX86 || defined __i386__ # define ITTAPI_CDECL __attribute__ ((cdecl)) # else /* _M_IX86 || __i386__ */ # define ITTAPI_CDECL /* actual only on x86 platform */ @@ -110,7 +110,7 @@ # define STDCALL __stdcall # else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ # if defined _M_IX86 || defined __i386__ -# define STDCALL __attribute__ ((stdcall)) +# define STDCALL __attribute__ ((stdcall)) # else /* _M_IX86 || __i386__ */ # define STDCALL /* supported only on x86 platform */ # endif /* _M_IX86 || __i386__ */ @@ -126,7 +126,7 @@ #if ITT_PLATFORM==ITT_PLATFORM_WIN /* use __forceinline (VC++ specific) */ -#define ITT_INLINE __forceinline +#define ITT_INLINE static __forceinline #define ITT_INLINE_ATTRIBUTE /* nothing */ #else /* ITT_PLATFORM==ITT_PLATFORM_WIN */ /* @@ -964,9 +964,9 @@ ITT_STUB(ITTAPI, __itt_frame, frame_create, (const char *domain)) #endif /* INTEL_NO_MACRO_BODY */ /** @endcond */ -/** @brief Record an frame begin occurrence. */ +/** @brief Record a frame begin occurrence. */ void ITTAPI __itt_frame_begin(__itt_frame frame); -/** @brief Record an frame end occurrence. */ +/** @brief Record a frame end occurrence. */ void ITTAPI __itt_frame_end (__itt_frame frame); /** @cond exclude_from_documentation */ diff --git a/contrib/libs/tbb/src/tbb/waiters.h b/contrib/libs/tbb/src/tbb/waiters.h index 07ee5ab4f0..7e0906bee9 100644 --- a/contrib/libs/tbb/src/tbb/waiters.h +++ b/contrib/libs/tbb/src/tbb/waiters.h @@ -29,7 +29,7 @@ inline d1::task* get_self_recall_task(arena_slot& slot); class waiter_base { public: - waiter_base(arena& a) : my_arena(a), my_backoff(int(a.my_num_slots)) {} + waiter_base(arena& a, int yields_multiplier = 1) : my_arena(a), my_backoff(int(a.my_num_slots), yields_multiplier) {} bool pause() { if (my_backoff.pause()) { @@ -115,15 +115,15 @@ protected: template <typename Pred> void sleep(std::uintptr_t uniq_tag, Pred wakeup_condition) { - my_arena.my_market->get_wait_list().wait<extended_concurrent_monitor::thread_context>(wakeup_condition, - extended_context{uniq_tag, &my_arena}); + my_arena.my_market->get_wait_list().wait<market_concurrent_monitor::thread_context>(wakeup_condition, + market_context{uniq_tag, &my_arena}); } }; class external_waiter : public sleep_waiter { public: external_waiter(arena& a, d1::wait_context& wo) - : sleep_waiter(a), my_wait_ctx(wo) + : sleep_waiter(a, /*yields_multiplier*/10), my_wait_ctx(wo) {} bool continue_execution(arena_slot& slot, d1::task*& t) const { |