diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/tbb/include | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/tbb/include')
129 files changed, 32583 insertions, 0 deletions
diff --git a/contrib/libs/tbb/include/oneapi/tbb.h b/contrib/libs/tbb/include/oneapi/tbb.h new file mode 100644 index 0000000000..1ca41dc516 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb.h @@ -0,0 +1,73 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_H +#define __TBB_tbb_H + +/** + This header bulk-includes declarations or definitions of all the functionality + provided by TBB (save for tbbmalloc and 3rd party dependent headers). + + If you use only a few TBB constructs, consider including specific headers only. + Any header listed below can be included independently of others. +**/ + +#include "oneapi/tbb/blocked_range.h" +#include "oneapi/tbb/blocked_range2d.h" +#include "oneapi/tbb/blocked_range3d.h" +#if TBB_PREVIEW_BLOCKED_RANGE_ND +#include "tbb/blocked_rangeNd.h" +#endif +#include "oneapi/tbb/cache_aligned_allocator.h" +#include "oneapi/tbb/combinable.h" +#include "oneapi/tbb/concurrent_hash_map.h" +#if TBB_PREVIEW_CONCURRENT_LRU_CACHE +#include "tbb/concurrent_lru_cache.h" +#endif +#include "oneapi/tbb/concurrent_priority_queue.h" +#include "oneapi/tbb/concurrent_queue.h" +#include "oneapi/tbb/concurrent_unordered_map.h" +#include "oneapi/tbb/concurrent_unordered_set.h" +#include "oneapi/tbb/concurrent_map.h" +#include "oneapi/tbb/concurrent_set.h" +#include "oneapi/tbb/concurrent_vector.h" +#include "oneapi/tbb/enumerable_thread_specific.h" +#include "oneapi/tbb/flow_graph.h" +#include "oneapi/tbb/global_control.h" +#include "oneapi/tbb/info.h" +#include "oneapi/tbb/null_mutex.h" +#include "oneapi/tbb/null_rw_mutex.h" +#include "oneapi/tbb/parallel_for.h" +#include "oneapi/tbb/parallel_for_each.h" +#include "oneapi/tbb/parallel_invoke.h" +#include "oneapi/tbb/parallel_pipeline.h" +#include "oneapi/tbb/parallel_reduce.h" +#include "oneapi/tbb/parallel_scan.h" +#include "oneapi/tbb/parallel_sort.h" +#include "oneapi/tbb/partitioner.h" +#include "oneapi/tbb/queuing_mutex.h" +#include "oneapi/tbb/queuing_rw_mutex.h" +#include "oneapi/tbb/spin_mutex.h" +#include "oneapi/tbb/spin_rw_mutex.h" +#include "oneapi/tbb/task.h" +#include "oneapi/tbb/task_arena.h" +#include "oneapi/tbb/task_group.h" +#include "oneapi/tbb/task_scheduler_observer.h" +#include "oneapi/tbb/tbb_allocator.h" +#include "oneapi/tbb/tick_count.h" +#include "oneapi/tbb/version.h" + +#endif /* __TBB_tbb_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h new file mode 100644 index 0000000000..f6612fb4e3 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range.h @@ -0,0 +1,163 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range_H +#define __TBB_blocked_range_H + +#include <cstddef> + +#include "detail/_range_common.h" +#include "detail/_namespace_injection.h" + +#include "version.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/** \page range_req Requirements on range concept + Class \c R implementing the concept of range must define: + - \code R::R( const R& ); \endcode Copy constructor + - \code R::~R(); \endcode Destructor + - \code bool R::is_divisible() const; \endcode True if range can be partitioned into two subranges + - \code bool R::empty() const; \endcode True if range is empty + - \code R::R( R& r, split ); \endcode Split range \c r into two subranges. +**/ + +//! A range over which to iterate. +/** @ingroup algorithms */ +template<typename Value> +class blocked_range { +public: + //! Type of a value + /** Called a const_iterator for sake of algorithms that need to treat a blocked_range + as an STL container. */ + using const_iterator = Value; + + //! Type for size of a range + using size_type = std::size_t; + + //! Construct range over half-open interval [begin,end), with the given grainsize. + blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) : + my_end(end_), my_begin(begin_), my_grainsize(grainsize_) + { + __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" ); + } + + //! Beginning of range. + const_iterator begin() const { return my_begin; } + + //! One past last value in range. + const_iterator end() const { return my_end; } + + //! Size of the range + /** Unspecified if end()<begin(). */ + size_type size() const { + __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" ); + return size_type(my_end-my_begin); + } + + //! The grain size for this range. + size_type grainsize() const { return my_grainsize; } + + //------------------------------------------------------------------------ + // Methods that implement Range concept + //------------------------------------------------------------------------ + + //! True if range is empty. + bool empty() const { return !(my_begin<my_end); } + + //! True if range is divisible. + /** Unspecified if end()<begin(). */ + bool is_divisible() const { return my_grainsize<size(); } + + //! Split range. + /** The new Range *this has the second part, the old range r has the first part. + Unspecified if end()<begin() or !is_divisible(). */ + blocked_range( blocked_range& r, split ) : + my_end(r.my_end), + my_begin(do_split(r, split())), + my_grainsize(r.my_grainsize) + { + // only comparison 'less than' is required from values of blocked_range objects + __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); + } + + //! Split range. + /** The new Range *this has the second part split according to specified proportion, the old range r has the first part. + Unspecified if end()<begin() or !is_divisible(). */ + blocked_range( blocked_range& r, proportional_split& proportion ) : + my_end(r.my_end), + my_begin(do_split(r, proportion)), + my_grainsize(r.my_grainsize) + { + // only comparison 'less than' is required from values of blocked_range objects + __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" ); + } + +private: + /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */ + Value my_end; + Value my_begin; + size_type my_grainsize; + + //! Auxiliary function used by the splitting constructor. + static Value do_split( blocked_range& r, split ) + { + __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); + Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u; + r.my_end = middle; + return middle; + } + + static Value do_split( blocked_range& r, proportional_split& proportion ) + { + __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" ); + + // usage of 32-bit floating point arithmetic is not enough to handle ranges of + // more than 2^24 iterations accurately. However, even on ranges with 2^64 + // iterations the computational error approximately equals to 0.000001% which + // makes small impact on uniform distribution of such range's iterations (assuming + // all iterations take equal time to complete). See 'test_partitioner_whitebox' + // for implementation of an exact split algorithm + size_type right_part = size_type(float(r.size()) * float(proportion.right()) + / float(proportion.left() + proportion.right()) + 0.5f); + return r.my_end = Value(r.my_end - right_part); + } + + template<typename RowValue, typename ColValue> + friend class blocked_range2d; + + template<typename RowValue, typename ColValue, typename PageValue> + friend class blocked_range3d; + + template<typename DimValue, unsigned int N, typename> + friend class blocked_rangeNd_impl; +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_blocked_range_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h new file mode 100644 index 0000000000..01ed17d859 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range2d.h @@ -0,0 +1,108 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range2d_H +#define __TBB_blocked_range2d_H + +#include <cstddef> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 2-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template<typename RowValue, typename ColValue = RowValue> +class blocked_range2d { +public: + //! Type for size of an iteration range + using row_range_type = blocked_range<RowValue>; + using col_range_type = blocked_range<ColValue>; + +private: + row_range_type my_rows; + col_range_type my_cols; + +public: + blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + blocked_range2d( RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range2d( blocked_range2d& r, split ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + split split_obj; + do_split(r, split_obj); + } + + blocked_range2d( blocked_range2d& r, proportional_split& proportion ) : + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template <typename Split> + void do_split( blocked_range2d& r, Split& split_obj ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range2d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range2d_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h new file mode 100644 index 0000000000..d4178050a8 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_range3d.h @@ -0,0 +1,127 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_range3d_H +#define __TBB_blocked_range3d_H + +#include <cstddef> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A 3-dimensional range that models the Range concept. +/** @ingroup algorithms */ +template<typename PageValue, typename RowValue = PageValue, typename ColValue = RowValue> +class blocked_range3d { +public: + //! Type for size of an iteration range + using page_range_type = blocked_range<PageValue>; + using row_range_type = blocked_range<RowValue>; + using col_range_type = blocked_range<ColValue>; + +private: + page_range_type my_pages; + row_range_type my_rows; + col_range_type my_cols; + +public: + + blocked_range3d( PageValue page_begin, PageValue page_end, + RowValue row_begin, RowValue row_end, + ColValue col_begin, ColValue col_end ) : + my_pages(page_begin,page_end), + my_rows(row_begin,row_end), + my_cols(col_begin,col_end) + {} + + blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize, + RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize, + ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) : + my_pages(page_begin,page_end,page_grainsize), + my_rows(row_begin,row_end,row_grainsize), + my_cols(col_begin,col_end,col_grainsize) + {} + + //! True if range is empty + bool empty() const { + // Range is empty if at least one dimension is empty. + return my_pages.empty() || my_rows.empty() || my_cols.empty(); + } + + //! True if range is divisible into two pieces. + bool is_divisible() const { + return my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible(); + } + + blocked_range3d( blocked_range3d& r, split split_obj ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, split_obj); + } + + blocked_range3d( blocked_range3d& r, proportional_split& proportion ) : + my_pages(r.my_pages), + my_rows(r.my_rows), + my_cols(r.my_cols) + { + do_split(r, proportion); + } + + //! The pages of the iteration space + const page_range_type& pages() const { return my_pages; } + + //! The rows of the iteration space + const row_range_type& rows() const { return my_rows; } + + //! The columns of the iteration space + const col_range_type& cols() const { return my_cols; } + +private: + template <typename Split> + void do_split( blocked_range3d& r, Split& split_obj) { + if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) { + if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj); + } + } else { + if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) { + my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj); + } else { + my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj); + } + } + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_range3d; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_range3d_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h new file mode 100644 index 0000000000..37b71da8fe --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/blocked_rangeNd.h @@ -0,0 +1,144 @@ +/* + Copyright (c) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_blocked_rangeNd_H +#define __TBB_blocked_rangeNd_H + +#if !TBB_PREVIEW_BLOCKED_RANGE_ND + #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h +#endif + +#include <algorithm> // std::any_of +#include <array> +#include <cstddef> +#include <type_traits> // std::is_same, std::enable_if + +#include "detail/_config.h" +#include "detail/_template_helpers.h" // index_sequence, make_index_sequence + +#include "blocked_range.h" + +namespace tbb { +namespace detail { +namespace d1 { + +/* + The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with + exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity + to use braced-init-list parameters to initialize each dimension. + Use of parameters, whose representation is a braced-init-list, but they're not + std::initializer_list or a reference to one, produces a non-deduced context + within template argument deduction. + + NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl + (and not e.g. a derived class), otherwise it would need to declare its own ctor + facing the same problem that the impl class solves. +*/ + +template<typename Value, unsigned int N, typename = detail::make_index_sequence<N>> +class blocked_rangeNd_impl; + +template<typename Value, unsigned int N, std::size_t... Is> +class blocked_rangeNd_impl<Value, N, detail::index_sequence<Is...>> { +public: + //! Type of a value. + using value_type = Value; + +private: + //! Helper type to construct range with N tbb::blocked_range<value_type> objects. + template<std::size_t> + using dim_type_helper = tbb::blocked_range<value_type>; + +public: + blocked_rangeNd_impl() = delete; + + //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>. + blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {} + + //! Dimensionality of a range. + static constexpr unsigned int ndims() { return N; } + + //! Range in certain dimension. + const tbb::blocked_range<value_type>& dim(unsigned int dimension) const { + __TBB_ASSERT(dimension < N, "out of bound"); + return my_dims[dimension]; + } + + //------------------------------------------------------------------------ + // Methods that implement Range concept + //------------------------------------------------------------------------ + + //! True if at least one dimension is empty. + bool empty() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { + return d.empty(); + }); + } + + //! True if at least one dimension is divisible. + bool is_divisible() const { + return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) { + return d.is_divisible(); + }); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + + blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) { + do_split(r, proportion); + } + +private: + static_assert(N != 0, "zero dimensional blocked_rangeNd can't be constructed"); + + //! Ranges in each dimension. + std::array<tbb::blocked_range<value_type>, N> my_dims; + + template<typename split_type> + void do_split(blocked_rangeNd_impl& r, split_type proportion) { + static_assert((std::is_same<split_type, split>::value || std::is_same<split_type, proportional_split>::value), "type of split object is incorrect"); + __TBB_ASSERT(r.is_divisible(), "can't split not divisible range"); + + auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) { + return (first.size() * second.grainsize() < second.size() * first.grainsize()); + }); + + auto r_it = r.my_dims.begin() + (my_it - my_dims.begin()); + + my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion); + + // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to + // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept + __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin), + "blocked_range has been split incorrectly"); + } +}; + +template<typename Value, unsigned int N> +using blocked_rangeNd = blocked_rangeNd_impl<Value, N>; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::blocked_rangeNd; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_blocked_rangeNd_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h new file mode 100644 index 0000000000..645f3fbd2e --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/cache_aligned_allocator.h @@ -0,0 +1,189 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_cache_aligned_allocator_H +#define __TBB_cache_aligned_allocator_H + +#include "detail/_utils.h" +#include "detail/_namespace_injection.h" +#include <cstdlib> +#include <utility> + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +std::size_t __TBB_EXPORTED_FUNC cache_line_size(); +} + +namespace d1 { + +template<typename T> +class cache_aligned_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers (supported since C++17 for std containers) + using is_always_equal = std::true_type; + + cache_aligned_allocator() = default; + template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) noexcept {} + + //! Allocate space for n objects, starting on a cache/sector line. + __TBB_nodiscard T* allocate(std::size_t n) { + return static_cast<T*>(r1::cache_aligned_allocate(n * sizeof(value_type))); + } + + //! Free block of memory that starts on a cache line + void deallocate(T* p, std::size_t) { + r1::cache_aligned_deallocate(p); + } + + //! Largest value for which method allocate might succeed. + std::size_t max_size() const noexcept { + return (~std::size_t(0) - r1::cache_line_size()) / sizeof(value_type); + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = cache_aligned_allocator<U>; + }; + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new (p) U(std::forward<Args>(args)...); } + void destroy(pointer p) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class cache_aligned_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = cache_aligned_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +bool operator==(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +bool operator!=(const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>&) noexcept { return false; } +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +//! C++17 memory resource wrapper to ensure cache line size alignment +class cache_aligned_resource : public std::pmr::memory_resource { +public: + cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {} + explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {} + + std::pmr::memory_resource* upstream_resource() const { + return m_upstream; + } + +private: + //! We don't know what memory resource set. Use padding to guarantee alignment + void* do_allocate(std::size_t bytes, std::size_t alignment) override { + // TODO: make it common with tbb_allocator.cpp + std::size_t cache_line_alignment = correct_alignment(alignment); + std::size_t space = correct_size(bytes) + cache_line_alignment; + std::uintptr_t base = reinterpret_cast<std::uintptr_t>(m_upstream->allocate(space)); + __TBB_ASSERT(base != 0, "Upstream resource returned NULL."); + + // Round up to the next cache line (align the base address) + std::uintptr_t result = (base + cache_line_alignment) & ~(cache_line_alignment - 1); + __TBB_ASSERT((result - base) >= sizeof(std::uintptr_t), "Can`t store a base pointer to the header"); + __TBB_ASSERT(space - (result - base) >= bytes, "Not enough space for the storage"); + + // Record where block actually starts. + (reinterpret_cast<std::uintptr_t*>(result))[-1] = base; + return reinterpret_cast<void*>(result); + } + + void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) override { + if (ptr) { + // Recover where block actually starts + std::uintptr_t base = (reinterpret_cast<std::uintptr_t*>(ptr))[-1]; + m_upstream->deallocate(reinterpret_cast<void*>(base), correct_size(bytes) + correct_alignment(alignment)); + } + } + + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + if (this == &other) { return true; } +#if __TBB_USE_OPTIONAL_RTTI + const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other); + return other_res && (upstream_resource() == other_res->upstream_resource()); +#else + return false; +#endif + } + + std::size_t correct_alignment(std::size_t alignment) { + __TBB_ASSERT(tbb::detail::is_power_of_two(alignment), "Alignment is not a power of 2"); +#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT + std::size_t cache_line_size = std::hardware_destructive_interference_size; +#else + std::size_t cache_line_size = r1::cache_line_size(); +#endif + return alignment < cache_line_size ? cache_line_size : alignment; + } + + std::size_t correct_size(std::size_t bytes) { + // To handle the case, when small size requested. There could be not + // enough space to store the original pointer. + return bytes < sizeof(std::uintptr_t) ? sizeof(std::uintptr_t) : bytes; + } + + std::pmr::memory_resource* m_upstream; +}; + +#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::cache_aligned_allocator; +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +using detail::d1::cache_aligned_resource; +#endif +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_cache_aligned_allocator_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/combinable.h b/contrib/libs/tbb/include/oneapi/tbb/combinable.h new file mode 100644 index 0000000000..b676a30cc0 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/combinable.h @@ -0,0 +1,69 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_combinable_H +#define __TBB_combinable_H + +#include "detail/_namespace_injection.h" + +#include "enumerable_thread_specific.h" +#include "cache_aligned_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { +/** \name combinable **/ +//@{ +//! Thread-local storage with optional reduction +/** @ingroup containers */ +template <typename T> +class combinable { + using my_alloc = typename tbb::cache_aligned_allocator<T>; + using my_ets_type = typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key>; + my_ets_type my_ets; + +public: + combinable() = default; + + template <typename Finit> + explicit combinable(Finit _finit) : my_ets(_finit) { } + + void clear() { my_ets.clear(); } + + T& local() { return my_ets.local(); } + + T& local(bool& exists) { return my_ets.local(exists); } + + // combine_func_t has signature T(T,T) or T(const T&, const T&) + template <typename CombineFunc> + T combine(CombineFunc f_combine) { return my_ets.combine(f_combine); } + + // combine_func_t has signature void(T) or void(const T&) + template <typename CombineFunc> + void combine_each(CombineFunc f_combine) { my_ets.combine_each(f_combine); } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::combinable; +} // inline namespace v1 + +} // namespace tbb + +#endif /* __TBB_combinable_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h new file mode 100644 index 0000000000..510557e9f2 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_hash_map.h @@ -0,0 +1,1524 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_hash_map_H +#define __TBB_concurrent_hash_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_assert.h" +#include "detail/_allocator_traits.h" +#include "detail/_containers_helpers.h" +#include "detail/_template_helpers.h" +#include "detail/_hash_compare.h" +#include "detail/_range_common.h" +#include "tbb_allocator.h" +#include "spin_rw_mutex.h" + +#include <atomic> +#include <initializer_list> +#include <tuple> +#include <iterator> +#include <utility> // Need std::pair +#include <cstring> // Need std::memset + +namespace tbb { +namespace detail { +namespace d1 { + +struct hash_map_node_base : no_copy { + using mutex_type = spin_rw_mutex; + // Scoped lock type for mutex + using scoped_type = mutex_type::scoped_lock; + // Next node in chain + hash_map_node_base* next; + mutex_type mutex; +}; + +// Incompleteness flag value +static hash_map_node_base* const rehash_req = reinterpret_cast<hash_map_node_base*>(std::size_t(3)); +// Rehashed empty bucket flag +static hash_map_node_base* const empty_rehashed = reinterpret_cast<hash_map_node_base*>(std::size_t(0)); + +// base class of concurrent_hash_map + +template <typename Allocator> +class hash_map_base { +public: + using size_type = std::size_t; + using hashcode_type = std::size_t; + using segment_index_type = std::size_t; + using node_base = hash_map_node_base; + + struct bucket : no_copy { + using mutex_type = spin_rw_mutex; + using scoped_type = mutex_type::scoped_lock; + + bucket() : node_list(nullptr) {} + bucket( node_base* ptr ) : node_list(ptr) {} + + mutex_type mutex; + std::atomic<node_base*> node_list; + }; + + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using bucket_allocator_type = typename allocator_traits_type::template rebind_alloc<bucket>; + using bucket_allocator_traits = tbb::detail::allocator_traits<bucket_allocator_type>; + + // Count of segments in the first block + static constexpr size_type embedded_block = 1; + // Count of segments in the first block + static constexpr size_type embedded_buckets = 1 << embedded_block; + // Count of segments in the first block + static constexpr size_type first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 + // Size of a pointer / table size + static constexpr size_type pointers_per_table = sizeof(segment_index_type) * 8; // one segment per bit + + using segment_ptr_type = bucket*; + using atomic_segment_type = std::atomic<segment_ptr_type>; + using segments_table_type = atomic_segment_type[pointers_per_table]; + + hash_map_base( const allocator_type& alloc ) : my_allocator(alloc), my_mask(embedded_buckets - 1), my_size(0) { + for (size_type i = 0; i != embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type segment_index = 0; segment_index < pointers_per_table; ++segment_index) { + auto argument = segment_index < embedded_block ? my_embedded_segment + segment_base(segment_index) : nullptr; + my_table[segment_index].store(argument, std::memory_order_relaxed); + } + + __TBB_ASSERT( embedded_block <= first_block, "The first block number must include embedded blocks"); + } + + // segment index of given index in the array + static segment_index_type segment_index_of( size_type index ) { + return segment_index_type(tbb::detail::log2( index|1 )); + } + + // the first array index of given segment + static segment_index_type segment_base( segment_index_type k ) { + return (segment_index_type(1) << k & ~segment_index_type(1)); + } + + // segment size except for k == 0 + static size_type segment_size( segment_index_type k ) { + return size_type(1) << k; // fake value for k==0 + } + + // true if ptr is valid pointer + static bool is_valid( void* ptr ) { + return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63); + } + + template <typename... Args> + void init_buckets_impl( segment_ptr_type ptr, size_type sz, Args&&... args ) { + for (size_type i = 0; i < sz; ++i) { + bucket_allocator_traits::construct(my_allocator, ptr + i, std::forward<Args>(args)...); + } + } + + // Initialize buckets + void init_buckets( segment_ptr_type ptr, size_type sz, bool is_initial ) { + if (is_initial) { + init_buckets_impl(ptr, sz); + } else { + init_buckets_impl(ptr, sz, reinterpret_cast<node_base*>(rehash_req)); + } + } + + // Add node n to bucket b + static void add_to_bucket( bucket* b, node_base* n ) { + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + n->next = b->node_list.load(std::memory_order_relaxed); + b->node_list.store(n, std::memory_order_relaxed); // its under lock and flag is set + } + + const bucket_allocator_type& get_allocator() const { + return my_allocator; + } + + bucket_allocator_type& get_allocator() { + return my_allocator; + } + + // Enable segment + void enable_segment( segment_index_type k, bool is_initial = false ) { + __TBB_ASSERT( k, "Zero segment must be embedded" ); + size_type sz; + __TBB_ASSERT( !is_valid(my_table[k].load(std::memory_order_relaxed)), "Wrong concurrent assignment"); + if (k >= first_block) { + sz = segment_size(k); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz, is_initial); + my_table[k].store(ptr, std::memory_order_release); + sz <<= 1;// double it to get entire capacity of the container + } else { // the first block + __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); + sz = segment_size(first_block); + segment_ptr_type ptr = nullptr; + try_call( [&] { + ptr = bucket_allocator_traits::allocate(my_allocator, sz - embedded_buckets); + } ).on_exception( [&] { + my_table[k].store(nullptr, std::memory_order_relaxed); + }); + + __TBB_ASSERT(ptr, nullptr); + init_buckets(ptr, sz - embedded_buckets, is_initial); + ptr -= segment_base(embedded_block); + for(segment_index_type i = embedded_block; i < first_block; i++) // calc the offsets + my_table[i].store(ptr + segment_base(i), std::memory_order_release); + } + my_mask.store(sz-1, std::memory_order_release); + } + + void delete_segment( segment_index_type s ) { + segment_ptr_type buckets_ptr = my_table[s].load(std::memory_order_relaxed); + size_type sz = segment_size( s ? s : 1 ); + + size_type deallocate_size = 0; + + if (s >= first_block) { // the first segment or the next + deallocate_size = sz; + } else if (s == embedded_block && embedded_block != first_block) { + deallocate_size = segment_size(first_block) - embedded_buckets; + } + + for (size_type i = 0; i < deallocate_size; ++i) { + bucket_allocator_traits::destroy(my_allocator, buckets_ptr + i); + } + if (deallocate_size != 0) { + bucket_allocator_traits::deallocate(my_allocator, buckets_ptr, deallocate_size); + } + + if (s >= embedded_block) my_table[s].store(nullptr, std::memory_order_relaxed); + } + + // Get bucket by (masked) hashcode + bucket *get_bucket( hashcode_type h ) const noexcept { + segment_index_type s = segment_index_of( h ); + h -= segment_base(s); + segment_ptr_type seg = my_table[s].load(std::memory_order_acquire); + __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mask for allocated segments" ); + return &seg[h]; + } + + // detail serial rehashing helper + void mark_rehashed_levels( hashcode_type h ) noexcept { + segment_index_type s = segment_index_of( h ); + while (segment_ptr_type seg = my_table[++s].load(std::memory_order_relaxed)) + if( seg[h].node_list.load(std::memory_order_relaxed) == rehash_req ) { + seg[h].node_list.store(empty_rehashed, std::memory_order_relaxed); + mark_rehashed_levels( h + ((hashcode_type)1<<s) ); // optimized segment_base(s) + } + } + + // Check for mask race + // Splitting into two functions should help inlining + inline bool check_mask_race( const hashcode_type h, hashcode_type &m ) const { + hashcode_type m_now, m_old = m; + m_now = my_mask.load(std::memory_order_acquire); + if (m_old != m_now) { + return check_rehashing_collision(h, m_old, m = m_now); + } + return false; + } + + // Process mask race, check for rehashing collision + bool check_rehashing_collision( const hashcode_type h, hashcode_type m_old, hashcode_type m ) const { + __TBB_ASSERT(m_old != m, nullptr); // TODO?: m arg could be optimized out by passing h = h&m + if( (h & m_old) != (h & m) ) { // mask changed for this hashcode, rare event + // condition above proves that 'h' has some other bits set beside 'm_old' + // find next applicable mask after m_old //TODO: look at bsl instruction + for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size + ; + m_old = (m_old<<1) - 1; // get full mask from a bit + __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, nullptr); + // check whether it is rehashing/ed + if( get_bucket(h & m_old)->node_list.load(std::memory_order_acquire) != rehash_req ) { + return true; + } + } + return false; + } + + // Insert a node and check for load factor. @return segment index to enable. + segment_index_type insert_new_node( bucket *b, node_base *n, hashcode_type mask ) { + size_type sz = ++my_size; // prefix form is to enforce allocation after the first item inserted + add_to_bucket( b, n ); + // check load factor + if( sz >= mask ) { // TODO: add custom load_factor + segment_index_type new_seg = tbb::detail::log2( mask+1 ); //optimized segment_index_of + __TBB_ASSERT( is_valid(my_table[new_seg-1].load(std::memory_order_relaxed)), "new allocations must not publish new mask until segment has allocated"); + static const segment_ptr_type is_allocating = segment_ptr_type(2);; + segment_ptr_type disabled = nullptr; + if (!(my_table[new_seg].load(std::memory_order_acquire)) + && my_table[new_seg].compare_exchange_strong(disabled, is_allocating)) + return new_seg; // The value must be processed + } + return 0; + } + + // Prepare enough segments for number of buckets + void reserve(size_type buckets) { + if( !buckets-- ) return; + bool is_initial = !my_size.load(std::memory_order_relaxed); + for (size_type m = my_mask.load(std::memory_order_relaxed); buckets > m; + m = my_mask.load(std::memory_order_relaxed)) + { + enable_segment( segment_index_of( m+1 ), is_initial ); + } + } + + // Swap hash_map_bases + void internal_swap_content(hash_map_base &table) { + using std::swap; + swap_atomics_relaxed(my_mask, table.my_mask); + swap_atomics_relaxed(my_size, table.my_size); + + for(size_type i = 0; i < embedded_buckets; i++) { + auto temp = my_embedded_segment[i].node_list.load(std::memory_order_relaxed); + my_embedded_segment[i].node_list.store(table.my_embedded_segment[i].node_list.load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_embedded_segment[i].node_list.store(temp, std::memory_order_relaxed); + } + for(size_type i = embedded_block; i < pointers_per_table; i++) { + auto temp = my_table[i].load(std::memory_order_relaxed); + my_table[i].store(table.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + table.my_table[i].store(temp, std::memory_order_relaxed); + } + } + + void internal_move(hash_map_base&& other) { + my_mask.store(other.my_mask.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_mask.store(embedded_buckets - 1, std::memory_order_relaxed); + + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(0, std::memory_order_relaxed); + + for (size_type i = 0; i < embedded_buckets; ++i) { + my_embedded_segment[i].node_list.store(other.my_embedded_segment[i].node_list, std::memory_order_relaxed); + other.my_embedded_segment[i].node_list.store(nullptr, std::memory_order_relaxed); + } + + for (size_type i = embedded_block; i < pointers_per_table; ++i) { + my_table[i].store(other.my_table[i].load(std::memory_order_relaxed), + std::memory_order_relaxed); + other.my_table[i].store(nullptr, std::memory_order_relaxed); + } + } + +protected: + + bucket_allocator_type my_allocator; + // Hash mask = sum of allocated segment sizes - 1 + std::atomic<hashcode_type> my_mask; + // Size of container in stored items + std::atomic<size_type> my_size; // It must be in separate cache line from my_mask due to performance effects + // Zero segment + bucket my_embedded_segment[embedded_buckets]; + // Segment pointers table. Also prevents false sharing between my_mask and my_size + segments_table_type my_table; +}; + +template <typename Iterator> +class hash_map_range; + +// Meets requirements of a forward iterator for STL +// Value is either the T or const T type of the container. +template <typename Container, typename Value> +class hash_map_iterator { + using map_type = Container; + using node = typename Container::node; + using map_base = typename Container::base_type; + using node_base = typename map_base::node_base; + using bucket = typename map_base::bucket; +public: + using value_type = Value; + using size_type = typename Container::size_type; + using difference_type = typename Container::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::forward_iterator_tag; + + // Construct undefined iterator + hash_map_iterator(): my_map(), my_index(), my_bucket(), my_node() {} + hash_map_iterator( const hash_map_iterator<Container, typename Container::value_type>& other ) : + my_map(other.my_map), + my_index(other.my_index), + my_bucket(other.my_bucket), + my_node(other.my_node) + {} + + hash_map_iterator& operator=( const hash_map_iterator<Container, typename Container::value_type>& other ) { + my_map = other.my_map; + my_index = other.my_index; + my_bucket = other.my_bucket; + my_node = other.my_node; + return *this; + } + + Value& operator*() const { + __TBB_ASSERT( map_base::is_valid(my_node), "iterator uninitialized or at end of container?" ); + return my_node->value(); + } + + Value* operator->() const {return &operator*();} + + hash_map_iterator& operator++() { + my_node = static_cast<node*>( my_node->next ); + if( !my_node ) advance_to_next_bucket(); + return *this; + } + + // Post increment + hash_map_iterator operator++(int) { + hash_map_iterator old(*this); + operator++(); + return old; + } +private: + template <typename C, typename T, typename U> + friend bool operator==( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename T, typename U> + friend bool operator!=( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename T, typename U> + friend ptrdiff_t operator-( const hash_map_iterator<C,T>& i, const hash_map_iterator<C,U>& j ); + + template <typename C, typename U> + friend class hash_map_iterator; + + template <typename I> + friend class hash_map_range; + + void advance_to_next_bucket() { // TODO?: refactor to iterator_base class + size_t k = my_index+1; + __TBB_ASSERT( my_bucket, "advancing an invalid iterator?"); + while (k <= my_map->my_mask.load(std::memory_order_relaxed)) { + // Following test uses 2's-complement wizardry + if( k&(k-2) ) // not the beginning of a segment + ++my_bucket; + else my_bucket = my_map->get_bucket( k ); + my_node = static_cast<node*>( my_bucket->node_list.load(std::memory_order_relaxed) ); + if( map_base::is_valid(my_node) ) { + my_index = k; return; + } + ++k; + } + my_bucket = 0; my_node = 0; my_index = k; // the end + } + + template <typename Key, typename T, typename HashCompare, typename A> + friend class concurrent_hash_map; + + hash_map_iterator( const Container &map, std::size_t index, const bucket *b, node_base *n ) : + my_map(&map), my_index(index), my_bucket(b), my_node(static_cast<node*>(n)) + { + if( b && !map_base::is_valid(n) ) + advance_to_next_bucket(); + } + + // concurrent_hash_map over which we are iterating. + const Container *my_map; + // Index in hash table for current item + size_t my_index; + // Pointer to bucket + const bucket* my_bucket; + // Pointer to node that has current item + node* my_node; +}; + +template <typename Container, typename T, typename U> +bool operator==( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { + return i.my_node == j.my_node && i.my_map == j.my_map; +} + +template <typename Container, typename T, typename U> +bool operator!=( const hash_map_iterator<Container,T>& i, const hash_map_iterator<Container,U>& j ) { + return i.my_node != j.my_node || i.my_map != j.my_map; +} + +// Range class used with concurrent_hash_map +template <typename Iterator> +class hash_map_range { + using map_type = typename Iterator::map_type; +public: + // Type for size of a range + using size_type = std::size_t; + using value_type = typename Iterator::value_type; + using reference = typename Iterator::reference; + using difference_type = typename Iterator::difference_type; + using iterator = Iterator; + + // True if range is empty. + bool empty() const {return my_begin == my_end;} + + // True if range can be partitioned into two subranges. + bool is_divisible() const { + return my_midpoint != my_end; + } + + // Split range. + hash_map_range( hash_map_range& r, split ) : + my_end(r.my_end), + my_grainsize(r.my_grainsize) + { + r.my_end = my_begin = r.my_midpoint; + __TBB_ASSERT( !empty(), "Splitting despite the range is not divisible" ); + __TBB_ASSERT( !r.empty(), "Splitting despite the range is not divisible" ); + set_midpoint(); + r.set_midpoint(); + } + + // Init range with container and grainsize specified + hash_map_range( const map_type &map, size_type grainsize_ = 1 ) : + my_begin( Iterator( map, 0, map.my_embedded_segment, map.my_embedded_segment->node_list.load(std::memory_order_relaxed) ) ), + my_end( Iterator( map, map.my_mask.load(std::memory_order_relaxed) + 1, 0, 0 ) ), + my_grainsize( grainsize_ ) + { + __TBB_ASSERT( grainsize_>0, "grainsize must be positive" ); + set_midpoint(); + } + + const Iterator begin() const { return my_begin; } + const Iterator end() const { return my_end; } + // The grain size for this range. + size_type grainsize() const { return my_grainsize; } + +private: + Iterator my_begin; + Iterator my_end; + mutable Iterator my_midpoint; + size_t my_grainsize; + // Set my_midpoint to point approximately half way between my_begin and my_end. + void set_midpoint() const; + template <typename U> friend class hash_map_range; +}; + +template <typename Iterator> +void hash_map_range<Iterator>::set_midpoint() const { + // Split by groups of nodes + size_t m = my_end.my_index-my_begin.my_index; + if( m > my_grainsize ) { + m = my_begin.my_index + m/2u; + auto b = my_begin.my_map->get_bucket(m); + my_midpoint = Iterator(*my_begin.my_map,m,b,b->node_list.load(std::memory_order_relaxed)); + } else { + my_midpoint = my_end; + } + __TBB_ASSERT( my_begin.my_index <= my_midpoint.my_index, + "my_begin is after my_midpoint" ); + __TBB_ASSERT( my_midpoint.my_index <= my_end.my_index, + "my_midpoint is after my_end" ); + __TBB_ASSERT( my_begin != my_midpoint || my_begin == my_end, + "[my_begin, my_midpoint) range should not be empty" ); +} + +template <typename Key, typename T, + typename HashCompare = tbb_hash_compare<Key>, + typename Allocator = tbb_allocator<std::pair<const Key, T>>> +class concurrent_hash_map : protected hash_map_base<Allocator> { + template <typename Container, typename Value> + friend class hash_map_iterator; + + template <typename I> + friend class hash_map_range; + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; +public: + using base_type = hash_map_base<Allocator>; + using key_type = Key; + using mapped_type = T; + // type_identity is needed to disable implicit deduction guides for std::initializer_list constructors + // and copy/move constructor with explicit allocator argument + using allocator_type = tbb::detail::type_identity_t<Allocator>; + using hash_compare_type = tbb::detail::type_identity_t<HashCompare>; + using value_type = std::pair<const Key, T>; + using size_type = typename base_type::size_type; + using difference_type = std::ptrdiff_t; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using reference = value_type&; + using const_reference = const value_type&; + using iterator = hash_map_iterator<concurrent_hash_map, value_type>; + using const_iterator = hash_map_iterator<concurrent_hash_map, const value_type>; + using range_type = hash_map_range<iterator>; + using const_range_type = hash_map_range<const_iterator>; + +protected: + static_assert(std::is_same<value_type, typename Allocator::value_type>::value, + "value_type of the container must be the same as its allocator's"); + + friend class const_accessor; + class node; + using segment_index_type = typename base_type::segment_index_type; + using segment_ptr_type = typename base_type::segment_ptr_type; + using node_base = typename base_type::node_base; + using bucket = typename base_type::bucket; + using hashcode_type = typename base_type::hashcode_type; + using bucket_allocator_type = typename base_type::bucket_allocator_type; + using node_allocator_type = typename base_type::allocator_traits_type::template rebind_alloc<node>; + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + hash_compare_type my_hash_compare; + + class node : public node_base { + public: + node() {} + ~node() {} + pointer storage() { return &my_value; } + value_type& value() { return *storage(); } + private: + union { + value_type my_value; + }; + }; + + void delete_node( node_base *n ) { + node_allocator_type node_allocator(this->get_allocator()); + node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)->storage()); + node_allocator_traits::destroy(node_allocator, static_cast<node*>(n)); + node_allocator_traits::deallocate(node_allocator, static_cast<node*>(n), 1); + } + + template <typename... Args> + static node* create_node(bucket_allocator_type& allocator, Args&&... args) { + node_allocator_type node_allocator(allocator); + node* node_ptr = node_allocator_traits::allocate(node_allocator, 1); + auto guard = make_raii_guard([&] { + node_allocator_traits::destroy(node_allocator, node_ptr); + node_allocator_traits::deallocate(node_allocator, node_ptr, 1); + }); + + node_allocator_traits::construct(node_allocator, node_ptr); + node_allocator_traits::construct(node_allocator, node_ptr->storage(), std::forward<Args>(args)...); + guard.dismiss(); + return node_ptr; + } + + static node* allocate_node_copy_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, *t); + } + + static node* allocate_node_move_construct(bucket_allocator_type& allocator, const Key &key, const T * t){ + return create_node(allocator, key, std::move(*const_cast<T*>(t))); + } + + static node* allocate_node_default_construct(bucket_allocator_type& allocator, const Key &key, const T * ){ + // Emplace construct an empty T object inside the pair + return create_node(allocator, std::piecewise_construct, + std::forward_as_tuple(key), std::forward_as_tuple()); + } + + static node* do_not_allocate_node(bucket_allocator_type& , const Key &, const T * ){ + __TBB_ASSERT(false,"this dummy function should not be called"); + return nullptr; + } + + node *search_bucket( const key_type &key, bucket *b ) const { + node *n = static_cast<node*>( b->node_list.load(std::memory_order_relaxed) ); + while (this->is_valid(n) && !my_hash_compare.equal(key, n->value().first)) + n = static_cast<node*>( n->next ); + __TBB_ASSERT(n != rehash_req, "Search can be executed only for rehashed bucket"); + return n; + } + + // bucket accessor is to find, rehash, acquire a lock, and access a bucket + class bucket_accessor : public bucket::scoped_type { + bucket *my_b; + public: + bucket_accessor( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { acquire( base, h, writer ); } + // find a bucket by masked hashcode, optionally rehash, and acquire the lock + inline void acquire( concurrent_hash_map *base, const hashcode_type h, bool writer = false ) { + my_b = base->get_bucket( h ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( my_b->node_list.load(std::memory_order_acquire) == rehash_req + && bucket::scoped_type::try_acquire( my_b->mutex, /*write=*/true ) ) + { + if( my_b->node_list.load(std::memory_order_relaxed) == rehash_req ) base->rehash_bucket( my_b, h ); //recursive rehashing + } + else bucket::scoped_type::acquire( my_b->mutex, writer ); + __TBB_ASSERT( my_b->node_list.load(std::memory_order_relaxed) != rehash_req, nullptr); + } + // check whether bucket is locked for write + bool is_writer() { return bucket::scoped_type::m_is_writer; } + // get bucket pointer + bucket *operator() () { return my_b; } + }; + + // TODO refactor to hash_base + void rehash_bucket( bucket *b_new, const hashcode_type hash ) { + __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (for write)"); + __TBB_ASSERT( hash > 1, "The lowermost buckets can't be rehashed" ); + b_new->node_list.store(empty_rehashed, std::memory_order_release); // mark rehashed + hashcode_type mask = (1u << tbb::detail::log2(hash)) - 1; // get parent mask from the topmost bit + bucket_accessor b_old( this, hash & mask ); + + mask = (mask<<1) | 1; // get full mask for new bucket + __TBB_ASSERT( (mask&(mask+1))==0 && (hash & mask) == hash, nullptr ); + restart: + node_base* prev = nullptr; + node_base* curr = b_old()->node_list.load(std::memory_order_acquire); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); + + if ((curr_node_hash & mask) == hash) { + if (!b_old.is_writer()) { + if (!b_old.upgrade_to_writer()) { + goto restart; // node ptr can be invalid due to concurrent erase + } + } + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + +public: + + class accessor; + // Combines data access, locking, and garbage collection. + class const_accessor : private node::scoped_type /*which derived from no_copy*/ { + friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; + friend class accessor; + public: + // Type of value + using value_type = const typename concurrent_hash_map::value_type; + + // True if result is empty. + bool empty() const { return !my_node; } + + // Set to null + void release() { + if( my_node ) { + node::scoped_type::release(); + my_node = 0; + } + } + + // Return reference to associated value in hash table. + const_reference operator*() const { + __TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); + return my_node->value(); + } + + // Return pointer to associated value in hash table. + const_pointer operator->() const { + return &operator*(); + } + + // Create empty result + const_accessor() : my_node(nullptr) {} + + // Destroy result after releasing the underlying reference. + ~const_accessor() { + my_node = nullptr; // scoped lock's release() is called in its destructor + } + protected: + bool is_writer() { return node::scoped_type::m_is_writer; } + node *my_node; + hashcode_type my_hash; + }; + + // Allows write access to elements and combines data access, locking, and garbage collection. + class accessor: public const_accessor { + public: + // Type of value + using value_type = typename concurrent_hash_map::value_type; + + // Return reference to associated value in hash table. + reference operator*() const { + __TBB_ASSERT( this->my_node, "attempt to dereference empty accessor" ); + return this->my_node->value(); + } + + // Return pointer to associated value in hash table. + pointer operator->() const { + return &operator*(); + } + }; + + explicit concurrent_hash_map( const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : base_type(a) + , my_hash_compare(compare) + {} + + concurrent_hash_map() : concurrent_hash_map(hash_compare_type()) {} + + explicit concurrent_hash_map( const allocator_type& a ) + : concurrent_hash_map(hash_compare_type(), a) + {} + + // Construct empty table with n preallocated buckets. This number serves also as initial concurrency level. + concurrent_hash_map( size_type n, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + this->reserve(n); + } + + concurrent_hash_map( size_type n, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + this->reserve(n); + } + + // Copy constructor + concurrent_hash_map( const concurrent_hash_map &table ) + : concurrent_hash_map(node_allocator_traits::select_on_container_copy_construction(table.get_allocator())) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( const concurrent_hash_map &table, const allocator_type &a) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(table); + }).on_exception( [&] { + this->clear(); + }); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table ) + : concurrent_hash_map(std::move(table.get_allocator())) + { + this->internal_move(std::move(table)); + } + + // Move constructor + concurrent_hash_map( concurrent_hash_map &&table, const allocator_type &a ) + : concurrent_hash_map(a) + { + using is_equal_type = typename node_allocator_traits::is_always_equal; + internal_move_construct_with_allocator(std::move(table), a, is_equal_type()); + } + + // Construction with copying iteration range and given allocator instance + template <typename I> + concurrent_hash_map( I first, I last, const allocator_type &a = allocator_type() ) + : concurrent_hash_map(a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + template <typename I> + concurrent_hash_map( I first, I last, const hash_compare_type& compare, const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(first, last, std::distance(first, last)); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( std::initializer_list<value_type> il, const hash_compare_type& compare = hash_compare_type(), const allocator_type& a = allocator_type() ) + : concurrent_hash_map(compare, a) + { + try_call( [&] { + internal_copy(il.begin(), il.end(), il.size()); + }).on_exception( [&] { + this->clear(); + }); + } + + concurrent_hash_map( std::initializer_list<value_type> il, const allocator_type& a ) + : concurrent_hash_map(il, hash_compare_type(), a) {} + + // Assignment + concurrent_hash_map& operator=( const concurrent_hash_map &table ) { + if( this != &table ) { + clear(); + copy_assign_allocators(this->my_allocator, table.my_allocator); + internal_copy(table); + } + return *this; + } + + // Move Assignment + concurrent_hash_map& operator=( concurrent_hash_map &&table ) { + if( this != &table ) { + using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; + using is_equal_type = typename node_allocator_traits::is_always_equal; + move_assign_allocators(this->my_allocator, table.my_allocator); + internal_move_assign(std::move(table), tbb::detail::disjunction<is_equal_type, pocma_type>()); + } + return *this; + } + + // Assignment + concurrent_hash_map& operator=( std::initializer_list<value_type> il ) { + clear(); + internal_copy(il.begin(), il.end(), il.size()); + return *this; + } + + // Rehashes and optionally resizes the whole table. + /** Useful to optimize performance before or after concurrent operations. + Also enables using of find() and count() concurrent methods in serial context. */ + void rehash(size_type sz = 0) { + this->reserve(sz); // TODO: add reduction of number of buckets as well + hashcode_type mask = this->my_mask.load(std::memory_order_relaxed); + hashcode_type b = (mask+1)>>1; // size or first index of the last segment + __TBB_ASSERT((b&(b-1))==0, nullptr); // zero or power of 2 + bucket *bp = this->get_bucket( b ); // only the last segment should be scanned for rehashing + for(; b <= mask; b++, bp++ ) { + node_base *n = bp->node_list.load(std::memory_order_relaxed); + __TBB_ASSERT( this->is_valid(n) || n == empty_rehashed || n == rehash_req, "Broken detail structure" ); + __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concurrent or unexpectedly terminated operation during rehash() execution" ); + if( n == rehash_req ) { // rehash bucket, conditional because rehashing of a previous bucket may affect this one + hashcode_type h = b; bucket *b_old = bp; + do { + __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); + hashcode_type m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit + b_old = this->get_bucket( h &= m ); + } while( b_old->node_list.load(std::memory_order_relaxed) == rehash_req ); + // now h - is index of the root rehashed bucket b_old + this->mark_rehashed_levels( h ); // mark all non-rehashed children recursively across all segments + node_base* prev = nullptr; + node_base* curr = b_old->node_list.load(std::memory_order_relaxed); + while (this->is_valid(curr)) { + hashcode_type curr_node_hash = my_hash_compare.hash(static_cast<node*>(curr)->value().first); + + if ((curr_node_hash & mask) != h) { // should be rehashed + node_base* next = curr->next; + // exclude from b_old + if (prev == nullptr) { + b_old->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + bucket *b_new = this->get_bucket(curr_node_hash & mask); + __TBB_ASSERT(b_new->node_list.load(std::memory_order_relaxed) != rehash_req, "hash() function changed for key in table or detail error" ); + this->add_to_bucket(b_new, curr); + curr = next; + } else { + prev = curr; + curr = curr->next; + } + } + } + } + } + + // Clear table + void clear() { + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + this->my_size.store(0, std::memory_order_relaxed); + segment_index_type s = this->segment_index_of( m ); + __TBB_ASSERT( s+1 == this->pointers_per_table || !this->my_table[s+1].load(std::memory_order_relaxed), "wrong mask or concurrent grow" ); + do { + __TBB_ASSERT(this->is_valid(this->my_table[s].load(std::memory_order_relaxed)), "wrong mask or concurrent grow" ); + segment_ptr_type buckets_ptr = this->my_table[s].load(std::memory_order_relaxed); + size_type sz = this->segment_size( s ? s : 1 ); + for( segment_index_type i = 0; i < sz; i++ ) + for( node_base *n = buckets_ptr[i].node_list.load(std::memory_order_relaxed); + this->is_valid(n); n = buckets_ptr[i].node_list.load(std::memory_order_relaxed) ) + { + buckets_ptr[i].node_list.store(n->next, std::memory_order_relaxed); + delete_node( n ); + } + this->delete_segment(s); + } while(s-- > 0); + this->my_mask.store(this->embedded_buckets - 1, std::memory_order_relaxed); + } + + // Clear table and destroy it. + ~concurrent_hash_map() { clear(); } + + //------------------------------------------------------------------------ + // Parallel algorithm support + //------------------------------------------------------------------------ + range_type range( size_type grainsize=1 ) { + return range_type( *this, grainsize ); + } + const_range_type range( size_type grainsize=1 ) const { + return const_range_type( *this, grainsize ); + } + + //------------------------------------------------------------------------ + // STL support - not thread-safe methods + //------------------------------------------------------------------------ + iterator begin() { return iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator begin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + const_iterator cbegin() const { return const_iterator( *this, 0, this->my_embedded_segment, this->my_embedded_segment->node_list.load(std::memory_order_relaxed) ); } + iterator end() { return iterator( *this, 0, 0, 0 ); } + const_iterator end() const { return const_iterator( *this, 0, 0, 0 ); } + const_iterator cend() const { return const_iterator( *this, 0, 0, 0 ); } + std::pair<iterator, iterator> equal_range( const Key& key ) { return internal_equal_range( key, end() ); } + std::pair<const_iterator, const_iterator> equal_range( const Key& key ) const { return internal_equal_range( key, end() ); } + + // Number of items in table. + size_type size() const { return this->my_size.load(std::memory_order_acquire); } + + // True if size()==0. + __TBB_nodiscard bool empty() const { return size() == 0; } + + // Upper bound on size. + size_type max_size() const { + return allocator_traits_type::max_size(base_type::get_allocator()); + } + + // Returns the current number of buckets + size_type bucket_count() const { return this->my_mask.load(std::memory_order_relaxed) + 1; } + + // return allocator object + allocator_type get_allocator() const { return base_type::get_allocator(); } + + // swap two instances. Iterators are invalidated + void swap(concurrent_hash_map& table) { + using pocs_type = typename node_allocator_traits::propagate_on_container_swap; + using is_equal_type = typename node_allocator_traits::is_always_equal; + swap_allocators(this->my_allocator, table.my_allocator); + internal_swap(table, tbb::detail::disjunction<pocs_type, is_equal_type>()); + } + + //------------------------------------------------------------------------ + // concurrent map operations + //------------------------------------------------------------------------ + + // Return count of items (0 or 1) + size_type count( const Key &key ) const { + return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, nullptr, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a read lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( const_accessor &result, const Key &key ) const { + result.release(); + return const_cast<concurrent_hash_map*>(this)->lookup(/*insert*/false, key, nullptr, &result, /*write=*/false, &do_not_allocate_node ); + } + + // Find item and acquire a write lock on the item. + /** Return true if item is found, false otherwise. */ + bool find( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/false, key, nullptr, &result, /*write=*/true, &do_not_allocate_node ); + } + + // Insert item (if not already present) and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/false, &allocate_node_default_construct ); + } + + // Insert item (if not already present) and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const Key &key ) { + result.release(); + return lookup(/*insert*/true, key, nullptr, &result, /*write=*/true, &allocate_node_default_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, const value_type &value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, &result, /*write=*/true, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( const value_type &value ) { + return lookup(/*insert*/true, value.first, &value.second, nullptr, /*write=*/false, &allocate_node_copy_construct ); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + bool insert( const_accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + bool insert( accessor &result, value_type && value ) { + return generic_move_insert(result, std::move(value)); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + bool insert( value_type && value ) { + return generic_move_insert(accessor_not_used(), std::move(value)); + } + + // Insert item by copying if there is no such key present already and acquire a read lock on the item. + /** Returns true if item is new. */ + template <typename... Args> + bool emplace( const_accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward<Args>(args)...); + } + + // Insert item by copying if there is no such key present already and acquire a write lock on the item. + /** Returns true if item is new. */ + template <typename... Args> + bool emplace( accessor &result, Args&&... args ) { + return generic_emplace(result, std::forward<Args>(args)...); + } + + // Insert item by copying if there is no such key present already + /** Returns true if item is inserted. */ + template <typename... Args> + bool emplace( Args&&... args ) { + return generic_emplace(accessor_not_used(), std::forward<Args>(args)...); + } + + // Insert range [first, last) + template <typename I> + void insert( I first, I last ) { + for ( ; first != last; ++first ) + insert( *first ); + } + + // Insert initializer list + void insert( std::initializer_list<value_type> il ) { + insert( il.begin(), il.end() ); + } + + // Erase item. + /** Return true if item was erased by particularly this call. */ + bool erase( const Key &key ) { + node_base *erase_node; + hashcode_type const hash = my_hash_compare.hash(key); + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + restart: + {//lock scope + // get bucket + bucket_accessor b( this, hash & mask ); + search: + node_base* prev = nullptr; + erase_node = b()->node_list.load(std::memory_order_relaxed); + while (this->is_valid(erase_node) && !my_hash_compare.equal(key, static_cast<node*>(erase_node)->value().first ) ) { + prev = erase_node; + erase_node = erase_node->next; + } + + if (erase_node == nullptr) { // not found, but mask could be changed + if (this->check_mask_race(hash, mask)) + goto restart; + return false; + } else if (!b.is_writer() && !b.upgrade_to_writer()) { + if (this->check_mask_race(hash, mask)) // contended upgrade, check mask + goto restart; + goto search; + } + + // remove from container + if (prev == nullptr) { + b()->node_list.store(erase_node->next, std::memory_order_relaxed); + } else { + prev->next = erase_node->next; + } + this->my_size--; + } + { + typename node::scoped_type item_locker( erase_node->mutex, /*write=*/true ); + } + // note: there should be no threads pretending to acquire this mutex again, do not try to upgrade const_accessor! + delete_node(erase_node); // Only one thread can delete it due to write lock on the bucket + return true; + } + + // Erase item by const_accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( const_accessor& item_accessor ) { + return exclude( item_accessor ); + } + + // Erase item by accessor. + /** Return true if item was erased by particularly this call. */ + bool erase( accessor& item_accessor ) { + return exclude( item_accessor ); + } + +protected: + // Insert or find item and optionally acquire a lock on the item. + bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write, node* (*allocate_node)(bucket_allocator_type&, + const Key&, const T*), node *tmp_n = 0) + { + __TBB_ASSERT( !result || !result->my_node, nullptr ); + bool return_value; + hashcode_type const h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + segment_index_type grow_segment = 0; + node *n; + restart: + {//lock scope + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + return_value = false; + // get bucket + bucket_accessor b( this, h & m ); + // find a node + n = search_bucket( key, b() ); + if( op_insert ) { + // [opt] insert a key + if( !n ) { + if( !tmp_n ) { + tmp_n = allocate_node(base_type::get_allocator(), key, t); + } + if( !b.is_writer() && !b.upgrade_to_writer() ) { // TODO: improved insertion + // Rerun search_list, in case another thread inserted the item during the upgrade. + n = search_bucket( key, b() ); + if( this->is_valid(n) ) { // unfortunately, it did + b.downgrade_to_reader(); + goto exists; + } + } + if( this->check_mask_race(h, m) ) + goto restart; // b.release() is done in ~b(). + // insert and set flag to grow the container + grow_segment = this->insert_new_node( b(), n = tmp_n, m ); + tmp_n = 0; + return_value = true; + } + } else { // find or count + if( !n ) { + if( this->check_mask_race( h, m ) ) + goto restart; // b.release() is done in ~b(). TODO: replace by continue + return false; + } + return_value = true; + } + exists: + if( !result ) goto check_growth; + // TODO: the following seems as generic/regular operation + // acquire the item + if( !result->try_acquire( n->mutex, write ) ) { + for( tbb::detail::atomic_backoff backoff(true);; ) { + if( result->try_acquire( n->mutex, write ) ) break; + if( !backoff.bounded_pause() ) { + // the wait takes really long, restart the operation + b.release(); + __TBB_ASSERT( !op_insert || !return_value, "Can't acquire new item in locked bucket?" ); + yield(); + m = this->my_mask.load(std::memory_order_acquire); + goto restart; + } + } + } + }//lock scope + result->my_node = n; + result->my_hash = h; + check_growth: + // [opt] grow the container + if( grow_segment ) { + this->enable_segment( grow_segment ); + } + if( tmp_n ) // if op_insert only + delete_node( tmp_n ); + return return_value; + } + + struct accessor_not_used { void release(){}}; + friend const_accessor* accessor_location( accessor_not_used const& ){ return nullptr;} + friend const_accessor* accessor_location( const_accessor & a ) { return &a;} + + friend bool is_write_access_needed( accessor const& ) { return true;} + friend bool is_write_access_needed( const_accessor const& ) { return false;} + friend bool is_write_access_needed( accessor_not_used const& ) { return false;} + + template <typename Accessor> + bool generic_move_insert( Accessor && result, value_type && value ) { + result.release(); + return lookup(/*insert*/true, value.first, &value.second, accessor_location(result), is_write_access_needed(result), &allocate_node_move_construct ); + } + + template <typename Accessor, typename... Args> + bool generic_emplace( Accessor && result, Args &&... args ) { + result.release(); + node * node_ptr = create_node(base_type::get_allocator(), std::forward<Args>(args)...); + return lookup(/*insert*/true, node_ptr->value().first, nullptr, accessor_location(result), is_write_access_needed(result), &do_not_allocate_node, node_ptr ); + } + + // delete item by accessor + bool exclude( const_accessor &item_accessor ) { + __TBB_ASSERT( item_accessor.my_node, nullptr ); + node_base *const exclude_node = item_accessor.my_node; + hashcode_type const hash = item_accessor.my_hash; + hashcode_type mask = this->my_mask.load(std::memory_order_acquire); + do { + // get bucket + bucket_accessor b( this, hash & mask, /*writer=*/true ); + node_base* prev = nullptr; + node_base* curr = b()->node_list.load(std::memory_order_relaxed); + + while (curr && curr != exclude_node) { + prev = curr; + curr = curr->next; + } + + if (curr == nullptr) { // someone else was first + if (this->check_mask_race(hash, mask)) + continue; + item_accessor.release(); + return false; + } + __TBB_ASSERT( curr == exclude_node, nullptr ); + // remove from container + if (prev == nullptr) { + b()->node_list.store(curr->next, std::memory_order_relaxed); + } else { + prev->next = curr->next; + } + + this->my_size--; + break; + } while(true); + if (!item_accessor.is_writer()) { // need to get exclusive lock + item_accessor.upgrade_to_writer(); // return value means nothing here + } + + item_accessor.release(); + delete_node(exclude_node); // Only one thread can delete it + return true; + } + + // Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) + template <typename I> + std::pair<I, I> internal_equal_range( const Key& key, I end_ ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + h &= m; + bucket *b = this->get_bucket( h ); + while ( b->node_list.load(std::memory_order_relaxed) == rehash_req ) { + m = ( 1u<<tbb::detail::log2( h ) ) - 1; // get parent mask from the topmost bit + b = this->get_bucket( h &= m ); + } + node *n = search_bucket( key, b ); + if( !n ) + return std::make_pair(end_, end_); + iterator lower(*this, h, b, n), upper(lower); + return std::make_pair(lower, ++upper); + } + + // Copy "source" to *this, where *this must start out empty. + void internal_copy( const concurrent_hash_map& source ) { + hashcode_type mask = source.my_mask.load(std::memory_order_relaxed); + if( this->my_mask.load(std::memory_order_relaxed) == mask ) { // optimized version + this->reserve(source.my_size.load(std::memory_order_relaxed)); // TODO: load_factor? + bucket *dst = 0, *src = 0; + bool rehash_required = false; + for( hashcode_type k = 0; k <= mask; k++ ) { + if( k & (k-2) ) ++dst,src++; // not the beginning of a segment + else { dst = this->get_bucket( k ); src = source.get_bucket( k ); } + __TBB_ASSERT( dst->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node *n = static_cast<node*>( src->node_list.load(std::memory_order_relaxed) ); + if( n == rehash_req ) { // source is not rehashed, items are in previous buckets + rehash_required = true; + dst->node_list.store(rehash_req, std::memory_order_relaxed); + } else for(; n; n = static_cast<node*>( n->next ) ) { + node* node_ptr = create_node(base_type::get_allocator(), n->value().first, n->value().second); + this->add_to_bucket( dst, node_ptr); + this->my_size.fetch_add(1, std::memory_order_relaxed); + } + } + if( rehash_required ) rehash(); + } else internal_copy(source.begin(), source.end(), source.my_size.load(std::memory_order_relaxed)); + } + + template <typename I> + void internal_copy( I first, I last, size_type reserve_size ) { + this->reserve(reserve_size); // TODO: load_factor? + hashcode_type m = this->my_mask.load(std::memory_order_relaxed); + for(; first != last; ++first) { + hashcode_type h = my_hash_compare.hash( (*first).first ); + bucket *b = this->get_bucket( h & m ); + __TBB_ASSERT( b->node_list.load(std::memory_order_relaxed) != rehash_req, "Invalid bucket in destination table"); + node* node_ptr = create_node(base_type::get_allocator(), (*first).first, (*first).second); + this->add_to_bucket( b, node_ptr ); + ++this->my_size; // TODO: replace by non-atomic op + } + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type&, + /*is_always_equal=*/std::true_type ) + { + this->internal_move(std::move(other)); + } + + void internal_move_construct_with_allocator( concurrent_hash_map&& other, const allocator_type& a, + /*is_always_equal=*/std::false_type ) + { + if (a == other.get_allocator()){ + this->internal_move(std::move(other)); + } else { + try_call( [&] { + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + }).on_exception( [&] { + this->clear(); + }); + } + } + + void internal_move_assign( concurrent_hash_map&& other, + /*is_always_equal || POCMA = */std::true_type) + { + this->internal_move(std::move(other)); + } + + void internal_move_assign(concurrent_hash_map&& other, /*is_always_equal=*/ std::false_type) { + if (this->my_allocator == other.my_allocator) { + this->internal_move(std::move(other)); + } else { + //do per element move + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end()), + other.size()); + } + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::true_type) { + this->internal_swap_content(other); + } + + void internal_swap(concurrent_hash_map& other, /*is_always_equal || POCS = */ std::false_type) { + __TBB_ASSERT(this->my_allocator == other.my_allocator, nullptr); + this->internal_swap_content(other); + } + + // Fast find when no concurrent erasure is used. For internal use inside TBB only! + /** Return pointer to item with given key, or nullptr if no such item exists. + Must not be called concurrently with erasure operations. */ + const_pointer internal_fast_find( const Key& key ) const { + hashcode_type h = my_hash_compare.hash( key ); + hashcode_type m = this->my_mask.load(std::memory_order_acquire); + node *n; + restart: + __TBB_ASSERT((m&(m+1))==0, "data structure is invalid"); + bucket *b = this->get_bucket( h & m ); + // TODO: actually, notification is unnecessary here, just hiding double-check + if( b->node_list.load(std::memory_order_acquire) == rehash_req ) + { + typename bucket::scoped_type lock; + if( lock.try_acquire( b->mutex, /*write=*/true ) ) { + if( b->node_list.load(std::memory_order_relaxed) == rehash_req) + const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing + } + else lock.acquire( b->mutex, /*write=*/false ); + __TBB_ASSERT(b->node_list.load(std::memory_order_relaxed) != rehash_req,nullptr); + } + n = search_bucket( key, b ); + if( n ) + return n->storage(); + else if( this->check_mask_race( h, m ) ) + goto restart; + return 0; + } +}; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename HashCompare = tbb_hash_compare<iterator_key_t<It>>, + typename Alloc = tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<HashCompare>>> +concurrent_hash_map( It, It, HashCompare = HashCompare(), Alloc = Alloc() ) +-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, HashCompare, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_hash_map( It, It, Alloc ) +-> concurrent_hash_map<iterator_key_t<It>, iterator_mapped_t<It>, tbb_hash_compare<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, + typename HashCompare = tbb_hash_compare<std::remove_const_t<Key>>, + typename Alloc = tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<HashCompare>>> +concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, HashCompare = HashCompare(), Alloc = Alloc() ) +-> concurrent_hash_map<std::remove_const_t<Key>, T, HashCompare, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_hash_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_hash_map<std::remove_const_t<Key>, T, tbb_hash_compare<std::remove_const_t<Key>>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +template <typename Key, typename T, typename HashCompare, typename A1, typename A2> +inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { + if(a.size() != b.size()) return false; + typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i(a.begin()), i_end(a.end()); + typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j, j_end(b.end()); + for(; i != i_end; ++i) { + j = b.equal_range(i->first).first; + if( j == j_end || !(i->second == j->second) ) return false; + } + return true; +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Key, typename T, typename HashCompare, typename A1, typename A2> +inline bool operator!=(const concurrent_hash_map<Key, T, HashCompare, A1> &a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) +{ return !(a == b); } +#endif // !__TBB_CPP20_COMPARISONS_PRESENT + +template <typename Key, typename T, typename HashCompare, typename A> +inline void swap(concurrent_hash_map<Key, T, HashCompare, A> &a, concurrent_hash_map<Key, T, HashCompare, A> &b) +{ a.swap( b ); } + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::split; + using detail::d1::concurrent_hash_map; + using detail::d1::tbb_hash_compare; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_concurrent_hash_map_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h new file mode 100644 index 0000000000..b83dd5f8c1 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_lru_cache.h @@ -0,0 +1,364 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_lru_cache_H +#define __TBB_concurrent_lru_cache_H + +#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE + #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h +#endif + +#include "detail/_assert.h" +#include "detail/_aggregator.h" + +#include <map> // for std::map +#include <list> // for std::list +#include <utility> // for std::make_pair +#include <algorithm> // for std::find +#include <atomic> // for std::atomic<bool> + +namespace tbb { + +namespace detail { +namespace d1 { + +//----------------------------------------------------------------------------- +// Concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT = ValT (*) (KeyT)> +class concurrent_lru_cache : no_assign { +// incapsulated helper classes +private: + struct handle_object; + struct storage_map_value_type; + + struct aggregator_operation; + struct retrieve_aggregator_operation; + struct signal_end_of_usage_aggregator_operation; + +// typedefs +public: + using key_type = KeyT; + using value_type = ValT; + using pointer = ValT*; + using reference = ValT&; + using const_pointer = const ValT*; + using const_reference = const ValT&; + + using value_function_type = KeyToValFunctorT; + using handle = handle_object; +private: + using lru_cache_type = concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>; + + using storage_map_type = std::map<key_type, storage_map_value_type>; + using storage_map_iterator_type = typename storage_map_type::iterator; + using storage_map_pointer_type = typename storage_map_type::pointer; + using storage_map_reference_type = typename storage_map_type::reference; + + using history_list_type = std::list<storage_map_iterator_type>; + using history_list_iterator_type = typename history_list_type::iterator; + + using aggregator_operation_type = aggregator_operation; + using aggregator_function_type = aggregating_functor<lru_cache_type, aggregator_operation_type>; + using aggregator_type = aggregator<aggregator_function_type, aggregator_operation_type>; + + friend class aggregating_functor<lru_cache_type,aggregator_operation_type>; + +// fields +private: + value_function_type my_value_function; + aggregator_type my_aggregator; + + storage_map_type my_storage_map; // storage map for used objects + history_list_type my_history_list; // history list for unused objects + const std::size_t my_history_list_capacity; // history list's allowed capacity + +// interface +public: + + concurrent_lru_cache(value_function_type value_function, std::size_t cache_capacity) + : my_value_function(value_function), my_history_list_capacity(cache_capacity) { + my_aggregator.initialize_handler(aggregator_function_type(this)); + } + + handle operator[](key_type key) { + retrieve_aggregator_operation op(key); + my_aggregator.execute(&op); + + if (op.is_new_value_needed()) { + op.result().second.my_value = my_value_function(key); + op.result().second.my_is_ready.store(true, std::memory_order_release); + } else { + spin_wait_while_eq(op.result().second.my_is_ready, false); + } + + return handle(*this, op.result()); + } + +private: + + void handle_operations(aggregator_operation* op_list) { + while (op_list) { + op_list->cast_and_handle(*this); + aggregator_operation* prev_op = op_list; + op_list = op_list->next; + + (prev_op->status).store(1, std::memory_order_release); + } + } + + void signal_end_of_usage(storage_map_reference_type map_record_ref) { + signal_end_of_usage_aggregator_operation op(map_record_ref); + my_aggregator.execute(&op); + } + + void signal_end_of_usage_serial(storage_map_reference_type map_record_ref) { + storage_map_iterator_type map_it = my_storage_map.find(map_record_ref.first); + + __TBB_ASSERT(map_it != my_storage_map.end(), + "cache should not return past-end iterators to outer world"); + __TBB_ASSERT(&(*map_it) == &map_record_ref, + "dangling reference has been returned to outside world: data race?"); + __TBB_ASSERT(std::find(my_history_list.begin(), my_history_list.end(), map_it) == my_history_list.end(), + "object in use should not be in list of unused objects "); + + // if it was the last reference, put it to the LRU history + if (! --(map_it->second.my_ref_counter)) { + // if the LRU history is full, evict the oldest items to get space + if (my_history_list.size() >= my_history_list_capacity) { + std::size_t number_of_elements_to_evict = 1 + my_history_list.size() - my_history_list_capacity; + + for (std::size_t i = 0; i < number_of_elements_to_evict; ++i) { + storage_map_iterator_type map_it_to_evict = my_history_list.back(); + + __TBB_ASSERT(map_it_to_evict->second.my_ref_counter == 0, + "item to be evicted should not have a live references"); + + // TODO: can we use forward_list instead of list? pop_front / insert_after last + my_history_list.pop_back(); + my_storage_map.erase(map_it_to_evict); + } + } + + // TODO: can we use forward_list instead of list? pop_front / insert_after last + my_history_list.push_front(map_it); + map_it->second.my_history_list_iterator = my_history_list.begin(); + } + } + + storage_map_reference_type retrieve_serial(key_type key, bool& is_new_value_needed) { + storage_map_iterator_type map_it = my_storage_map.find(key); + + if (map_it == my_storage_map.end()) { + map_it = my_storage_map.emplace_hint( + map_it, std::piecewise_construct, std::make_tuple(key), std::make_tuple(value_type(), 0, my_history_list.end(), false)); + is_new_value_needed = true; + } else { + history_list_iterator_type list_it = map_it->second.my_history_list_iterator; + if (list_it != my_history_list.end()) { + __TBB_ASSERT(map_it->second.my_ref_counter == 0, + "item to be evicted should not have a live references"); + + // Item is going to be used. Therefore it is not a subject for eviction, + // so we remove it from LRU history. + my_history_list.erase(list_it); + map_it->second.my_history_list_iterator = my_history_list.end(); + } + } + + ++(map_it->second.my_ref_counter); + return *map_it; + } +}; + +//----------------------------------------------------------------------------- +// Value type for storage map in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::storage_map_value_type { +//typedefs +public: + using ref_counter_type = std::size_t; + +// fields +public: + value_type my_value; + ref_counter_type my_ref_counter; + history_list_iterator_type my_history_list_iterator; + std::atomic<bool> my_is_ready; + +// interface +public: + storage_map_value_type( + value_type const& value, ref_counter_type ref_counter, + history_list_iterator_type history_list_iterator, bool is_ready) + : my_value(value), my_ref_counter(ref_counter), + my_history_list_iterator(history_list_iterator), my_is_ready(is_ready) {} +}; + +//----------------------------------------------------------------------------- +// Handle object for operator[] in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::handle_object { +// fields +private: + lru_cache_type* my_lru_cache_ptr; + storage_map_pointer_type my_map_record_ptr; + +// interface +public: + handle_object() + : my_lru_cache_ptr(nullptr), my_map_record_ptr(nullptr) {} + handle_object(lru_cache_type& lru_cache_ref, storage_map_reference_type map_record_ref) + : my_lru_cache_ptr(&lru_cache_ref), my_map_record_ptr(&map_record_ref) {} + + handle_object(handle_object&) = delete; + void operator=(handle_object&) = delete; + + handle_object(handle_object&& other) + : my_lru_cache_ptr(other.my_lru_cache_ptr), my_map_record_ptr(other.my_map_record_ptr) { + + __TBB_ASSERT( + bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), + "invalid state of moving object?"); + + other.my_lru_cache_ptr = nullptr; + other.my_map_record_ptr = nullptr; + } + + handle_object& operator=(handle_object&& other) { + __TBB_ASSERT( + bool(other.my_lru_cache_ptr) == bool(other.my_map_record_ptr), + "invalid state of moving object?"); + + if (my_lru_cache_ptr) + my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); + + my_lru_cache_ptr = other.my_lru_cache_ptr; + my_map_record_ptr = other.my_map_record_ptr; + other.my_lru_cache_ptr = nullptr; + other.my_map_record_ptr = nullptr; + + return *this; + } + + ~handle_object() { + if (my_lru_cache_ptr) + my_lru_cache_ptr->signal_end_of_usage(*my_map_record_ptr); + } + + operator bool() const { + return (my_lru_cache_ptr && my_map_record_ptr); + } + + value_type& value() { + __TBB_ASSERT(my_lru_cache_ptr, "get value from already moved object?"); + __TBB_ASSERT(my_map_record_ptr, "get value from an invalid or already moved object?"); + + return my_map_record_ptr->second.my_value; + } +}; + +//----------------------------------------------------------------------------- +// Aggregator operation for aggregator type in concurrent LRU cache +//----------------------------------------------------------------------------- + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::aggregator_operation + : aggregated_operation<aggregator_operation> { +// incapsulated helper classes +public: + enum class op_type { retrieve, signal_end_of_usage }; + +// fields +private: + op_type my_op; + +// interface +public: + aggregator_operation(op_type op) : my_op(op) {} + + // TODO: aggregator_operation can be implemented + // - as a statically typed variant type or CRTP? (static, dependent on the use case) + // - or use pointer to function and apply_visitor (dynamic) + // - or use virtual functions (dynamic) + void cast_and_handle(lru_cache_type& lru_cache_ref) { + if (my_op == op_type::retrieve) + static_cast<retrieve_aggregator_operation*>(this)->handle(lru_cache_ref); + else + static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(lru_cache_ref); + } +}; + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::retrieve_aggregator_operation + : aggregator_operation, private no_assign { +public: + key_type my_key; + storage_map_pointer_type my_map_record_ptr; + bool my_is_new_value_needed; + +public: + retrieve_aggregator_operation(key_type key) + : aggregator_operation(aggregator_operation::op_type::retrieve), + my_key(key), my_is_new_value_needed(false) {} + + void handle(lru_cache_type& lru_cache_ref) { + my_map_record_ptr = &lru_cache_ref.retrieve_serial(my_key, my_is_new_value_needed); + } + + storage_map_reference_type result() { return *my_map_record_ptr; } + + bool is_new_value_needed() { return my_is_new_value_needed; } +}; + +template<typename KeyT, typename ValT, typename KeyToValFunctorT> +struct concurrent_lru_cache<KeyT, ValT, KeyToValFunctorT>::signal_end_of_usage_aggregator_operation + : aggregator_operation, private no_assign { + +private: + storage_map_reference_type my_map_record_ref; + +public: + signal_end_of_usage_aggregator_operation(storage_map_reference_type map_record_ref) + : aggregator_operation(aggregator_operation::op_type::signal_end_of_usage), + my_map_record_ref(map_record_ref) {} + + void handle(lru_cache_type& lru_cache_ref) { + lru_cache_ref.signal_end_of_usage_serial(my_map_record_ref); + } +}; + +// TODO: if we have guarantees that KeyToValFunctorT always have +// ValT as a return type and KeyT as an argument type +// we can deduce template parameters of concurrent_lru_cache +// by pattern matching on KeyToValFunctorT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_lru_cache; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_lru_cache_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h new file mode 100644 index 0000000000..ae389d4f42 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_map.h @@ -0,0 +1,342 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_map_H +#define __TBB_concurrent_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_skip_list.h" +#include "tbb_allocator.h" +#include <functional> +#include <tuple> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator, + typename Allocator, bool AllowMultimapping> +struct map_traits { + static constexpr std::size_t max_level = RandomGenerator::max_level; + using random_level_generator_type = RandomGenerator; + using key_type = Key; + using mapped_type = Value; + using compare_type = KeyCompare; + using value_type = std::pair<const key_type, mapped_type>; + using reference = value_type&; + using const_reference = const value_type&; + using allocator_type = Allocator; + + static constexpr bool allow_multimapping = AllowMultimapping; + + class value_compare { + public: + bool operator()(const value_type& lhs, const value_type& rhs) const { + return comp(lhs.first, rhs.first); + } + + protected: + value_compare(compare_type c) : comp(c) {} + + friend struct map_traits; + + compare_type comp; + }; + + static value_compare value_comp(compare_type comp) { return value_compare(comp); } + + static const key_type& get_key(const_reference val) { + return val.first; + } +}; // struct map_traits + +template <typename Key, typename Value, typename Compare, typename Allocator> +class concurrent_multimap; + +template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> +class concurrent_map : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { + using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_map() = default; + concurrent_map( const concurrent_map& ) = default; + concurrent_map( const concurrent_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_map( concurrent_map&& ) = default; + concurrent_map( concurrent_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_map& operator=( const concurrent_map& ) = default; + concurrent_map& operator=( concurrent_map&& ) = default; + + // Observers + mapped_type& at(const key_type& key) { + iterator it = this->find(key); + + if (it == this->end()) { + throw_exception(exception_id::invalid_key); + } + return it->second; + } + + const mapped_type& at(const key_type& key) const { + return const_cast<concurrent_map*>(this)->at(key); + } + + mapped_type& operator[](const key_type& key) { + iterator it = this->find(key); + + if (it == this->end()) { + it = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; + } + return it->second; + } + + mapped_type& operator[](key_type&& key) { + iterator it = this->find(key); + + if (it == this->end()) { + it = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; + } + return it->second; + } + + using base_type::insert; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) + { + return this->emplace(std::forward<P>(value)); + } + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) + { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_map + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_map( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; + +template <typename Key, typename T, + typename Comp = std::less<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_map( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_map<std::remove_const_t<Key>, T, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_map( It, It, Alloc ) +-> concurrent_map<iterator_key_t<It>, iterator_mapped_t<It>, + std::less<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_map<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Value, typename Compare, typename Allocator> +void swap( concurrent_map<Key, Value, Compare, Allocator>& lhs, + concurrent_map<Key, Value, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +template <typename Key, typename Value, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<std::pair<const Key, Value>>> +class concurrent_multimap : public concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { + using base_type = concurrent_skip_list<map_traits<Key, Value, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; +public: + using key_type = Key; + using mapped_type = Value; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type + using base_type::base_type; + using base_type::insert; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_multimap() = default; + concurrent_multimap( const concurrent_multimap& ) = default; + concurrent_multimap( const concurrent_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_multimap( concurrent_multimap&& ) = default; + concurrent_multimap( concurrent_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_multimap& operator=( const concurrent_multimap& ) = default; + concurrent_multimap& operator=( concurrent_multimap&& ) = default; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) + { + return this->emplace(std::forward<P>(value)); + } + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) + { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multimap<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_map<key_type, mapped_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_multimap + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multimap( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Comp, Alloc>; + +template <typename Key, typename T, + typename Comp = std::less<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multimap<std::remove_const_t<Key>, T, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multimap( It, It, Alloc ) +-> concurrent_multimap<iterator_key_t<It>, iterator_mapped_t<It>, + std::less<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_multimap<std::remove_const_t<Key>, T, std::less<std::remove_const_t<Key>>, Alloc>; + + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Value, typename Compare, typename Allocator> +void swap( concurrent_multimap<Key, Value, Compare, Allocator>& lhs, + concurrent_multimap<Key, Value, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_map; +using detail::d1::concurrent_multimap; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_map_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h new file mode 100644 index 0000000000..a281740ad8 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_priority_queue.h @@ -0,0 +1,490 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_priority_queue_H +#define __TBB_concurrent_priority_queue_H + +#include "detail/_namespace_injection.h" +#include "detail/_aggregator.h" +#include "detail/_template_helpers.h" +#include "detail/_allocator_traits.h" +#include "detail/_range_common.h" +#include "detail/_exception.h" +#include "detail/_utils.h" +#include "detail/_containers_helpers.h" +#include "cache_aligned_allocator.h" +#include <vector> +#include <iterator> +#include <functional> +#include <utility> +#include <initializer_list> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename T, typename Compare = std::less<T>, typename Allocator = cache_aligned_allocator<T>> +class concurrent_priority_queue { +public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + + concurrent_priority_queue() : concurrent_priority_queue(allocator_type{}) {} + + explicit concurrent_priority_queue( const allocator_type& alloc ) + : mark(0), my_size(0), my_compare(), data(alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(compare), data(alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( size_type init_capacity, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(), data(alloc) + { + data.reserve(init_capacity); + my_aggregator.initialize_handler(functor{this}); + } + + explicit concurrent_priority_queue( size_type init_capacity, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_size(0), my_compare(compare), data(alloc) + { + data.reserve(init_capacity); + my_aggregator.initialize_handler(functor{this}); + } + + template <typename InputIterator> + concurrent_priority_queue( InputIterator begin, InputIterator end, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : mark(0), my_compare(compare), data(begin, end, alloc) + { + my_aggregator.initialize_handler(functor{this}); + heapify(); + my_size.store(data.size(), std::memory_order_relaxed); + } + + template <typename InputIterator> + concurrent_priority_queue( InputIterator begin, InputIterator end, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(begin, end, Compare(), alloc) {} + + concurrent_priority_queue( std::initializer_list<value_type> init, const Compare& compare, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(init.begin(), init.end(), compare, alloc) {} + + concurrent_priority_queue( std::initializer_list<value_type> init, const allocator_type& alloc = allocator_type() ) + : concurrent_priority_queue(init, Compare(), alloc) {} + + concurrent_priority_queue( const concurrent_priority_queue& other ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(other.data) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( const concurrent_priority_queue& other, const allocator_type& alloc ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(other.data, alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( concurrent_priority_queue&& other ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(std::move(other.data)) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue( concurrent_priority_queue&& other, const allocator_type& alloc ) + : mark(other.mark), my_size(other.my_size.load(std::memory_order_relaxed)), my_compare(other.my_compare), + data(std::move(other.data), alloc) + { + my_aggregator.initialize_handler(functor{this}); + } + + concurrent_priority_queue& operator=( const concurrent_priority_queue& other ) { + if (this != &other) { + data = other.data; + mark = other.mark; + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + return *this; + } + + concurrent_priority_queue& operator=( concurrent_priority_queue&& other ) { + if (this != &other) { + // TODO: check if exceptions from std::vector::operator=(vector&&) should be handled separately + data = std::move(other.data); + mark = other.mark; + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + } + return *this; + } + + concurrent_priority_queue& operator=( std::initializer_list<value_type> init ) { + assign(init.begin(), init.end()); + return *this; + } + + template <typename InputIterator> + void assign( InputIterator begin, InputIterator end ) { + data.assign(begin, end); + mark = 0; + my_size.store(data.size(), std::memory_order_relaxed); + heapify(); + } + + void assign( std::initializer_list<value_type> init ) { + assign(init.begin(), init.end()); + } + + /* Returned value may not reflect results of pending operations. + This operation reads shared data and will trigger a race condition. */ + __TBB_nodiscard bool empty() const { return size() == 0; } + + // Returns the current number of elements contained in the queue + /* Returned value may not reflect results of pending operations. + This operation reads shared data and will trigger a race condition. */ + size_type size() const { return my_size.load(std::memory_order_relaxed); } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + void push( const value_type& value ) { + cpq_operation op_data(value, PUSH_OP); + my_aggregator.execute(&op_data); + if (op_data.status == FAILED) + throw_exception(exception_id::bad_alloc); + } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + void push( value_type&& value ) { + cpq_operation op_data(value, PUSH_RVALUE_OP); + my_aggregator.execute(&op_data); + if (op_data.status == FAILED) + throw_exception(exception_id::bad_alloc); + } + + /* This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + template <typename... Args> + void emplace( Args&&... args ) { + // TODO: support uses allocator construction in this place + push(value_type(std::forward<Args>(args)...)); + } + + // Gets a reference to and removes highest priority element + /* If a highest priority element was found, sets elem and returns true, + otherwise returns false. + This operation can be safely used concurrently with other push, try_pop or emplace operations. */ + bool try_pop( value_type& value ) { + cpq_operation op_data(value, POP_OP); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // This operation affects the whole container => it is not thread-safe + void clear() { + data.clear(); + mark = 0; + my_size.store(0, std::memory_order_relaxed); + } + + // This operation affects the whole container => it is not thread-safe + void swap( concurrent_priority_queue& other ) { + if (this != &other) { + using std::swap; + swap(data, other.data); + swap(mark, other.mark); + + size_type sz = my_size.load(std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(sz, std::memory_order_relaxed); + } + } + + allocator_type get_allocator() const { return data.get_allocator(); } +private: + enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP}; + enum operation_status {WAIT = 0, SUCCEEDED, FAILED}; + + class cpq_operation : public aggregated_operation<cpq_operation> { + public: + operation_type type; + union { + value_type* elem; + size_type sz; + }; + cpq_operation( const value_type& value, operation_type t ) + : type(t), elem(const_cast<value_type*>(&value)) {} + }; // class cpq_operation + + class functor { + concurrent_priority_queue* my_cpq; + public: + functor() : my_cpq(nullptr) {} + functor( concurrent_priority_queue* cpq ) : my_cpq(cpq) {} + + void operator()(cpq_operation* op_list) { + __TBB_ASSERT(my_cpq != nullptr, "Invalid functor"); + my_cpq->handle_operations(op_list); + } + }; // class functor + + void handle_operations( cpq_operation* op_list ) { + call_itt_notify(acquired, this); + cpq_operation* tmp, *pop_list = nullptr; + __TBB_ASSERT(mark == data.size(), NULL); + + // First pass processes all constant (amortized; reallocation may happen) time pushes and pops. + while(op_list) { + // ITT note: &(op_list->status) tag is used to cover accesses to op_list + // node. This thread is going to handle the operation, and so will acquire it + // and perform the associated operation w/o triggering a race condition; the + // thread that created the operation is waiting on the status field, so when + // this thread is done with the operation, it will perform a + // store_with_release to give control back to the waiting thread in + // aggregator::insert_operation. + // TODO: enable + call_itt_notify(acquired, &(op_list->status)); + __TBB_ASSERT(op_list->type != INVALID_OP, NULL); + + tmp = op_list; + op_list = op_list->next.load(std::memory_order_relaxed); + if (tmp->type == POP_OP) { + if (mark < data.size() && + my_compare(data[0], data.back())) + { + // there are newly pushed elems and the last one is higher than top + *(tmp->elem) = std::move(data.back()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + + data.pop_back(); + __TBB_ASSERT(mark <= data.size(), NULL); + } else { // no convenient item to pop; postpone + tmp->next.store(pop_list, std::memory_order_relaxed); + pop_list = tmp; + } + } else { // PUSH_OP or PUSH_RVALUE_OP + __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation"); +#if TBB_USE_EXCEPTIONS + try +#endif + { + if (tmp->type == PUSH_OP) { + push_back_helper(*(tmp->elem)); + } else { + data.push_back(std::move(*(tmp->elem))); + } + my_size.store(my_size.load(std::memory_order_relaxed) + 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + } +#if TBB_USE_EXCEPTIONS + catch(...) { + tmp->status.store(uintptr_t(FAILED), std::memory_order_release); + } +#endif + } + } + + // Second pass processes pop operations + while(pop_list) { + tmp = pop_list; + pop_list = pop_list->next.load(std::memory_order_relaxed); + __TBB_ASSERT(tmp->type == POP_OP, NULL); + if (data.empty()) { + tmp->status.store(uintptr_t(FAILED), std::memory_order_release); + } else { + __TBB_ASSERT(mark <= data.size(), NULL); + if (mark < data.size() && + my_compare(data[0], data.back())) + { + // there are newly pushed elems and the last one is higher than top + *(tmp->elem) = std::move(data.back()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + data.pop_back(); + } else { // extract top and push last element down heap + *(tmp->elem) = std::move(data[0]); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + tmp->status.store(uintptr_t(SUCCEEDED), std::memory_order_release); + reheap(); + } + } + } + + // heapify any leftover pushed elements before doing the next + // batch of operations + if (mark < data.size()) heapify(); + __TBB_ASSERT(mark == data.size(), NULL); + call_itt_notify(releasing, this); + } + + // Merge unsorted elements into heap + void heapify() { + if (!mark && data.size() > 0) mark = 1; + for (; mark < data.size(); ++mark) { + // for each unheapified element under size + size_type cur_pos = mark; + value_type to_place = std::move(data[mark]); + do { // push to_place up the heap + size_type parent = (cur_pos - 1) >> 1; + if (!my_compare(data[parent], to_place)) + break; + data[cur_pos] = std::move(data[parent]); + cur_pos = parent; + } while(cur_pos); + data[cur_pos] = std::move(to_place); + } + } + + // Re-heapify after an extraction + // Re-heapify by pushing last element down the heap from the root. + void reheap() { + size_type cur_pos = 0, child = 1; + + while(child < mark) { + size_type target = child; + if (child + 1 < mark && my_compare(data[child], data[child + 1])) + ++target; + // target now has the higher priority child + if (my_compare(data[target], data.back())) + break; + data[cur_pos] = std::move(data[target]); + cur_pos = target; + child = (cur_pos << 1) + 1; + } + if (cur_pos != data.size() - 1) + data[cur_pos] = std::move(data.back()); + data.pop_back(); + if (mark > data.size()) mark = data.size(); + } + + void push_back_helper( const T& value ) { + push_back_helper_impl(value, std::is_copy_constructible<T>{}); + } + + void push_back_helper_impl( const T& value, /*is_copy_constructible = */std::true_type ) { + data.push_back(value); + } + + void push_back_helper_impl( const T&, /*is_copy_constructible = */std::false_type ) { + __TBB_ASSERT(false, "error: calling tbb::concurrent_priority_queue.push(const value_type&) for move-only type"); + } + + using aggregator_type = aggregator<functor, cpq_operation>; + + aggregator_type my_aggregator; + // Padding added to avoid false sharing + char padding1[max_nfs_size - sizeof(aggregator_type)]; + // The point at which unsorted elements begin + size_type mark; + std::atomic<size_type> my_size; + Compare my_compare; + + // Padding added to avoid false sharing + char padding2[max_nfs_size - (2*sizeof(size_type)) - sizeof(Compare)]; + //! Storage for the heap of elements in queue, plus unheapified elements + /** data has the following structure: + + binary unheapified + heap elements + ____|_______|____ + | | | + v v v + [_|...|_|_|...|_| |...| ] + 0 ^ ^ ^ + | | |__capacity + | |__my_size + |__mark + + Thus, data stores the binary heap starting at position 0 through + mark-1 (it may be empty). Then there are 0 or more elements + that have not yet been inserted into the heap, in positions + mark through my_size-1. */ + + using vector_type = std::vector<value_type, allocator_type>; + vector_type data; + + friend bool operator==( const concurrent_priority_queue& lhs, + const concurrent_priority_queue& rhs ) + { + return lhs.data == rhs.data; + } + +#if !__TBB_CPP20_COMPARISONS_PRESENT + friend bool operator!=( const concurrent_priority_queue& lhs, + const concurrent_priority_queue& rhs ) + { + return !(lhs == rhs); + } +#endif +}; // class concurrent_priority_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_priority_queue( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_priority_queue<iterator_value_t<It>, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_priority_queue( It, It, Alloc ) +-> concurrent_priority_queue<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; + +template <typename T, + typename Comp = std::less<T>, + typename Alloc = tbb::cache_aligned_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_priority_queue( std::initializer_list<T>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_priority_queue<T, Comp, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_priority_queue( std::initializer_list<T>, Alloc ) +-> concurrent_priority_queue<T, std::less<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename T, typename Compare, typename Allocator> +void swap( concurrent_priority_queue<T, Compare, Allocator>& lhs, + concurrent_priority_queue<T, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail +inline namespace v1 { +using detail::d1::concurrent_priority_queue; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_priority_queue_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h new file mode 100644 index 0000000000..c8ae7afff7 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_queue.h @@ -0,0 +1,592 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_queue_H +#define __TBB_concurrent_queue_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_queue_base.h" +#include "detail/_allocator_traits.h" +#include "detail/_exception.h" +#include "detail/_containers_helpers.h" +#include "cache_aligned_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { + +// A high-performance thread-safe non-blocking concurrent queue. +// Multiple threads may each push and pop concurrently. +// Assignment construction is not allowed. +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_queue { + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + using queue_representation_type = concurrent_queue_rep<T, Allocator>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; + using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; +public: + using size_type = std::size_t; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = concurrent_queue_iterator<concurrent_queue, T, Allocator>; + using const_iterator = concurrent_queue_iterator<concurrent_queue, const T, Allocator>; + + concurrent_queue() : concurrent_queue(allocator_type()) {} + + explicit concurrent_queue(const allocator_type& a) : + my_allocator(a), my_queue_representation(nullptr) + { + my_queue_representation = static_cast<queue_representation_type*>(r1::cache_aligned_allocate(sizeof(queue_representation_type))); + queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + + __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); + } + + template <typename InputIterator> + concurrent_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) : + concurrent_queue(a) + { + for (; begin != end; ++begin) + push(*begin); + } + + concurrent_queue(const concurrent_queue& src, const allocator_type& a) : + concurrent_queue(a) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + concurrent_queue(const concurrent_queue& src) : + concurrent_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + // Move constructors + concurrent_queue(concurrent_queue&& src) : + concurrent_queue(std::move(src.my_allocator)) + { + internal_swap(src); + } + + concurrent_queue(concurrent_queue&& src, const allocator_type& a) : + concurrent_queue(a) + { + // checking that memory allocated by one instance of allocator can be deallocated + // with another + if (my_allocator == src.my_allocator) { + internal_swap(src); + } else { + // allocators are different => performing per-element move + my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + src.clear(); + } + } + + // Destroy queue + ~concurrent_queue() { + clear(); + my_queue_representation->clear(); + queue_allocator_traits::destroy(my_allocator, my_queue_representation); + r1::cache_aligned_deallocate(my_queue_representation); + } + + // Enqueue an item at tail of queue. + void push(const T& value) { + internal_push(value); + } + + void push(T&& value) { + internal_push(std::move(value)); + } + + template <typename... Args> + void emplace( Args&&... args ) { + internal_push(std::forward<Args>(args)...); + } + + // Attempt to dequeue an item from head of queue. + /** Does not wait for item to become available. + Returns true if successful; false otherwise. */ + bool try_pop( T& result ) { + return internal_try_pop(&result); + } + + // Return the number of items in the queue; thread unsafe + size_type unsafe_size() const { + std::ptrdiff_t size = my_queue_representation->size(); + return size < 0 ? 0 : size_type(size); + } + + // Equivalent to size()==0. + __TBB_nodiscard bool empty() const { + return my_queue_representation->empty(); + } + + // Clear the queue. not thread-safe. + void clear() { + while (!empty()) { + T value; + try_pop(value); + } + } + + // Return allocator object + allocator_type get_allocator() const { return my_allocator; } + + //------------------------------------------------------------------------ + // The iterators are intended only for debugging. They are slow and not thread safe. + //------------------------------------------------------------------------ + + iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } + iterator unsafe_end() { return iterator(); } + const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_end() const { return const_iterator(); } + const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_cend() const { return const_iterator(); } + +private: + void internal_swap(concurrent_queue& src) { + using std::swap; + swap(my_queue_representation, src.my_queue_representation); + } + + template <typename... Args> + void internal_push( Args&&... args ) { + ticket_type k = my_queue_representation->tail_counter++; + my_queue_representation->choose(k).push(k, *my_queue_representation, std::forward<Args>(args)...); + } + + bool internal_try_pop( void* dst ) { + ticket_type k; + do { + k = my_queue_representation->head_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - k) <= 0) { + // Queue is empty + return false; + } + + // Queue had item with ticket k when we looked. Attempt to get that item. + // Another thread snatched the item, retry. + } while (!my_queue_representation->head_counter.compare_exchange_strong(k, k + 1)); + } while (!my_queue_representation->choose(k).pop(dst, k, *my_queue_representation)); + return true; + } + + template <typename Container, typename Value, typename A> + friend class concurrent_queue_iterator; + + static void copy_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for copy construction + new (location) value_type(*static_cast<const value_type*>(src)); + // queue_allocator_traits::construct(my_allocator, location, *static_cast<const T*>(src)); + } + + static void move_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for move construction + new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); + } + + queue_allocator_type my_allocator; + queue_representation_type* my_queue_representation; +}; // class concurrent_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_queue( It, It, Alloc = Alloc() ) +-> concurrent_queue<iterator_value_t<It>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +class concurrent_monitor; + +template <typename FuncType> +class delegated_function : public delegate_base { +public: + delegated_function(FuncType& f) : my_func(f) {} + + bool operator()() const override { + return my_func(); + } + +private: + FuncType &my_func; +}; // class delegated_function + +// The concurrent monitor tags for concurrent_bounded_queue. +static constexpr std::size_t cbq_slots_avail_tag = 0; +static constexpr std::size_t cbq_items_avail_tag = 1; +} // namespace d1 + + +namespace r1 { + class concurrent_monitor; + + std::uint8_t* __TBB_EXPORTED_FUNC allocate_bounded_queue_rep( std::size_t queue_rep_size ); + void __TBB_EXPORTED_FUNC deallocate_bounded_queue_rep( std::uint8_t* mem, std::size_t queue_rep_size ); + void __TBB_EXPORTED_FUNC abort_bounded_queue_monitors( concurrent_monitor* monitors ); + void __TBB_EXPORTED_FUNC notify_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag + , std::size_t ticket ); + void __TBB_EXPORTED_FUNC wait_bounded_queue_monitor( concurrent_monitor* monitors, std::size_t monitor_tag, + std::ptrdiff_t target, d1::delegate_base& predicate ); +} // namespace r1 + + +namespace d1 { +// A high-performance thread-safe blocking concurrent bounded queue. +// Supports boundedness and blocking semantics. +// Multiple threads may each push and pop concurrently. +// Assignment construction is not allowed. +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_bounded_queue { + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + using queue_representation_type = concurrent_queue_rep<T, Allocator>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<queue_representation_type>; + using queue_allocator_traits = tbb::detail::allocator_traits<queue_allocator_type>; + + template <typename FuncType> + void internal_wait(r1::concurrent_monitor* monitors, std::size_t monitor_tag, std::ptrdiff_t target, FuncType pred) { + delegated_function<FuncType> func(pred); + r1::wait_bounded_queue_monitor(monitors, monitor_tag, target, func); + } +public: + using size_type = std::ptrdiff_t; + using value_type = T; + using reference = T&; + using const_reference = const T&; + using difference_type = std::ptrdiff_t; + + using allocator_type = Allocator; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = concurrent_queue_iterator<concurrent_bounded_queue, T, Allocator>; + using const_iterator = concurrent_queue_iterator<concurrent_bounded_queue, const T, Allocator> ; + + concurrent_bounded_queue() : concurrent_bounded_queue(allocator_type()) {} + + explicit concurrent_bounded_queue( const allocator_type& a ) : + my_allocator(a), my_capacity(0), my_abort_counter(0), my_queue_representation(nullptr) + { + my_queue_representation = reinterpret_cast<queue_representation_type*>( + r1::allocate_bounded_queue_rep(sizeof(queue_representation_type))); + my_monitors = reinterpret_cast<r1::concurrent_monitor*>(my_queue_representation + 1); + queue_allocator_traits::construct(my_allocator, my_queue_representation, my_allocator); + my_capacity = std::size_t(-1) / (queue_representation_type::item_size > 1 ? queue_representation_type::item_size : 2); + + __TBB_ASSERT(is_aligned(my_queue_representation, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->head_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->tail_counter, max_nfs_size), "alignment error" ); + __TBB_ASSERT(is_aligned(&my_queue_representation->array, max_nfs_size), "alignment error" ); + } + + template <typename InputIterator> + concurrent_bounded_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type() ) : + concurrent_bounded_queue(a) + { + for (; begin != end; ++begin) + push(*begin); + } + + concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a ) : + concurrent_bounded_queue(a) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + concurrent_bounded_queue( const concurrent_bounded_queue& src ) : + concurrent_bounded_queue(queue_allocator_traits::select_on_container_copy_construction(src.get_allocator())) + { + my_queue_representation->assign(*src.my_queue_representation, copy_construct_item); + } + + // Move constructors + concurrent_bounded_queue( concurrent_bounded_queue&& src ) : + concurrent_bounded_queue(std::move(src.my_allocator)) + { + internal_swap(src); + } + + concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a ) : + concurrent_bounded_queue(a) + { + // checking that memory allocated by one instance of allocator can be deallocated + // with another + if (my_allocator == src.my_allocator) { + internal_swap(src); + } else { + // allocators are different => performing per-element move + my_queue_representation->assign(*src.my_queue_representation, move_construct_item); + src.clear(); + } + } + + // Destroy queue + ~concurrent_bounded_queue() { + clear(); + my_queue_representation->clear(); + queue_allocator_traits::destroy(my_allocator, my_queue_representation); + r1::deallocate_bounded_queue_rep(reinterpret_cast<std::uint8_t*>(my_queue_representation), + sizeof(queue_representation_type)); + } + + // Enqueue an item at tail of queue. + void push( const T& value ) { + internal_push(value); + } + + void push( T&& value ) { + internal_push(std::move(value)); + } + + // Enqueue an item at tail of queue if queue is not already full. + // Does not wait for queue to become not full. + // Returns true if item is pushed; false if queue was already full. + bool try_push( const T& value ) { + return internal_push_if_not_full(value); + } + + bool try_push( T&& value ) { + return internal_push_if_not_full(std::move(value)); + } + + template <typename... Args> + void emplace( Args&&... args ) { + internal_push(std::forward<Args>(args)...); + } + + template <typename... Args> + bool try_emplace( Args&&... args ) { + return internal_push_if_not_full(std::forward<Args>(args)...); + } + + // Attempt to dequeue an item from head of queue. + /** Does not wait for item to become available. + Returns true if successful; false otherwise. */ + bool pop( T& result ) { + return internal_pop(&result); + } + + bool try_pop( T& result ) { + return internal_pop_if_present(&result); + } + + void abort() { + internal_abort(); + } + + // Return the number of items in the queue; thread unsafe + std::ptrdiff_t size() const { + return my_queue_representation->size(); + } + + void set_capacity( size_type new_capacity ) { + std::ptrdiff_t c = new_capacity < 0 ? infinite_capacity : new_capacity; + my_capacity = c; + } + + size_type capacity() const { + return my_capacity; + } + + // Equivalent to size()==0. + __TBB_nodiscard bool empty() const { + return my_queue_representation->empty(); + } + + // Clear the queue. not thread-safe. + void clear() { + while (!empty()) { + T value; + try_pop(value); + } + } + + // Return allocator object + allocator_type get_allocator() const { return my_allocator; } + + //------------------------------------------------------------------------ + // The iterators are intended only for debugging. They are slow and not thread safe. + //------------------------------------------------------------------------ + + iterator unsafe_begin() { return concurrent_queue_iterator_provider::get<iterator>(*this); } + iterator unsafe_end() { return iterator(); } + const_iterator unsafe_begin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_end() const { return const_iterator(); } + const_iterator unsafe_cbegin() const { return concurrent_queue_iterator_provider::get<const_iterator>(*this); } + const_iterator unsafe_cend() const { return const_iterator(); } + +private: + void internal_swap( concurrent_bounded_queue& src ) { + std::swap(my_queue_representation, src.my_queue_representation); + std::swap(my_monitors, src.my_monitors); + } + + static constexpr std::ptrdiff_t infinite_capacity = std::ptrdiff_t(~size_type(0) / 2); + + template <typename... Args> + void internal_push( Args&&... args ) { + unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); + ticket_type ticket = my_queue_representation->tail_counter++; + std::ptrdiff_t target = ticket - my_capacity; + + if (static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target) { // queue is full + auto pred = [&] { + if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { + throw_exception(exception_id::user_abort); + } + + return static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) <= target; + }; + + try_call( [&] { + internal_wait(my_monitors, cbq_slots_avail_tag, target, pred); + }).on_exception( [&] { + my_queue_representation->choose(ticket).abort_push(ticket, *my_queue_representation); + }); + + } + __TBB_ASSERT((static_cast<std::ptrdiff_t>(my_queue_representation->head_counter.load(std::memory_order_relaxed)) > target), nullptr); + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); + } + + template <typename... Args> + bool internal_push_if_not_full( Args&&... args ) { + ticket_type ticket = my_queue_representation->tail_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(ticket - my_queue_representation->head_counter.load(std::memory_order_relaxed)) >= my_capacity) { + // Queue is full + return false; + } + // Queue had empty slot with ticket k when we looked. Attempt to claim that slot. + // Another thread claimed the slot, so retry. + } while (!my_queue_representation->tail_counter.compare_exchange_strong(ticket, ticket + 1)); + + my_queue_representation->choose(ticket).push(ticket, *my_queue_representation, std::forward<Args>(args)...); + r1::notify_bounded_queue_monitor(my_monitors, cbq_items_avail_tag, ticket); + return true; + } + + bool internal_pop( void* dst ) { + std::ptrdiff_t target; + // This loop is a single pop operation; abort_counter should not be re-read inside + unsigned old_abort_counter = my_abort_counter.load(std::memory_order_relaxed); + + do { + target = my_queue_representation->head_counter++; + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target) { + auto pred = [&] { + if (my_abort_counter.load(std::memory_order_relaxed) != old_abort_counter) { + throw_exception(exception_id::user_abort); + } + + return static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) <= target; + }; + + try_call( [&] { + internal_wait(my_monitors, cbq_items_avail_tag, target, pred); + }).on_exception( [&] { + my_queue_representation->head_counter--; + }); + } + __TBB_ASSERT(static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed)) > target, nullptr); + } while (!my_queue_representation->choose(target).pop(dst, target, *my_queue_representation)); + + r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, target); + return true; + } + + bool internal_pop_if_present( void* dst ) { + ticket_type ticket; + do { + ticket = my_queue_representation->head_counter.load(std::memory_order_relaxed); + do { + if (static_cast<std::ptrdiff_t>(my_queue_representation->tail_counter.load(std::memory_order_relaxed) - ticket) <= 0) { // queue is empty + // Queue is empty + return false; + } + // Queue had item with ticket k when we looked. Attempt to get that item. + // Another thread snatched the item, retry. + } while (!my_queue_representation->head_counter.compare_exchange_strong(ticket, ticket + 1)); + } while (!my_queue_representation->choose(ticket).pop(dst, ticket, *my_queue_representation)); + + r1::notify_bounded_queue_monitor(my_monitors, cbq_slots_avail_tag, ticket); + return true; + } + + void internal_abort() { + ++my_abort_counter; + r1::abort_bounded_queue_monitors(my_monitors); + } + + static void copy_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for copy construction + new (location) value_type(*static_cast<const value_type*>(src)); + } + + static void move_construct_item(T* location, const void* src) { + // TODO: use allocator_traits for move construction + new (location) value_type(std::move(*static_cast<value_type*>(const_cast<void*>(src)))); + } + + template <typename Container, typename Value, typename A> + friend class concurrent_queue_iterator; + + queue_allocator_type my_allocator; + std::ptrdiff_t my_capacity; + std::atomic<unsigned> my_abort_counter; + queue_representation_type* my_queue_representation; + + r1::concurrent_monitor* my_monitors; +}; // class concurrent_bounded_queue + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>> +concurrent_bounded_queue( It, It, Alloc = Alloc() ) +-> concurrent_bounded_queue<iterator_value_t<It>, Alloc>; + +#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */ + +} //namespace d1 +} // namesapce detail + +inline namespace v1 { + +using detail::d1::concurrent_queue; +using detail::d1::concurrent_bounded_queue; +using detail::r1::user_abort; +using detail::r1::bad_last_alloc; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_queue_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h new file mode 100644 index 0000000000..c68fa6c362 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_set.h @@ -0,0 +1,259 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_set_H +#define __TBB_concurrent_set_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_skip_list.h" +#include "tbb_allocator.h" +#include <functional> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +template<typename Key, typename KeyCompare, typename RandomGenerator, typename Allocator, bool AllowMultimapping> +struct set_traits { + static constexpr std::size_t max_level = RandomGenerator::max_level; + using random_level_generator_type = RandomGenerator; + using key_type = Key; + using value_type = key_type; + using compare_type = KeyCompare; + using value_compare = compare_type; + using reference = value_type&; + using const_reference = const value_type&; + using allocator_type = Allocator; + + static constexpr bool allow_multimapping = AllowMultimapping; + + static const key_type& get_key(const_reference val) { + return val; + } + + static value_compare value_comp(compare_type comp) { return comp; } +}; // struct set_traits + +template <typename Key, typename Compare, typename Allocator> +class concurrent_multiset; + +template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_set : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>> { + using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, false>>; +public: + using key_type = Key; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_set() = default; + concurrent_set( const concurrent_set& ) = default; + concurrent_set( const concurrent_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_set( concurrent_set&& ) = default; + concurrent_set( concurrent_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_set& operator=( const concurrent_set& ) = default; + concurrent_set& operator=( concurrent_set&& ) = default; + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_set + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_set( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_set<iterator_value_t<It>, Comp, Alloc>; + +template <typename Key, + typename Comp = std::less<Key>, + typename Alloc = tbb::tbb_allocator<Key>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_set( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_set<Key, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_set( It, It, Alloc ) +-> concurrent_set<iterator_value_t<It>, + std::less<iterator_value_t<It>>, Alloc>; + +template <typename Key, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_set( std::initializer_list<Key>, Alloc ) +-> concurrent_set<Key, std::less<Key>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Compare, typename Allocator> +void swap( concurrent_set<Key, Compare, Allocator>& lhs, + concurrent_set<Key, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +template <typename Key, typename Compare = std::less<Key>, typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_multiset : public concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>> { + using base_type = concurrent_skip_list<set_traits<Key, Compare, concurrent_geometric_level_generator<32>, Allocator, true>>; +public: + using key_type = Key; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using key_compare = Compare; + using value_compare = typename base_type::value_compare; + using allocator_type = Allocator; + + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_multiset() = default; + concurrent_multiset( const concurrent_multiset& ) = default; + concurrent_multiset( const concurrent_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_multiset( concurrent_multiset&& ) = default; + concurrent_multiset( concurrent_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_multiset& operator=( const concurrent_multiset& ) = default; + concurrent_multiset& operator=( concurrent_multiset&& ) = default; + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_set<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>& source) { + this->internal_merge(source); + } + + template<typename OtherCompare> + void merge(concurrent_multiset<key_type, OtherCompare, Allocator>&& source) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_multiset + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Comp = std::less<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multiset( It, It, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multiset<iterator_value_t<It>, Comp, Alloc>; + +template <typename Key, + typename Comp = std::less<Key>, + typename Alloc = tbb::tbb_allocator<Key>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Comp>>> +concurrent_multiset( std::initializer_list<Key>, Comp = Comp(), Alloc = Alloc() ) +-> concurrent_multiset<Key, Comp, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multiset( It, It, Alloc ) +-> concurrent_multiset<iterator_value_t<It>, std::less<iterator_value_t<It>>, Alloc>; + +template <typename Key, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_multiset( std::initializer_list<Key>, Alloc ) +-> concurrent_multiset<Key, std::less<Key>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Compare, typename Allocator> +void swap( concurrent_multiset<Key, Compare, Allocator>& lhs, + concurrent_multiset<Key, Compare, Allocator>& rhs ) +{ + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_set; +using detail::d1::concurrent_multiset; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_set_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h new file mode 100644 index 0000000000..0c9c2cd79c --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_map.h @@ -0,0 +1,387 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_unordered_map_H +#define __TBB_concurrent_unordered_map_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_unordered_base.h" +#include "tbb_allocator.h" +#include <functional> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> +struct concurrent_unordered_map_traits { + using value_type = std::pair<const Key, T>; + using key_type = Key; + using allocator_type = Allocator; + using hash_compare_type = hash_compare<Key, Hash, KeyEqual>; + static constexpr bool allow_multimapping = AllowMultimapping; + + static constexpr const key_type& get_key( const value_type& value ) { + return value.first; + } +}; // struct concurrent_unordered_map_traits + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +class concurrent_unordered_multimap; + +template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > +class concurrent_unordered_map + : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>> +{ + using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, false>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using mapped_type = T; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_unordered_map() = default; + concurrent_unordered_map( const concurrent_unordered_map& ) = default; + concurrent_unordered_map( const concurrent_unordered_map& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_map( concurrent_unordered_map&& ) = default; + concurrent_unordered_map( concurrent_unordered_map&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_map& operator=( const concurrent_unordered_map& ) = default; + concurrent_unordered_map& operator=( concurrent_unordered_map&& ) = default; + + // Observers + mapped_type& operator[]( const key_type& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + where = this->emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first; + } + return where->second; + } + + mapped_type& operator[]( key_type&& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + where = this->emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first; + } + return where->second; + } + + mapped_type& at( const key_type& key ) { + iterator where = this->find(key); + + if (where == this->end()) { + throw_exception(exception_id::invalid_key); + } + return where->second; + } + + const mapped_type& at( const key_type& key ) const { + const_iterator where = this->find(key); + + if (where == this->end()) { + throw_exception(exception_id::out_of_range); + } + return where->second; + } + + using base_type::insert; + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) { + return this->emplace(std::forward<P>(value)); + } + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) { + return this->emplace_hint(hint, std::forward<P>(value)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_map + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Hash = std::hash<iterator_key_t<It>>, + typename KeyEq = std::equal_to<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( It, It, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; + +template <typename Key, typename T, + typename Hash = std::hash<std::remove_const_t<Key>>, + typename KeyEq = std::equal_to<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( It, It, std::size_t, Alloc ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, + std::hash<iterator_key_t<It>>, + std::equal_to<iterator_key_t<It>>, Alloc>; + +// TODO: investigate if a deduction guide for concurrent_unordered_map(It, It, Alloc) is needed + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_map<iterator_key_t<It>, iterator_mapped_t<It>, + Hash, std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_map( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_map<std::remove_const_t<Key>, T, Hash, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_map<Key, T, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +template <typename Key, typename T, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<std::pair<const Key, T>> > +class concurrent_unordered_multimap + : public concurrent_unordered_base<concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>> +{ + using traits_type = concurrent_unordered_map_traits<Key, T, Hash, KeyEqual, Allocator, true>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using mapped_type = T; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base type + using base_type::base_type; + using base_type::operator=; + using base_type::insert; + + // Required for implicit deduction guides + concurrent_unordered_multimap() = default; + concurrent_unordered_multimap( const concurrent_unordered_multimap& ) = default; + concurrent_unordered_multimap( const concurrent_unordered_multimap& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_multimap( concurrent_unordered_multimap&& ) = default; + concurrent_unordered_multimap( concurrent_unordered_multimap&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_multimap& operator=( const concurrent_unordered_multimap& ) = default; + concurrent_unordered_multimap& operator=( concurrent_unordered_multimap&& ) = default; + + template <typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + std::pair<iterator, bool>>::type insert( P&& value ) { + return this->emplace(std::forward<P>(value)); + } + + template<typename P> + typename std::enable_if<std::is_constructible<value_type, P&&>::value, + iterator>::type insert( const_iterator hint, P&& value ) { + return this->emplace_hint(hint, std::forward<P&&>(value)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_map<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multimap<key_type, mapped_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_multimap + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Hash = std::hash<iterator_key_t<It>>, + typename KeyEq = std::equal_to<iterator_key_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_alloc_pair_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, KeyEq, Alloc>; + +template <typename Key, typename T, + typename Hash = std::hash<std::remove_const_t<Key>>, + typename KeyEq = std::equal_to<std::remove_const_t<Key>>, + typename Alloc = tbb::tbb_allocator<std::pair<const Key, T>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( It, It, std::size_t, Alloc ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, + std::hash<iterator_key_t<It>>, + std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multimap<iterator_key_t<It>, iterator_mapped_t<It>, Hash, + std::equal_to<iterator_key_t<It>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, std::hash<std::remove_const_t<Key>>, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +template <typename Key, typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multimap( std::initializer_list<std::pair<Key, T>>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multimap<std::remove_const_t<Key>, T, Hash, + std::equal_to<std::remove_const_t<Key>>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_multimap<Key, T, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_unordered_map; +using detail::d1::concurrent_unordered_multimap; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_unordered_map_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h new file mode 100644 index 0000000000..ce6175294d --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_unordered_set.h @@ -0,0 +1,306 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_unordered_set_H +#define __TBB_concurrent_unordered_set_H + +#include "detail/_namespace_injection.h" +#include "detail/_concurrent_unordered_base.h" +#include "tbb_allocator.h" + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator, bool AllowMultimapping> +struct concurrent_unordered_set_traits { + using key_type = Key; + using value_type = key_type; + using allocator_type = Allocator; + using hash_compare_type = hash_compare<key_type, Hash, KeyEqual>; + static constexpr bool allow_multimapping = AllowMultimapping; + + static constexpr const key_type& get_key( const value_type& value ) { + return value; + } +}; // class concurrent_unordered_set_traits + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +class concurrent_unordered_multiset; + +template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_unordered_set + : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>> +{ + using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, false>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + // Required for implicit deduction guides + concurrent_unordered_set() = default; + concurrent_unordered_set( const concurrent_unordered_set& ) = default; + concurrent_unordered_set( const concurrent_unordered_set& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_set( concurrent_unordered_set&& ) = default; + concurrent_unordered_set( concurrent_unordered_set&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_set& operator=( const concurrent_unordered_set& ) = default; + concurrent_unordered_set& operator=( concurrent_unordered_set&& ) = default; + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_set + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename It, + typename Hash = std::hash<iterator_value_t<It>>, + typename KeyEq = std::equal_to<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_set<iterator_value_t<It>, Hash, KeyEq, Alloc>; + +template <typename T, + typename Hash = std::hash<T>, + typename KeyEq = std::equal_to<T>, + typename Alloc = tbb::tbb_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_set<T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( It, It, std::size_t, Alloc ) +-> concurrent_unordered_set<iterator_value_t<It>, std::hash<iterator_value_t<It>>, + std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_set<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t, Alloc ) +-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_set( std::initializer_list<T>, Alloc ) +-> concurrent_unordered_set<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_set( std::initializer_list<T>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_set<T, Hash, std::equal_to<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_set<Key, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +template <typename Key, typename Hash = std::hash<Key>, typename KeyEqual = std::equal_to<Key>, + typename Allocator = tbb::tbb_allocator<Key>> +class concurrent_unordered_multiset + : public concurrent_unordered_base<concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>> +{ + using traits_type = concurrent_unordered_set_traits<Key, Hash, KeyEqual, Allocator, true>; + using base_type = concurrent_unordered_base<traits_type>; +public: + using key_type = typename base_type::key_type; + using value_type = typename base_type::value_type; + using size_type = typename base_type::size_type; + using difference_type = typename base_type::difference_type; + using hasher = typename base_type::hasher; + using key_equal = typename base_type::key_equal; + using allocator_type = typename base_type::allocator_type; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using local_iterator = typename base_type::local_iterator; + using const_local_iterator = typename base_type::const_local_iterator; + using node_type = typename base_type::node_type; + + // Include constructors of base_type; + using base_type::base_type; + using base_type::operator=; + + // Required for implicit deduction guides + concurrent_unordered_multiset() = default; + concurrent_unordered_multiset( const concurrent_unordered_multiset& ) = default; + concurrent_unordered_multiset( const concurrent_unordered_multiset& other, const allocator_type& alloc ) : base_type(other, alloc) {} + concurrent_unordered_multiset( concurrent_unordered_multiset&& ) = default; + concurrent_unordered_multiset( concurrent_unordered_multiset&& other, const allocator_type& alloc ) : base_type(std::move(other), alloc) {} + // Required to respect the rule of 5 + concurrent_unordered_multiset& operator=( const concurrent_unordered_multiset& ) = default; + concurrent_unordered_multiset& operator=( concurrent_unordered_multiset&& ) = default; + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_set<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>& source ) { + this->internal_merge(source); + } + + template <typename OtherHash, typename OtherKeyEqual> + void merge( concurrent_unordered_multiset<key_type, OtherHash, OtherKeyEqual, allocator_type>&& source ) { + this->internal_merge(std::move(source)); + } +}; // class concurrent_unordered_multiset + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename It, + typename Hash = std::hash<iterator_value_t<It>>, + typename KeyEq = std::equal_to<iterator_value_t<It>>, + typename Alloc = tbb::tbb_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( It, It, std::size_t = {}, Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, KeyEq, Alloc>; + +template <typename T, + typename Hash = std::hash<T>, + typename KeyEq = std::equal_to<T>, + typename Alloc = tbb::tbb_allocator<T>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!is_allocator_v<KeyEq>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t = {}, + Hash = Hash(), KeyEq = KeyEq(), Alloc = Alloc() ) +-> concurrent_unordered_multiset<T, Hash, KeyEq, Alloc>; + +template <typename It, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( It, It, std::size_t, Alloc ) +-> concurrent_unordered_multiset<iterator_value_t<It>, std::hash<iterator_value_t<It>>, + std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename It, typename Hash, typename Alloc, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( It, It, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multiset<iterator_value_t<It>, Hash, std::equal_to<iterator_value_t<It>>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Alloc ) +-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_unordered_multiset( std::initializer_list<T>, Alloc ) +-> concurrent_unordered_multiset<T, std::hash<T>, std::equal_to<T>, Alloc>; + +template <typename T, typename Hash, typename Alloc, + typename = std::enable_if_t<is_allocator_v<Alloc>>, + typename = std::enable_if_t<!is_allocator_v<Hash>>, + typename = std::enable_if_t<!std::is_integral_v<Hash>>> +concurrent_unordered_multiset( std::initializer_list<T>, std::size_t, Hash, Alloc ) +-> concurrent_unordered_multiset<T, Hash, std::equal_to<T>, Alloc>; + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +template <typename Key, typename Hash, typename KeyEqual, typename Allocator> +void swap( concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& lhs, + concurrent_unordered_multiset<Key, Hash, KeyEqual, Allocator>& rhs ) { + lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + +using detail::d1::concurrent_unordered_set; +using detail::d1::concurrent_unordered_multiset; +using detail::split; + +} // inline namespace v1 +} // namespace tbb + +#endif // __TBB_concurrent_unordered_set_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h new file mode 100644 index 0000000000..94a22b92c6 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/concurrent_vector.h @@ -0,0 +1,1114 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_concurrent_vector_H +#define __TBB_concurrent_vector_H + +#include "detail/_namespace_injection.h" +#include "detail/_utils.h" +#include "detail/_assert.h" +#include "detail/_allocator_traits.h" +#include "detail/_segment_table.h" +#include "detail/_containers_helpers.h" +#include "blocked_range.h" +#include "cache_aligned_allocator.h" + +#include <algorithm> +#include <utility> // std::move_if_noexcept +#include <algorithm> +#if __TBB_CPP20_COMPARISONS_PRESENT +#include <compare> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Vector, typename Value> +class vector_iterator { + using vector_type = Vector; + +public: + using value_type = Value; + using size_type = typename vector_type::size_type; + using difference_type = typename vector_type::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::random_access_iterator_tag; + + template <typename Vec, typename Val> + friend vector_iterator<Vec, Val> operator+( typename vector_iterator<Vec, Val>::difference_type, const vector_iterator<Vec, Val>& ); + + template <typename Vec, typename Val1, typename Val2> + friend typename vector_iterator<Vec, Val1>::difference_type operator-( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val1, typename Val2> + friend bool operator==( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val1, typename Val2> + friend bool operator<( const vector_iterator<Vec, Val1>&, const vector_iterator<Vec, Val2>& ); + + template <typename Vec, typename Val> + friend class vector_iterator; + + template <typename T, typename Allocator> + friend class concurrent_vector; + +private: + vector_iterator( const vector_type& vector, size_type index, value_type* item = nullptr ) + : my_vector(const_cast<vector_type*>(&vector)), my_index(index), my_item(item) + {} + +public: + vector_iterator() : my_vector(nullptr), my_index(~size_type(0)), my_item(nullptr) + {} + + vector_iterator( const vector_iterator<vector_type, typename vector_type::value_type>& other ) + : my_vector(other.my_vector), my_index(other.my_index), my_item(other.my_item) + {} + + vector_iterator& operator=( const vector_iterator<vector_type, typename vector_type::value_type>& other ) { + my_vector = other.my_vector; + my_index = other.my_index; + my_item = other.my_item; + return *this; + } + + vector_iterator operator+( difference_type offset ) const { + return vector_iterator(*my_vector, my_index + offset); + } + + vector_iterator& operator+=( difference_type offset ) { + my_index += offset; + my_item = nullptr; + return *this; + } + + vector_iterator operator-( difference_type offset ) const { + return vector_iterator(*my_vector, my_index - offset); + } + + vector_iterator& operator-=( difference_type offset ) { + my_index -= offset; + my_item = nullptr; + return *this; + } + + reference operator*() const { + value_type *item = my_item; + if (item == nullptr) { + item = &my_vector->internal_subscript(my_index); + } else { + __TBB_ASSERT(item == &my_vector->internal_subscript(my_index), "corrupt cache"); + } + return *item; + } + + pointer operator->() const { return &(operator*()); } + + reference operator[]( difference_type k ) const { + return my_vector->internal_subscript(my_index + k); + } + + vector_iterator& operator++() { + ++my_index; + if (my_item != nullptr) { + if (vector_type::is_first_element_in_segment(my_index)) { + // If the iterator crosses a segment boundary, the pointer become invalid + // as possibly next segment is in another memory location + my_item = nullptr; + } else { + ++my_item; + } + } + return *this; + } + + vector_iterator operator++(int) { + vector_iterator result = *this; + ++(*this); + return result; + } + + vector_iterator& operator--() { + __TBB_ASSERT(my_index > 0, "operator--() applied to iterator already at beginning of concurrent_vector"); + --my_index; + if (my_item != nullptr) { + if (vector_type::is_first_element_in_segment(my_index)) { + // If the iterator crosses a segment boundary, the pointer become invalid + // as possibly next segment is in another memory location + my_item = nullptr; + } else { + --my_item; + } + } + return *this; + } + + vector_iterator operator--(int) { + vector_iterator result = *this; + --(*this); + return result; + } + +private: + // concurrent_vector over which we are iterating. + vector_type* my_vector; + + // Index into the vector + size_type my_index; + + // Caches my_vector *it; + // If my_item == nullptr cached value is not available use internal_subscript(my_index) + mutable value_type* my_item; +}; // class vector_iterator + +template <typename Vector, typename T> +vector_iterator<Vector, T> operator+( typename vector_iterator<Vector, T>::difference_type offset, + const vector_iterator<Vector, T>& v ) +{ + return vector_iterator<Vector, T>(*v.my_vector, v.my_index + offset); +} + +template <typename Vector, typename T, typename U> +typename vector_iterator<Vector, T>::difference_type operator-( const vector_iterator<Vector, T>& i, + const vector_iterator<Vector, U>& j ) +{ + using difference_type = typename vector_iterator<Vector, T>::difference_type; + return static_cast<difference_type>(i.my_index) - static_cast<difference_type>(j.my_index); +} + +template <typename Vector, typename T, typename U> +bool operator==( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return i.my_vector == j.my_vector && i.my_index == j.my_index; +} + +template <typename Vector, typename T, typename U> +bool operator!=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(i == j); +} + +template <typename Vector, typename T, typename U> +bool operator<( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return i.my_index < j.my_index; +} + +template <typename Vector, typename T, typename U> +bool operator>( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return j < i; +} + +template <typename Vector, typename T, typename U> +bool operator>=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(i < j); +} + +template <typename Vector, typename T, typename U> +bool operator<=( const vector_iterator<Vector, T>& i, const vector_iterator<Vector, U>& j ) { + return !(j < i); +} + +static constexpr std::size_t embedded_table_num_segments = 3; + +template <typename T, typename Allocator = tbb::cache_aligned_allocator<T>> +class concurrent_vector + : private segment_table<T, Allocator, concurrent_vector<T, Allocator>, embedded_table_num_segments> +{ + using self_type = concurrent_vector<T, Allocator>; + using base_type = segment_table<T, Allocator, self_type, embedded_table_num_segments>; + + friend class segment_table<T, Allocator, self_type, embedded_table_num_segments>; + + template <typename Iterator> + class generic_range_type : public tbb::blocked_range<Iterator> { + using base_type = tbb::blocked_range<Iterator>; + public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + using iterator = Iterator; + using difference_type = std::ptrdiff_t; + + using base_type::base_type; + + template<typename U> + generic_range_type( const generic_range_type<U>& r) : blocked_range<Iterator>(r.begin(), r.end(), r.grainsize()) {} + generic_range_type( generic_range_type& r, split ) : blocked_range<Iterator>(r, split()) {} + }; // class generic_range_type + + static_assert(std::is_same<T, typename Allocator::value_type>::value, + "value_type of the container must be the same as its allocator's"); + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + // Segment table for concurrent_vector can be extended + static constexpr bool allow_table_extending = true; + static constexpr bool is_noexcept_assignment = allocator_traits_type::propagate_on_container_move_assignment::value || + allocator_traits_type::is_always_equal::value; + static constexpr bool is_noexcept_swap = allocator_traits_type::propagate_on_container_swap::value || + allocator_traits_type::is_always_equal::value; + +public: + using value_type = T; + using allocator_type = Allocator; + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + using reference = value_type&; + using const_reference = const value_type&; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using iterator = vector_iterator<concurrent_vector, value_type>; + using const_iterator = vector_iterator<concurrent_vector, const value_type>; + using reverse_iterator = std::reverse_iterator<iterator>; + using const_reverse_iterator = std::reverse_iterator<const_iterator>; + + using range_type = generic_range_type<iterator>; + using const_range_type = generic_range_type<const_iterator>; + + concurrent_vector() : concurrent_vector(allocator_type()) {} + + explicit concurrent_vector( const allocator_type& alloc ) noexcept + : base_type(alloc) + {} + + explicit concurrent_vector( size_type count, const value_type& value, + const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(count, value); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + explicit concurrent_vector( size_type count, const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(count); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + template <typename InputIterator> + concurrent_vector( InputIterator first, InputIterator last, const allocator_type& alloc = allocator_type() ) + : concurrent_vector(alloc) + { + try_call( [&] { + grow_by(first, last); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + concurrent_vector( const concurrent_vector& other ) + : base_type(segment_table_allocator_traits::select_on_container_copy_construction(other.get_allocator())) + { + try_call( [&] { + grow_by(other.begin(), other.end()); + } ).on_exception( [&] { + base_type::clear(); + }); + } + + concurrent_vector( const concurrent_vector& other, const allocator_type& alloc ) + : base_type(other, alloc) {} + + concurrent_vector(concurrent_vector&& other) noexcept + : base_type(std::move(other)) + {} + + concurrent_vector( concurrent_vector&& other, const allocator_type& alloc ) + : base_type(std::move(other), alloc) + {} + + concurrent_vector( std::initializer_list<value_type> init, + const allocator_type& alloc = allocator_type() ) + : concurrent_vector(init.begin(), init.end(), alloc) + {} + + ~concurrent_vector() {} + + // Assignment + concurrent_vector& operator=( const concurrent_vector& other ) { + base_type::operator=(other); + return *this; + } + + concurrent_vector& operator=( concurrent_vector&& other ) noexcept(is_noexcept_assignment) { + base_type::operator=(std::move(other)); + return *this; + } + + concurrent_vector& operator=( std::initializer_list<value_type> init ) { + assign(init); + return *this; + } + + void assign( size_type count, const value_type& value ) { + destroy_elements(); + grow_by(count, value); + } + + template <typename InputIterator> + typename std::enable_if<is_input_iterator<InputIterator>::value, void>::type + assign( InputIterator first, InputIterator last ) { + destroy_elements(); + grow_by(first, last); + } + + void assign( std::initializer_list<value_type> init ) { + destroy_elements(); + assign(init.begin(), init.end()); + } + + // Concurrent growth + iterator grow_by( size_type delta ) { + return internal_grow_by_delta(delta); + } + + iterator grow_by( size_type delta, const value_type& value ) { + return internal_grow_by_delta(delta, value); + } + + template <typename ForwardIterator> + typename std::enable_if<is_input_iterator<ForwardIterator>::value, iterator>::type + grow_by( ForwardIterator first, ForwardIterator last ) { + auto delta = std::distance(first, last); + return internal_grow_by_delta(delta, first, last); + } + + iterator grow_by( std::initializer_list<value_type> init ) { + return grow_by(init.begin(), init.end()); + } + + iterator grow_to_at_least( size_type n ) { + return internal_grow_to_at_least(n); + } + iterator grow_to_at_least( size_type n, const value_type& value ) { + return internal_grow_to_at_least(n, value); + } + + iterator push_back( const value_type& item ) { + return internal_emplace_back(item); + } + + iterator push_back( value_type&& item ) { + return internal_emplace_back(std::move(item)); + } + + template <typename... Args> + iterator emplace_back( Args&&... args ) { + return internal_emplace_back(std::forward<Args>(args)...); + } + + // Items access + reference operator[]( size_type index ) { + return internal_subscript(index); + } + const_reference operator[]( size_type index ) const { + return internal_subscript(index); + } + + reference at( size_type index ) { + return internal_subscript_with_exceptions(index); + } + const_reference at( size_type index ) const { + return internal_subscript_with_exceptions(index); + } + + // Get range for iterating with parallel algorithms + range_type range( size_t grainsize = 1 ) { + return range_type(begin(), end(), grainsize); + } + + // Get const range for iterating with parallel algorithms + const_range_type range( size_t grainsize = 1 ) const { + return const_range_type(begin(), end(), grainsize); + } + + reference front() { + return internal_subscript(0); + } + + const_reference front() const { + return internal_subscript(0); + } + + reference back() { + return internal_subscript(size() - 1); + } + + const_reference back() const { + return internal_subscript(size() - 1); + } + + // Iterators + iterator begin() { return iterator(*this, 0); } + const_iterator begin() const { return const_iterator(*this, 0); } + const_iterator cbegin() const { return const_iterator(*this, 0); } + + iterator end() { return iterator(*this, size()); } + const_iterator end() const { return const_iterator(*this, size()); } + const_iterator cend() const { return const_iterator(*this, size()); } + + reverse_iterator rbegin() { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); } + + reverse_iterator rend() { return reverse_iterator(begin()); } + const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); } + + allocator_type get_allocator() const { + return base_type::get_allocator(); + } + + // Storage + bool empty() const noexcept { + return 0 == size(); + } + + size_type size() const noexcept { + return std::min(this->my_size.load(std::memory_order_acquire), capacity()); + } + + size_type max_size() const noexcept { + return allocator_traits_type::max_size(base_type::get_allocator()); + } + + size_type capacity() const noexcept { + return base_type::capacity(); + } + + void reserve( size_type n ) { + if (n == 0) return; + + if (n > max_size()) { + tbb::detail::throw_exception(exception_id::reservation_length_error); + } + + this->assign_first_block_if_necessary(this->segment_index_of(n - 1) + 1); + base_type::reserve(n); + } + + void resize( size_type n ) { + internal_resize(n); + } + + void resize( size_type n, const value_type& val ) { + internal_resize(n, val); + } + + void shrink_to_fit() { + internal_compact(); + } + + void swap(concurrent_vector& other) noexcept(is_noexcept_swap) { + base_type::swap(other); + } + + void clear() { + destroy_elements(); + } + +private: + using segment_type = typename base_type::segment_type; + using segment_table_type = typename base_type::segment_table_type; + using segment_table_allocator_traits = typename base_type::segment_table_allocator_traits; + using segment_index_type = typename base_type::segment_index_type; + + using segment_element_type = typename base_type::value_type; + using segment_element_allocator_type = typename allocator_traits_type::template rebind_alloc<segment_element_type>; + using segment_element_allocator_traits = tbb::detail::allocator_traits<segment_element_allocator_type>; + + segment_table_type allocate_long_table( const typename base_type::atomic_segment* embedded_table, size_type start_index ) { + __TBB_ASSERT(start_index <= this->embedded_table_size, "Start index out of embedded table"); + + // If other threads are trying to set pointers in the short segment, wait for them to finish their + // assignments before we copy the short segment to the long segment. Note: grow_to_at_least depends on it + for (segment_index_type i = 0; this->segment_base(i) < start_index; ++i) { + spin_wait_while_eq(embedded_table[i], segment_type(nullptr)); + } + + // It is possible that the table was extend by a thread allocating first_block, need to check this. + if (this->get_table() != embedded_table) { + return nullptr; + } + + // Allocate long segment table and fill with null pointers + segment_table_type new_segment_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), this->pointers_per_long_table); + // Copy segment pointers from the embedded table + for (size_type segment_index = 0; segment_index < this->pointers_per_embedded_table; ++segment_index) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], + embedded_table[segment_index].load(std::memory_order_relaxed)); + } + for (size_type segment_index = this->pointers_per_embedded_table; segment_index < this->pointers_per_long_table; ++segment_index) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &new_segment_table[segment_index], nullptr); + } + + return new_segment_table; + } + + // create_segment function is required by the segment_table base class + segment_type create_segment( segment_table_type table, segment_index_type seg_index, size_type index ) { + size_type first_block = this->my_first_block.load(std::memory_order_relaxed); + // First block allocation + if (seg_index < first_block) { + // If 0 segment is already allocated, then it remains to wait until the segments are filled to requested + if (table[0].load(std::memory_order_acquire) != nullptr) { + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + return nullptr; + } + + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + segment_type new_segment = nullptr; + size_type first_block_size = this->segment_size(first_block); + try_call( [&] { + new_segment = segment_element_allocator_traits::allocate(segment_allocator, first_block_size); + } ).on_exception( [&] { + segment_type disabled_segment = nullptr; + if (table[0].compare_exchange_strong(disabled_segment, this->segment_allocation_failure_tag)) { + size_type end_segment = table == this->my_embedded_table ? this->pointers_per_embedded_table : first_block; + for (size_type i = 1; i < end_segment; ++i) { + table[i].store(this->segment_allocation_failure_tag, std::memory_order_release); + } + } + }); + + segment_type disabled_segment = nullptr; + if (table[0].compare_exchange_strong(disabled_segment, new_segment)) { + this->extend_table_if_necessary(table, 0, first_block_size); + for (size_type i = 1; i < first_block; ++i) { + table[i].store(new_segment, std::memory_order_release); + } + + // Other threads can wait on a snapshot of an embedded table, need to fill it. + for (size_type i = 1; i < first_block && i < this->pointers_per_embedded_table; ++i) { + this->my_embedded_table[i].store(new_segment, std::memory_order_release); + } + } else if (new_segment != this->segment_allocation_failure_tag) { + // Deallocate the memory + segment_element_allocator_traits::deallocate(segment_allocator, new_segment, first_block_size); + // 0 segment is already allocated, then it remains to wait until the segments are filled to requested + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + } + } else { + size_type offset = this->segment_base(seg_index); + if (index == offset) { + __TBB_ASSERT(table[seg_index].load(std::memory_order_relaxed) == nullptr, "Only this thread can enable this segment"); + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + segment_type new_segment = this->segment_allocation_failure_tag; + try_call( [&] { + new_segment = segment_element_allocator_traits::allocate(segment_allocator,this->segment_size(seg_index)); + // Shift base address to simplify access by index + new_segment -= this->segment_base(seg_index); + } ).on_completion( [&] { + table[seg_index].store(new_segment, std::memory_order_release); + }); + } else { + spin_wait_while_eq(table[seg_index], segment_type(nullptr)); + } + } + return nullptr; + } + + // Returns the number of elements in the segment to be destroy + size_type number_of_elements_in_segment( segment_index_type seg_index ) { + size_type curr_vector_size = this->my_size.load(std::memory_order_relaxed); + size_type curr_segment_base = this->segment_base(seg_index); + + if (seg_index == 0) { + return std::min(curr_vector_size, this->segment_size(seg_index)); + } else { + // Perhaps the segment is allocated, but there are no elements in it. + if (curr_vector_size < curr_segment_base) { + return 0; + } + return curr_segment_base * 2 > curr_vector_size ? curr_vector_size - curr_segment_base : curr_segment_base; + } + } + + void deallocate_segment( segment_type address, segment_index_type seg_index ) { + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + size_type first_block = this->my_first_block.load(std::memory_order_relaxed); + if (seg_index >= first_block) { + segment_element_allocator_traits::deallocate(segment_allocator, address, this->segment_size(seg_index)); + } + else if (seg_index == 0) { + size_type elements_to_deallocate = first_block > 0 ? this->segment_size(first_block) : this->segment_size(0); + segment_element_allocator_traits::deallocate(segment_allocator, address, elements_to_deallocate); + } + } + + // destroy_segment function is required by the segment_table base class + void destroy_segment( segment_type address, segment_index_type seg_index ) { + size_type elements_to_destroy = number_of_elements_in_segment(seg_index); + segment_element_allocator_type segment_allocator(base_type::get_allocator()); + + for (size_type i = 0; i < elements_to_destroy; ++i) { + segment_element_allocator_traits::destroy(segment_allocator, address + i); + } + + deallocate_segment(address, seg_index); + } + + // copy_segment function is required by the segment_table base class + void copy_segment( segment_index_type seg_index, segment_type from, segment_type to ) { + size_type i = 0; + try_call( [&] { + for (; i != number_of_elements_in_segment(seg_index); ++i) { + segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, from[i]); + } + } ).on_exception( [&] { + // Zero-initialize items left not constructed after the exception + zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); + + segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); + auto table = this->get_table(); + for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { + auto curr_segment = table[j].load(std::memory_order_relaxed); + if (curr_segment) { + zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); + } + } + this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); + }); + } + + // move_segment function is required by the segment_table base class + void move_segment( segment_index_type seg_index, segment_type from, segment_type to ) { + size_type i = 0; + try_call( [&] { + for (; i != number_of_elements_in_segment(seg_index); ++i) { + segment_table_allocator_traits::construct(base_type::get_allocator(), to + i, std::move(from[i])); + } + } ).on_exception( [&] { + // Zero-initialize items left not constructed after the exception + zero_unconstructed_elements(this->get_segment(seg_index) + i, this->segment_size(seg_index) - i); + + segment_index_type last_segment = this->segment_index_of(this->my_size.load(std::memory_order_relaxed)); + auto table = this->get_table(); + for (segment_index_type j = seg_index + 1; j != last_segment; ++j) { + auto curr_segment = table[j].load(std::memory_order_relaxed); + if (curr_segment) { + zero_unconstructed_elements(curr_segment + this->segment_base(j), this->segment_size(j)); + } + } + this->my_size.store(this->segment_size(seg_index) + i, std::memory_order_relaxed); + }); + } + + static constexpr bool is_first_element_in_segment( size_type index ) { + // An element is the first in a segment if its index is equal to a power of two + return is_power_of_two_at_least(index, 2); + } + + const_reference internal_subscript( size_type index ) const { + return const_cast<self_type*>(this)->internal_subscript(index); + } + + reference internal_subscript( size_type index ) { + __TBB_ASSERT(index < this->my_size.load(std::memory_order_relaxed), "Invalid subscript index"); + return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); + } + + const_reference internal_subscript_with_exceptions( size_type index ) const { + return const_cast<self_type*>(this)->internal_subscript_with_exceptions(index); + } + + reference internal_subscript_with_exceptions( size_type index ) { + if (index >= this->my_size.load(std::memory_order_acquire)) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + segment_table_type table = this->my_segment_table.load(std::memory_order_acquire); + + size_type seg_index = this->segment_index_of(index); + if (base_type::number_of_segments(table) < seg_index) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + if (table[seg_index] <= this->segment_allocation_failure_tag) { + tbb::detail::throw_exception(exception_id::out_of_range); + } + + return base_type::template internal_subscript</*allow_out_of_range_access=*/false>(index); + } + + static void zero_unconstructed_elements( pointer start, size_type count ) { + std::memset(static_cast<void *>(start), 0, count * sizeof(value_type)); + } + + template <typename... Args> + iterator internal_emplace_back( Args&&... args ) { + size_type old_size = this->my_size++; + this->assign_first_block_if_necessary(default_first_block_size); + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(old_size); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + zero_unconstructed_elements(element_address, /*count =*/1); + }); + + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, std::forward<Args>(args)...); + value_guard.dismiss(); + return iterator(*this, old_size, element_address); + } + + template <typename... Args> + void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, const Args&... args ) { + static_assert(sizeof...(Args) < 2, "Too many parameters"); + for (size_type idx = start_idx; idx < end_idx; ++idx) { + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard( [&] { + segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); + size_type segment_size = this->segment_size(last_allocated_segment); + end_idx = end_idx < segment_size ? end_idx : segment_size; + for (size_type i = idx; i < end_idx; ++i) { + zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); + } + }); + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, args...); + value_guard.dismiss(); + } + } + + template <typename ForwardIterator> + void internal_loop_construct( segment_table_type table, size_type start_idx, size_type end_idx, ForwardIterator first, ForwardIterator ) { + for (size_type idx = start_idx; idx < end_idx; ++idx) { + auto element_address = &base_type::template internal_subscript</*allow_out_of_range_access=*/true>(idx); + try_call( [&] { + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, *first++); + } ).on_exception( [&] { + segment_index_type last_allocated_segment = this->find_last_allocated_segment(table); + size_type segment_size = this->segment_size(last_allocated_segment); + end_idx = end_idx < segment_size ? end_idx : segment_size; + for (size_type i = idx; i < end_idx; ++i) { + zero_unconstructed_elements(&this->internal_subscript(i), /*count =*/1); + } + }); + } + } + + template <typename... Args> + iterator internal_grow( size_type start_idx, size_type end_idx, const Args&... args ) { + this->assign_first_block_if_necessary(this->segment_index_of(end_idx - 1) + 1); + size_type seg_index = this->segment_index_of(end_idx - 1); + segment_table_type table = this->get_table(); + this->extend_table_if_necessary(table, start_idx, end_idx); + + if (seg_index > this->my_first_block.load(std::memory_order_relaxed)) { + // So that other threads be able to work with the last segment of grow_by, allocate it immediately. + // If the last segment is not less than the first block + if (table[seg_index].load(std::memory_order_relaxed) == nullptr) { + size_type first_element = this->segment_base(seg_index); + if (first_element >= start_idx && first_element < end_idx) { + segment_type segment = table[seg_index].load(std::memory_order_relaxed); + base_type::enable_segment(segment, table, seg_index, first_element); + } + } + } + + internal_loop_construct(table, start_idx, end_idx, args...); + + return iterator(*this, start_idx, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(start_idx)); + } + + + template <typename... Args> + iterator internal_grow_by_delta( size_type delta, const Args&... args ) { + if (delta == size_type(0)) { + return end(); + } + size_type start_idx = this->my_size.fetch_add(delta); + size_type end_idx = start_idx + delta; + + return internal_grow(start_idx, end_idx, args...); + } + + template <typename... Args> + iterator internal_grow_to_at_least( size_type new_size, const Args&... args ) { + size_type old_size = this->my_size.load(std::memory_order_relaxed); + if (new_size == size_type(0)) return iterator(*this, 0); + while (old_size < new_size && !this->my_size.compare_exchange_weak(old_size, new_size)) + {} + + int delta = static_cast<int>(new_size) - static_cast<int>(old_size); + if (delta > 0) { + return internal_grow(old_size, new_size, args...); + } + + size_type end_segment = this->segment_index_of(new_size - 1); + + // Check/wait for segments allocation completes + if (end_segment >= this->pointers_per_embedded_table && + this->get_table() == this->my_embedded_table) + { + spin_wait_while_eq(this->my_segment_table, this->my_embedded_table); + } + + for (segment_index_type seg_idx = 0; seg_idx <= end_segment; ++seg_idx) { + if (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { + atomic_backoff backoff(true); + while (this->get_table()[seg_idx].load(std::memory_order_relaxed) == nullptr) { + backoff.pause(); + } + } + } + + #if TBB_USE_DEBUG + size_type cap = capacity(); + __TBB_ASSERT( cap >= new_size, NULL); + #endif + return iterator(*this, size()); + } + + template <typename... Args> + void internal_resize( size_type n, const Args&... args ) { + if (n == 0) { + clear(); + return; + } + + size_type old_size = this->my_size.load(std::memory_order_acquire); + if (n > old_size) { + reserve(n); + grow_to_at_least(n, args...); + } else { + if (old_size == n) { + return; + } + size_type last_segment = this->segment_index_of(old_size - 1); + // Delete segments + for (size_type seg_idx = this->segment_index_of(n - 1) + 1; seg_idx <= last_segment; ++seg_idx) { + this->delete_segment(seg_idx); + } + + // If n > segment_size(n) => we need to destroy all of the items in the first segment + // Otherwise, we need to destroy only items with the index < n + size_type n_segment = this->segment_index_of(n - 1); + size_type last_index_to_destroy = std::min(this->segment_base(n_segment) + this->segment_size(n_segment), old_size); + // Destroy elements in curr segment + for (size_type idx = n; idx < last_index_to_destroy; ++idx) { + segment_table_allocator_traits::destroy(base_type::get_allocator(), &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(idx)); + } + this->my_size.store(n, std::memory_order_release); + } + } + + void destroy_elements() { + allocator_type alloc(base_type::get_allocator()); + for (size_type i = 0; i < this->my_size.load(std::memory_order_relaxed); ++i) { + allocator_traits_type::destroy(alloc, &base_type::template internal_subscript</*allow_out_of_range_access=*/false>(i)); + } + this->my_size.store(0, std::memory_order_relaxed); + } + + static bool incompact_predicate( size_type size ) { + // memory page size + const size_type page_size = 4096; + return size < page_size || ((size - 1) % page_size < page_size / 2 && size < page_size * 128); + } + + void internal_compact() { + const size_type curr_size = this->my_size.load(std::memory_order_relaxed); + segment_table_type table = this->get_table(); + const segment_index_type k_end = this->find_last_allocated_segment(table); // allocated segments + const segment_index_type k_stop = curr_size ? this->segment_index_of(curr_size - 1) + 1 : 0; // number of segments to store existing items: 0=>0; 1,2=>1; 3,4=>2; [5-8]=>3;.. + const segment_index_type first_block = this->my_first_block; // number of merged segments, getting values from atomics + + segment_index_type k = first_block; + if (k_stop < first_block) { + k = k_stop; + } + else { + while (k < k_stop && incompact_predicate(this->segment_size(k) * sizeof(value_type))) k++; + } + + if (k_stop == k_end && k == first_block) { + return; + } + + // First segment optimization + if (k != first_block && k) { + size_type max_block = std::max(first_block, k); + + auto buffer_table = segment_table_allocator_traits::allocate(base_type::get_allocator(), max_block); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + segment_table_allocator_traits::construct(base_type::get_allocator(), &buffer_table[seg_idx], + table[seg_idx].load(std::memory_order_relaxed)); + table[seg_idx].store(nullptr, std::memory_order_relaxed); + } + + this->my_first_block.store(k, std::memory_order_relaxed); + size_type index = 0; + try_call( [&] { + for (; index < std::min(this->segment_size(max_block), curr_size); ++index) { + auto element_address = &static_cast<base_type*>(this)->operator[](index); + segment_index_type seg_idx = this->segment_index_of(index); + segment_table_allocator_traits::construct(base_type::get_allocator(), element_address, + std::move_if_noexcept(buffer_table[seg_idx].load(std::memory_order_relaxed)[index])); + } + } ).on_exception( [&] { + segment_element_allocator_type allocator(base_type::get_allocator()); + for (size_type i = 0; i < index; ++i) { + auto element_adress = &this->operator[](i); + segment_element_allocator_traits::destroy(allocator, element_adress); + } + segment_element_allocator_traits::deallocate(allocator, + table[0].load(std::memory_order_relaxed), this->segment_size(max_block)); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + table[seg_idx].store(buffer_table[seg_idx].load(std::memory_order_relaxed), + std::memory_order_relaxed); + buffer_table[seg_idx].store(nullptr, std::memory_order_relaxed); + } + segment_table_allocator_traits::deallocate(base_type::get_allocator(), + buffer_table, max_block); + this->my_first_block.store(first_block, std::memory_order_relaxed); + }); + + // Need to correct deallocate old segments + // Method destroy_segment respect active first_block, therefore, + // in order for the segment deletion to work correctly, set the first_block size that was earlier, + // destroy the unnecessary segments. + this->my_first_block.store(first_block, std::memory_order_relaxed); + for (size_type seg_idx = max_block; seg_idx > 0 ; --seg_idx) { + auto curr_segment = buffer_table[seg_idx - 1].load(std::memory_order_relaxed); + if (curr_segment != nullptr) { + destroy_segment(buffer_table[seg_idx - 1].load(std::memory_order_relaxed) + this->segment_base(seg_idx - 1), + seg_idx - 1); + } + } + + this->my_first_block.store(k, std::memory_order_relaxed); + + for (size_type seg_idx = 0; seg_idx < max_block; ++seg_idx) { + segment_table_allocator_traits::destroy(base_type::get_allocator(), &buffer_table[seg_idx]); + } + + segment_table_allocator_traits::deallocate(base_type::get_allocator(), buffer_table, max_block); + } + // free unnecessary segments allocated by reserve() call + if (k_stop < k_end) { + for (size_type seg_idx = k_end; seg_idx != k_stop; --seg_idx) { + if (table[seg_idx - 1].load(std::memory_order_relaxed) != nullptr) { + this->delete_segment(seg_idx - 1); + } + } + if (!k) this->my_first_block.store(0, std::memory_order_relaxed);; + } + } + + // Lever for adjusting the size of first_block at the very first insertion. + // TODO: consider >1 value, check performance + static constexpr size_type default_first_block_size = 1; + + template <typename Vector, typename Value> + friend class vector_iterator; +}; // class concurrent_vector + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// Deduction guide for the constructor from two iterators +template <typename It, typename Alloc = tbb::cache_aligned_allocator<iterator_value_t<It>>, + typename = std::enable_if_t<is_input_iterator_v<It>>, + typename = std::enable_if_t<is_allocator_v<Alloc>>> +concurrent_vector( It, It, Alloc = Alloc() ) +-> concurrent_vector<iterator_value_t<It>, Alloc>; +#endif + +template <typename T, typename Allocator> +void swap(concurrent_vector<T, Allocator> &lhs, + concurrent_vector<T, Allocator> &rhs) +{ + lhs.swap(rhs); +} + +template <typename T, typename Allocator> +bool operator==(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return lhs.size() == rhs.size() && std::equal(lhs.begin(), lhs.end(), rhs.begin()); +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename T, typename Allocator> +bool operator!=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(lhs == rhs); +} +#endif // !__TBB_CPP20_COMPARISONS_PRESENT + +#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT +template <typename T, typename Allocator> +tbb::detail::synthesized_three_way_result<typename concurrent_vector<T, Allocator>::value_type> +operator<=>(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), + rhs.begin(), rhs.end(), + tbb::detail::synthesized_three_way_comparator{}); +} + +#else + +template <typename T, typename Allocator> +bool operator<(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename T, typename Allocator> +bool operator<=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(rhs < lhs); +} + +template <typename T, typename Allocator> +bool operator>(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return rhs < lhs; +} + +template <typename T, typename Allocator> +bool operator>=(const concurrent_vector<T, Allocator> &lhs, + const concurrent_vector<T, Allocator> &rhs) +{ + return !(lhs < rhs); +} +#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::concurrent_vector; +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_concurrent_vector_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h new file mode 100644 index 0000000000..40ba64e43d --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aggregator.h @@ -0,0 +1,173 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + + +#ifndef __TBB_detail__aggregator_H +#define __TBB_detail__aggregator_H + +#include "_assert.h" +#include "_utils.h" +#include <atomic> +#if !__TBBMALLOC_BUILD // TODO: check this macro with TBB Malloc +#include "../profiling.h" +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +// Base class for aggregated operation +template <typename Derived> +class aggregated_operation { +public: + // Zero value means "wait" status, all other values are "user" specified values and + // are defined into the scope of a class which uses "status" + std::atomic<uintptr_t> status; + + std::atomic<Derived*> next; + aggregated_operation() : status{}, next(nullptr) {} +}; // class aggregated_operation + +// Aggregator base class +/* An aggregator for collecting operations coming from multiple sources and executing + them serially on a single thread. OperationType must be derived from + aggregated_operation. The parameter HandlerType is a functor that will be passed the + list of operations and is expected to handle each operation appropriately, setting the + status of each operation to non-zero. */ +template <typename OperationType> +class aggregator_generic { +public: + aggregator_generic() : pending_operations(nullptr), handler_busy(false) {} + + // Execute an operation + /* Places an operation into the waitlist (pending_operations), and either handles the list, + or waits for the operation to complete, or returns. + The long_life_time parameter specifies the life time of the given operation object. + Operations with long_life_time == true may be accessed after execution. + A "short" life time operation (long_life_time == false) can be destroyed + during execution, and so any access to it after it was put into the waitlist, + including status check, is invalid. As a consequence, waiting for completion + of such operation causes undefined behavior. */ + template <typename HandlerType> + void execute( OperationType* op, HandlerType& handle_operations, bool long_life_time = true ) { + // op->status should be read before inserting the operation into the + // aggregator waitlist since it can become invalid after executing a + // handler (if the operation has 'short' life time.) + const uintptr_t status = op->status.load(std::memory_order_relaxed); + + // ITT note: &(op->status) tag is used to cover accesses to this op node. This + // thread has created the operation, and now releases it so that the handler + // thread may handle the associated operation w/o triggering a race condition; + // thus this tag will be acquired just before the operation is handled in the + // handle_operations functor. + call_itt_notify(releasing, &(op->status)); + // insert the operation in the queue. + OperationType* res = pending_operations.load(std::memory_order_relaxed); + do { + op->next.store(res, std::memory_order_relaxed); + } while (!pending_operations.compare_exchange_strong(res, op)); + if (!res) { // first in the list; handle the operations + // ITT note: &pending_operations tag covers access to the handler_busy flag, + // which this waiting handler thread will try to set before entering + // handle_operations. + call_itt_notify(acquired, &pending_operations); + start_handle_operations(handle_operations); + // The operation with 'short' life time can already be destroyed + if (long_life_time) + __TBB_ASSERT(op->status.load(std::memory_order_relaxed), NULL); + } + // Not first; wait for op to be ready + else if (!status) { // operation is blocking here. + __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing"); + call_itt_notify(prepare, &(op->status)); + spin_wait_while_eq(op->status, uintptr_t(0)); + } + } + +private: + // Trigger the handling of operations when the handler is free + template <typename HandlerType> + void start_handle_operations( HandlerType& handle_operations ) { + OperationType* op_list; + + // ITT note: &handler_busy tag covers access to pending_operations as it is passed + // between active and waiting handlers. Below, the waiting handler waits until + // the active handler releases, and the waiting handler acquires &handler_busy as + // it becomes the active_handler. The release point is at the end of this + // function, when all operations in pending_operations have been handled by the + // owner of this aggregator. + call_itt_notify(prepare, &handler_busy); + // get the handler_busy: + // only one thread can possibly spin here at a time + spin_wait_until_eq(handler_busy, uintptr_t(0)); + call_itt_notify(acquired, &handler_busy); + // acquire fence not necessary here due to causality rule and surrounding atomics + handler_busy.store(1, std::memory_order_relaxed); + + // ITT note: &pending_operations tag covers access to the handler_busy flag + // itself. Capturing the state of the pending_operations signifies that + // handler_busy has been set and a new active handler will now process that list's + // operations. + call_itt_notify(releasing, &pending_operations); + // grab pending_operations + op_list = pending_operations.exchange(nullptr); + + // handle all the operations + handle_operations(op_list); + + // release the handler + handler_busy.store(0, std::memory_order_release); + } + + // An atomically updated list (aka mailbox) of pending operations + std::atomic<OperationType*> pending_operations; + // Controls threads access to handle_operations + std::atomic<uintptr_t> handler_busy; +}; // class aggregator_generic + +template <typename HandlerType, typename OperationType> +class aggregator : public aggregator_generic<OperationType> { + HandlerType handle_operations; +public: + aggregator() = default; + + void initialize_handler( HandlerType h ) { handle_operations = h; } + + void execute(OperationType* op) { + aggregator_generic<OperationType>::execute(op, handle_operations); + } +}; // class aggregator + +// the most-compatible friend declaration (vs, gcc, icc) is +// template<class U, class V> friend class aggregating_functor; +template <typename AggregatingClass, typename OperationList> +class aggregating_functor { + AggregatingClass* my_object; +public: + aggregating_functor() = default; + aggregating_functor( AggregatingClass* object ) : my_object(object) { + __TBB_ASSERT(my_object, nullptr); + } + + void operator()( OperationList* op_list ) { my_object->handle_operations(op_list); } +}; // class aggregating_functor + + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__aggregator_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h new file mode 100644 index 0000000000..13857c47cc --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_aligned_space.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +#ifndef __TBB_aligned_space_H +#define __TBB_aligned_space_H + +#include <cstddef> + +#include "_template_helpers.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Block of space aligned sufficiently to construct an array T with N elements. +/** The elements are not constructed or destroyed by this class. + @ingroup memory_allocation */ +template<typename T, std::size_t N = 1> +class aligned_space { + alignas(alignof(T)) std::uint8_t aligned_array[N * sizeof(T)]; + +public: + //! Pointer to beginning of array + T* begin() const { return punned_cast<T*>(&aligned_array); } + + //! Pointer to one past last element in array. + T* end() const { return begin() + N; } +}; + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#endif /* __TBB_aligned_space_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h new file mode 100644 index 0000000000..8c60e25e7e --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_allocator_traits.h @@ -0,0 +1,107 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__allocator_traits_H +#define __TBB_detail__allocator_traits_H + +#include "_config.h" +#include "_template_helpers.h" +#include <memory> +#include <type_traits> + +namespace tbb { +namespace detail { +inline namespace d0 { + +#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT +// Struct is_always_equal_detector provides the member type "type" which is +// Allocator::is_always_equal if it is present, std::false_type otherwise +template <typename Allocator, typename = void> +struct is_always_equal_detector { + using type = std::false_type; +}; + +template <typename Allocator> +struct is_always_equal_detector<Allocator, tbb::detail::void_t<typename Allocator::is_always_equal>> +{ + using type = typename Allocator::is_always_equal; +}; +#endif // !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT + +template <typename Allocator> +class allocator_traits : public std::allocator_traits<Allocator> +{ + using base_type = std::allocator_traits<Allocator>; +public: +#if !__TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT + using is_always_equal = typename is_always_equal_detector<Allocator>::type; +#endif + + template <typename T> + using rebind_traits = typename tbb::detail::allocator_traits<typename base_type::template rebind_alloc<T>>; +}; // struct allocator_traits + +template <typename Allocator> +void copy_assign_allocators_impl( Allocator& lhs, const Allocator& rhs, /*pocca = */std::true_type ) { + lhs = rhs; +} + +template <typename Allocator> +void copy_assign_allocators_impl( Allocator&, const Allocator&, /*pocca = */ std::false_type ) {} + +// Copy assigns allocators only if propagate_on_container_copy_assignment is true +template <typename Allocator> +void copy_assign_allocators( Allocator& lhs, const Allocator& rhs ) { + using pocca_type = typename allocator_traits<Allocator>::propagate_on_container_copy_assignment; + copy_assign_allocators_impl(lhs, rhs, pocca_type()); +} + +template <typename Allocator> +void move_assign_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocma = */ std::true_type ) { + lhs = std::move(rhs); +} + +template <typename Allocator> +void move_assign_allocators_impl( Allocator&, Allocator&, /*pocma = */ std::false_type ) {} + +// Move assigns allocators only if propagate_on_container_move_assignment is true +template <typename Allocator> +void move_assign_allocators( Allocator& lhs, Allocator& rhs ) { + using pocma_type = typename allocator_traits<Allocator>::propagate_on_container_move_assignment; + move_assign_allocators_impl(lhs, rhs, pocma_type()); +} + +template <typename Allocator> +void swap_allocators_impl( Allocator& lhs, Allocator& rhs, /*pocs = */ std::true_type ) { + using std::swap; + swap(lhs, rhs); +} + +template <typename Allocator> +void swap_allocators_impl( Allocator&, Allocator&, /*pocs = */ std::false_type ) {} + +// Swaps allocators only if propagate_on_container_swap is true +template <typename Allocator> +void swap_allocators( Allocator& lhs, Allocator& rhs ) { + using pocs_type = typename allocator_traits<Allocator>::propagate_on_container_swap; + swap_allocators_impl(lhs, rhs, pocs_type()); +} + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__allocator_traits_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h new file mode 100644 index 0000000000..4116386a92 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_assert.h @@ -0,0 +1,52 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__assert_H +#define __TBB_detail__assert_H + +#include "_config.h" + +namespace tbb { +namespace detail { +namespace r1 { +//! Process an assertion failure. +/** Normally called from __TBB_ASSERT macro. + If assertion handler is null, print message for assertion failure and abort. + Otherwise call the assertion handler. */ +void __TBB_EXPORTED_FUNC assertion_failure(const char* filename, int line, const char* expression, const char* comment); +} // namespace r1 +} // namespace detail +} // namespace tbb + +//! Release version of assertions +#define __TBB_ASSERT_RELEASE(predicate,message) ((predicate)?((void)0) : tbb::detail::r1::assertion_failure(__FILE__,__LINE__,#predicate,message)) + +#if TBB_USE_ASSERT + //! Assert that predicate is true. + /** If predicate is false, print assertion failure message. + If the comment argument is not NULL, it is printed as part of the failure message. + The comment argument has no other effect. */ + #define __TBB_ASSERT(predicate,message) __TBB_ASSERT_RELEASE(predicate,message) + //! "Extended" version + #define __TBB_ASSERT_EX __TBB_ASSERT +#else + //! No-op version of __TBB_ASSERT. + #define __TBB_ASSERT(predicate,comment) ((void)0) + //! "Extended" version is useful to suppress warnings if a variable is only used with an assert + #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) +#endif // TBB_USE_ASSERT + +#endif // __TBB_detail__assert_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h new file mode 100644 index 0000000000..6289632601 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_queue_base.h @@ -0,0 +1,659 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_queue_base_H +#define __TBB_detail__concurrent_queue_base_H + +#include "_utils.h" +#include "_exception.h" +#include "_machine.h" +#include "_allocator_traits.h" + +#include "../profiling.h" +#include "../spin_mutex.h" +#include "../cache_aligned_allocator.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +using ticket_type = std::size_t; + +template <typename Page> +inline bool is_valid_page(const Page p) { + return reinterpret_cast<std::uintptr_t>(p) > 1; +} + +template <typename T, typename Allocator> +struct concurrent_queue_rep; + +template <typename Container, typename T, typename Allocator> +class micro_queue_pop_finalizer; + +#if _MSC_VER && !defined(__INTEL_COMPILER) +// unary minus operator applied to unsigned type, result still unsigned +#pragma warning( push ) +#pragma warning( disable: 4146 ) +#endif + +// A queue using simple locking. +// For efficiency, this class has no constructor. +// The caller is expected to zero-initialize it. +template <typename T, typename Allocator> +class micro_queue { +private: + using queue_rep_type = concurrent_queue_rep<T, Allocator>; + using self_type = micro_queue<T, Allocator>; +public: + using size_type = std::size_t; + using value_type = T; + using reference = value_type&; + using const_reference = const value_type&; + + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + + static constexpr size_type item_size = sizeof(T); + static constexpr size_type items_per_page = item_size <= 8 ? 32 : + item_size <= 16 ? 16 : + item_size <= 32 ? 8 : + item_size <= 64 ? 4 : + item_size <= 128 ? 2 : 1; + + struct padded_page { + padded_page() {} + ~padded_page() {} + + reference operator[] (std::size_t index) { + __TBB_ASSERT(index < items_per_page, "Index out of range"); + return items[index]; + } + + const_reference operator[] (std::size_t index) const { + __TBB_ASSERT(index < items_per_page, "Index out of range"); + return items[index]; + } + + padded_page* next{ nullptr }; + std::atomic<std::uintptr_t> mask{}; + + union { + value_type items[items_per_page]; + }; + }; // struct padded_page + + using page_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_page>; +protected: + using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; + +public: + using item_constructor_type = void (*)(value_type* location, const void* src); + micro_queue() = default; + micro_queue( const micro_queue& ) = delete; + micro_queue& operator=( const micro_queue& ) = delete; + + size_type prepare_page( ticket_type k, queue_rep_type& base, page_allocator_type page_allocator, + padded_page*& p ) { + __TBB_ASSERT(p == nullptr, "Invalid page argument for prepare_page"); + k &= -queue_rep_type::n_queue; + size_type index = modulo_power_of_two(k / queue_rep_type::n_queue, items_per_page); + if (!index) { + try_call( [&] { + p = page_allocator_traits::allocate(page_allocator, 1); + }).on_exception( [&] { + ++base.n_invalid_entries; + invalidate_page( k ); + }); + page_allocator_traits::construct(page_allocator, p); + } + + if (tail_counter.load(std::memory_order_relaxed) != k) spin_wait_until_my_turn(tail_counter, k, base); + call_itt_notify(acquired, &tail_counter); + + if (p) { + spin_mutex::scoped_lock lock( page_mutex ); + padded_page* q = tail_page.load(std::memory_order_relaxed); + if (is_valid_page(q)) { + q->next = p; + } else { + head_page.store(p, std::memory_order_relaxed); + } + tail_page.store(p, std::memory_order_relaxed);; + } else { + p = tail_page.load(std::memory_order_acquire); // TODO may be relaxed ? + } + return index; + } + + template<typename... Args> + void push( ticket_type k, queue_rep_type& base, Args&&... args ) + { + padded_page* p = nullptr; + page_allocator_type page_allocator(base.get_allocator()); + size_type index = prepare_page(k, base, page_allocator, p); + __TBB_ASSERT(p != nullptr, "Page was not prepared"); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + ++base.n_invalid_entries; + call_itt_notify(releasing, &tail_counter); + tail_counter.fetch_add(queue_rep_type::n_queue); + }); + + page_allocator_traits::construct(page_allocator, &(*p)[index], std::forward<Args>(args)...); + // If no exception was thrown, mark item as present. + p->mask.store(p->mask.load(std::memory_order_relaxed) | uintptr_t(1) << index, std::memory_order_relaxed); + call_itt_notify(releasing, &tail_counter); + + value_guard.dismiss(); + tail_counter.fetch_add(queue_rep_type::n_queue); + } + + void abort_push( ticket_type k, queue_rep_type& base) { + padded_page* p = nullptr; + prepare_page(k, base, base.get_allocator(), p); + ++base.n_invalid_entries; + tail_counter.fetch_add(queue_rep_type::n_queue); + } + + bool pop( void* dst, ticket_type k, queue_rep_type& base ) { + k &= -queue_rep_type::n_queue; + if (head_counter.load(std::memory_order_relaxed) != k) spin_wait_until_eq(head_counter, k); + call_itt_notify(acquired, &head_counter); + if (tail_counter.load(std::memory_order_relaxed) == k) spin_wait_while_eq(tail_counter, k); + call_itt_notify(acquired, &tail_counter); + padded_page *p = head_page.load(std::memory_order_acquire); + __TBB_ASSERT( p, nullptr ); + size_type index = modulo_power_of_two( k/queue_rep_type::n_queue, items_per_page ); + bool success = false; + { + page_allocator_type page_allocator(base.get_allocator()); + micro_queue_pop_finalizer<self_type, value_type, page_allocator_type> finalizer(*this, page_allocator, + k + queue_rep_type::n_queue, index == items_per_page - 1 ? p : nullptr ); + if (p->mask.load(std::memory_order_relaxed) & (std::uintptr_t(1) << index)) { + success = true; + assign_and_destroy_item( dst, *p, index ); + } else { + --base.n_invalid_entries; + } + } + return success; + } + + micro_queue& assign( const micro_queue& src, queue_rep_type& base, + item_constructor_type construct_item ) + { + head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + + const padded_page* srcp = src.head_page.load(std::memory_order_relaxed); + if( is_valid_page(srcp) ) { + ticket_type g_index = head_counter.load(std::memory_order_relaxed); + size_type n_items = (tail_counter.load(std::memory_order_relaxed) - head_counter.load(std::memory_order_relaxed)) + / queue_rep_type::n_queue; + size_type index = modulo_power_of_two(head_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); + size_type end_in_first_page = (index+n_items < items_per_page) ? (index + n_items) : items_per_page; + + try_call( [&] { + head_page.store(make_copy(base, srcp, index, end_in_first_page, g_index, construct_item), std::memory_order_relaxed); + }).on_exception( [&] { + head_counter.store(0, std::memory_order_relaxed); + tail_counter.store(0, std::memory_order_relaxed); + }); + padded_page* cur_page = head_page.load(std::memory_order_relaxed); + + try_call( [&] { + if (srcp != src.tail_page.load(std::memory_order_relaxed)) { + for (srcp = srcp->next; srcp != src.tail_page.load(std::memory_order_relaxed); srcp=srcp->next ) { + cur_page->next = make_copy( base, srcp, 0, items_per_page, g_index, construct_item ); + cur_page = cur_page->next; + } + + __TBB_ASSERT(srcp == src.tail_page.load(std::memory_order_relaxed), nullptr ); + size_type last_index = modulo_power_of_two(tail_counter.load(std::memory_order_relaxed) / queue_rep_type::n_queue, items_per_page); + if( last_index==0 ) last_index = items_per_page; + + cur_page->next = make_copy( base, srcp, 0, last_index, g_index, construct_item ); + cur_page = cur_page->next; + } + tail_page.store(cur_page, std::memory_order_relaxed); + }).on_exception( [&] { + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + tail_page.store(invalid_page, std::memory_order_relaxed); + }); + } else { + head_page.store(nullptr, std::memory_order_relaxed); + tail_page.store(nullptr, std::memory_order_relaxed); + } + return *this; + } + + padded_page* make_copy( queue_rep_type& base, const padded_page* src_page, size_type begin_in_page, + size_type end_in_page, ticket_type& g_index, item_constructor_type construct_item ) + { + page_allocator_type page_allocator(base.get_allocator()); + padded_page* new_page = page_allocator_traits::allocate(page_allocator, 1); + new_page->next = nullptr; + new_page->mask.store(src_page->mask.load(std::memory_order_relaxed), std::memory_order_relaxed); + for (; begin_in_page!=end_in_page; ++begin_in_page, ++g_index) { + if (new_page->mask.load(std::memory_order_relaxed) & uintptr_t(1) << begin_in_page) { + copy_item(*new_page, begin_in_page, *src_page, begin_in_page, construct_item); + } + } + return new_page; + } + + void invalidate_page( ticket_type k ) { + // Append an invalid page at address 1 so that no more pushes are allowed. + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + { + spin_mutex::scoped_lock lock( page_mutex ); + tail_counter.store(k + queue_rep_type::n_queue + 1, std::memory_order_relaxed); + padded_page* q = tail_page.load(std::memory_order_relaxed); + if (is_valid_page(q)) { + q->next = invalid_page; + } else { + head_page.store(invalid_page, std::memory_order_relaxed); + } + tail_page.store(invalid_page, std::memory_order_relaxed); + } + } + + padded_page* get_tail_page() { + return tail_page.load(std::memory_order_relaxed); + } + + padded_page* get_head_page() { + return head_page.load(std::memory_order_relaxed); + } + + void set_tail_page( padded_page* pg ) { + tail_page.store(pg, std::memory_order_relaxed); + } + + void clear(queue_rep_type& base) { + padded_page* curr_page = head_page.load(std::memory_order_relaxed); + std::size_t index = head_counter.load(std::memory_order_relaxed); + page_allocator_type page_allocator(base.get_allocator()); + + while (curr_page) { + for (; index != items_per_page - 1; ++index) { + curr_page->operator[](index).~value_type(); + } + padded_page* next_page = curr_page->next; + page_allocator_traits::destroy(page_allocator, curr_page); + page_allocator_traits::deallocate(page_allocator, curr_page, 1); + curr_page = next_page; + } + + padded_page* invalid_page = reinterpret_cast<padded_page*>(std::uintptr_t(1)); + head_page.store(invalid_page, std::memory_order_relaxed); + tail_page.store(invalid_page, std::memory_order_relaxed); + } + +private: + // template <typename U, typename A> + friend class micro_queue_pop_finalizer<self_type, value_type, page_allocator_type>; + + // Class used to ensure exception-safety of method "pop" + class destroyer { + value_type& my_value; + public: + destroyer( reference value ) : my_value(value) {} + destroyer( const destroyer& ) = delete; + destroyer& operator=( const destroyer& ) = delete; + ~destroyer() {my_value.~T();} + }; // class destroyer + + void copy_item( padded_page& dst, size_type dindex, const padded_page& src, size_type sindex, + item_constructor_type construct_item ) + { + auto& src_item = src[sindex]; + construct_item( &dst[dindex], static_cast<const void*>(&src_item) ); + } + + void assign_and_destroy_item( void* dst, padded_page& src, size_type index ) { + auto& from = src[index]; + destroyer d(from); + *static_cast<T*>(dst) = std::move(from); + } + + void spin_wait_until_my_turn( std::atomic<ticket_type>& counter, ticket_type k, queue_rep_type& rb ) const { + for (atomic_backoff b(true);; b.pause()) { + ticket_type c = counter; + if (c == k) return; + else if (c & 1) { + ++rb.n_invalid_entries; + throw_exception( exception_id::bad_last_alloc); + } + } + } + + std::atomic<padded_page*> head_page{}; + std::atomic<ticket_type> head_counter{}; + + std::atomic<padded_page*> tail_page{}; + std::atomic<ticket_type> tail_counter{}; + + spin_mutex page_mutex{}; +}; // class micro_queue + +#if _MSC_VER && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif // warning 4146 is back + +template <typename Container, typename T, typename Allocator> +class micro_queue_pop_finalizer { +public: + using padded_page = typename Container::padded_page; + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + + micro_queue_pop_finalizer( Container& queue, Allocator& alloc, ticket_type k, padded_page* p ) : + my_ticket_type(k), my_queue(queue), my_page(p), allocator(alloc) + {} + + micro_queue_pop_finalizer( const micro_queue_pop_finalizer& ) = delete; + micro_queue_pop_finalizer& operator=( const micro_queue_pop_finalizer& ) = delete; + + ~micro_queue_pop_finalizer() { + padded_page* p = my_page; + if( is_valid_page(p) ) { + spin_mutex::scoped_lock lock( my_queue.page_mutex ); + padded_page* q = p->next; + my_queue.head_page.store(q, std::memory_order_relaxed); + if( !is_valid_page(q) ) { + my_queue.tail_page.store(nullptr, std::memory_order_relaxed); + } + } + my_queue.head_counter.store(my_ticket_type, std::memory_order_relaxed); + if ( is_valid_page(p) ) { + allocator_traits_type::destroy(allocator, static_cast<padded_page*>(p)); + allocator_traits_type::deallocate(allocator, static_cast<padded_page*>(p), 1); + } + } +private: + ticket_type my_ticket_type; + Container& my_queue; + padded_page* my_page; + Allocator& allocator; +}; // class micro_queue_pop_finalizer + +#if _MSC_VER && !defined(__INTEL_COMPILER) +// structure was padded due to alignment specifier +#pragma warning( push ) +#pragma warning( disable: 4324 ) +#endif + +template <typename T, typename Allocator> +struct concurrent_queue_rep { + using self_type = concurrent_queue_rep<T, Allocator>; + using size_type = std::size_t; + using micro_queue_type = micro_queue<T, Allocator>; + using allocator_type = Allocator; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using padded_page = typename micro_queue_type::padded_page; + using page_allocator_type = typename micro_queue_type::page_allocator_type; + using item_constructor_type = typename micro_queue_type::item_constructor_type; +private: + using page_allocator_traits = tbb::detail::allocator_traits<page_allocator_type>; + using queue_allocator_type = typename allocator_traits_type::template rebind_alloc<self_type>; + +public: + // must be power of 2 + static constexpr size_type n_queue = 8; + // Approximately n_queue/golden ratio + static constexpr size_type phi = 3; + static constexpr size_type item_size = micro_queue_type::item_size; + static constexpr size_type items_per_page = micro_queue_type::items_per_page; + + concurrent_queue_rep( queue_allocator_type& alloc ) : my_queue_allocator(alloc) + {} + + concurrent_queue_rep( const concurrent_queue_rep& ) = delete; + concurrent_queue_rep& operator=( const concurrent_queue_rep& ) = delete; + + void clear() { + page_allocator_type page_allocator(my_queue_allocator); + for (size_type i = 0; i < n_queue; ++i) { + padded_page* tail_page = array[i].get_tail_page(); + if( is_valid_page(tail_page) ) { + __TBB_ASSERT(array[i].get_head_page() == tail_page, "at most one page should remain" ); + page_allocator_traits::destroy(page_allocator, static_cast<padded_page*>(tail_page)); + page_allocator_traits::deallocate(page_allocator, static_cast<padded_page*>(tail_page), 1); + array[i].set_tail_page(nullptr); + } else { + __TBB_ASSERT(!is_valid_page(array[i].get_head_page()), "head page pointer corrupt?"); + } + } + } + + void assign( const concurrent_queue_rep& src, item_constructor_type construct_item ) { + head_counter.store(src.head_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + tail_counter.store(src.tail_counter.load(std::memory_order_relaxed), std::memory_order_relaxed); + n_invalid_entries.store(src.n_invalid_entries.load(std::memory_order_relaxed), std::memory_order_relaxed); + + // copy or move micro_queues + size_type queue_idx = 0; + try_call( [&] { + for (; queue_idx < n_queue; ++queue_idx) { + array[queue_idx].assign(src.array[queue_idx], *this, construct_item); + } + }).on_exception( [&] { + for (size_type i = 0; i < queue_idx + 1; ++i) { + array[i].clear(*this); + } + head_counter.store(0, std::memory_order_relaxed); + tail_counter.store(0, std::memory_order_relaxed); + n_invalid_entries.store(0, std::memory_order_relaxed); + }); + + __TBB_ASSERT(head_counter.load(std::memory_order_relaxed) == src.head_counter.load(std::memory_order_relaxed) && + tail_counter.load(std::memory_order_relaxed) == src.tail_counter.load(std::memory_order_relaxed), + "the source concurrent queue should not be concurrently modified." ); + } + + bool empty() const { + ticket_type tc = tail_counter.load(std::memory_order_acquire); + ticket_type hc = head_counter.load(std::memory_order_relaxed); + // if tc!=r.tail_counter, the queue was not empty at some point between the two reads. + return tc == tail_counter.load(std::memory_order_relaxed) && + std::ptrdiff_t(tc - hc - n_invalid_entries.load(std::memory_order_relaxed)) <= 0; + } + + std::ptrdiff_t size() const { + __TBB_ASSERT(sizeof(std::ptrdiff_t) <= sizeof(size_type), NULL); + std::ptrdiff_t hc = head_counter.load(std::memory_order_acquire); + std::ptrdiff_t tc = tail_counter.load(std::memory_order_relaxed); + std::ptrdiff_t nie = n_invalid_entries.load(std::memory_order_relaxed); + + return tc - hc - nie; + } + + queue_allocator_type& get_allocator() { + return my_queue_allocator; + } + + friend class micro_queue<T, Allocator>; + + // Map ticket_type to an array index + static size_type index( ticket_type k ) { + return k * phi % n_queue; + } + + micro_queue_type& choose( ticket_type k ) { + // The formula here approximates LRU in a cache-oblivious way. + return array[index(k)]; + } + + alignas(max_nfs_size) micro_queue_type array[n_queue]; + + alignas(max_nfs_size) std::atomic<ticket_type> head_counter{}; + alignas(max_nfs_size) std::atomic<ticket_type> tail_counter{}; + alignas(max_nfs_size) std::atomic<size_type> n_invalid_entries{}; + queue_allocator_type& my_queue_allocator; +}; // class concurrent_queue_rep + +#if _MSC_VER && !defined(__INTEL_COMPILER) +#pragma warning( pop ) +#endif + +template <typename Value, typename Allocator> +class concurrent_queue_iterator_base { + using queue_rep_type = concurrent_queue_rep<Value, Allocator>; + using padded_page = typename queue_rep_type::padded_page; +protected: + concurrent_queue_iterator_base() = default; + + concurrent_queue_iterator_base( const concurrent_queue_iterator_base& other ) { + assign(other); + } + + concurrent_queue_iterator_base( queue_rep_type* queue_rep ) + : my_queue_rep(queue_rep), + my_head_counter(my_queue_rep->head_counter.load(std::memory_order_relaxed)) + { + for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { + my_array[i] = my_queue_rep->array[i].get_head_page(); + } + + if (!get_item(my_item, my_head_counter)) advance(); + } + + void assign( const concurrent_queue_iterator_base& other ) { + my_item = other.my_item; + my_queue_rep = other.my_queue_rep; + + if (my_queue_rep != nullptr) { + my_head_counter = other.my_head_counter; + + for (std::size_t i = 0; i < queue_rep_type::n_queue; ++i) { + my_array[i] = other.my_array[i]; + } + } + } + + void advance() { + __TBB_ASSERT(my_item, "Attempt to increment iterator past end of the queue"); + std::size_t k = my_head_counter; +#if TBB_USE_ASSERT + Value* tmp; + get_item(tmp, k); + __TBB_ASSERT(my_item == tmp, nullptr); +#endif + std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); + if (i == my_queue_rep->items_per_page - 1) { + padded_page*& root = my_array[queue_rep_type::index(k)]; + root = root->next; + } + // Advance k + my_head_counter = ++k; + if (!get_item(my_item, k)) advance(); + } + + concurrent_queue_iterator_base& operator=( const concurrent_queue_iterator_base& other ) { + this->assign(other); + return *this; + } + + bool get_item( Value*& item, std::size_t k ) { + if (k == my_queue_rep->tail_counter.load(std::memory_order_relaxed)) { + item = nullptr; + return true; + } else { + padded_page* p = my_array[queue_rep_type::index(k)]; + __TBB_ASSERT(p, nullptr); + std::size_t i = modulo_power_of_two(k / queue_rep_type::n_queue, my_queue_rep->items_per_page); + item = &(*p)[i]; + return (p->mask & uintptr_t(1) << i) != 0; + } + } + + Value* my_item{ nullptr }; + queue_rep_type* my_queue_rep{ nullptr }; + ticket_type my_head_counter{}; + padded_page* my_array[queue_rep_type::n_queue]; +}; // class concurrent_queue_iterator_base + +struct concurrent_queue_iterator_provider { + template <typename Iterator, typename Container> + static Iterator get( const Container& container ) { + return Iterator(container); + } +}; // struct concurrent_queue_iterator_provider + +template <typename Container, typename Value, typename Allocator> +class concurrent_queue_iterator : public concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator> { + using base_type = concurrent_queue_iterator_base<typename std::remove_cv<Value>::type, Allocator>; +public: + using value_type = Value; + using pointer = value_type*; + using reference = value_type&; + using difference_type = std::ptrdiff_t; + using iterator_category = std::forward_iterator_tag; + + concurrent_queue_iterator() = default; + + /** If Value==Container::value_type, then this routine is the copy constructor. + If Value==const Container::value_type, then this routine is a conversion constructor. */ + concurrent_queue_iterator( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) + : base_type(other) {} + +private: + concurrent_queue_iterator( const Container& container ) + : base_type(container.my_queue_representation) {} +public: + concurrent_queue_iterator& operator=( const concurrent_queue_iterator<Container, typename Container::value_type, Allocator>& other ) { + this->assign(other); + return *this; + } + + reference operator*() const { + return *static_cast<pointer>(this->my_item); + } + + pointer operator->() const { return &operator*(); } + + concurrent_queue_iterator& operator++() { + this->advance(); + return *this; + } + + concurrent_queue_iterator operator++(int) { + concurrent_queue_iterator tmp = *this; + ++*this; + return tmp; + } + + friend bool operator==( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { + return lhs.my_item == rhs.my_item; + } + + friend bool operator!=( const concurrent_queue_iterator& lhs, const concurrent_queue_iterator& rhs ) { + return lhs.my_item != rhs.my_item; + } +private: + friend struct concurrent_queue_iterator_provider; +}; // class concurrent_queue_iterator + +} // namespace d1 +} // namespace detail +} // tbb + +#endif // __TBB_detail__concurrent_queue_base_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h new file mode 100644 index 0000000000..c4d4c627e0 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_skip_list.h @@ -0,0 +1,1252 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_skip_list_H +#define __TBB_detail__concurrent_skip_list_H + +#if !defined(__TBB_concurrent_map_H) && !defined(__TBB_concurrent_set_H) +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_config.h" +#include "_range_common.h" +#include "_allocator_traits.h" +#include "_template_helpers.h" +#include "_node_handle.h" +#include "_containers_helpers.h" +#include "_assert.h" +#include "_exception.h" +#include "../enumerable_thread_specific.h" +#include <utility> +#include <initializer_list> +#include <atomic> +#include <array> +#include <type_traits> +#include <random> // Need std::geometric_distribution +#include <algorithm> // Need std::equal and std::lexicographical_compare +#include <cstdint> +#if __TBB_CPP20_COMPARISONS_PRESENT +#include <compare> +#endif + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Value, typename Allocator> +class skip_list_node { + using node_ptr = skip_list_node*; +public: + using value_type = Value; + using atomic_node_ptr = std::atomic<node_ptr>; + using size_type = std::size_t; + using container_allocator_type = Allocator; + + using reference = value_type&; + using const_reference = const value_type&; +private: + using allocator_traits = tbb::detail::allocator_traits<container_allocator_type>; + + // Allocator is the same as the container allocator=> allocates unitptr_t + // It is required to rebind it to value_type to get the correct pointer and const_pointer + using value_allocator_traits = typename allocator_traits::template rebind_traits<value_type>; +public: + using pointer = typename value_allocator_traits::pointer; + using const_pointer = typename value_allocator_traits::const_pointer; + + skip_list_node( size_type levels, container_allocator_type& alloc ) + : my_container_allocator(alloc), my_height(levels), my_index_number(0) + { + for (size_type l = 0; l < my_height; ++l) { + allocator_traits::construct(my_container_allocator, &get_atomic_next(l), nullptr); + } + } + + ~skip_list_node() { + for (size_type l = 0; l < my_height; ++l) { + allocator_traits::destroy(my_container_allocator, &get_atomic_next(l)); + } + } + + skip_list_node( const skip_list_node& ) = delete; + skip_list_node( skip_list_node&& ) = delete; + skip_list_node& operator=( const skip_list_node& ) = delete; + skip_list_node& operator=( skip_list_node&& ) = delete; + + pointer storage() { + return &my_value; + } + + reference value() { + return *storage(); + } + + node_ptr next( size_type level ) const { + node_ptr res = get_atomic_next(level).load(std::memory_order_acquire); + __TBB_ASSERT(res == nullptr || res->height() > level, "Broken internal structure"); + return res; + } + + atomic_node_ptr& atomic_next( size_type level ) { + atomic_node_ptr& res = get_atomic_next(level); +#if TBB_USE_DEBUG + node_ptr node = res.load(std::memory_order_acquire); + __TBB_ASSERT(node == nullptr || node->height() > level, "Broken internal structure"); +#endif + return res; + } + + void set_next( size_type level, node_ptr n ) { + __TBB_ASSERT(n == nullptr || n->height() > level, "Broken internal structure"); + get_atomic_next(level).store(n, std::memory_order_relaxed); + } + + size_type height() const { + return my_height; + } + + void set_index_number( size_type index_num ) { + my_index_number = index_num; + } + + size_type index_number() const { + return my_index_number; + } + +private: + atomic_node_ptr& get_atomic_next( size_type level ) { + atomic_node_ptr* arr = reinterpret_cast<atomic_node_ptr*>(this + 1); + return arr[level]; + } + + const atomic_node_ptr& get_atomic_next( size_type level ) const { + const atomic_node_ptr* arr = reinterpret_cast<const atomic_node_ptr*>(this + 1); + return arr[level]; + } + + container_allocator_type& my_container_allocator; + union { + value_type my_value; + }; + size_type my_height; + size_type my_index_number; +}; // class skip_list_node + +template <typename NodeType, typename ValueType> +class skip_list_iterator { + using node_type = NodeType; + using node_ptr = node_type*; +public: + using iterator_category = std::forward_iterator_tag; + using value_type = ValueType; + + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + skip_list_iterator() : skip_list_iterator(nullptr) {} + + skip_list_iterator( const skip_list_iterator<node_type, typename node_type::value_type>& other ) + : my_node_ptr(other.my_node_ptr) {} + + skip_list_iterator& operator=( const skip_list_iterator<node_type, typename node_type::value_type>& other ) { + my_node_ptr = other.my_node_ptr; + return *this; + } + + reference operator*() const { return my_node_ptr->value(); } + pointer operator->() const { return my_node_ptr->storage(); } + + skip_list_iterator& operator++() { + __TBB_ASSERT(my_node_ptr != nullptr, nullptr); + my_node_ptr = my_node_ptr->next(0); + return *this; + } + + skip_list_iterator operator++(int) { + skip_list_iterator tmp = *this; + ++*this; + return tmp; + } + +private: + skip_list_iterator(node_type* n) : my_node_ptr(n) {} + + node_ptr my_node_ptr; + + template <typename Traits> + friend class concurrent_skip_list; + + template <typename N, typename V> + friend class skip_list_iterator; + + friend class const_range; + friend class range; + + friend bool operator==( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { + return lhs.my_node_ptr == rhs.my_node_ptr; + } + + friend bool operator!=( const skip_list_iterator& lhs, const skip_list_iterator& rhs ) { + return lhs.my_node_ptr != rhs.my_node_ptr; + } +}; // class skip_list_iterator + +template <typename Traits> +class concurrent_skip_list { +protected: + using container_traits = Traits; + using self_type = concurrent_skip_list<container_traits>; + using allocator_type = typename container_traits::allocator_type; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using key_compare = typename container_traits::compare_type; + using value_compare = typename container_traits::value_compare; + using key_type = typename container_traits::key_type; + using value_type = typename container_traits::value_type; + static_assert(std::is_same<value_type, typename allocator_type::value_type>::value, + "value_type of the container should be the same as its allocator"); + + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + static constexpr size_type max_level = container_traits::max_level; + + using node_allocator_type = typename allocator_traits_type::template rebind_alloc<std::uint8_t>; + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + + using list_node_type = skip_list_node<value_type, node_allocator_type>; + using node_type = node_handle<key_type, value_type, list_node_type, allocator_type>; + + using iterator = skip_list_iterator<list_node_type, value_type>; + using const_iterator = skip_list_iterator<list_node_type, const value_type>; + + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using random_level_generator_type = typename container_traits::random_level_generator_type; + + using node_ptr = list_node_type*; + + using array_type = std::array<node_ptr, max_level>; +private: + template <typename T> + using is_transparent = dependent_bool<comp_is_transparent<key_compare>, T>; +public: + static constexpr bool allow_multimapping = container_traits::allow_multimapping; + + concurrent_skip_list() : my_head_ptr(nullptr), my_size(0), my_max_height(0) {} + + explicit concurrent_skip_list( const key_compare& comp, const allocator_type& alloc = allocator_type() ) + : my_node_allocator(alloc), my_compare(comp), my_head_ptr(nullptr), my_size(0), my_max_height(0) {} + + explicit concurrent_skip_list( const allocator_type& alloc ) + : concurrent_skip_list(key_compare(), alloc) {} + + template<typename InputIterator> + concurrent_skip_list( InputIterator first, InputIterator last, const key_compare& comp = key_compare(), + const allocator_type& alloc = allocator_type() ) + : concurrent_skip_list(comp, alloc) + { + internal_copy(first, last); + } + + template <typename InputIterator> + concurrent_skip_list( InputIterator first, InputIterator last, const allocator_type& alloc ) + : concurrent_skip_list(first, last, key_compare(), alloc) {} + + concurrent_skip_list( std::initializer_list<value_type> init, const key_compare& comp = key_compare(), + const allocator_type& alloc = allocator_type() ) + : concurrent_skip_list(init.begin(), init.end(), comp, alloc) {} + + concurrent_skip_list( std::initializer_list<value_type> init, const allocator_type& alloc ) + : concurrent_skip_list(init, key_compare(), alloc) {} + + concurrent_skip_list( const concurrent_skip_list& other ) + : my_node_allocator(node_allocator_traits::select_on_container_copy_construction(other.get_allocator())), + my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), + my_size(0), my_max_height(0) + { + internal_copy(other); + __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); + } + + concurrent_skip_list( const concurrent_skip_list& other, const allocator_type& alloc ) + : my_node_allocator(alloc), my_compare(other.my_compare), my_rng(other.my_rng), my_head_ptr(nullptr), + my_size(0), my_max_height(0) + { + internal_copy(other); + __TBB_ASSERT(my_size == other.my_size, "Wrong size of copy-constructed container"); + } + + concurrent_skip_list( concurrent_skip_list&& other ) + : my_node_allocator(std::move(other.my_node_allocator)), my_compare(other.my_compare), + my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) // my_head_ptr would be stored in internal_move + { + internal_move(std::move(other)); + } + + concurrent_skip_list( concurrent_skip_list&& other, const allocator_type& alloc ) + : my_node_allocator(alloc), my_compare(other.my_compare), + my_rng(std::move(other.my_rng)), my_head_ptr(nullptr) + { + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_construct_with_allocator(std::move(other), is_always_equal()); + } + + ~concurrent_skip_list() { + clear(); + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + if (head != nullptr) { + delete_node(head); + } + } + + concurrent_skip_list& operator=( const concurrent_skip_list& other ) { + if (this != &other) { + clear(); + copy_assign_allocators(my_node_allocator, other.my_node_allocator); + my_compare = other.my_compare; + my_rng = other.my_rng; + internal_copy(other); + } + return *this; + } + + concurrent_skip_list& operator=( concurrent_skip_list&& other ) { + if (this != &other) { + clear(); + my_compare = std::move(other.my_compare); + my_rng = std::move(other.my_rng); + + move_assign_allocators(my_node_allocator, other.my_node_allocator); + using pocma_type = typename node_allocator_traits::propagate_on_container_move_assignment; + using is_always_equal = typename node_allocator_traits::is_always_equal; + internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); + } + return *this; + } + + concurrent_skip_list& operator=( std::initializer_list<value_type> il ) + { + clear(); + insert(il.begin(),il.end()); + return *this; + } + + std::pair<iterator, bool> insert( const value_type& value ) { + return internal_insert(value); + } + + std::pair<iterator, bool> insert( value_type&& value ) { + return internal_insert(std::move(value)); + } + + iterator insert( const_iterator, const_reference value ) { + // Ignore hint + return insert(value).first; + } + + iterator insert( const_iterator, value_type&& value ) { + // Ignore hint + return insert(std::move(value)).first; + } + + template<typename InputIterator> + void insert( InputIterator first, InputIterator last ) { + while (first != last) { + insert(*first); + ++first; + } + } + + void insert( std::initializer_list<value_type> init ) { + insert(init.begin(), init.end()); + } + + std::pair<iterator, bool> insert( node_type&& nh ) { + if (!nh.empty()) { + auto insert_node = node_handle_accessor::get_node_ptr(nh); + std::pair<iterator, bool> insert_result = internal_insert_node(insert_node); + if (insert_result.second) { + node_handle_accessor::deactivate(nh); + } + return insert_result; + } + return std::pair<iterator, bool>(end(), false); + } + + iterator insert( const_iterator, node_type&& nh ) { + // Ignore hint + return insert(std::move(nh)).first; + } + + template<typename... Args> + std::pair<iterator, bool> emplace( Args&&... args ) { + return internal_insert(std::forward<Args>(args)...); + } + + template<typename... Args> + iterator emplace_hint( const_iterator, Args&&... args ) { + // Ignore hint + return emplace(std::forward<Args>(args)...).first; + } + + iterator unsafe_erase( iterator pos ) { + std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); + if (extract_result.first) { // node was extracted + delete_value_node(extract_result.first); + return extract_result.second; + } + return end(); + } + + iterator unsafe_erase( const_iterator pos ) { + return unsafe_erase(get_iterator(pos)); + } + + iterator unsafe_erase( const_iterator first, const_iterator last ) { + while (first != last) { + // Unsafe erase returns the iterator which follows the erased one + first = unsafe_erase(first); + } + return get_iterator(first); + } + + size_type unsafe_erase( const key_type& key ) { + return internal_erase(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + size_type>::type unsafe_erase( const K& key ) + { + return internal_erase(key); + } + + node_type unsafe_extract( const_iterator pos ) { + std::pair<node_ptr, node_ptr> extract_result = internal_extract(pos); + return extract_result.first ? node_handle_accessor::construct<node_type>(extract_result.first) : node_type(); + } + + node_type unsafe_extract( iterator pos ) { + return unsafe_extract(const_iterator(pos)); + } + + node_type unsafe_extract( const key_type& key ) { + return unsafe_extract(find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + node_type>::type unsafe_extract( const K& key ) + { + return unsafe_extract(find(key)); + } + + iterator lower_bound( const key_type& key ) { + return iterator(internal_get_bound(key, my_compare)); + } + + const_iterator lower_bound( const key_type& key ) const { + return const_iterator(internal_get_bound(key, my_compare)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type lower_bound( const K& key ) { + return iterator(internal_get_bound(key, my_compare)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type lower_bound( const K& key ) const { + return const_iterator(internal_get_bound(key, my_compare)); + } + + iterator upper_bound( const key_type& key ) { + return iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + const_iterator upper_bound( const key_type& key ) const { + return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type upper_bound( const K& key ) { + return iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type upper_bound( const K& key ) const { + return const_iterator(internal_get_bound(key, not_greater_compare(my_compare))); + } + + iterator find( const key_type& key ) { + return iterator(internal_find(key)); + } + + const_iterator find( const key_type& key ) const { + return const_iterator(internal_find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { + return iterator(internal_find(key)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { + return const_iterator(internal_find(key)); + } + + size_type count( const key_type& key ) const { + return internal_count(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { + return internal_count(key); + } + + bool contains( const key_type& key ) const { + return find(key) != end(); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { + return find(key) != end(); + } + + void clear() noexcept { + // clear is not thread safe - load can be relaxed + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + + if (head == nullptr) return; // Head is not allocated => container is empty + + node_ptr current = head->next(0); + + // Delete all value nodes in the container + while (current) { + node_ptr next = current->next(0); + delete_value_node(current); + current = next; + } + + for (size_type level = 0; level < head->height(); ++level) { + head->set_next(level, nullptr); + } + + my_size.store(0, std::memory_order_relaxed); + my_max_height.store(0, std::memory_order_relaxed); + } + + iterator begin() { + return iterator(internal_begin()); + } + + const_iterator begin() const { + return const_iterator(internal_begin()); + } + + const_iterator cbegin() const { + return const_iterator(internal_begin()); + } + + iterator end() { + return iterator(nullptr); + } + + const_iterator end() const { + return const_iterator(nullptr); + } + + const_iterator cend() const { + return const_iterator(nullptr); + } + + size_type size() const { + return my_size.load(std::memory_order_relaxed); + } + + size_type max_size() const { + return node_allocator_traits::max_size(my_node_allocator); + } + + __TBB_nodiscard bool empty() const { + return 0 == size(); + } + + allocator_type get_allocator() const { + return my_node_allocator; + } + + void swap(concurrent_skip_list& other) { + if (this != &other) { + using pocs_type = typename node_allocator_traits::propagate_on_container_swap; + using is_always_equal = typename node_allocator_traits::is_always_equal; + internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); + } + } + + std::pair<iterator, iterator> equal_range(const key_type& key) { + return internal_equal_range(key); + } + + std::pair<const_iterator, const_iterator> equal_range(const key_type& key) const { + return internal_equal_range(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { + return internal_equal_range(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { + return internal_equal_range(key); + } + + key_compare key_comp() const { return my_compare; } + + value_compare value_comp() const { return container_traits::value_comp(my_compare); } + + class const_range_type { + public: + using size_type = typename concurrent_skip_list::size_type; + using value_type = typename concurrent_skip_list::value_type; + using iterator = typename concurrent_skip_list::const_iterator; + + bool empty() const { + return my_begin.my_node_ptr->next(0) == my_end.my_node_ptr; + } + + bool is_divisible() const { + return my_level != 0 ? my_begin.my_node_ptr->next(my_level - 1) != my_end.my_node_ptr : false; + } + + size_type size() const { return std::distance(my_begin, my_end); } + + const_range_type( const_range_type& r, split) + : my_end(r.my_end) { + my_begin = iterator(r.my_begin.my_node_ptr->next(r.my_level - 1)); + my_level = my_begin.my_node_ptr->height(); + r.my_end = my_begin; + } + + const_range_type( const concurrent_skip_list& l) + : my_end(l.end()), my_begin(l.begin()), my_level(my_begin.my_node_ptr->height() ) {} + + iterator begin() const { return my_begin; } + iterator end() const { return my_end; } + size_type grainsize() const { return 1; } + + private: + const_iterator my_end; + const_iterator my_begin; + size_type my_level; + }; // class const_range_type + + class range_type : public const_range_type { + public: + using iterator = typename concurrent_skip_list::iterator; + + range_type(range_type& r, split) : const_range_type(r, split()) {} + range_type(const concurrent_skip_list& l) : const_range_type(l) {} + + iterator begin() const { + node_ptr node = const_range_type::begin().my_node_ptr; + return iterator(node); + } + + iterator end() const { + node_ptr node = const_range_type::end().my_node_ptr; + return iterator(node); + } + }; // class range_type + + range_type range() { return range_type(*this); } + const_range_type range() const { return const_range_type(*this); } + +private: + node_ptr internal_begin() const { + node_ptr head = get_head(); + return head == nullptr ? head : head->next(0); + } + + void internal_move(concurrent_skip_list&& other) { + my_head_ptr.store(other.my_head_ptr.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_head_ptr.store(nullptr, std::memory_order_relaxed); + + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(0, std::memory_order_relaxed); + + my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_max_height.store(0, std::memory_order_relaxed); + } + + void internal_move_construct_with_allocator(concurrent_skip_list&& other, + /*is_always_equal = */std::true_type) { + internal_move(std::move(other)); + } + + void internal_move_construct_with_allocator(concurrent_skip_list&& other, + /*is_always_equal = */std::false_type) { + if (my_node_allocator == other.get_allocator()) { + internal_move(std::move(other)); + } else { + my_size.store(0, std::memory_order_relaxed); + my_max_height.store(other.my_max_height.load(std::memory_order_relaxed), std::memory_order_relaxed); + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); + } + } + + static const key_type& get_key( node_ptr n ) { + __TBB_ASSERT(n, nullptr); + return container_traits::get_key(static_cast<node_ptr>(n)->value()); + } + + template <typename K> + bool found( node_ptr node, const K& key ) const { + return node != nullptr && !my_compare(key, get_key(node)); + } + + template <typename K> + node_ptr internal_find(const K& key) const { + return allow_multimapping ? internal_find_multi(key) : internal_find_unique(key); + } + + template <typename K> + node_ptr internal_find_multi( const K& key ) const { + node_ptr prev = get_head(); + if (prev == nullptr) return nullptr; // If the head node is not allocated - exit + + node_ptr curr = nullptr; + node_ptr old_curr = curr; + + for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { + curr = internal_find_position(h - 1, prev, key, my_compare); + + if (curr != old_curr && found(curr, key)) { + return curr; + } + old_curr = curr; + } + return nullptr; + } + + template <typename K> + node_ptr internal_find_unique( const K& key ) const { + const_iterator it = lower_bound(key); + return (it == end() || my_compare(key, container_traits::get_key(*it))) ? nullptr : it.my_node_ptr; + } + + template <typename K> + size_type internal_count( const K& key ) const { + if (allow_multimapping) { + // TODO: reimplement without double traversal + std::pair<const_iterator, const_iterator> r = equal_range(key); + return std::distance(r.first, r.second); + } + return size_type(contains(key) ? 1 : 0); + } + + template <typename K> + std::pair<iterator, iterator> internal_equal_range(const K& key) const { + iterator lb = get_iterator(lower_bound(key)); + auto result = std::make_pair(lb, lb); + + // If the lower bound points to the node with the requested key + if (found(lb.my_node_ptr, key)) { + + if (!allow_multimapping) { + // For unique containers - move the second iterator forward and exit + ++result.second; + } else { + // For multi containers - find the upper bound starting from the lower bound + node_ptr prev = lb.my_node_ptr; + node_ptr curr = nullptr; + not_greater_compare cmp(my_compare); + + // Start from the lower bound of the range + for (size_type h = prev->height(); h > 0; --h) { + curr = prev->next(h - 1); + while (curr && cmp(get_key(curr), key)) { + prev = curr; + // If the height of the next node is greater than the current one - jump to its height + if (h < curr->height()) { + h = curr->height(); + } + curr = prev->next(h - 1); + } + } + result.second = iterator(curr); + } + } + + return result; + } + + // Finds position on the level using comparator cmp starting from the node prev + template <typename K, typename Comparator> + node_ptr internal_find_position( size_type level, node_ptr& prev, const K& key, + const Comparator& cmp ) const { + __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); + node_ptr curr = prev->next(level); + + while (curr && cmp(get_key(curr), key)) { + prev = curr; + __TBB_ASSERT(level < prev->height(), nullptr); + curr = prev->next(level); + } + + return curr; + } + + // The same as previous overload, but allows index_number comparison + template <typename Comparator> + node_ptr internal_find_position( size_type level, node_ptr& prev, node_ptr node, + const Comparator& cmp ) const { + __TBB_ASSERT(level < prev->height(), "Wrong level to find position"); + node_ptr curr = prev->next(level); + + while (curr && cmp(get_key(curr), get_key(node))) { + if (allow_multimapping && cmp(get_key(node), get_key(curr)) && curr->index_number() > node->index_number()) { + break; + } + + prev = curr; + __TBB_ASSERT(level < prev->height(), nullptr); + curr = prev->next(level); + } + return curr; + } + + template <typename Comparator> + void fill_prev_curr_arrays(array_type& prev_nodes, array_type& curr_nodes, node_ptr node, const key_type& key, + const Comparator& cmp, node_ptr head ) { + + size_type curr_max_height = my_max_height.load(std::memory_order_acquire); + size_type node_height = node->height(); + if (curr_max_height < node_height) { + std::fill(prev_nodes.begin() + curr_max_height, prev_nodes.begin() + node_height, head); + std::fill(curr_nodes.begin() + curr_max_height, curr_nodes.begin() + node_height, nullptr); + } + + node_ptr prev = head; + for (size_type level = curr_max_height; level > 0; --level) { + node_ptr curr = internal_find_position(level - 1, prev, key, cmp); + prev_nodes[level - 1] = prev; + curr_nodes[level - 1] = curr; + } + } + + void fill_prev_array_for_existing_node( array_type& prev_nodes, node_ptr node ) { + node_ptr head = create_head_if_necessary(); + prev_nodes.fill(head); + + node_ptr prev = head; + for (size_type level = node->height(); level > 0; --level) { + while (prev->next(level - 1) != node) { + prev = prev->next(level - 1); + } + prev_nodes[level - 1] = prev; + } + } + + struct not_greater_compare { + const key_compare& my_less_compare; + + not_greater_compare( const key_compare& less_compare ) : my_less_compare(less_compare) {} + + template <typename K1, typename K2> + bool operator()( const K1& first, const K2& second ) const { + return !my_less_compare(second, first); + } + }; + + not_greater_compare select_comparator( /*allow_multimapping = */ std::true_type ) { + return not_greater_compare(my_compare); + } + + key_compare select_comparator( /*allow_multimapping = */ std::false_type ) { + return my_compare; + } + + template<typename... Args> + std::pair<iterator, bool> internal_insert( Args&&... args ) { + node_ptr new_node = create_value_node(std::forward<Args>(args)...); + std::pair<iterator, bool> insert_result = internal_insert_node(new_node); + if (!insert_result.second) { + delete_value_node(new_node); + } + return insert_result; + } + + std::pair<iterator, bool> internal_insert_node( node_ptr new_node ) { + array_type prev_nodes; + array_type curr_nodes; + size_type new_height = new_node->height(); + auto compare = select_comparator(std::integral_constant<bool, allow_multimapping>{}); + + node_ptr head_node = create_head_if_necessary(); + + for (;;) { + fill_prev_curr_arrays(prev_nodes, curr_nodes, new_node, get_key(new_node), compare, head_node); + + node_ptr prev = prev_nodes[0]; + node_ptr next = curr_nodes[0]; + + if (allow_multimapping) { + new_node->set_index_number(prev->index_number() + 1); + } else { + if (found(next, get_key(new_node))) { + return std::pair<iterator, bool>(iterator(next), false); + } + } + + new_node->set_next(0, next); + if (!prev->atomic_next(0).compare_exchange_strong(next, new_node)) { + continue; + } + + // If the node was successfully linked on the first level - it will be linked on other levels + // Insertion cannot fail starting from this point + + // If the height of inserted node is greater than maximum - increase maximum + size_type max_height = my_max_height.load(std::memory_order_acquire); + for (;;) { + if (new_height <= max_height || my_max_height.compare_exchange_strong(max_height, new_height)) { + // If the maximum was successfully updated by current thread + // or by an other thread for the value, greater or equal to new_height + break; + } + } + + for (std::size_t level = 1; level < new_height; ++level) { + // Link the node on upper levels + for (;;) { + prev = prev_nodes[level]; + next = static_cast<node_ptr>(curr_nodes[level]); + + new_node->set_next(level, next); + __TBB_ASSERT(new_node->height() > level, "Internal structure break"); + if (prev->atomic_next(level).compare_exchange_strong(next, new_node)) { + break; + } + + for (size_type lev = level; lev != new_height; ++lev ) { + curr_nodes[lev] = internal_find_position(lev, prev_nodes[lev], new_node, compare); + } + } + } + ++my_size; + return std::pair<iterator, bool>(iterator(new_node), true); + } + } + + template <typename K, typename Comparator> + node_ptr internal_get_bound( const K& key, const Comparator& cmp ) const { + node_ptr prev = get_head(); + if (prev == nullptr) return nullptr; // If the head node is not allocated - exit + + node_ptr curr = nullptr; + + for (size_type h = my_max_height.load(std::memory_order_acquire); h > 0; --h) { + curr = internal_find_position(h - 1, prev, key, cmp); + } + + return curr; + } + + template <typename K> + size_type internal_erase( const K& key ) { + auto eq = equal_range(key); + size_type old_size = size(); + unsafe_erase(eq.first, eq.second); + return old_size - size(); + } + + // Returns node_ptr to the extracted node and node_ptr to the next node after the extracted + std::pair<node_ptr, node_ptr> internal_extract( const_iterator it ) { + std::pair<node_ptr, node_ptr> result(nullptr, nullptr); + if ( it != end() ) { + array_type prev_nodes; + + node_ptr erase_node = it.my_node_ptr; + node_ptr next_node = erase_node->next(0); + fill_prev_array_for_existing_node(prev_nodes, erase_node); + + for (size_type level = 0; level < erase_node->height(); ++level) { + prev_nodes[level]->set_next(level, erase_node->next(level)); + erase_node->set_next(level, nullptr); + } + my_size.fetch_sub(1, std::memory_order_relaxed); + + result.first = erase_node; + result.second = next_node; + } + return result; + } + +protected: + template<typename SourceType> + void internal_merge( SourceType&& source ) { + using source_type = typename std::decay<SourceType>::type; + using source_iterator = typename source_type::iterator; + static_assert((std::is_same<node_type, typename source_type::node_type>::value), "Incompatible containers cannot be merged"); + + for (source_iterator it = source.begin(); it != source.end();) { + source_iterator where = it++; + if (allow_multimapping || !contains(container_traits::get_key(*where))) { + node_type handle = source.unsafe_extract(where); + __TBB_ASSERT(!handle.empty(), "Extracted handle in merge is empty"); + + if (!insert(std::move(handle)).second) { + //If the insertion fails - return the node into source + source.insert(std::move(handle)); + } + __TBB_ASSERT(handle.empty(), "Node handle should be empty after the insertion"); + } + } + } + +private: + void internal_copy( const concurrent_skip_list& other ) { + internal_copy(other.begin(), other.end()); + } + + template<typename Iterator> + void internal_copy( Iterator first, Iterator last ) { + try_call([&] { + for (auto it = first; it != last; ++it) { + insert(*it); + } + }).on_exception([&] { + clear(); + node_ptr head = my_head_ptr.load(std::memory_order_relaxed); + if (head != nullptr) { + delete_node(head); + } + }); + } + + static size_type calc_node_size( size_type height ) { + static_assert(alignof(list_node_type) >= alignof(typename list_node_type::atomic_node_ptr), "Incorrect alignment"); + return sizeof(list_node_type) + height * sizeof(typename list_node_type::atomic_node_ptr); + } + + node_ptr create_node( size_type height ) { + size_type sz = calc_node_size(height); + node_ptr node = reinterpret_cast<node_ptr>(node_allocator_traits::allocate(my_node_allocator, sz)); + node_allocator_traits::construct(my_node_allocator, node, height, my_node_allocator); + return node; + } + + template <typename... Args> + node_ptr create_value_node( Args&&... args ) { + node_ptr node = create_node(my_rng()); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + delete_node(node); + }); + + // Construct the value inside the node + node_allocator_traits::construct(my_node_allocator, node->storage(), std::forward<Args>(args)...); + value_guard.dismiss(); + return node; + } + + node_ptr create_head_node() { + return create_node(max_level); + } + + void delete_node( node_ptr node ) { + size_type sz = calc_node_size(node->height()); + + // Destroy the node + node_allocator_traits::destroy(my_node_allocator, node); + // Deallocate the node + node_allocator_traits::deallocate(my_node_allocator, reinterpret_cast<std::uint8_t*>(node), sz); + } + + void delete_value_node( node_ptr node ) { + // Destroy the value inside the node + node_allocator_traits::destroy(my_node_allocator, node->storage()); + delete_node(node); + } + + node_ptr get_head() const { + return my_head_ptr.load(std::memory_order_acquire); + } + + node_ptr create_head_if_necessary() { + node_ptr current_head = get_head(); + if (current_head == nullptr) { + // Head node was not created - create it + node_ptr new_head = create_head_node(); + if (my_head_ptr.compare_exchange_strong(current_head, new_head)) { + current_head = new_head; + } else { + // If an other thread has already created the head node - destroy new_head + // current_head now points to the actual head node + delete_node(new_head); + } + } + __TBB_ASSERT(my_head_ptr.load(std::memory_order_relaxed) != nullptr, nullptr); + __TBB_ASSERT(current_head != nullptr, nullptr); + return current_head; + } + + static iterator get_iterator( const_iterator it ) { + return iterator(it.my_node_ptr); + } + + void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::true_type ) { + internal_move(std::move(other)); + } + + void internal_move_assign( concurrent_skip_list&& other, /*POCMA || is_always_equal =*/std::false_type ) { + if (my_node_allocator == other.my_node_allocator) { + internal_move(std::move(other)); + } else { + internal_copy(std::make_move_iterator(other.begin()), std::make_move_iterator(other.end())); + } + } + + void internal_swap_fields( concurrent_skip_list& other ) { + using std::swap; + swap_allocators(my_node_allocator, other.my_node_allocator); + swap(my_compare, other.my_compare); + swap(my_rng, other.my_rng); + + swap_atomics_relaxed(my_head_ptr, other.my_head_ptr); + swap_atomics_relaxed(my_size, other.my_size); + swap_atomics_relaxed(my_max_height, other.my_max_height); + } + + void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::true_type ) { + internal_swap_fields(other); + } + + void internal_swap( concurrent_skip_list& other, /*POCMA || is_always_equal =*/std::false_type ) { + __TBB_ASSERT(my_node_allocator == other.my_node_allocator, "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + node_allocator_type my_node_allocator; + key_compare my_compare; + random_level_generator_type my_rng; + std::atomic<list_node_type*> my_head_ptr; + std::atomic<size_type> my_size; + std::atomic<size_type> my_max_height; + + template<typename OtherTraits> + friend class concurrent_skip_list; +}; // class concurrent_skip_list + +template <typename Traits> +bool operator==( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + if (lhs.size() != rhs.size()) return false; +#if _MSC_VER + // Passing "unchecked" iterators to std::equal with 3 parameters + // causes compiler warnings. + // The workaround is to use overload with 4 parameters, which is + // available since C++14 - minimally supported version on MSVC + return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +#else + return std::equal(lhs.begin(), lhs.end(), rhs.begin()); +#endif +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Traits> +bool operator!=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(lhs == rhs); +} +#endif + +#if __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT +template <typename Traits> +tbb::detail::synthesized_three_way_result<typename Traits::value_type> +operator<=>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return std::lexicographical_compare_three_way(lhs.begin(), lhs.end(), + rhs.begin(), rhs.end(), + tbb::detail::synthesized_three_way_comparator{}); +} +#else +template <typename Traits> +bool operator<( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +} + +template <typename Traits> +bool operator>( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return rhs < lhs; +} + +template <typename Traits> +bool operator<=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(rhs < lhs); +} + +template <typename Traits> +bool operator>=( const concurrent_skip_list<Traits>& lhs, const concurrent_skip_list<Traits>& rhs ) { + return !(lhs < rhs); +} +#endif // __TBB_CPP20_COMPARISONS_PRESENT && __TBB_CPP20_CONCEPTS_PRESENT + +// Generates a number from the interval [0, MaxLevel). +template <std::size_t MaxLevel> +class concurrent_geometric_level_generator { +public: + static constexpr std::size_t max_level = MaxLevel; + // TODO: modify the algorithm to accept other values of max_level + static_assert(max_level == 32, "Incompatible max_level for rng"); + + concurrent_geometric_level_generator() : engines(std::minstd_rand::result_type(time(nullptr))) {} + + std::size_t operator()() { + // +1 is required to pass at least 1 into log2 (log2(0) is undefined) + // -1 is required to have an ability to return 0 from the generator (max_level - log2(2^31) - 1) + std::size_t result = max_level - std::size_t(tbb::detail::log2(engines.local()() + 1)) - 1; + __TBB_ASSERT(result <= max_level, nullptr); + return result; + } + +private: + tbb::enumerable_thread_specific<std::minstd_rand> engines; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +#endif // __TBB_detail__concurrent_skip_list_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h new file mode 100644 index 0000000000..3abcce2b29 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_concurrent_unordered_base.h @@ -0,0 +1,1500 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__concurrent_unordered_base_H +#define __TBB_detail__concurrent_unordered_base_H + +#if !defined(__TBB_concurrent_unordered_map_H) && !defined(__TBB_concurrent_unordered_set_H) +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_range_common.h" +#include "_containers_helpers.h" +#include "_segment_table.h" +#include "_hash_compare.h" +#include "_allocator_traits.h" +#include "_node_handle.h" +#include "_assert.h" +#include "_utils.h" +#include "_exception.h" +#include <iterator> +#include <utility> +#include <functional> +#include <initializer_list> +#include <atomic> +#include <type_traits> +#include <memory> +#include <algorithm> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Traits> +class concurrent_unordered_base; + +template<typename Container, typename Value> +class solist_iterator { +private: + using node_ptr = typename Container::value_node_ptr; + template <typename T, typename Allocator> + friend class split_ordered_list; + template<typename M, typename V> + friend class solist_iterator; + template <typename Traits> + friend class concurrent_unordered_base; + template<typename M, typename T, typename U> + friend bool operator==( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); + template<typename M, typename T, typename U> + friend bool operator!=( const solist_iterator<M,T>& i, const solist_iterator<M,U>& j ); +public: + using value_type = Value; + using difference_type = typename Container::difference_type; + using pointer = value_type*; + using reference = value_type&; + using iterator_category = std::forward_iterator_tag; + + solist_iterator() : my_node_ptr(nullptr) {} + solist_iterator( const solist_iterator<Container, typename Container::value_type>& other ) + : my_node_ptr(other.my_node_ptr) {} + + solist_iterator& operator=( const solist_iterator<Container, typename Container::value_type>& other ) { + my_node_ptr = other.my_node_ptr; + return *this; + } + + reference operator*() const { + return my_node_ptr->value(); + } + + pointer operator->() const { + return my_node_ptr->storage(); + } + + solist_iterator& operator++() { + auto next_node = my_node_ptr->next(); + while(next_node && next_node->is_dummy()) { + next_node = next_node->next(); + } + my_node_ptr = static_cast<node_ptr>(next_node); + return *this; + } + + solist_iterator operator++(int) { + solist_iterator tmp = *this; + ++*this; + return tmp; + } + +private: + solist_iterator( node_ptr pnode ) : my_node_ptr(pnode) {} + + node_ptr get_node_ptr() const { return my_node_ptr; } + + node_ptr my_node_ptr; +}; + +template<typename Solist, typename T, typename U> +bool operator==( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { + return i.my_node_ptr == j.my_node_ptr; +} + +template<typename Solist, typename T, typename U> +bool operator!=( const solist_iterator<Solist, T>& i, const solist_iterator<Solist, U>& j ) { + return i.my_node_ptr != j.my_node_ptr; +} + +template <typename SokeyType> +class list_node { +public: + using node_ptr = list_node*; + using sokey_type = SokeyType; + + list_node(sokey_type key) : my_next(nullptr), my_order_key(key) {} + + void init( sokey_type key ) { + my_order_key = key; + } + + sokey_type order_key() const { + return my_order_key; + } + + bool is_dummy() { + // The last bit of order key is unset for dummy nodes + return (my_order_key & 0x1) == 0; + } + + node_ptr next() const { + return my_next.load(std::memory_order_acquire); + } + + void set_next( node_ptr next_node ) { + my_next.store(next_node, std::memory_order_release); + } + + bool try_set_next( node_ptr expected_next, node_ptr new_next ) { + return my_next.compare_exchange_strong(expected_next, new_next); + } + +private: + std::atomic<node_ptr> my_next; + sokey_type my_order_key; +}; // class list_node + +template <typename ValueType, typename SokeyType> +class value_node : public list_node<SokeyType> +{ +public: + using base_type = list_node<SokeyType>; + using sokey_type = typename base_type::sokey_type; + using value_type = ValueType; + + value_node( sokey_type ord_key ) : base_type(ord_key) {} + ~value_node() {} + value_type* storage() { + return reinterpret_cast<value_type*>(&my_value); + } + + value_type& value() { + return *storage(); + } + +private: + using aligned_storage_type = typename std::aligned_storage<sizeof(value_type)>::type; + aligned_storage_type my_value; +}; // class value_node + +template <typename Traits> +class concurrent_unordered_base { + using self_type = concurrent_unordered_base<Traits>; + using traits_type = Traits; + using hash_compare_type = typename traits_type::hash_compare_type; + class unordered_segment_table; +public: + using value_type = typename traits_type::value_type; + using key_type = typename traits_type::key_type; + using allocator_type = typename traits_type::allocator_type; + +private: + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + // TODO: check assert conditions for different C++ standards + static_assert(std::is_same<typename allocator_traits_type::value_type, value_type>::value, + "value_type of the container must be the same as its allocator"); + using sokey_type = std::size_t; + +public: + using size_type = std::size_t; + using difference_type = std::ptrdiff_t; + + using iterator = solist_iterator<self_type, value_type>; + using const_iterator = solist_iterator<self_type, const value_type>; + using local_iterator = iterator; + using const_local_iterator = const_iterator; + + using reference = value_type&; + using const_reference = const value_type&; + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + using hasher = typename hash_compare_type::hasher; + using key_equal = typename hash_compare_type::key_equal; + +private: + using list_node_type = list_node<sokey_type>; + using value_node_type = value_node<value_type, sokey_type>; + using node_ptr = list_node_type*; + using value_node_ptr = value_node_type*; + + using value_node_allocator_type = typename allocator_traits_type::template rebind_alloc<value_node_type>; + using node_allocator_type = typename allocator_traits_type::template rebind_alloc<list_node_type>; + + using node_allocator_traits = tbb::detail::allocator_traits<node_allocator_type>; + using value_node_allocator_traits = tbb::detail::allocator_traits<value_node_allocator_type>; + + static constexpr size_type round_up_to_power_of_two( size_type bucket_count ) { + return size_type(1) << size_type(tbb::detail::log2(uintptr_t(bucket_count == 0 ? 1 : bucket_count) * 2 - 1)); + } + + template <typename T> + using is_transparent = dependent_bool<has_transparent_key_equal<key_type, hasher, key_equal>, T>; +public: + using node_type = node_handle<key_type, value_type, value_node_type, allocator_type>; + + explicit concurrent_unordered_base( size_type bucket_count, const hasher& hash = hasher(), + const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) + : my_size(0), + my_bucket_count(round_up_to_power_of_two(bucket_count)), + my_max_load_factor(float(initial_max_load_factor)), + my_hash_compare(hash, equal), + my_head(sokey_type(0)), + my_segments(alloc) {} + + concurrent_unordered_base() : concurrent_unordered_base(initial_bucket_count) {} + + concurrent_unordered_base( size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(bucket_count, hasher(), key_equal(), alloc) {} + + concurrent_unordered_base( size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(bucket_count, hash, key_equal(), alloc) {} + + explicit concurrent_unordered_base( const allocator_type& alloc ) + : concurrent_unordered_base(initial_bucket_count, hasher(), key_equal(), alloc) {} + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count = initial_bucket_count, const hasher& hash = hasher(), + const key_equal& equal = key_equal(), const allocator_type& alloc = allocator_type() ) + : concurrent_unordered_base(bucket_count, hash, equal, alloc) + { + insert(first, last); + } + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(first, last, bucket_count, hasher(), key_equal(), alloc) {} + + template <typename InputIterator> + concurrent_unordered_base( InputIterator first, InputIterator last, + size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(first, last, bucket_count, hash, key_equal(), alloc) {} + + concurrent_unordered_base( const concurrent_unordered_base& other ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(other.my_max_load_factor), + my_hash_compare(other.my_hash_compare), + my_head(other.my_head.order_key()), + my_segments(other.my_segments) + { + try_call( [&] { + internal_copy(other); + } ).on_exception( [&] { + clear(); + }); + } + + concurrent_unordered_base( const concurrent_unordered_base& other, const allocator_type& alloc ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(other.my_max_load_factor), + my_hash_compare(other.my_hash_compare), + my_head(other.my_head.order_key()), + my_segments(other.my_segments, alloc) + { + try_call( [&] { + internal_copy(other); + } ).on_exception( [&] { + clear(); + }); + } + + concurrent_unordered_base( concurrent_unordered_base&& other ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(std::move(other.my_max_load_factor)), + my_hash_compare(std::move(other.my_hash_compare)), + my_head(other.my_head.order_key()), + my_segments(std::move(other.my_segments)) + { + move_content(std::move(other)); + } + + concurrent_unordered_base( concurrent_unordered_base&& other, const allocator_type& alloc ) + : my_size(other.my_size.load(std::memory_order_relaxed)), + my_bucket_count(other.my_bucket_count.load(std::memory_order_relaxed)), + my_max_load_factor(std::move(other.my_max_load_factor)), + my_hash_compare(std::move(other.my_hash_compare)), + my_head(other.my_head.order_key()), + my_segments(std::move(other.my_segments), alloc) + { + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_construct_with_allocator(std::move(other), alloc, is_always_equal()); + } + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count = initial_bucket_count, + const hasher& hash = hasher(), const key_equal& equal = key_equal(), + const allocator_type& alloc = allocator_type() ) + : concurrent_unordered_base(init.begin(), init.end(), bucket_count, hash, equal, alloc) {} + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count, const allocator_type& alloc ) + : concurrent_unordered_base(init, bucket_count, hasher(), key_equal(), alloc) {} + + concurrent_unordered_base( std::initializer_list<value_type> init, + size_type bucket_count, const hasher& hash, const allocator_type& alloc ) + : concurrent_unordered_base(init, bucket_count, hash, key_equal(), alloc) {} + + ~concurrent_unordered_base() { + internal_clear(); + } + + concurrent_unordered_base& operator=( const concurrent_unordered_base& other ) { + if (this != &other) { + clear(); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_max_load_factor = other.my_max_load_factor; + my_hash_compare = other.my_hash_compare; + my_segments = other.my_segments; + internal_copy(other); // TODO: guards for exceptions? + } + return *this; + } + + concurrent_unordered_base& operator=( concurrent_unordered_base&& other ) noexcept(unordered_segment_table::is_noexcept_assignment) { + if (this != &other) { + clear(); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_max_load_factor = std::move(other.my_max_load_factor); + my_hash_compare = std::move(other.my_hash_compare); + my_segments = std::move(other.my_segments); + + using pocma_type = typename allocator_traits_type::propagate_on_container_move_assignment; + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_move_assign(std::move(other), tbb::detail::disjunction<pocma_type, is_always_equal>()); + } + return *this; + } + + concurrent_unordered_base& operator=( std::initializer_list<value_type> init ) { + clear(); + insert(init); + return *this; + } + + void swap( concurrent_unordered_base& other ) noexcept(unordered_segment_table::is_noexcept_swap) { + if (this != &other) { + using pocs_type = typename allocator_traits_type::propagate_on_container_swap; + using is_always_equal = typename allocator_traits_type::is_always_equal; + internal_swap(other, tbb::detail::disjunction<pocs_type, is_always_equal>()); + } + } + + allocator_type get_allocator() const noexcept { return my_segments.get_allocator(); } + + iterator begin() noexcept { return iterator(first_value_node(&my_head)); } + const_iterator begin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } + const_iterator cbegin() const noexcept { return const_iterator(first_value_node(const_cast<node_ptr>(&my_head))); } + + iterator end() noexcept { return iterator(nullptr); } + const_iterator end() const noexcept { return const_iterator(nullptr); } + const_iterator cend() const noexcept { return const_iterator(nullptr); } + + __TBB_nodiscard bool empty() const noexcept { return size() == 0; } + size_type size() const noexcept { return my_size.load(std::memory_order_relaxed); } + size_type max_size() const noexcept { return allocator_traits_type::max_size(get_allocator()); } + + void clear() noexcept { + internal_clear(); + } + + std::pair<iterator, bool> insert( const value_type& value ) { + return internal_insert_value(value); + } + + std::pair<iterator, bool> insert( value_type&& value ) { + return internal_insert_value(std::move(value)); + } + + iterator insert( const_iterator, const value_type& value ) { + // Ignore hint + return insert(value).first; + } + + iterator insert( const_iterator, value_type&& value ) { + // Ignore hint + return insert(std::move(value)).first; + } + + template <typename InputIterator> + void insert( InputIterator first, InputIterator last ) { + for (; first != last; ++first) { + insert(*first); + } + } + + void insert( std::initializer_list<value_type> init ) { + insert(init.begin(), init.end()); + } + + std::pair<iterator, bool> insert( node_type&& nh ) { + if (!nh.empty()) { + value_node_ptr insert_node = node_handle_accessor::get_node_ptr(nh); + auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { + insert_node->init(order_key); + return insert_node; + }; + auto insert_result = internal_insert(insert_node->value(), init_node); + if (insert_result.inserted) { + // If the insertion succeeded - set node handle to the empty state + __TBB_ASSERT(insert_result.remaining_node == nullptr, + "internal_insert_node should not return the remaining node if the insertion succeeded"); + node_handle_accessor::deactivate(nh); + } + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + return {end(), false}; + } + + iterator insert( const_iterator, node_type&& nh ) { + // Ignore hint + return insert(std::move(nh)).first; + } + + template <typename... Args> + std::pair<iterator, bool> emplace( Args&&... args ) { + // Create a node with temporary order_key 0, which will be reinitialize + // in internal_insert after the hash calculation + value_node_ptr insert_node = create_node(0, std::forward<Args>(args)...); + + auto init_node = [&insert_node]( sokey_type order_key )->value_node_ptr { + insert_node->init(order_key); + return insert_node; + }; + + auto insert_result = internal_insert(insert_node->value(), init_node); + + if (!insert_result.inserted) { + // If the insertion failed - destroy the node which was created + insert_node->init(split_order_key_regular(1)); + destroy_node(insert_node); + } + + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + + template <typename... Args> + iterator emplace_hint( const_iterator, Args&&... args ) { + // Ignore hint + return emplace(std::forward<Args>(args)...).first; + } + + iterator unsafe_erase( const_iterator pos ) { + return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); + } + + iterator unsafe_erase( iterator pos ) { + return iterator(first_value_node(internal_erase(pos.get_node_ptr()))); + } + + iterator unsafe_erase( const_iterator first, const_iterator last ) { + while(first != last) { + first = unsafe_erase(first); + } + return iterator(first.get_node_ptr()); + } + + size_type unsafe_erase( const key_type& key ) { + return internal_erase_by_key(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + size_type>::type unsafe_erase( const K& key ) + { + return internal_erase_by_key(key); + } + + node_type unsafe_extract( const_iterator pos ) { + internal_extract(pos.get_node_ptr()); + return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + } + + node_type unsafe_extract( iterator pos ) { + internal_extract(pos.get_node_ptr()); + return node_handle_accessor::construct<node_type>(pos.get_node_ptr()); + } + + node_type unsafe_extract( const key_type& key ) { + iterator item = find(key); + return item == end() ? node_type() : unsafe_extract(item); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value + && !std::is_convertible<K, const_iterator>::value + && !std::is_convertible<K, iterator>::value, + node_type>::type unsafe_extract( const K& key ) + { + iterator item = find(key); + return item == end() ? node_type() : unsafe_extract(item); + } + + // Lookup functions + iterator find( const key_type& key ) { + value_node_ptr result = internal_find(key); + return result == nullptr ? end() : iterator(result); + } + + const_iterator find( const key_type& key ) const { + value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); + return result == nullptr ? end() : const_iterator(result); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, iterator>::type find( const K& key ) { + value_node_ptr result = internal_find(key); + return result == nullptr ? end() : iterator(result); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, const_iterator>::type find( const K& key ) const { + value_node_ptr result = const_cast<self_type*>(this)->internal_find(key); + return result == nullptr ? end() : const_iterator(result); + } + + std::pair<iterator, iterator> equal_range( const key_type& key ) { + auto result = internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + std::pair<const_iterator, const_iterator> equal_range( const key_type& key ) const { + auto result = const_cast<self_type*>(this)->internal_equal_range(key); + return std::make_pair(const_iterator(result.first), const_iterator(result.second)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<iterator, iterator>>::type equal_range( const K& key ) { + auto result = internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, std::pair<const_iterator, const_iterator>>::type equal_range( const K& key ) const { + auto result = const_cast<self_type*>(this)->internal_equal_range(key); + return std::make_pair(iterator(result.first), iterator(result.second)); + } + + size_type count( const key_type& key ) const { + return internal_count(key); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, size_type>::type count( const K& key ) const { + return internal_count(key); + } + + bool contains( const key_type& key ) const { + return find(key) != end(); + } + + template <typename K> + typename std::enable_if<is_transparent<K>::value, bool>::type contains( const K& key ) const { + return find(key) != end(); + } + + // Bucket interface + local_iterator unsafe_begin( size_type n ) { + return local_iterator(first_value_node(get_bucket(n))); + } + + const_local_iterator unsafe_begin( size_type n ) const { + auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); + return const_local_iterator(bucket_begin); + } + + const_local_iterator unsafe_cbegin( size_type n ) const { + auto bucket_begin = first_value_node(const_cast<self_type*>(this)->get_bucket(n)); + return const_local_iterator(bucket_begin); + } + + local_iterator unsafe_end( size_type n ) { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : local_iterator(nullptr); + } + + const_local_iterator unsafe_end( size_type n ) const { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); + } + + const_local_iterator unsafe_cend( size_type n ) const { + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + return n != bucket_count - 1 ? unsafe_begin(get_next_bucket_index(n)) : const_local_iterator(nullptr); + } + + size_type unsafe_bucket_count() const { return my_bucket_count.load(std::memory_order_relaxed); } + + size_type unsafe_max_bucket_count() const { + return max_size(); + } + + size_type unsafe_bucket_size( size_type n ) const { + return size_type(std::distance(unsafe_begin(n), unsafe_end(n))); + } + + size_type unsafe_bucket( const key_type& key ) const { + return my_hash_compare(key) % my_bucket_count.load(std::memory_order_relaxed); + } + + // Hash policy + float load_factor() const { + return float(size() / float(my_bucket_count.load(std::memory_order_acquire))); + } + + float max_load_factor() const { return my_max_load_factor; } + + void max_load_factor( float mlf ) { + if (mlf != mlf || mlf < 0) { + tbb::detail::throw_exception(exception_id::invalid_load_factor); + } + my_max_load_factor = mlf; + } // TODO: unsafe? + + void rehash( size_type bucket_count ) { + size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); + if (current_bucket_count < bucket_count) { + // TODO: do we need do-while here? + my_bucket_count.compare_exchange_strong(current_bucket_count, round_up_to_power_of_two(bucket_count)); + } + } + + void reserve( size_type elements_count ) { + size_type current_bucket_count = my_bucket_count.load(std::memory_order_acquire); + size_type necessary_bucket_count = current_bucket_count; + + do { + // TODO: Log2 seems useful here + while (necessary_bucket_count * max_load_factor() < elements_count) { + necessary_bucket_count <<= 1; + } + } while (current_bucket_count >= necessary_bucket_count || + !my_bucket_count.compare_exchange_strong(current_bucket_count, necessary_bucket_count)); + } + + // Observers + hasher hash_function() const { return my_hash_compare.hash_function(); } + key_equal key_eq() const { return my_hash_compare.key_eq(); } + + class const_range_type { + private: + const concurrent_unordered_base& my_instance; + node_ptr my_begin_node; // may be node* const + node_ptr my_end_node; + mutable node_ptr my_midpoint_node; + public: + using size_type = typename concurrent_unordered_base::size_type; + using value_type = typename concurrent_unordered_base::value_type; + using reference = typename concurrent_unordered_base::reference; + using difference_type = typename concurrent_unordered_base::difference_type; + using iterator = typename concurrent_unordered_base::const_iterator; + + bool empty() const { return my_begin_node == my_end_node; } + + bool is_divisible() const { + return my_midpoint_node != my_end_node; + } + + size_type grainsize() const { return 1; } + + const_range_type( const_range_type& range, split ) + : my_instance(range.my_instance), + my_begin_node(range.my_midpoint_node), + my_end_node(range.my_end_node) + { + range.my_end_node = my_begin_node; + __TBB_ASSERT(!empty(), "Splitting despite the range is not divisible"); + __TBB_ASSERT(!range.empty(), "Splitting despite the range is not divisible"); + set_midpoint(); + range.set_midpoint(); + } + + iterator begin() const { return iterator(my_instance.first_value_node(my_begin_node)); } + iterator end() const { return iterator(my_instance.first_value_node(my_end_node)); } + + const_range_type( const concurrent_unordered_base& table ) + : my_instance(table), my_begin_node(const_cast<node_ptr>(&table.my_head)), my_end_node(nullptr) + { + set_midpoint(); + } + private: + void set_midpoint() const { + if (my_begin_node == my_end_node) { + my_midpoint_node = my_end_node; + } else { + sokey_type invalid_key = ~sokey_type(0); + sokey_type begin_key = my_begin_node != nullptr ? my_begin_node->order_key() : invalid_key; + sokey_type end_key = my_end_node != nullptr ? my_end_node->order_key() : invalid_key; + + size_type mid_bucket = reverse_bits(begin_key + (end_key - begin_key) / 2) % + my_instance.my_bucket_count.load(std::memory_order_relaxed); + while( my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed) == nullptr) { + mid_bucket = my_instance.get_parent(mid_bucket); + } + if (reverse_bits(mid_bucket) > begin_key) { + // Found a dummy node between begin and end + my_midpoint_node = my_instance.first_value_node( + my_instance.my_segments[mid_bucket].load(std::memory_order_relaxed)); + } else { + // Didn't find a dummy node between begin and end + my_midpoint_node = my_end_node; + } + } + } + }; // class const_range_type + + class range_type : public const_range_type { + public: + using iterator = typename concurrent_unordered_base::iterator; + using const_range_type::const_range_type; + + iterator begin() const { return iterator(const_range_type::begin().get_node_ptr()); } + iterator end() const { return iterator(const_range_type::end().get_node_ptr()); } + }; // class range_type + + // Parallel iteration + range_type range() { + return range_type(*this); + } + + const_range_type range() const { + return const_range_type(*this); + } +protected: + static constexpr bool allow_multimapping = traits_type::allow_multimapping; + +private: + static constexpr size_type initial_bucket_count = 8; + static constexpr float initial_max_load_factor = 4; // TODO: consider 1? + static constexpr size_type pointers_per_embedded_table = sizeof(size_type) * 8 - 1; + + class unordered_segment_table + : public segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table> + { + using self_type = unordered_segment_table; + using atomic_node_ptr = std::atomic<node_ptr>; + using base_type = segment_table<std::atomic<node_ptr>, allocator_type, unordered_segment_table, pointers_per_embedded_table>; + using segment_type = typename base_type::segment_type; + using base_allocator_type = typename base_type::allocator_type; + + using segment_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_node_ptr>; + using segment_allocator_traits = tbb::detail::allocator_traits<segment_allocator_type>; + public: + // Segment table for unordered containers should not be extended in the wait- free implementation + static constexpr bool allow_table_extending = false; + static constexpr bool is_noexcept_assignment = std::is_nothrow_move_assignable<hasher>::value && + std::is_nothrow_move_assignable<key_equal>::value && + segment_allocator_traits::is_always_equal::value; + static constexpr bool is_noexcept_swap = tbb::detail::is_nothrow_swappable<hasher>::value && + tbb::detail::is_nothrow_swappable<key_equal>::value && + segment_allocator_traits::is_always_equal::value; + + // TODO: using base_type::base_type is not compiling on Windows and Intel Compiler - investigate + unordered_segment_table( const base_allocator_type& alloc = base_allocator_type() ) + : base_type(alloc) {} + + unordered_segment_table( const unordered_segment_table& ) = default; + + unordered_segment_table( const unordered_segment_table& other, const base_allocator_type& alloc ) + : base_type(other, alloc) {} + + unordered_segment_table( unordered_segment_table&& ) = default; + + unordered_segment_table( unordered_segment_table&& other, const base_allocator_type& alloc ) + : base_type(std::move(other), alloc) {} + + unordered_segment_table& operator=( const unordered_segment_table& ) = default; + + unordered_segment_table& operator=( unordered_segment_table&& ) = default; + + segment_type create_segment( typename base_type::segment_table_type, typename base_type::segment_index_type segment_index, size_type ) { + segment_allocator_type alloc(this->get_allocator()); + size_type seg_size = this->segment_size(segment_index); + segment_type new_segment = segment_allocator_traits::allocate(alloc, seg_size); + for (size_type i = 0; i != seg_size; ++i) { + segment_allocator_traits::construct(alloc, new_segment + i, nullptr); + } + return new_segment; + } + + // deallocate_segment is required by the segment_table base class, but + // in unordered, it is also necessary to call the destructor during deallocation + void deallocate_segment( segment_type address, size_type index ) { + destroy_segment(address, index); + } + + void destroy_segment( segment_type address, size_type index ) { + segment_allocator_type alloc(this->get_allocator()); + for (size_type i = 0; i != this->segment_size(index); ++i) { + segment_allocator_traits::destroy(alloc, address + i); + } + segment_allocator_traits::deallocate(alloc, address, this->segment_size(index)); + } + + + void copy_segment( size_type index, segment_type, segment_type to ) { + if (index == 0) { + // The first element in the first segment is embedded into the table (my_head) + // so the first pointer should not be stored here + // It would be stored during move ctor/assignment operation + to[1].store(nullptr, std::memory_order_relaxed); + } else { + for (size_type i = 0; i != this->segment_size(index); ++i) { + to[i].store(nullptr, std::memory_order_relaxed); + } + } + } + + void move_segment( size_type index, segment_type from, segment_type to ) { + if (index == 0) { + // The first element in the first segment is embedded into the table (my_head) + // so the first pointer should not be stored here + // It would be stored during move ctor/assignment operation + to[1].store(from[1].load(std::memory_order_relaxed), std::memory_order_relaxed); + } else { + for (size_type i = 0; i != this->segment_size(index); ++i) { + to[i].store(from[i].load(std::memory_order_relaxed), std::memory_order_relaxed); + from[i].store(nullptr, std::memory_order_relaxed); + } + } + } + + // allocate_long_table is required by the segment_table base class, but unused for unordered containers + typename base_type::segment_table_type allocate_long_table( const typename base_type::atomic_segment*, size_type ) { + __TBB_ASSERT(false, "This method should never been called"); + // TableType is a pointer + return nullptr; + } + + // destroy_elements is required by the segment_table base class, but unused for unordered containers + // this function call but do nothing + void destroy_elements() {} + }; // struct unordered_segment_table + + void internal_clear() { + // TODO: consider usefulness of two versions of clear() - with dummy nodes deallocation and without it + node_ptr next = my_head.next(); + node_ptr curr = next; + + my_head.set_next(nullptr); + + while (curr != nullptr) { + next = curr->next(); + destroy_node(curr); + curr = next; + } + + my_size.store(0, std::memory_order_relaxed); + my_segments.clear(); + } + + void destroy_node( node_ptr node ) { + if (node->is_dummy()) { + node_allocator_type dummy_node_allocator(my_segments.get_allocator()); + // Destroy the node + node_allocator_traits::destroy(dummy_node_allocator, node); + // Deallocate the memory + node_allocator_traits::deallocate(dummy_node_allocator, node, 1); + } else { + value_node_ptr val_node = static_cast<value_node_ptr>(node); + value_node_allocator_type value_node_allocator(my_segments.get_allocator()); + // Destroy the value + value_node_allocator_traits::destroy(value_node_allocator, val_node->storage()); + // Destroy the node + value_node_allocator_traits::destroy(value_node_allocator, val_node); + // Deallocate the memory + value_node_allocator_traits::deallocate(value_node_allocator, val_node, 1); + } + } + + struct internal_insert_return_type { + // If the insertion failed - the remaining_node points to the node, which was failed to insert + // This node can be allocated in process of insertion + value_node_ptr remaining_node; + // If the insertion failed - node_with_equal_key points to the node in the list with the + // key, equivalent to the inserted, otherwise it points to the node, which was inserted. + value_node_ptr node_with_equal_key; + // Insertion status + // NOTE: if it is true - remaining_node should be nullptr + bool inserted; + }; // struct internal_insert_return_type + + // Inserts the value into the split ordered list + template <typename ValueType> + std::pair<iterator, bool> internal_insert_value( ValueType&& value ) { + + auto create_value_node = [&value, this]( sokey_type order_key )->value_node_ptr { + return create_node(order_key, std::forward<ValueType>(value)); + }; + + auto insert_result = internal_insert(value, create_value_node); + + if (insert_result.remaining_node != nullptr) { + // If the insertion fails - destroy the node which was failed to insert if it exist + __TBB_ASSERT(!insert_result.inserted, + "remaining_node should be nullptr if the node was successfully inserted"); + destroy_node(insert_result.remaining_node); + } + + return { iterator(insert_result.node_with_equal_key), insert_result.inserted }; + } + + // Inserts the node into the split ordered list + // Creates a node using the specified callback after the place for insertion was found + // Returns internal_insert_return_type object, where: + // - If the insertion succeeded: + // - remaining_node is nullptr + // - node_with_equal_key point to the inserted node + // - inserted is true + // - If the insertion failed: + // - remaining_node points to the node, that was failed to insert if it was created. + // nullptr if the node was not created, because the requested key was already + // presented in the list + // - node_with_equal_key point to the element in the list with the key, equivalent to + // to the requested key + // - inserted is false + template <typename ValueType, typename CreateInsertNode> + internal_insert_return_type internal_insert( ValueType&& value, CreateInsertNode create_insert_node ) { + static_assert(std::is_same<typename std::decay<ValueType>::type, value_type>::value, + "Incorrect type in internal_insert"); + const key_type& key = traits_type::get_key(value); + sokey_type hash_key = sokey_type(my_hash_compare(key)); + + sokey_type order_key = split_order_key_regular(hash_key); + node_ptr prev = prepare_bucket(hash_key); + __TBB_ASSERT(prev != nullptr, "Invalid head node"); + + auto search_result = search_after(prev, order_key, key); + + if (search_result.second) { + return internal_insert_return_type{ nullptr, search_result.first, false }; + } + + value_node_ptr new_node = create_insert_node(order_key); + node_ptr curr = search_result.first; + + while (!try_insert(prev, new_node, curr)) { + search_result = search_after(prev, order_key, key); + if (search_result.second) { + return internal_insert_return_type{ new_node, search_result.first, false }; + } + curr = search_result.first; + } + + auto sz = my_size.fetch_add(1); + adjust_table_size(sz + 1, my_bucket_count.load(std::memory_order_acquire)); + return internal_insert_return_type{ nullptr, static_cast<value_node_ptr>(new_node), true }; + } + + // Searches the node with the key, equivalent to key with requested order key after the node prev + // Returns the existing node and true if the node is already in the list + // Returns the first node with the order key, greater than requested and false if the node is not presented in the list + std::pair<value_node_ptr, bool> search_after( node_ptr& prev, sokey_type order_key, const key_type& key ) { + // NOTE: static_cast<value_node_ptr>(curr) should be done only after we would ensure + // that the node is not a dummy node + + node_ptr curr = prev->next(); + + while (curr != nullptr && (curr->order_key() < order_key || + (curr->order_key() == order_key && !my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)))) + { + prev = curr; + curr = curr->next(); + } + + if (curr != nullptr && curr->order_key() == order_key && !allow_multimapping) { + return { static_cast<value_node_ptr>(curr), true }; + } + return { static_cast<value_node_ptr>(curr), false }; + } + + void adjust_table_size( size_type total_elements, size_type current_size ) { + // Grow the table by a factor of 2 if possible and needed + if ( (float(total_elements) / float(current_size)) > my_max_load_factor ) { + // Double the size of the hash only if size hash not changed in between loads + my_bucket_count.compare_exchange_strong(current_size, 2u * current_size); + } + } + + node_ptr insert_dummy_node( node_ptr parent_dummy_node, sokey_type order_key ) { + node_ptr prev_node = parent_dummy_node; + + node_ptr dummy_node = create_dummy_node(order_key); + node_ptr next_node; + + do { + next_node = prev_node->next(); + // Move forward through the list while the order key is less than requested + while (next_node != nullptr && next_node->order_key() < order_key) { + prev_node = next_node; + next_node = next_node->next(); + } + + if (next_node != nullptr && next_node->order_key() == order_key) { + // Another dummy node with the same order key was inserted by another thread + // Destroy the node and exit + destroy_node(dummy_node); + return next_node; + } + } while (!try_insert(prev_node, dummy_node, next_node)); + + return dummy_node; + } + + // Try to insert a node between prev_node and expected next + // If the next is not equal to expected next - return false + static bool try_insert( node_ptr prev_node, node_ptr new_node, node_ptr current_next_node ) { + new_node->set_next(current_next_node); + return prev_node->try_set_next(current_next_node, new_node); + } + + // Returns the bucket, associated with the hash_key + node_ptr prepare_bucket( sokey_type hash_key ) { + size_type bucket = hash_key % my_bucket_count.load(std::memory_order_acquire); + return get_bucket(bucket); + } + + // Initialize the corresponding bucket if it is not initialized + node_ptr get_bucket( size_type bucket_index ) { + if (my_segments[bucket_index].load(std::memory_order_acquire) == nullptr) { + init_bucket(bucket_index); + } + return my_segments[bucket_index].load(std::memory_order_acquire); + } + + void init_bucket( size_type bucket ) { + if (bucket == 0) { + // Atomicaly store the first bucket into my_head + node_ptr disabled = nullptr; + my_segments[0].compare_exchange_strong(disabled, &my_head); + return; + } + + size_type parent_bucket = get_parent(bucket); + + while (my_segments[parent_bucket].load(std::memory_order_acquire) == nullptr) { + // Initialize all of the parent buckets + init_bucket(parent_bucket); + } + + __TBB_ASSERT(my_segments[parent_bucket].load(std::memory_order_acquire) != nullptr, "Parent bucket should be initialized"); + node_ptr parent = my_segments[parent_bucket].load(std::memory_order_acquire); + + // Insert dummy node into the list + node_ptr dummy_node = insert_dummy_node(parent, split_order_key_dummy(bucket)); + // TODO: consider returning pair<node_ptr, bool> to avoid store operation if the bucket was stored by an other thread + // or move store to insert_dummy_node + // Add dummy_node into the segment table + my_segments[bucket].store(dummy_node, std::memory_order_release); + } + + node_ptr create_dummy_node( sokey_type order_key ) { + node_allocator_type dummy_node_allocator(my_segments.get_allocator()); + node_ptr dummy_node = node_allocator_traits::allocate(dummy_node_allocator, 1); + node_allocator_traits::construct(dummy_node_allocator, dummy_node, order_key); + return dummy_node; + } + + template <typename... Args> + value_node_ptr create_node( sokey_type order_key, Args&&... args ) { + value_node_allocator_type value_node_allocator(my_segments.get_allocator()); + // Allocate memory for the value_node + value_node_ptr new_node = value_node_allocator_traits::allocate(value_node_allocator, 1); + // Construct the node + value_node_allocator_traits::construct(value_node_allocator, new_node, order_key); + + // try_call API is not convenient here due to broken + // variadic capture on GCC 4.8.5 + auto value_guard = make_raii_guard([&] { + value_node_allocator_traits::destroy(value_node_allocator, new_node); + value_node_allocator_traits::deallocate(value_node_allocator, new_node, 1); + }); + + // Construct the value in the node + value_node_allocator_traits::construct(value_node_allocator, new_node->storage(), std::forward<Args>(args)...); + value_guard.dismiss(); + return new_node; + } + + value_node_ptr first_value_node( node_ptr first_node ) const { + while (first_node != nullptr && first_node->is_dummy()) { + first_node = first_node->next(); + } + return static_cast<value_node_ptr>(first_node); + } + + // Unsafe method, which removes the node from the list and returns the next node + node_ptr internal_erase( value_node_ptr node_to_erase ) { + __TBB_ASSERT(node_to_erase != nullptr, "Invalid iterator for erase"); + node_ptr next_node = node_to_erase->next(); + internal_extract(node_to_erase); + destroy_node(node_to_erase); + return next_node; + } + + template <typename K> + size_type internal_erase_by_key( const K& key ) { + // TODO: consider reimplementation without equal_range - it is not effective to perform lookup over a bucket + // for each unsafe_erase call + auto eq_range = equal_range(key); + size_type erased_count = 0; + + for (auto it = eq_range.first; it != eq_range.second;) { + it = unsafe_erase(it); + ++erased_count; + } + return erased_count; + } + + // Unsafe method, which extracts the node from the list + void internal_extract( value_node_ptr node_to_extract ) { + const key_type& key = traits_type::get_key(node_to_extract->value()); + sokey_type hash_key = sokey_type(my_hash_compare(key)); + + node_ptr prev_node = prepare_bucket(hash_key); + + for (node_ptr node = prev_node->next(); node != nullptr; prev_node = node, node = node->next()) { + if (node == node_to_extract) { + unlink_node(prev_node, node, node_to_extract->next()); + my_size.store(my_size.load(std::memory_order_relaxed) - 1, std::memory_order_relaxed); + return; + } + __TBB_ASSERT(node->order_key() <= node_to_extract->order_key(), + "node, which is going to be extracted should be presented in the list"); + } + } + +protected: + template <typename SourceType> + void internal_merge( SourceType&& source ) { + static_assert(std::is_same<node_type, typename std::decay<SourceType>::type::node_type>::value, + "Incompatible containers cannot be merged"); + + for (node_ptr source_prev = &source.my_head; source_prev->next() != nullptr;) { + if (!source_prev->next()->is_dummy()) { + value_node_ptr curr = static_cast<value_node_ptr>(source_prev->next()); + // If the multimapping is allowed, or the key is not presented + // in the *this container - extract the node from the list + if (allow_multimapping || !contains(traits_type::get_key(curr->value()))) { + node_ptr next_node = curr->next(); + source.unlink_node(source_prev, curr, next_node); + + // Remember the old order key + sokey_type old_order_key = curr->order_key(); + + // Node handle with curr cannot be used directly in insert call, because + // the destructor of node_type will destroy curr + node_type curr_node = node_handle_accessor::construct<node_type>(curr); + + // If the insertion fails - return ownership of the node to the source + if (!insert(std::move(curr_node)).second) { + __TBB_ASSERT(!allow_multimapping, "Insertion should succeed for multicontainer"); + __TBB_ASSERT(source_prev->next() == next_node, + "Concurrent operations with the source container in merge are prohibited"); + + // Initialize the node with the old order key, because the order key + // can change during the insertion + curr->init(old_order_key); + __TBB_ASSERT(old_order_key >= source_prev->order_key() && + (next_node == nullptr || old_order_key <= next_node->order_key()), + "Wrong nodes order in the source container"); + // Merge is unsafe for source container, so the insertion back can be done without compare_exchange + curr->set_next(next_node); + source_prev->set_next(curr); + source_prev = curr; + node_handle_accessor::deactivate(curr_node); + } else { + source.my_size.fetch_sub(1, std::memory_order_relaxed); + } + } else { + source_prev = curr; + } + } else { + source_prev = source_prev->next(); + } + } + } + +private: + // Unsafe method, which unlinks the node between prev and next + void unlink_node( node_ptr prev_node, node_ptr node_to_unlink, node_ptr next_node ) { + __TBB_ASSERT(prev_node->next() == node_to_unlink && + node_to_unlink->next() == next_node, + "erasing and extracting nodes from the containers are unsafe in concurrent mode"); + prev_node->set_next(next_node); + node_to_unlink->set_next(nullptr); + } + + template <typename K> + value_node_ptr internal_find( const K& key ) { + sokey_type hash_key = sokey_type(my_hash_compare(key)); + sokey_type order_key = split_order_key_regular(hash_key); + + node_ptr curr = prepare_bucket(hash_key); + + while (curr != nullptr) { + if (curr->order_key() > order_key) { + // If the order key is greater than the requested order key, + // the element is not in the hash table + return nullptr; + } else if (curr->order_key() == order_key && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { + // The fact that order keys match does not mean that the element is found. + // Key function comparison has to be performed to check whether this is the + // right element. If not, keep searching while order key is the same. + return static_cast<value_node_ptr>(curr); + } + curr = curr->next(); + } + + return nullptr; + } + + template <typename K> + std::pair<value_node_ptr, value_node_ptr> internal_equal_range( const K& key ) { + sokey_type hash_key = sokey_type(my_hash_compare(key)); + sokey_type order_key = split_order_key_regular(hash_key); + + node_ptr curr = prepare_bucket(hash_key); + + while (curr != nullptr) { + if (curr->order_key() > order_key) { + // If the order key is greater than the requested order key, + // the element is not in the hash table + return std::make_pair(nullptr, nullptr); + } else if (curr->order_key() == order_key && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(curr)->value()), key)) { + value_node_ptr first = static_cast<value_node_ptr>(curr); + node_ptr last = first; + do { + last = last->next(); + } while (allow_multimapping && last != nullptr && !last->is_dummy() && + my_hash_compare(traits_type::get_key(static_cast<value_node_ptr>(last)->value()), key)); + return std::make_pair(first, first_value_node(last)); + } + curr = curr->next(); + } + return {nullptr, nullptr}; + } + + template <typename K> + size_type internal_count( const K& key ) const { + if (allow_multimapping) { + // TODO: consider reimplementing the internal_equal_range with elements counting to avoid std::distance + auto eq_range = equal_range(key); + return std::distance(eq_range.first, eq_range.second); + } else { + return contains(key) ? 1 : 0; + } + } + + void internal_copy( const concurrent_unordered_base& other ) { + node_ptr last_node = &my_head; + my_segments[0].store(&my_head, std::memory_order_relaxed); + + for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { + node_ptr new_node; + if (!node->is_dummy()) { + // The node in the right table contains a value + new_node = create_node(node->order_key(), static_cast<value_node_ptr>(node)->value()); + } else { + // The node in the right table is a dummy node + new_node = create_dummy_node(node->order_key()); + my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); + } + + last_node->set_next(new_node); + last_node = new_node; + } + } + + void internal_move( concurrent_unordered_base&& other ) { + node_ptr last_node = &my_head; + my_segments[0].store(&my_head, std::memory_order_relaxed); + + for (node_ptr node = other.my_head.next(); node != nullptr; node = node->next()) { + node_ptr new_node; + if (!node->is_dummy()) { + // The node in the right table contains a value + new_node = create_node(node->order_key(), std::move(static_cast<value_node_ptr>(node)->value())); + } else { + // TODO: do we need to destroy a dummy node in the right container? + // The node in the right table is a dummy_node + new_node = create_dummy_node(node->order_key()); + my_segments[reverse_bits(node->order_key())].store(new_node, std::memory_order_relaxed); + } + + last_node->set_next(new_node); + last_node = new_node; + } + } + + void move_content( concurrent_unordered_base&& other ) { + // NOTE: allocators should be equal + my_head.set_next(other.my_head.next()); + other.my_head.set_next(nullptr); + my_segments[0].store(&my_head, std::memory_order_relaxed); + + other.my_bucket_count.store(initial_bucket_count, std::memory_order_relaxed); + other.my_max_load_factor = initial_max_load_factor; + other.my_size.store(0, std::memory_order_relaxed); + } + + void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type&, + /*is_always_equal = */std::true_type ) { + // Allocators are always equal - no need to compare for equality + move_content(std::move(other)); + } + + void internal_move_construct_with_allocator( concurrent_unordered_base&& other, const allocator_type& alloc, + /*is_always_equal = */std::false_type ) { + // Allocators are not always equal + if (alloc == other.my_segments.get_allocator()) { + move_content(std::move(other)); + } else { + try_call( [&] { + internal_move(std::move(other)); + } ).on_exception( [&] { + clear(); + }); + } + } + + // Move assigns the hash table to other is any instances of allocator_type are always equal + // or propagate_on_container_move_assignment is true + void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::true_type ) { + move_content(std::move(other)); + } + + // Move assigns the hash table to other is any instances of allocator_type are not always equal + // and propagate_on_container_move_assignment is false + void internal_move_assign( concurrent_unordered_base&& other, /*is_always_equal || POCMA = */std::false_type ) { + if (my_segments.get_allocator() == other.my_segments.get_allocator()) { + move_content(std::move(other)); + } else { + // TODO: guards for exceptions + internal_move(std::move(other)); + } + } + + void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::true_type ) { + internal_swap_fields(other); + } + + void internal_swap( concurrent_unordered_base& other, /*is_always_equal || POCS = */std::false_type ) { + __TBB_ASSERT(my_segments.get_allocator() == other.my_segments.get_allocator(), + "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + void internal_swap_fields( concurrent_unordered_base& other ) { + node_ptr first_node = my_head.next(); + my_head.set_next(other.my_head.next()); + other.my_head.set_next(first_node); + + size_type current_size = my_size.load(std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_size.store(current_size, std::memory_order_relaxed); + + size_type bucket_count = my_bucket_count.load(std::memory_order_relaxed); + my_bucket_count.store(other.my_bucket_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + other.my_bucket_count.store(bucket_count, std::memory_order_relaxed); + + using std::swap; + swap(my_max_load_factor, other.my_max_load_factor); + swap(my_hash_compare, other.my_hash_compare); + my_segments.swap(other.my_segments); + + // swap() method from segment table swaps all of the segments including the first segment + // We should restore it to my_head. Without it the first segment of the container will point + // to other.my_head. + my_segments[0].store(&my_head, std::memory_order_relaxed); + other.my_segments[0].store(&other.my_head, std::memory_order_relaxed); + } + + // A regular order key has its original hash value reversed and the last bit set + static constexpr sokey_type split_order_key_regular( sokey_type hash ) { + return reverse_bits(hash) | 0x1; + } + + // A dummy order key has its original hash value reversed and the last bit unset + static constexpr sokey_type split_order_key_dummy( sokey_type hash ) { + return reverse_bits(hash) & ~sokey_type(0x1); + } + + size_type get_parent( size_type bucket ) const { + // Unset bucket's most significant turned-on bit + __TBB_ASSERT(bucket != 0, "Unable to get_parent of the bucket 0"); + size_type msb = tbb::detail::log2(bucket); + return bucket & ~(size_type(1) << msb); + } + + size_type get_next_bucket_index( size_type bucket ) const { + size_type bits = tbb::detail::log2(my_bucket_count.load(std::memory_order_relaxed)); + size_type reversed_next = reverse_n_bits(bucket, bits) + 1; + return reverse_n_bits(reversed_next, bits); + } + + std::atomic<size_type> my_size; + std::atomic<size_type> my_bucket_count; + float my_max_load_factor; + hash_compare_type my_hash_compare; + + list_node_type my_head; // Head node for split ordered list + unordered_segment_table my_segments; // Segment table of pointers to nodes + + template <typename Container, typename Value> + friend class solist_iterator; + + template <typename OtherTraits> + friend class concurrent_unordered_base; +}; // class concurrent_unordered_base + +template <typename Traits> +bool operator==( const concurrent_unordered_base<Traits>& lhs, + const concurrent_unordered_base<Traits>& rhs ) { + if (&lhs == &rhs) { return true; } + if (lhs.size() != rhs.size()) { return false; } + +#if _MSC_VER + // Passing "unchecked" iterators to std::permutation with 3 parameters + // causes compiler warnings. + // The workaround is to use overload with 4 parameters, which is + // available since C++14 - minimally supported version on MSVC + return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin(), rhs.end()); +#else + return std::is_permutation(lhs.begin(), lhs.end(), rhs.begin()); +#endif +} + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template <typename Traits> +bool operator!=( const concurrent_unordered_base<Traits>& lhs, + const concurrent_unordered_base<Traits>& rhs ) { + return !(lhs == rhs); +} +#endif + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__concurrent_unordered_base_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h new file mode 100644 index 0000000000..251ebb8d82 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_config.h @@ -0,0 +1,483 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__config_H +#define __TBB_detail__config_H + +/** This header is supposed to contain macro definitions only. + The macros defined here are intended to control such aspects of TBB build as + - presence of compiler features + - compilation modes + - feature sets + - known compiler/platform issues +**/ + +/* Check which standard library we use. */ +#include <cstddef> + +#if _MSC_VER + #define __TBB_EXPORTED_FUNC __cdecl + #define __TBB_EXPORTED_METHOD __thiscall +#else + #define __TBB_EXPORTED_FUNC + #define __TBB_EXPORTED_METHOD +#endif + +#if defined(_MSVC_LANG) + #define __TBB_LANG _MSVC_LANG +#else + #define __TBB_LANG __cplusplus +#endif // _MSVC_LANG + +#define __TBB_CPP14_PRESENT (__TBB_LANG >= 201402L) +#define __TBB_CPP17_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP20_PRESENT (__TBB_LANG >= 201709L) + +#if __INTEL_COMPILER || _MSC_VER + #define __TBB_NOINLINE(decl) __declspec(noinline) decl +#elif __GNUC__ + #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) +#else + #define __TBB_NOINLINE(decl) decl +#endif + +#define __TBB_STRING_AUX(x) #x +#define __TBB_STRING(x) __TBB_STRING_AUX(x) + +// Note that when ICC or Clang is in use, __TBB_GCC_VERSION might not fully match +// the actual GCC version on the system. +#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +/* Check which standard library we use. */ + +// Prior to GCC 7, GNU libstdc++ did not have a convenient version macro. +// Therefore we use different ways to detect its version. +#ifdef TBB_USE_GLIBCXX_VERSION + // The version is explicitly specified in our public TBB_USE_GLIBCXX_VERSION macro. + // Its format should match the __TBB_GCC_VERSION above, e.g. 70301 for libstdc++ coming with GCC 7.3.1. + #define __TBB_GLIBCXX_VERSION TBB_USE_GLIBCXX_VERSION +#elif _GLIBCXX_RELEASE && _GLIBCXX_RELEASE != __GNUC__ + // Reported versions of GCC and libstdc++ do not match; trust the latter + #define __TBB_GLIBCXX_VERSION (_GLIBCXX_RELEASE*10000) +#elif __GLIBCPP__ || __GLIBCXX__ + // The version macro is not defined or matches the GCC version; use __TBB_GCC_VERSION + #define __TBB_GLIBCXX_VERSION __TBB_GCC_VERSION +#endif + +#if __clang__ + // according to clang documentation, version can be vendor specific + #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) +#endif + +/** Macro helpers **/ + +#define __TBB_CONCAT_AUX(A,B) A##B +// The additional level of indirection is needed to expand macros A and B (not to get the AB macro). +// See [cpp.subst] and [cpp.concat] for more details. +#define __TBB_CONCAT(A,B) __TBB_CONCAT_AUX(A,B) +// The IGNORED argument and comma are needed to always have 2 arguments (even when A is empty). +#define __TBB_IS_MACRO_EMPTY(A,IGNORED) __TBB_CONCAT_AUX(__TBB_MACRO_EMPTY,A) +#define __TBB_MACRO_EMPTY 1 + +#if _M_X64 + #define __TBB_W(name) name##64 +#else + #define __TBB_W(name) name +#endif + +/** User controlled TBB features & modes **/ + +#ifndef TBB_USE_DEBUG + /* + There are four cases that are supported: + 1. "_DEBUG is undefined" means "no debug"; + 2. "_DEBUG defined to something that is evaluated to 0" (including "garbage", as per [cpp.cond]) means "no debug"; + 3. "_DEBUG defined to something that is evaluated to a non-zero value" means "debug"; + 4. "_DEBUG defined to nothing (empty)" means "debug". + */ + #ifdef _DEBUG + // Check if _DEBUG is empty. + #define __TBB_IS__DEBUG_EMPTY (__TBB_IS_MACRO_EMPTY(_DEBUG,IGNORED)==__TBB_MACRO_EMPTY) + #if __TBB_IS__DEBUG_EMPTY + #define TBB_USE_DEBUG 1 + #else + #define TBB_USE_DEBUG _DEBUG + #endif // __TBB_IS__DEBUG_EMPTY + #else + #define TBB_USE_DEBUG 0 + #endif // _DEBUG +#endif // TBB_USE_DEBUG + +#ifndef TBB_USE_ASSERT + #define TBB_USE_ASSERT TBB_USE_DEBUG +#endif // TBB_USE_ASSERT + +#ifndef TBB_USE_PROFILING_TOOLS +#if TBB_USE_DEBUG + #define TBB_USE_PROFILING_TOOLS 2 +#else // TBB_USE_DEBUG + #define TBB_USE_PROFILING_TOOLS 0 +#endif // TBB_USE_DEBUG +#endif // TBB_USE_PROFILING_TOOLS + +// Exceptions support cases +#if !(__EXCEPTIONS || defined(_CPPUNWIND) || __SUNPRO_CC) + #if TBB_USE_EXCEPTIONS + #error Compilation settings do not support exception handling. Please do not set TBB_USE_EXCEPTIONS macro or set it to 0. + #elif !defined(TBB_USE_EXCEPTIONS) + #define TBB_USE_EXCEPTIONS 0 + #endif +#elif !defined(TBB_USE_EXCEPTIONS) + #define TBB_USE_EXCEPTIONS 1 +#endif + +/** Preprocessor symbols to determine HW architecture **/ + +#if _WIN32 || _WIN64 + #if defined(_M_X64) || defined(__x86_64__) // the latter for MinGW support + #define __TBB_x86_64 1 + #elif defined(_M_IA64) + #define __TBB_ipf 1 + #elif defined(_M_IX86) || defined(__i386__) // the latter for MinGW support + #define __TBB_x86_32 1 + #else + #define __TBB_generic_arch 1 + #endif +#else /* Assume generic Unix */ + #if __x86_64__ + #define __TBB_x86_64 1 + #elif __ia64__ + #define __TBB_ipf 1 + #elif __i386__||__i386 // __i386 is for Sun OS + #define __TBB_x86_32 1 + #else + #define __TBB_generic_arch 1 + #endif +#endif + +/** Windows API or POSIX API **/ + +#if _WIN32 || _WIN64 + #define __TBB_USE_WINAPI 1 +#else + #define __TBB_USE_POSIX 1 +#endif + +/** Internal TBB features & modes **/ + +/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to load shared libraries at run time **/ +#ifndef __TBB_DYNAMIC_LOAD_ENABLED + #define __TBB_DYNAMIC_LOAD_ENABLED 1 +#endif + +/** __TBB_WIN8UI_SUPPORT enables support of Windows* Store Apps and limit a possibility to load + shared libraries at run time only from application container **/ +#if defined(WINAPI_FAMILY) && WINAPI_FAMILY == WINAPI_FAMILY_APP + #define __TBB_WIN8UI_SUPPORT 1 +#else + #define __TBB_WIN8UI_SUPPORT 0 +#endif + +/** __TBB_WEAK_SYMBOLS_PRESENT denotes that the system supports the weak symbol mechanism **/ +#ifndef __TBB_WEAK_SYMBOLS_PRESENT + #define __TBB_WEAK_SYMBOLS_PRESENT ( !_WIN32 && !__APPLE__ && !__sun && (__TBB_GCC_VERSION >= 40000 || __INTEL_COMPILER ) ) +#endif + +/** Presence of compiler features **/ + +#if __clang__ && !__INTEL_COMPILER + #define __TBB_USE_OPTIONAL_RTTI __has_feature(cxx_rtti) +#elif defined(_CPPRTTI) + #define __TBB_USE_OPTIONAL_RTTI 1 +#else + #define __TBB_USE_OPTIONAL_RTTI (__GXX_RTTI || __RTTI || __INTEL_RTTI__) +#endif + +/** Library features presence macros **/ + +#define __TBB_CPP14_INTEGER_SEQUENCE_PRESENT (__TBB_LANG >= 201402L) +#define __TBB_CPP17_INVOKE_RESULT_PRESENT (__TBB_LANG >= 201703L) + +// TODO: Remove the condition(__INTEL_COMPILER > 2021) from the __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +// macro when this feature start working correctly on this compiler. +#if __INTEL_COMPILER && (!_MSC_VER || __INTEL_CXX11_MOVE__) + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__INTEL_COMPILER > 2021 && __TBB_LANG >= 201703L) + #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition +#elif __clang__ + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__has_feature(cxx_variable_templates)) + #define __TBB_CPP20_CONCEPTS_PRESENT 0 // TODO: add a mechanism for future addition + #ifdef __cpp_deduction_guides + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201611L) + #else + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT 0 + #endif +#elif __GNUC__ + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L && __TBB_GCC_VERSION >= 50000) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__cpp_deduction_guides >= 201606L) + #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 201709L && __TBB_GCC_VERSION >= 100201) +#elif _MSC_VER + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (_MSC_FULL_VER >= 190023918 && (!__INTEL_COMPILER || __INTEL_COMPILER >= 1700)) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (_MSC_VER >= 1914 && __TBB_LANG >= 201703L && (!__INTEL_COMPILER || __INTEL_COMPILER > 2021)) + #define __TBB_CPP20_CONCEPTS_PRESENT (_MSC_VER >= 1923 && __TBB_LANG >= 202002L) // TODO: INTEL_COMPILER? +#else + #define __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT (__TBB_LANG >= 201402L) + #define __TBB_CPP17_DEDUCTION_GUIDES_PRESENT (__TBB_LANG >= 201703L) + #define __TBB_CPP20_CONCEPTS_PRESENT (__TBB_LANG >= 202002L) +#endif + +// GCC4.8 on RHEL7 does not support std::get_new_handler +#define __TBB_CPP11_GET_NEW_HANDLER_PRESENT (_MSC_VER >= 1900 || __TBB_GLIBCXX_VERSION >= 40900 && __GXX_EXPERIMENTAL_CXX0X__ || _LIBCPP_VERSION) +// GCC4.8 on RHEL7 does not support std::is_trivially_copyable +#define __TBB_CPP11_TYPE_PROPERTIES_PRESENT (_LIBCPP_VERSION || _MSC_VER >= 1700 || (__TBB_GLIBCXX_VERSION >= 50000 && __GXX_EXPERIMENTAL_CXX0X__)) + +#define __TBB_CPP17_MEMORY_RESOURCE_PRESENT 0 +#define __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT (_MSC_VER >= 1911) +#define __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_ALLOCATOR_IS_ALWAYS_EQUAL_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_IS_SWAPPABLE_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP20_COMPARISONS_PRESENT __TBB_CPP20_PRESENT + +#if (!__TBB_WIN8UI_SUPPORT && !__ANDROID__ && !__APPLE__ && !defined(_musl_)) +#define __TBB_RESUMABLE_TASKS 1 +#else +#define __TBB_RESUMABLE_TASKS 0 +#endif + +/* This macro marks incomplete code or comments describing ideas which are considered for the future. + * See also for plain comment with TODO and FIXME marks for small improvement opportunities. + */ +#define __TBB_TODO 0 + +/* Check which standard library we use. */ +/* __TBB_SYMBOL is defined only while processing exported symbols list where C++ is not allowed. */ +#if !defined(__TBB_SYMBOL) && !__TBB_CONFIG_PREPROC_ONLY + #include <cstddef> +#endif + +/** Target OS is either iOS* or iOS* simulator **/ +#if __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ + #define __TBB_IOS 1 +#endif + +#if __APPLE__ + #if __INTEL_COMPILER && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ > 1099 \ + && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101000 + // ICC does not correctly set the macro if -mmacosx-min-version is not specified + #define __TBB_MACOS_TARGET_VERSION (100000 + 10*(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ - 1000)) + #else + #define __TBB_MACOS_TARGET_VERSION __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ + #endif +#endif + +#if defined(__GNUC__) && !defined(__INTEL_COMPILER) + #define __TBB_GCC_WARNING_IGNORED_ATTRIBUTES_PRESENT (__TBB_GCC_VERSION >= 60100) +#endif + +#define __TBB_CPP17_FALLTHROUGH_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_CPP17_NODISCARD_PRESENT (__TBB_LANG >= 201703L) +#define __TBB_FALLTHROUGH_PRESENT (__TBB_GCC_VERSION >= 70000 && !__INTEL_COMPILER) + +#if __TBB_CPP17_FALLTHROUGH_PRESENT + #define __TBB_fallthrough [[fallthrough]] +#elif __TBB_FALLTHROUGH_PRESENT + #define __TBB_fallthrough __attribute__ ((fallthrough)) +#else + #define __TBB_fallthrough +#endif + +#if __TBB_CPP17_NODISCARD_PRESENT + #define __TBB_nodiscard [[nodiscard]] +#elif __clang__ || __GNUC__ + #define __TBB_nodiscard __attribute__((warn_unused_result)) +#else + #define __TBB_nodiscard +#endif + +#define __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT (_MSC_VER >= 1900 || __GLIBCXX__ && __cpp_lib_uncaught_exceptions \ + || _LIBCPP_VERSION >= 3700 && (!__TBB_MACOS_TARGET_VERSION || __TBB_MACOS_TARGET_VERSION >= 101200) && !__TBB_IOS) + + +#define __TBB_TSX_INTRINSICS_PRESENT ((__RTM__ || (_MSC_VER>=1700 && !__clang__) || __INTEL_COMPILER>=1300) && !__TBB_DEFINE_MIC && !__ANDROID__) + +#define __TBB_WAITPKG_INTRINSICS_PRESENT ((__INTEL_COMPILER >= 1900 || __TBB_GCC_VERSION >= 110000 || __TBB_CLANG_VERSION >= 120000) && !__ANDROID__) + +/** Internal TBB features & modes **/ + +/** __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when + it's necessary to test internal functions not exported from TBB DLLs +**/ +#if (_WIN32||_WIN64) && (__TBB_SOURCE_DIRECTLY_INCLUDED || TBB_USE_PREVIEW_BINARY) + #define __TBB_NO_IMPLICIT_LINKAGE 1 + #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 +#endif + +#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBMALLOCPROXY_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE) + #define __TBB_NO_IMPLICIT_LINKAGE 1 +#endif + +#if _MSC_VER + #if !__TBB_NO_IMPLICIT_LINKAGE + #ifdef _DEBUG + #pragma comment(lib, "tbb12_debug.lib") + #else + #pragma comment(lib, "tbb12.lib") + #endif + #endif +#endif + +#ifndef __TBB_SCHEDULER_OBSERVER + #define __TBB_SCHEDULER_OBSERVER 1 +#endif /* __TBB_SCHEDULER_OBSERVER */ + +#ifndef __TBB_FP_CONTEXT + #define __TBB_FP_CONTEXT 1 +#endif /* __TBB_FP_CONTEXT */ + +#define __TBB_RECYCLE_TO_ENQUEUE __TBB_BUILD // keep non-official + +#ifndef __TBB_ARENA_OBSERVER + #define __TBB_ARENA_OBSERVER __TBB_SCHEDULER_OBSERVER +#endif /* __TBB_ARENA_OBSERVER */ + +#ifndef __TBB_ARENA_BINDING + #define __TBB_ARENA_BINDING 1 +#endif + +#if TBB_PREVIEW_WAITING_FOR_WORKERS || __TBB_BUILD + #define __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE 1 +#endif + +#if (TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION || __TBB_BUILD) && __TBB_ARENA_BINDING + #define __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT 1 +#endif + +#ifndef __TBB_ENQUEUE_ENFORCED_CONCURRENCY + #define __TBB_ENQUEUE_ENFORCED_CONCURRENCY 1 +#endif + +#if !defined(__TBB_SURVIVE_THREAD_SWITCH) && \ + (_WIN32 || _WIN64 || __APPLE__ || (__linux__ && !__ANDROID__)) + #define __TBB_SURVIVE_THREAD_SWITCH 1 +#endif /* __TBB_SURVIVE_THREAD_SWITCH */ + +#ifndef TBB_PREVIEW_FLOW_GRAPH_FEATURES + #define TBB_PREVIEW_FLOW_GRAPH_FEATURES __TBB_CPF_BUILD +#endif + +#ifndef __TBB_DEFAULT_PARTITIONER + #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner +#endif + +#ifndef __TBB_FLOW_TRACE_CODEPTR + #define __TBB_FLOW_TRACE_CODEPTR __TBB_CPF_BUILD +#endif + +// Intel(R) C++ Compiler starts analyzing usages of the deprecated content at the template +// instantiation site, which is too late for suppression of the corresponding messages for internal +// stuff. +#if !defined(__INTEL_COMPILER) && (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) + #if (__TBB_LANG >= 201402L) + #define __TBB_DEPRECATED [[deprecated]] + #define __TBB_DEPRECATED_MSG(msg) [[deprecated(msg)]] + #elif _MSC_VER + #define __TBB_DEPRECATED __declspec(deprecated) + #define __TBB_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) + #elif (__GNUC__ && __TBB_GCC_VERSION >= 40805) || __clang__ + #define __TBB_DEPRECATED __attribute__((deprecated)) + #define __TBB_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) + #endif +#endif // !defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + +#if !defined(__TBB_DEPRECATED) + #define __TBB_DEPRECATED + #define __TBB_DEPRECATED_MSG(msg) +#elif !defined(__TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES) + // Suppress deprecated messages from self + #define __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES 1 +#endif + +#if defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) && (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + #define __TBB_DEPRECATED_VERBOSE __TBB_DEPRECATED + #define __TBB_DEPRECATED_VERBOSE_MSG(msg) __TBB_DEPRECATED_MSG(msg) +#else + #define __TBB_DEPRECATED_VERBOSE + #define __TBB_DEPRECATED_VERBOSE_MSG(msg) +#endif // (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0) + +#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !(__TBB_LANG >= 201103L || _MSC_VER >= 1900) + #pragma message("TBB Warning: Support for C++98/03 is deprecated. Please use the compiler that supports C++11 features at least.") +#endif + +#ifdef _VARIADIC_MAX + #define __TBB_VARIADIC_MAX _VARIADIC_MAX +#else + #if _MSC_VER == 1700 + #define __TBB_VARIADIC_MAX 5 // VS11 setting, issue resolved in VS12 + #elif _MSC_VER == 1600 + #define __TBB_VARIADIC_MAX 10 // VS10 setting + #else + #define __TBB_VARIADIC_MAX 15 + #endif +#endif + +/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by + the bugs in compilers, standard or OS specific libraries. They should be + removed as soon as the corresponding bugs are fixed or the buggy OS/compiler + versions go out of the support list. +**/ + +// Some STL containers not support allocator traits in old GCC versions +#if __GXX_EXPERIMENTAL_CXX0X__ && __TBB_GLIBCXX_VERSION <= 50301 + #define TBB_ALLOCATOR_TRAITS_BROKEN 1 +#endif + +// GCC 4.8 C++ standard library implements std::this_thread::yield as no-op. +#if __TBB_GLIBCXX_VERSION >= 40800 && __TBB_GLIBCXX_VERSION < 40900 + #define __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN 1 +#endif + +/** End of __TBB_XXX_BROKEN macro section **/ + +#if defined(_MSC_VER) && _MSC_VER>=1500 && !defined(__INTEL_COMPILER) + // A macro to suppress erroneous or benign "unreachable code" MSVC warning (4702) + #define __TBB_MSVC_UNREACHABLE_CODE_IGNORED 1 +#endif + +// Many OS versions (Android 4.0.[0-3] for example) need workaround for dlopen to avoid non-recursive loader lock hang +// Setting the workaround for all compile targets ($APP_PLATFORM) below Android 4.4 (android-19) +#if __ANDROID__ + #include <android/api-level.h> +#endif + +#define __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING (TBB_PREVIEW_FLOW_GRAPH_FEATURES) + +#ifndef __TBB_PREVIEW_CRITICAL_TASKS +#define __TBB_PREVIEW_CRITICAL_TASKS 1 +#endif + +#ifndef __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +#define __TBB_PREVIEW_FLOW_GRAPH_NODE_SET (TBB_PREVIEW_FLOW_GRAPH_FEATURES) +#endif + + +#if !defined(__APPLE__) || !defined(__MAC_OS_X_VERSION_MIN_REQUIRED) || __MAC_OS_X_VERSION_MIN_REQUIRED > 101500 + #define __TBB_ALIGNAS_AVAILABLE 1 +#else + #define __TBB_ALIGNAS_AVAILABLE 0 +#endif + +#endif // __TBB_detail__config_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h new file mode 100644 index 0000000000..4dca07fa10 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_containers_helpers.h @@ -0,0 +1,67 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__containers_helpers_H +#define __TBB_detail__containers_helpers_H + +#include "_template_helpers.h" +#include "_allocator_traits.h" +#include <type_traits> +#include <memory> +#include <functional> + +namespace tbb { +namespace detail { +inline namespace d0 { + +template <typename Compare, typename = void> +struct comp_is_transparent : std::false_type {}; + +template <typename Compare> +struct comp_is_transparent<Compare, tbb::detail::void_t<typename Compare::is_transparent>> : std::true_type {}; + +template <typename Key, typename Hasher, typename KeyEqual, typename = void > +struct has_transparent_key_equal : std::false_type { using type = KeyEqual; }; + +template <typename Key, typename Hasher, typename KeyEqual> +struct has_transparent_key_equal<Key, Hasher, KeyEqual, tbb::detail::void_t<typename Hasher::transparent_key_equal>> : std::true_type { + using type = typename Hasher::transparent_key_equal; + static_assert(comp_is_transparent<type>::value, "Hash::transparent_key_equal::is_transparent is not valid or does not denote a type."); + static_assert((std::is_same<KeyEqual, std::equal_to<Key>>::value || + std::is_same<typename Hasher::transparent_key_equal, KeyEqual>::value), "KeyEqual is a different type than equal_to<Key> or Hash::transparent_key_equal."); + }; + +struct is_iterator_impl { +template <typename T> +using iter_traits_category = typename std::iterator_traits<T>::iterator_category; + +template <typename T> +using input_iter_category = typename std::enable_if<std::is_base_of<std::input_iterator_tag, iter_traits_category<T>>::value>::type; +}; // struct is_iterator_impl + +template <typename T> +using is_input_iterator = supports<T, is_iterator_impl::iter_traits_category, is_iterator_impl::input_iter_category>; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename T> +inline constexpr bool is_input_iterator_v = is_input_iterator<T>::value; +#endif + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__containers_helpers_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h new file mode 100644 index 0000000000..9764209fa8 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_exception.h @@ -0,0 +1,88 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__exception_H +#define __TBB__exception_H + +#include "_config.h" + +#include <new> // std::bad_alloc +#include <exception> // std::exception +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +#include <stdexcept> // std::runtime_error +#endif + +namespace tbb { +namespace detail { +inline namespace d0 { +enum class exception_id { + bad_alloc = 1, + bad_last_alloc, + user_abort, + nonpositive_step, + out_of_range, + reservation_length_error, + missing_wait, + invalid_load_factor, + invalid_key, + bad_tagged_msg_cast, + unsafe_wait, + last_entry +}; +} // namespace d0 + +namespace r1 { +//! Exception for concurrent containers +class bad_last_alloc : public std::bad_alloc { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +//! Exception for user-initiated abort +class user_abort : public std::exception { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +//! Exception for missing wait on structured_task_group +class missing_wait : public std::exception { +public: + const char* __TBB_EXPORTED_METHOD what() const noexcept(true) override; +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! Exception for impossible finalization of task_sheduler_handle +class unsafe_wait : public std::runtime_error { +public: + unsafe_wait(const char* msg) : std::runtime_error(msg) {} +}; +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +//! Gathers all throw operators in one place. +/** Its purpose is to minimize code bloat that can be caused by throw operators + scattered in multiple places, especially in templates. **/ +void __TBB_EXPORTED_FUNC throw_exception ( exception_id ); +} // namespace r1 + +inline namespace d0 { +using r1::throw_exception; +} // namespace d0 + +} // namespace detail +} // namespace tbb + +#endif // __TBB__exception_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h new file mode 100644 index 0000000000..34ba1efcaf --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_body_impl.h @@ -0,0 +1,371 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_body_impl_H +#define __TBB__flow_graph_body_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 (in flow_graph.h) + +typedef std::uint64_t tag_value; + + +// TODO revamp: find out if there is already helper for has_policy. +template<typename ... Policies> struct Policy {}; + +template<typename ... Policies> struct has_policy; + +template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies> +struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> : + std::integral_constant<bool, has_policy<ExpectedPolicy, FirstPolicy>::value || + has_policy<ExpectedPolicy, Policies...>::value> {}; + +template<typename ExpectedPolicy, typename SinglePolicy> +struct has_policy<ExpectedPolicy, SinglePolicy> : + std::integral_constant<bool, std::is_same<ExpectedPolicy, SinglePolicy>::value> {}; + +template<typename ExpectedPolicy, typename ...Policies> +struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {}; + +namespace graph_policy_namespace { + + struct rejecting { }; + struct reserving { }; + struct queueing { }; + struct lightweight { }; + + // K == type of field used for key-matching. Each tag-matching port will be provided + // functor that, given an object accepted by the port, will return the + /// field of type K being used for matching. + template<typename K, typename KHash=tbb_hash_compare<typename std::decay<K>::type > > + struct key_matching { + typedef K key_type; + typedef typename std::decay<K>::type base_key_type; + typedef KHash hash_compare_type; + }; + + // old tag_matching join's new specifier + typedef key_matching<tag_value> tag_matching; + + // Aliases for Policy combinations + typedef Policy<queueing, lightweight> queueing_lightweight; + typedef Policy<rejecting, lightweight> rejecting_lightweight; + +} // namespace graph_policy_namespace + +// -------------- function_body containers ---------------------- + +//! A functor that takes no input and generates a value of type Output +template< typename Output > +class input_body : no_assign { +public: + virtual ~input_body() {} + virtual Output operator()(flow_control& fc) = 0; + virtual input_body* clone() = 0; +}; + +//! The leaf for input_body +template< typename Output, typename Body> +class input_body_leaf : public input_body<Output> { +public: + input_body_leaf( const Body &_body ) : body(_body) { } + Output operator()(flow_control& fc) override { return body(fc); } + input_body_leaf* clone() override { + return new input_body_leaf< Output, Body >(body); + } + Body get_body() { return body; } +private: + Body body; +}; + +//! A functor that takes an Input and generates an Output +template< typename Input, typename Output > +class function_body : no_assign { +public: + virtual ~function_body() {} + virtual Output operator()(const Input &input) = 0; + virtual function_body* clone() = 0; +}; + +//! the leaf for function_body +template <typename Input, typename Output, typename B> +class function_body_leaf : public function_body< Input, Output > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const Input &i) override { return body(i); } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< Input, Output, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Input and output of continue_msg +template <typename B> +class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + continue_msg operator()( const continue_msg &i ) override { + body(i); + return i; + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< continue_msg, continue_msg, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Output of continue_msg +template <typename Input, typename B> +class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + continue_msg operator()(const Input &i) override { + body(i); + return continue_msg(); + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< Input, continue_msg, B >(body); + } +private: + B body; +}; + +//! the leaf for function_body specialized for Input of continue_msg +template <typename Output, typename B> +class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > { +public: + function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const continue_msg &i) override { + return body(i); + } + B get_body() { return body; } + function_body_leaf* clone() override { + return new function_body_leaf< continue_msg, Output, B >(body); + } +private: + B body; +}; + +//! function_body that takes an Input and a set of output ports +template<typename Input, typename OutputSet> +class multifunction_body : no_assign { +public: + virtual ~multifunction_body () {} + virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0; + virtual multifunction_body* clone() = 0; + virtual void* get_body_ptr() = 0; +}; + +//! leaf for multifunction. OutputSet can be a std::tuple or a vector. +template<typename Input, typename OutputSet, typename B > +class multifunction_body_leaf : public multifunction_body<Input, OutputSet> { +public: + multifunction_body_leaf(const B &_body) : body(_body) { } + void operator()(const Input &input, OutputSet &oset) override { + body(input, oset); // body may explicitly put() to one or more of oset. + } + void* get_body_ptr() override { return &body; } + multifunction_body_leaf* clone() override { + return new multifunction_body_leaf<Input, OutputSet,B>(body); + } + +private: + B body; +}; + +// ------ function bodies for hash_buffers and key-matching joins. + +template<typename Input, typename Output> +class type_to_key_function_body : no_assign { + public: + virtual ~type_to_key_function_body() {} + virtual Output operator()(const Input &input) = 0; // returns an Output + virtual type_to_key_function_body* clone() = 0; +}; + +// specialization for ref output +template<typename Input, typename Output> +class type_to_key_function_body<Input,Output&> : no_assign { + public: + virtual ~type_to_key_function_body() {} + virtual const Output & operator()(const Input &input) = 0; // returns a const Output& + virtual type_to_key_function_body* clone() = 0; +}; + +template <typename Input, typename Output, typename B> +class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> { +public: + type_to_key_function_body_leaf( const B &_body ) : body(_body) { } + Output operator()(const Input &i) override { return body(i); } + type_to_key_function_body_leaf* clone() override { + return new type_to_key_function_body_leaf< Input, Output, B>(body); + } +private: + B body; +}; + +template <typename Input, typename Output, typename B> +class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> { +public: + type_to_key_function_body_leaf( const B &_body ) : body(_body) { } + const Output& operator()(const Input &i) override { + return body(i); + } + type_to_key_function_body_leaf* clone() override { + return new type_to_key_function_body_leaf< Input, Output&, B>(body); + } +private: + B body; +}; + +// --------------------------- end of function_body containers ------------------------ + +// --------------------------- node task bodies --------------------------------------- + +//! A task that calls a node's forward_task function +template< typename NodeType > +class forward_task_bypass : public graph_task { + NodeType &my_node; +public: + forward_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n + , node_priority_t node_priority = no_priority + ) : graph_task(g, allocator, node_priority), + my_node(n) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.forward_task(); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + } +}; + +//! A task that calls a node's apply_body_bypass function, passing in an input of type Input +// return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL +template< typename NodeType, typename Input > +class apply_body_task_bypass : public graph_task { + NodeType &my_node; + Input my_input; +public: + + apply_body_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n, const Input &i + , node_priority_t node_priority = no_priority + ) : graph_task(g, allocator, node_priority), + my_node(n), my_input(i) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.apply_body_bypass( my_input ); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + + } +}; + +//! A task that calls a node's apply_body_bypass function with no input +template< typename NodeType > +class input_node_task_bypass : public graph_task { + NodeType &my_node; +public: + input_node_task_bypass( graph& g, small_object_allocator& allocator, NodeType &n ) + : graph_task(g, allocator), my_node(n) {} + + task* execute(execution_data& ed) override { + graph_task* next_task = my_node.apply_body_bypass( ); + if (SUCCESSFULLY_ENQUEUED == next_task) + next_task = nullptr; + else if (next_task) + next_task = prioritize_task(my_node.graph_reference(), *next_task); + finalize(ed); + return next_task; + } + +}; + +// ------------------------ end of node task bodies ----------------------------------- + +template<typename T, typename DecrementType, typename DummyType = void> +class threshold_regulator; + +template<typename T, typename DecrementType> +class threshold_regulator<T, DecrementType, + typename std::enable_if<std::is_integral<DecrementType>::value>::type> + : public receiver<DecrementType>, no_copy +{ + T* my_node; +protected: + + graph_task* try_put_task( const DecrementType& value ) override { + graph_task* result = my_node->decrement_counter( value ); + if( !result ) + result = SUCCESSFULLY_ENQUEUED; + return result; + } + + graph& graph_reference() const override { + return my_node->my_graph; + } + + template<typename U, typename V> friend class limiter_node; + void reset_receiver( reset_flags ) {} + +public: + threshold_regulator(T* owner) : my_node(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } +}; + +template<typename T> +class threshold_regulator<T, continue_msg, void> : public continue_receiver, no_copy { + + T *my_node; + + graph_task* execute() override { + return my_node->decrement_counter( 1 ); + } + +protected: + + graph& graph_reference() const override { + return my_node->my_graph; + } + +public: + + typedef continue_msg input_type; + typedef continue_msg output_type; + threshold_regulator(T* owner) + : continue_receiver( /*number_of_predecessors=*/0, no_priority ), my_node(owner) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } +}; + +#endif // __TBB__flow_graph_body_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h new file mode 100644 index 0000000000..ac5564598b --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_cache_impl.h @@ -0,0 +1,435 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_cache_impl_H +#define __TBB__flow_graph_cache_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 (in flow_graph.h) + +//! A node_cache maintains a std::queue of elements of type T. Each operation is protected by a lock. +template< typename T, typename M=spin_mutex > +class node_cache { + public: + + typedef size_t size_type; + + bool empty() { + typename mutex_type::scoped_lock lock( my_mutex ); + return internal_empty(); + } + + void add( T &n ) { + typename mutex_type::scoped_lock lock( my_mutex ); + internal_push(n); + } + + void remove( T &n ) { + typename mutex_type::scoped_lock lock( my_mutex ); + for ( size_t i = internal_size(); i != 0; --i ) { + T &s = internal_pop(); + if ( &s == &n ) + break; // only remove one predecessor per request + internal_push(s); + } + } + + void clear() { + while( !my_q.empty()) (void)my_q.pop(); + } + +protected: + + typedef M mutex_type; + mutex_type my_mutex; + std::queue< T * > my_q; + + // Assumes lock is held + inline bool internal_empty( ) { + return my_q.empty(); + } + + // Assumes lock is held + inline size_type internal_size( ) { + return my_q.size(); + } + + // Assumes lock is held + inline void internal_push( T &n ) { + my_q.push(&n); + } + + // Assumes lock is held + inline T &internal_pop() { + T *v = my_q.front(); + my_q.pop(); + return *v; + } + +}; + +//! A cache of predecessors that only supports try_get +template< typename T, typename M=spin_mutex > +class predecessor_cache : public node_cache< sender<T>, M > { +public: + typedef M mutex_type; + typedef T output_type; + typedef sender<output_type> predecessor_type; + typedef receiver<output_type> successor_type; + + predecessor_cache( successor_type* owner ) : my_owner( owner ) { + __TBB_ASSERT( my_owner, "predecessor_cache should have an owner." ); + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + bool get_item( output_type& v ) { + + bool msg = false; + + do { + predecessor_type *src; + { + typename mutex_type::scoped_lock lock(this->my_mutex); + if ( this->internal_empty() ) { + break; + } + src = &this->internal_pop(); + } + + // Try to get from this sender + msg = src->try_get( v ); + + if (msg == false) { + // Relinquish ownership of the edge + register_successor(*src, *my_owner); + } else { + // Retain ownership of the edge + this->add(*src); + } + } while ( msg == false ); + return msg; + } + + // If we are removing arcs (rf_clear_edges), call clear() rather than reset(). + void reset() { + for(;;) { + predecessor_type *src; + { + if (this->internal_empty()) break; + src = &this->internal_pop(); + } + register_successor(*src, *my_owner); + } + } + +protected: + successor_type* my_owner; +}; + +//! An cache of predecessors that supports requests and reservations +template< typename T, typename M=spin_mutex > +class reservable_predecessor_cache : public predecessor_cache< T, M > { +public: + typedef M mutex_type; + typedef T output_type; + typedef sender<T> predecessor_type; + typedef receiver<T> successor_type; + + reservable_predecessor_cache( successor_type* owner ) + : predecessor_cache<T,M>(owner), reserved_src(NULL) + { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + bool + try_reserve( output_type &v ) { + bool msg = false; + + do { + { + typename mutex_type::scoped_lock lock(this->my_mutex); + if ( reserved_src || this->internal_empty() ) + return false; + + reserved_src = &this->internal_pop(); + } + + // Try to get from this sender + msg = reserved_src->try_reserve( v ); + + if (msg == false) { + typename mutex_type::scoped_lock lock(this->my_mutex); + // Relinquish ownership of the edge + register_successor( *reserved_src, *this->my_owner ); + reserved_src = NULL; + } else { + // Retain ownership of the edge + this->add( *reserved_src ); + } + } while ( msg == false ); + + return msg; + } + + bool + try_release( ) { + reserved_src->try_release( ); + reserved_src = NULL; + return true; + } + + bool + try_consume( ) { + reserved_src->try_consume( ); + reserved_src = NULL; + return true; + } + + void reset( ) { + reserved_src = NULL; + predecessor_cache<T,M>::reset( ); + } + + void clear() { + reserved_src = NULL; + predecessor_cache<T,M>::clear(); + } + +private: + predecessor_type *reserved_src; +}; + + +//! An abstract cache of successors +template<typename T, typename M=spin_rw_mutex > +class successor_cache : no_copy { +protected: + + typedef M mutex_type; + mutex_type my_mutex; + + typedef receiver<T> successor_type; + typedef receiver<T>* pointer_type; + typedef sender<T> owner_type; + // TODO revamp: introduce heapified collection of successors for strict priorities + typedef std::list< pointer_type > successors_type; + successors_type my_successors; + + owner_type* my_owner; + +public: + successor_cache( owner_type* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + virtual ~successor_cache() {} + + void register_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + if( r.priority() != no_priority ) + my_successors.push_front( &r ); + else + my_successors.push_back( &r ); + } + + void remove_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + for ( typename successors_type::iterator i = my_successors.begin(); + i != my_successors.end(); ++i ) { + if ( *i == & r ) { + my_successors.erase(i); + break; + } + } + } + + bool empty() { + typename mutex_type::scoped_lock l(my_mutex, false); + return my_successors.empty(); + } + + void clear() { + my_successors.clear(); + } + + virtual graph_task* try_put_task( const T& t ) = 0; +}; // successor_cache<T> + +//! An abstract cache of successors, specialized to continue_msg +template<typename M> +class successor_cache< continue_msg, M > : no_copy { +protected: + + typedef M mutex_type; + mutex_type my_mutex; + + typedef receiver<continue_msg> successor_type; + typedef receiver<continue_msg>* pointer_type; + typedef sender<continue_msg> owner_type; + typedef std::list< pointer_type > successors_type; + successors_type my_successors; + owner_type* my_owner; + +public: + successor_cache( sender<continue_msg>* owner ) : my_owner(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + virtual ~successor_cache() {} + + void register_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + if( r.priority() != no_priority ) + my_successors.push_front( &r ); + else + my_successors.push_back( &r ); + __TBB_ASSERT( my_owner, "Cache of successors must have an owner." ); + if ( r.is_continue_receiver() ) { + r.register_predecessor( *my_owner ); + } + } + + void remove_successor( successor_type& r ) { + typename mutex_type::scoped_lock l(my_mutex, true); + for ( successors_type::iterator i = my_successors.begin(); i != my_successors.end(); ++i ) { + if ( *i == &r ) { + __TBB_ASSERT(my_owner, "Cache of successors must have an owner."); + // TODO: check if we need to test for continue_receiver before removing from r. + r.remove_predecessor( *my_owner ); + my_successors.erase(i); + break; + } + } + } + + bool empty() { + typename mutex_type::scoped_lock l(my_mutex, false); + return my_successors.empty(); + } + + void clear() { + my_successors.clear(); + } + + virtual graph_task* try_put_task( const continue_msg& t ) = 0; +}; // successor_cache< continue_msg > + +//! A cache of successors that are broadcast to +template<typename T, typename M=spin_rw_mutex> +class broadcast_cache : public successor_cache<T, M> { + typedef successor_cache<T, M> base_type; + typedef M mutex_type; + typedef typename successor_cache<T,M>::successors_type successors_type; + +public: + + broadcast_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + // as above, but call try_put_task instead, and return the last task we received (if any) + graph_task* try_put_task( const T &t ) override { + graph_task * last_task = nullptr; + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task *new_task = (*i)->try_put_task(t); + // workaround for icc bug + graph& graph_ref = (*i)->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); // enqueue if necessary + if(new_task) { + ++i; + } + else { // failed + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } else { + ++i; + } + } + } + return last_task; + } + + // call try_put_task and return list of received tasks + bool gather_successful_try_puts( const T &t, graph_task_list& tasks ) { + bool is_at_least_one_put_successful = false; + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task * new_task = (*i)->try_put_task(t); + if(new_task) { + ++i; + if(new_task != SUCCESSFULLY_ENQUEUED) { + tasks.push_back(*new_task); + } + is_at_least_one_put_successful = true; + } + else { // failed + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } else { + ++i; + } + } + } + return is_at_least_one_put_successful; + } +}; + +//! A cache of successors that are put in a round-robin fashion +template<typename T, typename M=spin_rw_mutex > +class round_robin_cache : public successor_cache<T, M> { + typedef successor_cache<T, M> base_type; + typedef size_t size_type; + typedef M mutex_type; + typedef typename successor_cache<T,M>::successors_type successors_type; + +public: + + round_robin_cache( typename base_type::owner_type* owner ): base_type(owner) { + // Do not work with the passed pointer here as it may not be fully initialized yet + } + + size_type size() { + typename mutex_type::scoped_lock l(this->my_mutex, false); + return this->my_successors.size(); + } + + graph_task* try_put_task( const T &t ) override { + typename mutex_type::scoped_lock l(this->my_mutex, /*write=*/true); + typename successors_type::iterator i = this->my_successors.begin(); + while ( i != this->my_successors.end() ) { + graph_task* new_task = (*i)->try_put_task(t); + if ( new_task ) { + return new_task; + } else { + if ( (*i)->register_predecessor(*this->my_owner) ) { + i = this->my_successors.erase(i); + } + else { + ++i; + } + } + } + return NULL; + } +}; + +#endif // __TBB__flow_graph_cache_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h new file mode 100644 index 0000000000..a3d17cfb1c --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_impl.h @@ -0,0 +1,488 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_impl_H +#define __TBB_flow_graph_impl_H + +// #include "../config.h" +#include "_task.h" +#include "tbb/task_group.h" +#include "../task_arena.h" +#include "../flow_graph_abstractions.h" + +#include "../concurrent_priority_queue.h" + +#include <list> + +namespace tbb { +namespace detail { + +namespace d1 { + +class graph_task; +static graph_task* const SUCCESSFULLY_ENQUEUED = (graph_task*)-1; +typedef unsigned int node_priority_t; +static const node_priority_t no_priority = node_priority_t(0); + +class graph; +class graph_node; + +template <typename GraphContainerType, typename GraphNodeType> +class graph_iterator { + friend class graph; + friend class graph_node; +public: + typedef size_t size_type; + typedef GraphNodeType value_type; + typedef GraphNodeType* pointer; + typedef GraphNodeType& reference; + typedef const GraphNodeType& const_reference; + typedef std::forward_iterator_tag iterator_category; + + //! Copy constructor + graph_iterator(const graph_iterator& other) : + my_graph(other.my_graph), current_node(other.current_node) + {} + + //! Assignment + graph_iterator& operator=(const graph_iterator& other) { + if (this != &other) { + my_graph = other.my_graph; + current_node = other.current_node; + } + return *this; + } + + //! Dereference + reference operator*() const; + + //! Dereference + pointer operator->() const; + + //! Equality + bool operator==(const graph_iterator& other) const { + return ((my_graph == other.my_graph) && (current_node == other.current_node)); + } + +#if !__TBB_CPP20_COMPARISONS_PRESENT + //! Inequality + bool operator!=(const graph_iterator& other) const { return !(operator==(other)); } +#endif + + //! Pre-increment + graph_iterator& operator++() { + internal_forward(); + return *this; + } + + //! Post-increment + graph_iterator operator++(int) { + graph_iterator result = *this; + operator++(); + return result; + } + +private: + // the graph over which we are iterating + GraphContainerType *my_graph; + // pointer into my_graph's my_nodes list + pointer current_node; + + //! Private initializing constructor for begin() and end() iterators + graph_iterator(GraphContainerType *g, bool begin); + void internal_forward(); +}; // class graph_iterator + +// flags to modify the behavior of the graph reset(). Can be combined. +enum reset_flags { + rf_reset_protocol = 0, + rf_reset_bodies = 1 << 0, // delete the current node body, reset to a copy of the initial node body. + rf_clear_edges = 1 << 1 // delete edges +}; + +void activate_graph(graph& g); +void deactivate_graph(graph& g); +bool is_graph_active(graph& g); +graph_task* prioritize_task(graph& g, graph_task& arena_task); +void spawn_in_graph_arena(graph& g, graph_task& arena_task); +void enqueue_in_graph_arena(graph &g, graph_task& arena_task); + +class graph; + +//! Base class for tasks generated by graph nodes. +class graph_task : public task { +public: + graph_task(graph& g, small_object_allocator& allocator + , node_priority_t node_priority = no_priority + ) + : my_graph(g) + , priority(node_priority) + , my_allocator(allocator) + {} + graph& my_graph; // graph instance the task belongs to + // TODO revamp: rename to my_priority + node_priority_t priority; + void destruct_and_deallocate(const execution_data& ed); + task* cancel(execution_data& ed) override; +protected: + void finalize(const execution_data& ed); +private: + // To organize task_list + graph_task* my_next{ nullptr }; + small_object_allocator my_allocator; + // TODO revamp: elaborate internal interfaces to avoid friends declarations + friend class graph_task_list; + friend graph_task* prioritize_task(graph& g, graph_task& gt); +}; + +struct graph_task_comparator { + bool operator()(const graph_task* left, const graph_task* right) { + return left->priority < right->priority; + } +}; + +typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t; + +class priority_task_selector : public task { +public: + priority_task_selector(graph_task_priority_queue_t& priority_queue, small_object_allocator& allocator) + : my_priority_queue(priority_queue), my_allocator(allocator), my_task() {} + task* execute(execution_data& ed) override { + next_task(); + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->execute(ed); + my_allocator.delete_object(this, ed); + return t_next; + } + task* cancel(execution_data& ed) override { + if (!my_task) { + next_task(); + } + __TBB_ASSERT(my_task, nullptr); + task* t_next = my_task->cancel(ed); + my_allocator.delete_object(this, ed); + return t_next; + } +private: + void next_task() { + // TODO revamp: hold functors in priority queue instead of real tasks + bool result = my_priority_queue.try_pop(my_task); + __TBB_ASSERT_EX(result, "Number of critical tasks for scheduler and tasks" + " in graph's priority queue mismatched"); + __TBB_ASSERT(my_task && my_task != SUCCESSFULLY_ENQUEUED, + "Incorrect task submitted to graph priority queue"); + __TBB_ASSERT(my_task->priority != no_priority, + "Tasks from graph's priority queue must have priority"); + } + + graph_task_priority_queue_t& my_priority_queue; + small_object_allocator my_allocator; + graph_task* my_task; +}; + +template <typename Receiver, typename Body> class run_and_put_task; +template <typename Body> class run_task; + +//******************************************************************************** +// graph tasks helpers +//******************************************************************************** + +//! The list of graph tasks +class graph_task_list : no_copy { +private: + graph_task* my_first; + graph_task** my_next_ptr; +public: + //! Construct empty list + graph_task_list() : my_first(nullptr), my_next_ptr(&my_first) {} + + //! True if list is empty; false otherwise. + bool empty() const { return !my_first; } + + //! Push task onto back of list. + void push_back(graph_task& task) { + task.my_next = nullptr; + *my_next_ptr = &task; + my_next_ptr = &task.my_next; + } + + //! Pop the front task from the list. + graph_task& pop_front() { + __TBB_ASSERT(!empty(), "attempt to pop item from empty task_list"); + graph_task* result = my_first; + my_first = result->my_next; + if (!my_first) { + my_next_ptr = &my_first; + } + return *result; + } +}; + +//! The graph class +/** This class serves as a handle to the graph */ +class graph : no_copy, public graph_proxy { + friend class graph_node; + + void prepare_task_arena(bool reinit = false) { + if (reinit) { + __TBB_ASSERT(my_task_arena, "task arena is NULL"); + my_task_arena->terminate(); + my_task_arena->initialize(task_arena::attach()); + } + else { + __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL"); + my_task_arena = new task_arena(task_arena::attach()); + } + if (!my_task_arena->is_active()) // failed to attach + my_task_arena->initialize(); // create a new, default-initialized arena + __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active"); + } + +public: + //! Constructs a graph with isolated task_group_context + graph(); + + //! Constructs a graph with use_this_context as context + explicit graph(task_group_context& use_this_context); + + //! Destroys the graph. + /** Calls wait_for_all, then destroys the root task and context. */ + ~graph(); + + //! Used to register that an external entity may still interact with the graph. + /** The graph will not return from wait_for_all until a matching number of release_wait calls is + made. */ + void reserve_wait() override; + + //! Deregisters an external entity that may have interacted with the graph. + /** The graph will not return from wait_for_all until all the number of reserve_wait calls + matches the number of release_wait calls. */ + void release_wait() override; + + //! Wait until graph is idle and the number of release_wait calls equals to the number of + //! reserve_wait calls. + /** The waiting thread will go off and steal work while it is blocked in the wait_for_all. */ + void wait_for_all() { + cancelled = false; + caught_exception = false; + try_call([this] { + my_task_arena->execute([this] { + wait(my_wait_context, *my_context); + }); + cancelled = my_context->is_group_execution_cancelled(); + }).on_exception([this] { + my_context->reset(); + caught_exception = true; + cancelled = true; + }); + // TODO: the "if" condition below is just a work-around to support the concurrent wait + // mode. The cancellation and exception mechanisms are still broken in this mode. + // Consider using task group not to re-implement the same functionality. + if (!(my_context->traits() & task_group_context::concurrent_wait)) { + my_context->reset(); // consistent with behavior in catch() + } + } + +#if TODO_REVAMP +#error Decide on ref_count() presence. + Its only use is in the template<typename T, typename BufferType> void test_resets() +#endif + +#if __TBB_EXTRA_DEBUG + unsigned ref_count() const { return my_wait_context.reference_count(); } +#endif + + + // TODO revamp: consider adding getter for task_group_context. + + // ITERATORS + template<typename C, typename N> + friend class graph_iterator; + + // Graph iterator typedefs + typedef graph_iterator<graph, graph_node> iterator; + typedef graph_iterator<const graph, const graph_node> const_iterator; + + // Graph iterator constructors + //! start iterator + iterator begin(); + //! end iterator + iterator end(); + //! start const iterator + const_iterator begin() const; + //! end const iterator + const_iterator end() const; + //! start const iterator + const_iterator cbegin() const; + //! end const iterator + const_iterator cend() const; + + // thread-unsafe state reset. + void reset(reset_flags f = rf_reset_protocol); + + //! cancels execution of the associated task_group_context + void cancel(); + + //! return status of graph execution + bool is_cancelled() { return cancelled; } + bool exception_thrown() { return caught_exception; } + +private: + wait_context my_wait_context; + task_group_context *my_context; + bool own_context; + bool cancelled; + bool caught_exception; + bool my_is_active; + + graph_node *my_nodes, *my_nodes_last; + + tbb::spin_mutex nodelist_mutex; + void register_node(graph_node *n); + void remove_node(graph_node *n); + + task_arena* my_task_arena; + + graph_task_priority_queue_t my_priority_queue; + + friend void activate_graph(graph& g); + friend void deactivate_graph(graph& g); + friend bool is_graph_active(graph& g); + friend graph_task* prioritize_task(graph& g, graph_task& arena_task); + friend void spawn_in_graph_arena(graph& g, graph_task& arena_task); + friend void enqueue_in_graph_arena(graph &g, graph_task& arena_task); + + friend class task_arena_base; + +}; // class graph + +inline void graph_task::destruct_and_deallocate(const execution_data& ed) { + auto allocator = my_allocator; + // TODO: investigate if direct call of derived destructor gives any benefits. + this->~graph_task(); + allocator.deallocate(this, ed); +} + +inline void graph_task::finalize(const execution_data& ed) { + graph& g = my_graph; + destruct_and_deallocate(ed); + g.release_wait(); +} + +inline task* graph_task::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//******************************************************************************** +// end of graph tasks helpers +//******************************************************************************** + + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +class get_graph_helper; +#endif + +//! The base of all graph nodes. +class graph_node : no_copy { + friend class graph; + template<typename C, typename N> + friend class graph_iterator; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + +protected: + graph& my_graph; + graph& graph_reference() const { + // TODO revamp: propagate graph_reference() method to all the reference places. + return my_graph; + } + graph_node* next = nullptr; + graph_node* prev = nullptr; +public: + explicit graph_node(graph& g); + + virtual ~graph_node(); + +protected: + // performs the reset on an individual node. + virtual void reset_node(reset_flags f = rf_reset_protocol) = 0; +}; // class graph_node + +inline void activate_graph(graph& g) { + g.my_is_active = true; +} + +inline void deactivate_graph(graph& g) { + g.my_is_active = false; +} + +inline bool is_graph_active(graph& g) { + return g.my_is_active; +} + +inline graph_task* prioritize_task(graph& g, graph_task& gt) { + if( no_priority == gt.priority ) + return > + + //! Non-preemptive priority pattern. The original task is submitted as a work item to the + //! priority queue, and a new critical task is created to take and execute a work item with + //! the highest known priority. The reference counting responsibility is transferred (via + //! allocate_continuation) to the new task. + task* critical_task = gt.my_allocator.new_object<priority_task_selector>(g.my_priority_queue, gt.my_allocator); + __TBB_ASSERT( critical_task, "bad_alloc?" ); + g.my_priority_queue.push(>); + using tbb::detail::d1::submit; + submit( *critical_task, *g.my_task_arena, *g.my_context, /*as_critical=*/true ); + return nullptr; +} + +//! Spawns a task inside graph arena +inline void spawn_in_graph_arena(graph& g, graph_task& arena_task) { + if (is_graph_active(g)) { + task* gt = prioritize_task(g, arena_task); + if( !gt ) + return; + + __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL); + submit( *gt, *g.my_task_arena, *g.my_context +#if __TBB_PREVIEW_CRITICAL_TASKS + , /*as_critical=*/false +#endif + ); + } +} + +// TODO revamp: unify *_in_graph_arena functions + +//! Enqueues a task inside graph arena +inline void enqueue_in_graph_arena(graph &g, graph_task& arena_task) { + if (is_graph_active(g)) { + __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" ); + + // TODO revamp: decide on the approach that does not postpone critical task + if( task* gt = prioritize_task(g, arena_task) ) + submit( *gt, *g.my_task_arena, *g.my_context, /*as_critical=*/false); + } +} + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_flow_graph_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h new file mode 100644 index 0000000000..f4f55a6c7a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_indexer_impl.h @@ -0,0 +1,351 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_indexer_impl_H +#define __TBB__flow_graph_indexer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 + +#include "_flow_graph_types_impl.h" + + // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of + // the form tagged_msg<tag, result> + // where the value of tag will indicate which result was put to the + // successor. + + template<typename IndexerNodeBaseType, typename T, size_t K> + graph_task* do_try_put(const T &v, void *p) { + typename IndexerNodeBaseType::output_type o(K, v); + return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o); + } + + template<typename TupleTypes,int N> + struct indexer_helper { + template<typename IndexerNodeBaseType, typename PortTuple> + static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { + typedef typename std::tuple_element<N-1, TupleTypes>::type T; + graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>; + std::get<N-1>(my_input).set_up(p, indexer_node_put_task, g); + indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g); + } + }; + + template<typename TupleTypes> + struct indexer_helper<TupleTypes,1> { + template<typename IndexerNodeBaseType, typename PortTuple> + static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) { + typedef typename std::tuple_element<0, TupleTypes>::type T; + graph_task* (*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>; + std::get<0>(my_input).set_up(p, indexer_node_put_task, g); + } + }; + + template<typename T> + class indexer_input_port : public receiver<T> { + private: + void* my_indexer_ptr; + typedef graph_task* (* forward_function_ptr)(T const &, void* ); + forward_function_ptr my_try_put_task; + graph* my_graph; + public: + void set_up(void* p, forward_function_ptr f, graph& g) { + my_indexer_ptr = p; + my_try_put_task = f; + my_graph = &g; + } + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const T &v) override { + return my_try_put_task(v, my_indexer_ptr); + } + + graph& graph_reference() const override { + return *my_graph; + } + }; + + template<typename InputTuple, typename OutputType, typename StructTypes> + class indexer_node_FE { + public: + static const int N = std::tuple_size<InputTuple>::value; + typedef OutputType output_type; + typedef InputTuple input_type; + + // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member. + indexer_node_FE() : my_inputs() {} + + input_type &input_ports() { return my_inputs; } + protected: + input_type my_inputs; + }; + + //! indexer_node_base + template<typename InputTuple, typename OutputType, typename StructTypes> + class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>, + public sender<OutputType> { + protected: + using graph_node::my_graph; + public: + static const size_t N = std::tuple_size<InputTuple>::value; + typedef OutputType output_type; + typedef StructTypes tuple_types; + typedef typename sender<output_type>::successor_type successor_type; + typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_succ, rem_succ, try__put_task + }; + typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type; + + class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> { + public: + char type; + union { + output_type const *my_arg; + successor_type *my_succ; + graph_task* bypass_t; + }; + indexer_node_base_operation(const output_type* e, op_type t) : + type(char(t)), my_arg(e) {} + indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)), + my_succ(const_cast<successor_type *>(&s)) {} + }; + + typedef aggregating_functor<class_type, indexer_node_base_operation> handler_type; + friend class aggregating_functor<class_type, indexer_node_base_operation>; + aggregator<handler_type, indexer_node_base_operation> my_aggregator; + + void handle_operations(indexer_node_base_operation* op_list) { + indexer_node_base_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + + case reg_succ: + my_successors.register_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + + case rem_succ: + my_successors.remove_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case try__put_task: { + current->bypass_t = my_successors.try_put_task(*(current->my_arg)); + current->status.store( SUCCEEDED, std::memory_order_release); // return of try_put_task actual return value + } + break; + } + } + } + // ---------- end aggregator ----------- + public: + indexer_node_base(graph& g) : graph_node(g), input_ports_type(), my_successors(this) { + indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g); + my_aggregator.initialize_handler(handler_type(this)); + } + + indexer_node_base(const indexer_node_base& other) + : graph_node(other.my_graph), input_ports_type(), sender<output_type>(), my_successors(this) + { + indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph); + my_aggregator.initialize_handler(handler_type(this)); + } + + bool register_successor(successor_type &r) override { + indexer_node_base_operation op_data(r, reg_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool remove_successor( successor_type &r) override { + indexer_node_base_operation op_data(r, rem_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + graph_task* try_put_task(output_type const *v) { // not a virtual method in this class + indexer_node_base_operation op_data(v, try__put_task); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + protected: + void reset_node(reset_flags f) override { + if(f & rf_clear_edges) { + my_successors.clear(); + } + } + + private: + broadcast_cache<output_type, null_rw_mutex> my_successors; + }; //indexer_node_base + + + template<int N, typename InputTuple> struct input_types; + + template<typename InputTuple> + struct input_types<1, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef tagged_msg<size_t, first_type > type; + }; + + template<typename InputTuple> + struct input_types<2, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef tagged_msg<size_t, first_type, second_type> type; + }; + + template<typename InputTuple> + struct input_types<3, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef tagged_msg<size_t, first_type, second_type, third_type> type; + }; + + template<typename InputTuple> + struct input_types<4, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type> type; + }; + + template<typename InputTuple> + struct input_types<5, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type> type; + }; + + template<typename InputTuple> + struct input_types<6, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type> type; + }; + + template<typename InputTuple> + struct input_types<7, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type> type; + }; + + + template<typename InputTuple> + struct input_types<8, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type> type; + }; + + + template<typename InputTuple> + struct input_types<9, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef typename std::tuple_element<8, InputTuple>::type nineth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type, nineth_type> type; + }; + + template<typename InputTuple> + struct input_types<10, InputTuple> { + typedef typename std::tuple_element<0, InputTuple>::type first_type; + typedef typename std::tuple_element<1, InputTuple>::type second_type; + typedef typename std::tuple_element<2, InputTuple>::type third_type; + typedef typename std::tuple_element<3, InputTuple>::type fourth_type; + typedef typename std::tuple_element<4, InputTuple>::type fifth_type; + typedef typename std::tuple_element<5, InputTuple>::type sixth_type; + typedef typename std::tuple_element<6, InputTuple>::type seventh_type; + typedef typename std::tuple_element<7, InputTuple>::type eighth_type; + typedef typename std::tuple_element<8, InputTuple>::type nineth_type; + typedef typename std::tuple_element<9, InputTuple>::type tenth_type; + typedef tagged_msg<size_t, first_type, second_type, third_type, + fourth_type, fifth_type, sixth_type, + seventh_type, eighth_type, nineth_type, + tenth_type> type; + }; + + // type generators + template<typename OutputTuple> + struct indexer_types : public input_types<std::tuple_size<OutputTuple>::value, OutputTuple> { + static const int N = std::tuple_size<OutputTuple>::value; + typedef typename input_types<N, OutputTuple>::type output_type; + typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type; + typedef indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type; + typedef indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type; + }; + + template<class OutputTuple> + class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type { + public: + typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type; + typedef OutputTuple tuple_types; + typedef typename indexer_types<OutputTuple>::output_type output_type; + private: + typedef typename indexer_types<OutputTuple>::indexer_base_type base_type; + public: + unfolded_indexer_node(graph& g) : base_type(g) {} + unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {} + }; + +#endif /* __TBB__flow_graph_indexer_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h new file mode 100644 index 0000000000..4466bf4180 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_item_buffer_impl.h @@ -0,0 +1,279 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_item_buffer_impl_H +#define __TBB__flow_graph_item_buffer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_aligned_space.h" + +// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h) + +//! Expandable buffer of items. The possible operations are push, pop, +//* tests for empty and so forth. No mutual exclusion is built in. +//* objects are constructed into and explicitly-destroyed. get_my_item gives +// a read-only reference to the item in the buffer. set_my_item may be called +// with either an empty or occupied slot. + +template <typename T, typename A=cache_aligned_allocator<T> > +class item_buffer { +public: + typedef T item_type; + enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 }; +protected: + typedef size_t size_type; + typedef std::pair<item_type, buffer_item_state> aligned_space_item; + typedef aligned_space<aligned_space_item> buffer_item_type; + typedef typename allocator_traits<A>::template rebind_alloc<buffer_item_type> allocator_type; + buffer_item_type *my_array; + size_type my_array_size; + static const size_type initial_buffer_size = 4; + size_type my_head; + size_type my_tail; + + bool buffer_empty() const { return my_head == my_tail; } + + aligned_space_item &item(size_type i) { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value),NULL); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); + return *my_array[i & (my_array_size - 1) ].begin(); + } + + const aligned_space_item &item(size_type i) const { + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->second))%alignment_of<buffer_item_state>::value), NULL); + __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].begin()->first))%alignment_of<item_type>::value), NULL); + return *my_array[i & (my_array_size-1)].begin(); + } + + bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); } +#if TBB_USE_ASSERT + bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; } +#endif + + // object management in buffer + const item_type &get_my_item(size_t i) const { + __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item"); + item_type* itm = const_cast<item_type*>(reinterpret_cast<const item_type*>(&item(i).first)); + return *itm; + } + + // may be called with an empty slot or a slot that has already been constructed into. + void set_my_item(size_t i, const item_type &o) { + if(item(i).second != no_item) { + destroy_item(i); + } + new(&(item(i).first)) item_type(o); + item(i).second = has_item; + } + + // destructively-fetch an object from the buffer + void fetch_item(size_t i, item_type &o) { + __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot"); + o = get_my_item(i); // could have std::move assign semantics + destroy_item(i); + } + + // move an existing item from one slot to another. The moved-to slot must be unoccupied, + // the moved-from slot must exist and not be reserved. The after, from will be empty, + // to will be occupied but not reserved + void move_item(size_t to, size_t from) { + __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot"); + __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot"); + set_my_item(to, get_my_item(from)); // could have std::move semantics + destroy_item(from); + + } + + // put an item in an empty slot. Return true if successful, else false + bool place_item(size_t here, const item_type &me) { +#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES + if(my_item_valid(here)) return false; +#endif + set_my_item(here, me); + return true; + } + + // could be implemented with std::move semantics + void swap_items(size_t i, size_t j) { + __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)"); + item_type temp = get_my_item(i); + set_my_item(i, get_my_item(j)); + set_my_item(j, temp); + } + + void destroy_item(size_type i) { + __TBB_ASSERT(my_item_valid(i), "destruction of invalid item"); + item(i).first.~item_type(); + item(i).second = no_item; + } + + // returns the front element + const item_type& front() const + { + __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item"); + return get_my_item(my_head); + } + + // returns the back element + const item_type& back() const + { + __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item"); + return get_my_item(my_tail - 1); + } + + // following methods are for reservation of the front of a buffer. + void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; } + void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; } + + void destroy_front() { destroy_item(my_head); ++my_head; } + void destroy_back() { destroy_item(my_tail-1); --my_tail; } + + // we have to be able to test against a new tail value without changing my_tail + // grow_array doesn't work if we change my_tail when the old array is too small + size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; } + size_type capacity() { return my_array_size; } + // sequencer_node does not use this method, so we don't + // need a version that passes in the new_tail value. + bool buffer_full() { return size() >= capacity(); } + + //! Grows the internal array. + void grow_my_array( size_t minimum_size ) { + // test that we haven't made the structure inconsistent. + __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity"); + size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size; + while( new_size<minimum_size ) + new_size*=2; + + buffer_item_type* new_array = allocator_type().allocate(new_size); + + // initialize validity to "no" + for( size_type i=0; i<new_size; ++i ) { new_array[i].begin()->second = no_item; } + + for( size_type i=my_head; i<my_tail; ++i) { + if(my_item_valid(i)) { // sequencer_node may have empty slots + // placement-new copy-construct; could be std::move + char *new_space = (char *)&(new_array[i&(new_size-1)].begin()->first); + (void)new(new_space) item_type(get_my_item(i)); + new_array[i&(new_size-1)].begin()->second = item(i).second; + } + } + + clean_up_buffer(/*reset_pointers*/false); + + my_array = new_array; + my_array_size = new_size; + } + + bool push_back(item_type &v) { + if(buffer_full()) { + grow_my_array(size() + 1); + } + set_my_item(my_tail, v); + ++my_tail; + return true; + } + + bool pop_back(item_type &v) { + if (!my_item_valid(my_tail-1)) { + return false; + } + v = this->back(); + destroy_back(); + return true; + } + + bool pop_front(item_type &v) { + if(!my_item_valid(my_head)) { + return false; + } + v = this->front(); + destroy_front(); + return true; + } + + // This is used both for reset and for grow_my_array. In the case of grow_my_array + // we want to retain the values of the head and tail. + void clean_up_buffer(bool reset_pointers) { + if (my_array) { + for( size_type i=my_head; i<my_tail; ++i ) { + if(my_item_valid(i)) + destroy_item(i); + } + allocator_type().deallocate(my_array,my_array_size); + } + my_array = NULL; + if(reset_pointers) { + my_head = my_tail = my_array_size = 0; + } + } + +public: + //! Constructor + item_buffer( ) : my_array(NULL), my_array_size(0), + my_head(0), my_tail(0) { + grow_my_array(initial_buffer_size); + } + + ~item_buffer() { + clean_up_buffer(/*reset_pointers*/true); + } + + void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); } + +}; + +//! item_buffer with reservable front-end. NOTE: if reserving, do not +//* complete operation with pop_front(); use consume_front(). +//* No synchronization built-in. +template<typename T, typename A=cache_aligned_allocator<T> > +class reservable_item_buffer : public item_buffer<T, A> { +protected: + using item_buffer<T, A>::my_item_valid; + using item_buffer<T, A>::my_head; + +public: + reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {} + void reset() {my_reserved = false; item_buffer<T,A>::reset(); } +protected: + + bool reserve_front(T &v) { + if(my_reserved || !my_item_valid(this->my_head)) return false; + my_reserved = true; + // reserving the head + v = this->front(); + this->reserve_item(this->my_head); + return true; + } + + void consume_front() { + __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item"); + this->destroy_front(); + my_reserved = false; + } + + void release_front() { + __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item"); + this->release_item(this->my_head); + my_reserved = false; + } + + bool my_reserved; +}; + +#endif // __TBB__flow_graph_item_buffer_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h new file mode 100644 index 0000000000..98b357cdbc --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_join_impl.h @@ -0,0 +1,1706 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_join_impl_H +#define __TBB__flow_graph_join_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included into namespace tbb::detail::d1 + + struct forwarding_base : no_assign { + forwarding_base(graph &g) : graph_ref(g) {} + virtual ~forwarding_base() {} + graph& graph_ref; + }; + + struct queueing_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; + // decrement_port_count may create a forwarding task. If we cannot handle the task + // ourselves, ask decrement_port_count to deal with it. + virtual graph_task* decrement_port_count(bool handle_task) = 0; + }; + + struct reserving_forwarding_base : forwarding_base { + using forwarding_base::forwarding_base; + // decrement_port_count may create a forwarding task. If we cannot handle the task + // ourselves, ask decrement_port_count to deal with it. + virtual graph_task* decrement_port_count() = 0; + virtual void increment_port_count() = 0; + }; + + // specialization that lets us keep a copy of the current_key for building results. + // KeyType can be a reference type. + template<typename KeyType> + struct matching_forwarding_base : public forwarding_base { + typedef typename std::decay<KeyType>::type current_key_type; + matching_forwarding_base(graph &g) : forwarding_base(g) { } + virtual graph_task* increment_key_count(current_key_type const & /*t*/) = 0; + current_key_type current_key; // so ports can refer to FE's desired items + }; + + template< int N > + struct join_helper { + + template< typename TupleType, typename PortType > + static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { + std::get<N-1>( my_input ).set_join_node_pointer(port); + join_helper<N-1>::set_join_node_pointer( my_input, port ); + } + template< typename TupleType > + static inline void consume_reservations( TupleType &my_input ) { + std::get<N-1>( my_input ).consume(); + join_helper<N-1>::consume_reservations( my_input ); + } + + template< typename TupleType > + static inline void release_my_reservation( TupleType &my_input ) { + std::get<N-1>( my_input ).release(); + } + + template <typename TupleType> + static inline void release_reservations( TupleType &my_input) { + join_helper<N-1>::release_reservations(my_input); + release_my_reservation(my_input); + } + + template< typename InputTuple, typename OutputTuple > + static inline bool reserve( InputTuple &my_input, OutputTuple &out) { + if ( !std::get<N-1>( my_input ).reserve( std::get<N-1>( out ) ) ) return false; + if ( !join_helper<N-1>::reserve( my_input, out ) ) { + release_my_reservation( my_input ); + return false; + } + return true; + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { + bool res = std::get<N-1>(my_input).get_item(std::get<N-1>(out) ); // may fail + return join_helper<N-1>::get_my_item(my_input, out) && res; // do get on other inputs before returning + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple &my_input, OutputTuple &out) { + return get_my_item(my_input, out); + } + + template<typename InputTuple> + static inline void reset_my_port(InputTuple &my_input) { + join_helper<N-1>::reset_my_port(my_input); + std::get<N-1>(my_input).reset_port(); + } + + template<typename InputTuple> + static inline void reset_ports(InputTuple& my_input) { + reset_my_port(my_input); + } + + template<typename InputTuple, typename KeyFuncTuple> + static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { + std::get<N-1>(my_input).set_my_key_func(std::get<N-1>(my_key_funcs)); + std::get<N-1>(my_key_funcs) = nullptr; + join_helper<N-1>::set_key_functors(my_input, my_key_funcs); + } + + template< typename KeyFuncTuple> + static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { + __TBB_ASSERT( + std::get<N-1>(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get<N-1>(my_inputs).set_my_key_func(std::get<N-1>(other_inputs).get_my_key_func()->clone()); + join_helper<N-1>::copy_key_functors(my_inputs, other_inputs); + } + + template<typename InputTuple> + static inline void reset_inputs(InputTuple &my_input, reset_flags f) { + join_helper<N-1>::reset_inputs(my_input, f); + std::get<N-1>(my_input).reset_receiver(f); + } + }; // join_helper<N> + + template< > + struct join_helper<1> { + + template< typename TupleType, typename PortType > + static inline void set_join_node_pointer(TupleType &my_input, PortType *port) { + std::get<0>( my_input ).set_join_node_pointer(port); + } + + template< typename TupleType > + static inline void consume_reservations( TupleType &my_input ) { + std::get<0>( my_input ).consume(); + } + + template< typename TupleType > + static inline void release_my_reservation( TupleType &my_input ) { + std::get<0>( my_input ).release(); + } + + template<typename TupleType> + static inline void release_reservations( TupleType &my_input) { + release_my_reservation(my_input); + } + + template< typename InputTuple, typename OutputTuple > + static inline bool reserve( InputTuple &my_input, OutputTuple &out) { + return std::get<0>( my_input ).reserve( std::get<0>( out ) ); + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_my_item( InputTuple &my_input, OutputTuple &out) { + return std::get<0>(my_input).get_item(std::get<0>(out)); + } + + template<typename InputTuple, typename OutputTuple> + static inline bool get_items(InputTuple &my_input, OutputTuple &out) { + return get_my_item(my_input, out); + } + + template<typename InputTuple> + static inline void reset_my_port(InputTuple &my_input) { + std::get<0>(my_input).reset_port(); + } + + template<typename InputTuple> + static inline void reset_ports(InputTuple& my_input) { + reset_my_port(my_input); + } + + template<typename InputTuple, typename KeyFuncTuple> + static inline void set_key_functors(InputTuple &my_input, KeyFuncTuple &my_key_funcs) { + std::get<0>(my_input).set_my_key_func(std::get<0>(my_key_funcs)); + std::get<0>(my_key_funcs) = nullptr; + } + + template< typename KeyFuncTuple> + static inline void copy_key_functors(KeyFuncTuple &my_inputs, KeyFuncTuple &other_inputs) { + __TBB_ASSERT( + std::get<0>(other_inputs).get_my_key_func(), + "key matching join node should not be instantiated without functors." + ); + std::get<0>(my_inputs).set_my_key_func(std::get<0>(other_inputs).get_my_key_func()->clone()); + } + template<typename InputTuple> + static inline void reset_inputs(InputTuple &my_input, reset_flags f) { + std::get<0>(my_input).reset_receiver(f); + } + }; // join_helper<1> + + //! The two-phase join port + template< typename T > + class reserving_port : public receiver<T> { + public: + typedef T input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_pred, rem_pred, res_item, rel_res, con_res + }; + typedef reserving_port<T> class_type; + + class reserving_port_operation : public aggregated_operation<reserving_port_operation> { + public: + char type; + union { + T *my_arg; + predecessor_type *my_pred; + }; + reserving_port_operation(const T& e, op_type t) : + type(char(t)), my_arg(const_cast<T*>(&e)) {} + reserving_port_operation(const predecessor_type &s, op_type t) : type(char(t)), + my_pred(const_cast<predecessor_type *>(&s)) {} + reserving_port_operation(op_type t) : type(char(t)) {} + }; + + typedef aggregating_functor<class_type, reserving_port_operation> handler_type; + friend class aggregating_functor<class_type, reserving_port_operation>; + aggregator<handler_type, reserving_port_operation> my_aggregator; + + void handle_operations(reserving_port_operation* op_list) { + reserving_port_operation *current; + bool was_missing_predecessors = false; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case reg_pred: + was_missing_predecessors = my_predecessors.empty(); + my_predecessors.add(*(current->my_pred)); + if ( was_missing_predecessors ) { + (void) my_join->decrement_port_count(); // may try to forward + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case rem_pred: + if ( !my_predecessors.empty() ) { + my_predecessors.remove(*(current->my_pred)); + if ( my_predecessors.empty() ) // was the last predecessor + my_join->increment_port_count(); + } + // TODO: consider returning failure if there were no predecessors to remove + current->status.store( SUCCEEDED, std::memory_order_release ); + break; + case res_item: + if ( reserved ) { + current->status.store( FAILED, std::memory_order_release); + } + else if ( my_predecessors.try_reserve( *(current->my_arg) ) ) { + reserved = true; + current->status.store( SUCCEEDED, std::memory_order_release); + } else { + if ( my_predecessors.empty() ) { + my_join->increment_port_count(); + } + current->status.store( FAILED, std::memory_order_release); + } + break; + case rel_res: + reserved = false; + my_predecessors.try_release( ); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case con_res: + reserved = false; + my_predecessors.try_consume( ); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task( const T & ) override { + return nullptr; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + //! Constructor + reserving_port() : my_join(nullptr), my_predecessors(this), reserved(false) { + my_aggregator.initialize_handler(handler_type(this)); + } + + // copy constructor + reserving_port(const reserving_port& /* other */) = delete; + + void set_join_node_pointer(reserving_forwarding_base *join) { + my_join = join; + } + + //! Add a predecessor + bool register_predecessor( predecessor_type &src ) override { + reserving_port_operation op_data(src, reg_pred); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Remove a predecessor + bool remove_predecessor( predecessor_type &src ) override { + reserving_port_operation op_data(src, rem_pred); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Reserve an item from the port + bool reserve( T &v ) { + reserving_port_operation op_data(v, res_item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + //! Release the port + void release( ) { + reserving_port_operation op_data(rel_res); + my_aggregator.execute(&op_data); + } + + //! Complete use of the port + void consume( ) { + reserving_port_operation op_data(con_res); + my_aggregator.execute(&op_data); + } + + void reset_receiver( reset_flags f) { + if(f & rf_clear_edges) my_predecessors.clear(); + else + my_predecessors.reset(); + reserved = false; + __TBB_ASSERT(!(f&rf_clear_edges) || my_predecessors.empty(), "port edges not removed"); + } + + private: +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + + reserving_forwarding_base *my_join; + reservable_predecessor_cache< T, null_mutex > my_predecessors; + bool reserved; + }; // reserving_port + + //! queueing join_port + template<typename T> + class queueing_port : public receiver<T>, public item_buffer<T> { + public: + typedef T input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef queueing_port<T> class_type; + + // ----------- Aggregator ------------ + private: + enum op_type { get__item, res_port, try__put_task + }; + + class queueing_port_operation : public aggregated_operation<queueing_port_operation> { + public: + char type; + T my_val; + T* my_arg; + graph_task* bypass_t; + // constructor for value parameter + queueing_port_operation(const T& e, op_type t) : + type(char(t)), my_val(e) + , bypass_t(nullptr) + {} + // constructor for pointer parameter + queueing_port_operation(const T* p, op_type t) : + type(char(t)), my_arg(const_cast<T*>(p)) + , bypass_t(nullptr) + {} + // constructor with no parameter + queueing_port_operation(op_type t) : type(char(t)) + , bypass_t(nullptr) + {} + }; + + typedef aggregating_functor<class_type, queueing_port_operation> handler_type; + friend class aggregating_functor<class_type, queueing_port_operation>; + aggregator<handler_type, queueing_port_operation> my_aggregator; + + void handle_operations(queueing_port_operation* op_list) { + queueing_port_operation *current; + bool was_empty; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case try__put_task: { + graph_task* rtask = nullptr; + was_empty = this->buffer_empty(); + this->push_back(current->my_val); + if (was_empty) rtask = my_join->decrement_port_count(false); + else + rtask = SUCCESSFULLY_ENQUEUED; + current->bypass_t = rtask; + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case get__item: + if(!this->buffer_empty()) { + *(current->my_arg) = this->front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + else { + current->status.store( FAILED, std::memory_order_release); + } + break; + case res_port: + __TBB_ASSERT(this->my_item_valid(this->my_head), "No item to reset"); + this->destroy_front(); + if(this->my_item_valid(this->my_head)) { + (void)my_join->decrement_port_count(true); + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } + // ------------ End Aggregator --------------- + + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const T &v) override { + queueing_port_operation op_data(v, try__put_task); + my_aggregator.execute(&op_data); + __TBB_ASSERT(op_data.status == SUCCEEDED || !op_data.bypass_t, "inconsistent return from aggregator"); + if(!op_data.bypass_t) return SUCCESSFULLY_ENQUEUED; + return op_data.bypass_t; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + //! Constructor + queueing_port() : item_buffer<T>() { + my_join = nullptr; + my_aggregator.initialize_handler(handler_type(this)); + } + + //! copy constructor + queueing_port(const queueing_port& /* other */) = delete; + + //! record parent for tallying available items + void set_join_node_pointer(queueing_forwarding_base *join) { + my_join = join; + } + + bool get_item( T &v ) { + queueing_port_operation op_data(&v, get__item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // reset_port is called when item is accepted by successor, but + // is initiated by join_node. + void reset_port() { + queueing_port_operation op_data(res_port); + my_aggregator.execute(&op_data); + return; + } + + void reset_receiver(reset_flags) { + item_buffer<T>::reset(); + } + + private: +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + friend class get_graph_helper; +#endif + + queueing_forwarding_base *my_join; + }; // queueing_port + +#include "_flow_graph_tagged_buffer_impl.h" + + template<typename K> + struct count_element { + K my_key; + size_t my_value; + }; + + // method to access the key in the counting table + // the ref has already been removed from K + template< typename K > + struct key_to_count_functor { + typedef count_element<K> table_item_type; + const K& operator()(const table_item_type& v) { return v.my_key; } + }; + + // the ports can have only one template parameter. We wrap the types needed in + // a traits type + template< class TraitsType > + class key_matching_port : + public receiver<typename TraitsType::T>, + public hash_buffer< typename TraitsType::K, typename TraitsType::T, typename TraitsType::TtoK, + typename TraitsType::KHash > { + public: + typedef TraitsType traits; + typedef key_matching_port<traits> class_type; + typedef typename TraitsType::T input_type; + typedef typename TraitsType::K key_type; + typedef typename std::decay<key_type>::type noref_key_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename TraitsType::TtoK type_to_key_func_type; + typedef typename TraitsType::KHash hash_compare_type; + typedef hash_buffer< key_type, input_type, type_to_key_func_type, hash_compare_type > buffer_type; + + private: +// ----------- Aggregator ------------ + private: + enum op_type { try__put, get__item, res_port + }; + + class key_matching_port_operation : public aggregated_operation<key_matching_port_operation> { + public: + char type; + input_type my_val; + input_type *my_arg; + // constructor for value parameter + key_matching_port_operation(const input_type& e, op_type t) : + type(char(t)), my_val(e) {} + // constructor for pointer parameter + key_matching_port_operation(const input_type* p, op_type t) : + type(char(t)), my_arg(const_cast<input_type*>(p)) {} + // constructor with no parameter + key_matching_port_operation(op_type t) : type(char(t)) {} + }; + + typedef aggregating_functor<class_type, key_matching_port_operation> handler_type; + friend class aggregating_functor<class_type, key_matching_port_operation>; + aggregator<handler_type, key_matching_port_operation> my_aggregator; + + void handle_operations(key_matching_port_operation* op_list) { + key_matching_port_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case try__put: { + bool was_inserted = this->insert_with_key(current->my_val); + // return failure if a duplicate insertion occurs + current->status.store( was_inserted ? SUCCEEDED : FAILED, std::memory_order_release); + } + break; + case get__item: + // use current_key from FE for item + if(!this->find_with_key(my_join->current_key, *(current->my_arg))) { + __TBB_ASSERT(false, "Failed to find item corresponding to current_key."); + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case res_port: + // use current_key from FE for item + this->delete_with_key(my_join->current_key); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + } + } + } +// ------------ End Aggregator --------------- + protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task(const input_type& v) override { + key_matching_port_operation op_data(v, try__put); + graph_task* rtask = nullptr; + my_aggregator.execute(&op_data); + if(op_data.status == SUCCEEDED) { + rtask = my_join->increment_key_count((*(this->get_key_func()))(v)); // may spawn + // rtask has to reflect the return status of the try_put + if(!rtask) rtask = SUCCESSFULLY_ENQUEUED; + } + return rtask; + } + + graph& graph_reference() const override { + return my_join->graph_ref; + } + + public: + + key_matching_port() : receiver<input_type>(), buffer_type() { + my_join = nullptr; + my_aggregator.initialize_handler(handler_type(this)); + } + + // copy constructor + key_matching_port(const key_matching_port& /*other*/) = delete; +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif + ~key_matching_port() { } + + void set_join_node_pointer(forwarding_base *join) { + my_join = dynamic_cast<matching_forwarding_base<key_type>*>(join); + } + + void set_my_key_func(type_to_key_func_type *f) { this->set_key_func(f); } + + type_to_key_func_type* get_my_key_func() { return this->get_key_func(); } + + bool get_item( input_type &v ) { + // aggregator uses current_key from FE for Key + key_matching_port_operation op_data(&v, get__item); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // reset_port is called when item is accepted by successor, but + // is initiated by join_node. + void reset_port() { + key_matching_port_operation op_data(res_port); + my_aggregator.execute(&op_data); + return; + } + + void reset_receiver(reset_flags ) { + buffer_type::reset(); + } + + private: + // my_join forwarding base used to count number of inputs that + // received key. + matching_forwarding_base<key_type> *my_join; + }; // key_matching_port + + using namespace graph_policy_namespace; + + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_base; + + //! join_node_FE : implements input port policy + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_FE; + + template<typename InputTuple, typename OutputTuple> + class join_node_FE<reserving, InputTuple, OutputTuple> : public reserving_forwarding_base { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef join_node_base<reserving, InputTuple, OutputTuple> base_node_type; // for forwarding + + join_node_FE(graph &g) : reserving_forwarding_base(g), my_node(nullptr) { + ports_with_no_inputs = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + join_node_FE(const join_node_FE& other) : reserving_forwarding_base((other.reserving_forwarding_base::graph_ref)), my_node(nullptr) { + ports_with_no_inputs = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void increment_port_count() override { + ++ports_with_no_inputs; + } + + // if all input_ports have predecessors, spawn forward to try and consume tuples + graph_task* decrement_port_count() override { + if(ports_with_no_inputs.fetch_sub(1) == 1) { + if(is_graph_active(this->graph_ref)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); + graph_ref.reserve_wait(); + spawn_in_graph_arena(this->graph_ref, *t); + } + } + return nullptr; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f) { + // called outside of parallel contexts + ports_with_no_inputs = N; + join_helper<N>::reset_inputs(my_inputs, f); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { + return !ports_with_no_inputs; + } + + bool try_to_make_tuple(output_type &out) { + if(ports_with_no_inputs) return false; + return join_helper<N>::reserve(my_inputs, out); + } + + void tuple_accepted() { + join_helper<N>::consume_reservations(my_inputs); + } + void tuple_rejected() { + join_helper<N>::release_reservations(my_inputs); + } + + input_type my_inputs; + base_node_type *my_node; + std::atomic<std::size_t> ports_with_no_inputs; + }; // join_node_FE<reserving, ... > + + template<typename InputTuple, typename OutputTuple> + class join_node_FE<queueing, InputTuple, OutputTuple> : public queueing_forwarding_base { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef join_node_base<queueing, InputTuple, OutputTuple> base_node_type; // for forwarding + + join_node_FE(graph &g) : queueing_forwarding_base(g), my_node(nullptr) { + ports_with_no_items = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + join_node_FE(const join_node_FE& other) : queueing_forwarding_base((other.queueing_forwarding_base::graph_ref)), my_node(nullptr) { + ports_with_no_items = N; + join_helper<N>::set_join_node_pointer(my_inputs, this); + } + + // needed for forwarding + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void reset_port_count() { + ports_with_no_items = N; + } + + // if all input_ports have items, spawn forward to try and consume tuples + graph_task* decrement_port_count(bool handle_task) override + { + if(ports_with_no_items.fetch_sub(1) == 1) { + if(is_graph_active(this->graph_ref)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + graph_task* t = allocator.new_object<task_type>(graph_ref, allocator, *my_node); + graph_ref.reserve_wait(); + if( !handle_task ) + return t; + spawn_in_graph_arena(this->graph_ref, *t); + } + } + return nullptr; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f) { + reset_port_count(); + join_helper<N>::reset_inputs(my_inputs, f ); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { + return !ports_with_no_items; + } + + bool try_to_make_tuple(output_type &out) { + if(ports_with_no_items) return false; + return join_helper<N>::get_items(my_inputs, out); + } + + void tuple_accepted() { + reset_port_count(); + join_helper<N>::reset_ports(my_inputs); + } + void tuple_rejected() { + // nothing to do. + } + + input_type my_inputs; + base_node_type *my_node; + std::atomic<std::size_t> ports_with_no_items; + }; // join_node_FE<queueing, ...> + + // key_matching join front-end. + template<typename InputTuple, typename OutputTuple, typename K, typename KHash> + class join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> : public matching_forwarding_base<K>, + // buffer of key value counts + public hash_buffer< // typedefed below to key_to_count_buffer_type + typename std::decay<K>::type&, // force ref type on K + count_element<typename std::decay<K>::type>, + type_to_key_function_body< + count_element<typename std::decay<K>::type>, + typename std::decay<K>::type& >, + KHash >, + // buffer of output items + public item_buffer<OutputTuple> { + public: + static const int N = std::tuple_size<OutputTuple>::value; + typedef OutputTuple output_type; + typedef InputTuple input_type; + typedef K key_type; + typedef typename std::decay<key_type>::type unref_key_type; + typedef KHash key_hash_compare; + // must use K without ref. + typedef count_element<unref_key_type> count_element_type; + // method that lets us refer to the key of this type. + typedef key_to_count_functor<unref_key_type> key_to_count_func; + typedef type_to_key_function_body< count_element_type, unref_key_type&> TtoK_function_body_type; + typedef type_to_key_function_body_leaf<count_element_type, unref_key_type&, key_to_count_func> TtoK_function_body_leaf_type; + // this is the type of the special table that keeps track of the number of discrete + // elements corresponding to each key that we've seen. + typedef hash_buffer< unref_key_type&, count_element_type, TtoK_function_body_type, key_hash_compare > + key_to_count_buffer_type; + typedef item_buffer<output_type> output_buffer_type; + typedef join_node_base<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> base_node_type; // for forwarding + typedef matching_forwarding_base<key_type> forwarding_base_type; + +// ----------- Aggregator ------------ + // the aggregator is only needed to serialize the access to the hash table. + // and the output_buffer_type base class + private: + enum op_type { res_count, inc_count, may_succeed, try_make }; + typedef join_node_FE<key_matching<key_type,key_hash_compare>, InputTuple, OutputTuple> class_type; + + class key_matching_FE_operation : public aggregated_operation<key_matching_FE_operation> { + public: + char type; + unref_key_type my_val; + output_type* my_output; + graph_task* bypass_t; + // constructor for value parameter + key_matching_FE_operation(const unref_key_type& e , op_type t) : type(char(t)), my_val(e), + my_output(nullptr), bypass_t(nullptr) {} + key_matching_FE_operation(output_type *p, op_type t) : type(char(t)), my_output(p), bypass_t(nullptr) {} + // constructor with no parameter + key_matching_FE_operation(op_type t) : type(char(t)), my_output(nullptr), bypass_t(nullptr) {} + }; + + typedef aggregating_functor<class_type, key_matching_FE_operation> handler_type; + friend class aggregating_functor<class_type, key_matching_FE_operation>; + aggregator<handler_type, key_matching_FE_operation> my_aggregator; + + // called from aggregator, so serialized + // returns a task pointer if the a task would have been enqueued but we asked that + // it be returned. Otherwise returns nullptr. + graph_task* fill_output_buffer(unref_key_type &t) { + output_type l_out; + graph_task* rtask = nullptr; + bool do_fwd = this->buffer_empty() && is_graph_active(this->graph_ref); + this->current_key = t; + this->delete_with_key(this->current_key); // remove the key + if(join_helper<N>::get_items(my_inputs, l_out)) { // <== call back + this->push_back(l_out); + if(do_fwd) { // we enqueue if receiving an item from predecessor, not if successor asks for item + small_object_allocator allocator{}; + typedef forward_task_bypass<base_node_type> task_type; + rtask = allocator.new_object<task_type>(this->graph_ref, allocator, *my_node); + this->graph_ref.reserve_wait(); + do_fwd = false; + } + // retire the input values + join_helper<N>::reset_ports(my_inputs); // <== call back + } + else { + __TBB_ASSERT(false, "should have had something to push"); + } + return rtask; + } + + void handle_operations(key_matching_FE_operation* op_list) { + key_matching_FE_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case res_count: // called from BE + { + this->destroy_front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case inc_count: { // called from input ports + count_element_type *p = 0; + unref_key_type &t = current->my_val; + if(!(this->find_ref_with_key(t,p))) { + count_element_type ev; + ev.my_key = t; + ev.my_value = 0; + this->insert_with_key(ev); + bool found = this->find_ref_with_key(t, p); + __TBB_ASSERT_EX(found, "should find key after inserting it"); + } + if(++(p->my_value) == size_t(N)) { + current->bypass_t = fill_output_buffer(t); + } + } + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case may_succeed: // called from BE + current->status.store( this->buffer_empty() ? FAILED : SUCCEEDED, std::memory_order_release); + break; + case try_make: // called from BE + if(this->buffer_empty()) { + current->status.store( FAILED, std::memory_order_release); + } + else { + *(current->my_output) = this->front(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + } + } + } +// ------------ End Aggregator --------------- + + public: + template<typename FunctionTuple> + join_node_FE(graph &g, FunctionTuple &TtoK_funcs) : forwarding_base_type(g), my_node(nullptr) { + join_helper<N>::set_join_node_pointer(my_inputs, this); + join_helper<N>::set_key_functors(my_inputs, TtoK_funcs); + my_aggregator.initialize_handler(handler_type(this)); + TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); + this->set_key_func(cfb); + } + + join_node_FE(const join_node_FE& other) : forwarding_base_type((other.forwarding_base_type::graph_ref)), key_to_count_buffer_type(), + output_buffer_type() { + my_node = nullptr; + join_helper<N>::set_join_node_pointer(my_inputs, this); + join_helper<N>::copy_key_functors(my_inputs, const_cast<input_type &>(other.my_inputs)); + my_aggregator.initialize_handler(handler_type(this)); + TtoK_function_body_type *cfb = new TtoK_function_body_leaf_type(key_to_count_func()); + this->set_key_func(cfb); + } + + // needed for forwarding + void set_my_node(base_node_type *new_my_node) { my_node = new_my_node; } + + void reset_port_count() { // called from BE + key_matching_FE_operation op_data(res_count); + my_aggregator.execute(&op_data); + return; + } + + // if all input_ports have items, spawn forward to try and consume tuples + // return a task if we are asked and did create one. + graph_task *increment_key_count(unref_key_type const & t) override { // called from input_ports + key_matching_FE_operation op_data(t, inc_count); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + input_type &input_ports() { return my_inputs; } + + protected: + + void reset( reset_flags f ) { + // called outside of parallel contexts + join_helper<N>::reset_inputs(my_inputs, f); + + key_to_count_buffer_type::reset(); + output_buffer_type::reset(); + } + + // all methods on input ports should be called under mutual exclusion from join_node_base. + + bool tuple_build_may_succeed() { // called from back-end + key_matching_FE_operation op_data(may_succeed); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + // cannot lock while calling back to input_ports. current_key will only be set + // and reset under the aggregator, so it will remain consistent. + bool try_to_make_tuple(output_type &out) { + key_matching_FE_operation op_data(&out,try_make); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + void tuple_accepted() { + reset_port_count(); // reset current_key after ports reset. + } + + void tuple_rejected() { + // nothing to do. + } + + input_type my_inputs; // input ports + base_node_type *my_node; + }; // join_node_FE<key_matching<K,KHash>, InputTuple, OutputTuple> + + //! join_node_base + template<typename JP, typename InputTuple, typename OutputTuple> + class join_node_base : public graph_node, public join_node_FE<JP, InputTuple, OutputTuple>, + public sender<OutputTuple> { + protected: + using graph_node::my_graph; + public: + typedef OutputTuple output_type; + + typedef typename sender<output_type>::successor_type successor_type; + typedef join_node_FE<JP, InputTuple, OutputTuple> input_ports_type; + using input_ports_type::tuple_build_may_succeed; + using input_ports_type::try_to_make_tuple; + using input_ports_type::tuple_accepted; + using input_ports_type::tuple_rejected; + + private: + // ----------- Aggregator ------------ + enum op_type { reg_succ, rem_succ, try__get, do_fwrd, do_fwrd_bypass + }; + typedef join_node_base<JP,InputTuple,OutputTuple> class_type; + + class join_node_base_operation : public aggregated_operation<join_node_base_operation> { + public: + char type; + union { + output_type *my_arg; + successor_type *my_succ; + }; + graph_task* bypass_t; + join_node_base_operation(const output_type& e, op_type t) : type(char(t)), + my_arg(const_cast<output_type*>(&e)), bypass_t(nullptr) {} + join_node_base_operation(const successor_type &s, op_type t) : type(char(t)), + my_succ(const_cast<successor_type *>(&s)), bypass_t(nullptr) {} + join_node_base_operation(op_type t) : type(char(t)), bypass_t(nullptr) {} + }; + + typedef aggregating_functor<class_type, join_node_base_operation> handler_type; + friend class aggregating_functor<class_type, join_node_base_operation>; + bool forwarder_busy; + aggregator<handler_type, join_node_base_operation> my_aggregator; + + void handle_operations(join_node_base_operation* op_list) { + join_node_base_operation *current; + while(op_list) { + current = op_list; + op_list = op_list->next; + switch(current->type) { + case reg_succ: { + my_successors.register_successor(*(current->my_succ)); + if(tuple_build_may_succeed() && !forwarder_busy && is_graph_active(my_graph)) { + small_object_allocator allocator{}; + typedef forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> > task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(my_graph, *t); + forwarder_busy = true; + } + current->status.store( SUCCEEDED, std::memory_order_release); + } + break; + case rem_succ: + my_successors.remove_successor(*(current->my_succ)); + current->status.store( SUCCEEDED, std::memory_order_release); + break; + case try__get: + if(tuple_build_may_succeed()) { + if(try_to_make_tuple(*(current->my_arg))) { + tuple_accepted(); + current->status.store( SUCCEEDED, std::memory_order_release); + } + else current->status.store( FAILED, std::memory_order_release); + } + else current->status.store( FAILED, std::memory_order_release); + break; + case do_fwrd_bypass: { + bool build_succeeded; + graph_task *last_task = nullptr; + output_type out; + // forwarding must be exclusive, because try_to_make_tuple and tuple_accepted + // are separate locked methods in the FE. We could conceivably fetch the front + // of the FE queue, then be swapped out, have someone else consume the FE's + // object, then come back, forward, and then try to remove it from the queue + // again. Without reservation of the FE, the methods accessing it must be locked. + // We could remember the keys of the objects we forwarded, and then remove + // them from the input ports after forwarding is complete? + if(tuple_build_may_succeed()) { // checks output queue of FE + do { + build_succeeded = try_to_make_tuple(out); // fetch front_end of queue + if(build_succeeded) { + graph_task *new_task = my_successors.try_put_task(out); + last_task = combine_tasks(my_graph, last_task, new_task); + if(new_task) { + tuple_accepted(); + } + else { + tuple_rejected(); + build_succeeded = false; + } + } + } while(build_succeeded); + } + current->bypass_t = last_task; + current->status.store( SUCCEEDED, std::memory_order_release); + forwarder_busy = false; + } + break; + } + } + } + // ---------- end aggregator ----------- + public: + join_node_base(graph &g) + : graph_node(g), input_ports_type(g), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + join_node_base(const join_node_base& other) : + graph_node(other.graph_node::my_graph), input_ports_type(other), + sender<OutputTuple>(), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + template<typename FunctionTuple> + join_node_base(graph &g, FunctionTuple f) + : graph_node(g), input_ports_type(g, f), forwarder_busy(false), my_successors(this) + { + input_ports_type::set_my_node(this); + my_aggregator.initialize_handler(handler_type(this)); + } + + bool register_successor(successor_type &r) override { + join_node_base_operation op_data(r, reg_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool remove_successor( successor_type &r) override { + join_node_base_operation op_data(r, rem_succ); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + bool try_get( output_type &v) override { + join_node_base_operation op_data(v, try__get); + my_aggregator.execute(&op_data); + return op_data.status == SUCCEEDED; + } + + protected: + void reset_node(reset_flags f) override { + input_ports_type::reset(f); + if(f & rf_clear_edges) my_successors.clear(); + } + + private: + broadcast_cache<output_type, null_rw_mutex> my_successors; + + friend class forward_task_bypass< join_node_base<JP, InputTuple, OutputTuple> >; + graph_task *forward_task() { + join_node_base_operation op_data(do_fwrd_bypass); + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + + }; // join_node_base + + // join base class type generator + template<int N, template<class> class PT, typename OutputTuple, typename JP> + struct join_base { + typedef join_node_base<JP, typename wrap_tuple_elements<N,PT,OutputTuple>::type, OutputTuple> type; + }; + + template<int N, typename OutputTuple, typename K, typename KHash> + struct join_base<N, key_matching_port, OutputTuple, key_matching<K,KHash> > { + typedef key_matching<K, KHash> key_traits_type; + typedef K key_type; + typedef KHash key_hash_compare; + typedef join_node_base< key_traits_type, + // ports type + typename wrap_key_tuple_elements<N,key_matching_port,key_traits_type,OutputTuple>::type, + OutputTuple > type; + }; + + //! unfolded_join_node : passes input_ports_type to join_node_base. We build the input port type + // using tuple_element. The class PT is the port type (reserving_port, queueing_port, key_matching_port) + // and should match the typename. + + template<int N, template<class> class PT, typename OutputTuple, typename JP> + class unfolded_join_node : public join_base<N,PT,OutputTuple,JP>::type { + public: + typedef typename wrap_tuple_elements<N, PT, OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<JP, input_ports_type, output_type > base_type; + public: + unfolded_join_node(graph &g) : base_type(g) {} + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + template <typename K, typename T> + struct key_from_message_body { + K operator()(const T& t) const { + return key_from_message<K>(t); + } + }; + // Adds const to reference type + template <typename K, typename T> + struct key_from_message_body<K&,T> { + const K& operator()(const T& t) const { + return key_from_message<const K&>(t); + } + }; +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + // key_matching unfolded_join_node. This must be a separate specialization because the constructors + // differ. + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<2,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<2,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + public: + typedef typename wrap_key_tuple_elements<2,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef std::tuple< f0_p, f1_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1> + unfolded_join_node(graph &g, Body0 body0, Body1 body1) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 2, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<3,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<3,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + public: + typedef typename wrap_key_tuple_elements<3,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef std::tuple< f0_p, f1_p, f2_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 3, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<4,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<4,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + public: + typedef typename wrap_key_tuple_elements<4,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash>, input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 4, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<5,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<5,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + public: + typedef typename wrap_key_tuple_elements<5,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 5, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; + +#if __TBB_VARIADIC_MAX >= 6 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<6,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<6,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + public: + typedef typename wrap_key_tuple_elements<6,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, typename Body5> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, Body5 body5) + : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 6, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 7 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<7,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<7,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + public: + typedef typename wrap_key_tuple_elements<7,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 7, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 8 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<8,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<8,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + public: + typedef typename wrap_key_tuple_elements<8,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 8, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 9 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<9,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<9,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + typedef typename std::tuple_element<8, OutputTuple>::type T8; + public: + typedef typename wrap_key_tuple_elements<9,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef type_to_key_function_body<T8, K> *f8_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), + new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7, typename Body8> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7, Body8 body8) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7), + new type_to_key_function_body_leaf<T8, K, Body8>(body8) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 9, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + +#if __TBB_VARIADIC_MAX >= 10 + template<typename OutputTuple, typename K, typename KHash> + class unfolded_join_node<10,key_matching_port,OutputTuple,key_matching<K,KHash> > : public + join_base<10,key_matching_port,OutputTuple,key_matching<K,KHash> >::type { + typedef typename std::tuple_element<0, OutputTuple>::type T0; + typedef typename std::tuple_element<1, OutputTuple>::type T1; + typedef typename std::tuple_element<2, OutputTuple>::type T2; + typedef typename std::tuple_element<3, OutputTuple>::type T3; + typedef typename std::tuple_element<4, OutputTuple>::type T4; + typedef typename std::tuple_element<5, OutputTuple>::type T5; + typedef typename std::tuple_element<6, OutputTuple>::type T6; + typedef typename std::tuple_element<7, OutputTuple>::type T7; + typedef typename std::tuple_element<8, OutputTuple>::type T8; + typedef typename std::tuple_element<9, OutputTuple>::type T9; + public: + typedef typename wrap_key_tuple_elements<10,key_matching_port,key_matching<K,KHash>,OutputTuple>::type input_ports_type; + typedef OutputTuple output_type; + private: + typedef join_node_base<key_matching<K,KHash> , input_ports_type, output_type > base_type; + typedef type_to_key_function_body<T0, K> *f0_p; + typedef type_to_key_function_body<T1, K> *f1_p; + typedef type_to_key_function_body<T2, K> *f2_p; + typedef type_to_key_function_body<T3, K> *f3_p; + typedef type_to_key_function_body<T4, K> *f4_p; + typedef type_to_key_function_body<T5, K> *f5_p; + typedef type_to_key_function_body<T6, K> *f6_p; + typedef type_to_key_function_body<T7, K> *f7_p; + typedef type_to_key_function_body<T8, K> *f8_p; + typedef type_to_key_function_body<T9, K> *f9_p; + typedef std::tuple< f0_p, f1_p, f2_p, f3_p, f4_p, f5_p, f6_p, f7_p, f8_p, f9_p > func_initializer_type; + public: +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + unfolded_join_node(graph &g) : base_type(g, + func_initializer_type( + new type_to_key_function_body_leaf<T0, K, key_from_message_body<K,T0> >(key_from_message_body<K,T0>()), + new type_to_key_function_body_leaf<T1, K, key_from_message_body<K,T1> >(key_from_message_body<K,T1>()), + new type_to_key_function_body_leaf<T2, K, key_from_message_body<K,T2> >(key_from_message_body<K,T2>()), + new type_to_key_function_body_leaf<T3, K, key_from_message_body<K,T3> >(key_from_message_body<K,T3>()), + new type_to_key_function_body_leaf<T4, K, key_from_message_body<K,T4> >(key_from_message_body<K,T4>()), + new type_to_key_function_body_leaf<T5, K, key_from_message_body<K,T5> >(key_from_message_body<K,T5>()), + new type_to_key_function_body_leaf<T6, K, key_from_message_body<K,T6> >(key_from_message_body<K,T6>()), + new type_to_key_function_body_leaf<T7, K, key_from_message_body<K,T7> >(key_from_message_body<K,T7>()), + new type_to_key_function_body_leaf<T8, K, key_from_message_body<K,T8> >(key_from_message_body<K,T8>()), + new type_to_key_function_body_leaf<T9, K, key_from_message_body<K,T9> >(key_from_message_body<K,T9>()) + ) ) { + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + template<typename Body0, typename Body1, typename Body2, typename Body3, typename Body4, + typename Body5, typename Body6, typename Body7, typename Body8, typename Body9> + unfolded_join_node(graph &g, Body0 body0, Body1 body1, Body2 body2, Body3 body3, Body4 body4, + Body5 body5, Body6 body6, Body7 body7, Body8 body8, Body9 body9) : base_type(g, func_initializer_type( + new type_to_key_function_body_leaf<T0, K, Body0>(body0), + new type_to_key_function_body_leaf<T1, K, Body1>(body1), + new type_to_key_function_body_leaf<T2, K, Body2>(body2), + new type_to_key_function_body_leaf<T3, K, Body3>(body3), + new type_to_key_function_body_leaf<T4, K, Body4>(body4), + new type_to_key_function_body_leaf<T5, K, Body5>(body5), + new type_to_key_function_body_leaf<T6, K, Body6>(body6), + new type_to_key_function_body_leaf<T7, K, Body7>(body7), + new type_to_key_function_body_leaf<T8, K, Body8>(body8), + new type_to_key_function_body_leaf<T9, K, Body9>(body9) + ) ) { + static_assert(std::tuple_size<OutputTuple>::value == 10, "wrong number of body initializers"); + } + unfolded_join_node(const unfolded_join_node &other) : base_type(other) {} + }; +#endif + + //! templated function to refer to input ports of the join node + template<size_t N, typename JNT> + typename std::tuple_element<N, typename JNT::input_ports_type>::type &input_port(JNT &jn) { + return std::get<N>(jn.input_ports()); + } + +#endif // __TBB__flow_graph_join_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h new file mode 100644 index 0000000000..aca465d088 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_impl.h @@ -0,0 +1,769 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_node_impl_H +#define __TBB__flow_graph_node_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +#include "_flow_graph_item_buffer_impl.h" + +template< typename T, typename A > +class function_input_queue : public item_buffer<T,A> { +public: + bool empty() const { + return this->buffer_empty(); + } + + const T& front() const { + return this->item_buffer<T, A>::front(); + } + + void pop() { + this->destroy_front(); + } + + bool push( T& t ) { + return this->push_back( t ); + } +}; + +//! Input and scheduling for a function node that takes a type Input as input +// The only up-ref is apply_body_impl, which should implement the function +// call and any handling of the result. +template< typename Input, typename Policy, typename A, typename ImplType > +class function_input_base : public receiver<Input>, no_assign { + enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency + }; + typedef function_input_base<Input, Policy, A, ImplType> class_type; + +public: + + //! The input type of this receiver + typedef Input input_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type; + typedef function_input_queue<input_type, A> input_queue_type; + typedef typename allocator_traits<A>::template rebind_alloc<input_queue_type> allocator_type; + static_assert(!has_policy<queueing, Policy>::value || !has_policy<rejecting, Policy>::value, ""); + + //! Constructor for function_input_base + function_input_base( graph &g, size_t max_concurrency, node_priority_t a_priority ) + : my_graph_ref(g), my_max_concurrency(max_concurrency) + , my_concurrency(0), my_priority(a_priority) + , my_queue(!has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL) + , my_predecessors(this) + , forwarder_busy(false) + { + my_aggregator.initialize_handler(handler_type(this)); + } + + //! Copy constructor + function_input_base( const function_input_base& src ) + : function_input_base(src.my_graph_ref, src.my_max_concurrency, src.my_priority) {} + + //! Destructor + // The queue is allocated by the constructor for {multi}function_node. + // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead. + // This would be an interface-breaking change. + virtual ~function_input_base() { + if ( my_queue ) delete my_queue; + } + + graph_task* try_put_task( const input_type& t) override { + return try_put_task_impl(t, has_policy<lightweight, Policy>()); + } + + //! Adds src to the list of cached predecessors. + bool register_predecessor( predecessor_type &src ) override { + operation_type op_data(reg_pred); + op_data.r = &src; + my_aggregator.execute(&op_data); + return true; + } + + //! Removes src from the list of cached predecessors. + bool remove_predecessor( predecessor_type &src ) override { + operation_type op_data(rem_pred); + op_data.r = &src; + my_aggregator.execute(&op_data); + return true; + } + +protected: + + void reset_function_input_base( reset_flags f) { + my_concurrency = 0; + if(my_queue) { + my_queue->reset(); + } + reset_receiver(f); + forwarder_busy = false; + } + + graph& my_graph_ref; + const size_t my_max_concurrency; + size_t my_concurrency; + node_priority_t my_priority; + input_queue_type *my_queue; + predecessor_cache<input_type, null_mutex > my_predecessors; + + void reset_receiver( reset_flags f) { + if( f & rf_clear_edges) my_predecessors.clear(); + else + my_predecessors.reset(); + __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed"); + } + + graph& graph_reference() const override { + return my_graph_ref; + } + + graph_task* try_get_postponed_task(const input_type& i) { + operation_type op_data(i, app_body_bypass); // tries to pop an item or get_item + my_aggregator.execute(&op_data); + return op_data.bypass_t; + } + +private: + + friend class apply_body_task_bypass< class_type, input_type >; + friend class forward_task_bypass< class_type >; + + class operation_type : public aggregated_operation< operation_type > { + public: + char type; + union { + input_type *elem; + predecessor_type *r; + }; + graph_task* bypass_t; + operation_type(const input_type& e, op_type t) : + type(char(t)), elem(const_cast<input_type*>(&e)) {} + operation_type(op_type t) : type(char(t)), r(NULL) {} + }; + + bool forwarder_busy; + typedef aggregating_functor<class_type, operation_type> handler_type; + friend class aggregating_functor<class_type, operation_type>; + aggregator< handler_type, operation_type > my_aggregator; + + graph_task* perform_queued_requests() { + graph_task* new_task = NULL; + if(my_queue) { + if(!my_queue->empty()) { + ++my_concurrency; + new_task = create_body_task(my_queue->front()); + + my_queue->pop(); + } + } + else { + input_type i; + if(my_predecessors.get_item(i)) { + ++my_concurrency; + new_task = create_body_task(i); + } + } + return new_task; + } + void handle_operations(operation_type *op_list) { + operation_type* tmp; + while (op_list) { + tmp = op_list; + op_list = op_list->next; + switch (tmp->type) { + case reg_pred: + my_predecessors.add(*(tmp->r)); + tmp->status.store(SUCCEEDED, std::memory_order_release); + if (!forwarder_busy) { + forwarder_busy = true; + spawn_forward_task(); + } + break; + case rem_pred: + my_predecessors.remove(*(tmp->r)); + tmp->status.store(SUCCEEDED, std::memory_order_release); + break; + case app_body_bypass: { + tmp->bypass_t = NULL; + __TBB_ASSERT(my_max_concurrency != 0, NULL); + --my_concurrency; + if(my_concurrency<my_max_concurrency) + tmp->bypass_t = perform_queued_requests(); + tmp->status.store(SUCCEEDED, std::memory_order_release); + } + break; + case tryput_bypass: internal_try_put_task(tmp); break; + case try_fwd: internal_forward(tmp); break; + case occupy_concurrency: + if (my_concurrency < my_max_concurrency) { + ++my_concurrency; + tmp->status.store(SUCCEEDED, std::memory_order_release); + } else { + tmp->status.store(FAILED, std::memory_order_release); + } + break; + } + } + } + + //! Put to the node, but return the task instead of enqueueing it + void internal_try_put_task(operation_type *op) { + __TBB_ASSERT(my_max_concurrency != 0, NULL); + if (my_concurrency < my_max_concurrency) { + ++my_concurrency; + graph_task * new_task = create_body_task(*(op->elem)); + op->bypass_t = new_task; + op->status.store(SUCCEEDED, std::memory_order_release); + } else if ( my_queue && my_queue->push(*(op->elem)) ) { + op->bypass_t = SUCCESSFULLY_ENQUEUED; + op->status.store(SUCCEEDED, std::memory_order_release); + } else { + op->bypass_t = NULL; + op->status.store(FAILED, std::memory_order_release); + } + } + + //! Creates tasks for postponed messages if available and if concurrency allows + void internal_forward(operation_type *op) { + op->bypass_t = NULL; + if (my_concurrency < my_max_concurrency) + op->bypass_t = perform_queued_requests(); + if(op->bypass_t) + op->status.store(SUCCEEDED, std::memory_order_release); + else { + forwarder_busy = false; + op->status.store(FAILED, std::memory_order_release); + } + } + + graph_task* internal_try_put_bypass( const input_type& t ) { + operation_type op_data(t, tryput_bypass); + my_aggregator.execute(&op_data); + if( op_data.status == SUCCEEDED ) { + return op_data.bypass_t; + } + return NULL; + } + + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::true_type ) { + if( my_max_concurrency == 0 ) { + return apply_body_bypass(t); + } else { + operation_type check_op(t, occupy_concurrency); + my_aggregator.execute(&check_op); + if( check_op.status == SUCCEEDED ) { + return apply_body_bypass(t); + } + return internal_try_put_bypass(t); + } + } + + graph_task* try_put_task_impl( const input_type& t, /*lightweight=*/std::false_type ) { + if( my_max_concurrency == 0 ) { + return create_body_task(t); + } else { + return internal_try_put_bypass(t); + } + } + + //! Applies the body to the provided input + // then decides if more work is available + graph_task* apply_body_bypass( const input_type &i ) { + return static_cast<ImplType *>(this)->apply_body_impl_bypass(i); + } + + //! allocates a task to apply a body + graph_task* create_body_task( const input_type &input ) { + if (!is_graph_active(my_graph_ref)) { + return nullptr; + } + // TODO revamp: extract helper for common graph task allocation part + small_object_allocator allocator{}; + typedef apply_body_task_bypass<class_type, input_type> task_type; + graph_task* t = allocator.new_object<task_type>( my_graph_ref, allocator, *this, input, my_priority ); + graph_reference().reserve_wait(); + return t; + } + + //! This is executed by an enqueued task, the "forwarder" + graph_task* forward_task() { + operation_type op_data(try_fwd); + graph_task* rval = NULL; + do { + op_data.status = WAIT; + my_aggregator.execute(&op_data); + if(op_data.status == SUCCEEDED) { + graph_task* ttask = op_data.bypass_t; + __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL ); + rval = combine_tasks(my_graph_ref, rval, ttask); + } + } while (op_data.status == SUCCEEDED); + return rval; + } + + inline graph_task* create_forward_task() { + if (!is_graph_active(my_graph_ref)) { + return nullptr; + } + small_object_allocator allocator{}; + typedef forward_task_bypass<class_type> task_type; + graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, my_priority ); + graph_reference().reserve_wait(); + return t; + } + + //! Spawns a task that calls forward() + inline void spawn_forward_task() { + graph_task* tp = create_forward_task(); + if(tp) { + spawn_in_graph_arena(graph_reference(), *tp); + } + } + + node_priority_t priority() const override { return my_priority; } +}; // function_input_base + +//! Implements methods for a function node that takes a type Input as input and sends +// a type Output to its successors. +template< typename Input, typename Output, typename Policy, typename A> +class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > { +public: + typedef Input input_type; + typedef Output output_type; + typedef function_body<input_type, output_type> function_body_type; + typedef function_input<Input, Output, Policy,A> my_class; + typedef function_input_base<Input, Policy, A, my_class> base_type; + typedef function_input_queue<input_type, A> input_queue_type; + + // constructor + template<typename Body> + function_input( + graph &g, size_t max_concurrency, Body& body, node_priority_t a_priority ) + : base_type(g, max_concurrency, a_priority) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) { + } + + //! Copy constructor + function_input( const function_input& src ) : + base_type(src), + my_body( src.my_init_body->clone() ), + my_init_body(src.my_init_body->clone() ) { + } +#if __INTEL_COMPILER <= 2021 + // Suppress superfluous diagnostic about virtual keyword absence in a destructor of an inherited + // class while the parent class has the virtual keyword for the destrocutor. + virtual +#endif + ~function_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + function_body_type &body_ref = *this->my_body; + return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); + } + + output_type apply_body_impl( const input_type& i) { + // There is an extra copied needed to capture the + // body execution without the try_put + fgt_begin_body( my_body ); + output_type v = (*my_body)(i); + fgt_end_body( my_body ); + return v; + } + + //TODO: consider moving into the base class + graph_task* apply_body_impl_bypass( const input_type &i) { + output_type v = apply_body_impl(i); + graph_task* postponed_task = NULL; + if( base_type::my_max_concurrency != 0 ) { + postponed_task = base_type::try_get_postponed_task(i); + __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL ); + } + if( postponed_task ) { + // make the task available for other workers since we do not know successors' + // execution policy + spawn_in_graph_arena(base_type::graph_reference(), *postponed_task); + } + graph_task* successor_task = successors().try_put_task(v); +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (push) +#pragma warning (disable: 4127) /* suppress conditional expression is constant */ +#endif + if(has_policy<lightweight, Policy>::value) { +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (pop) +#endif + if(!successor_task) { + // Return confirmative status since current + // node's body has been executed anyway + successor_task = SUCCESSFULLY_ENQUEUED; + } + } + return successor_task; + } + +protected: + + void reset_function_input(reset_flags f) { + base_type::reset_function_input_base(f); + if(f & rf_reset_bodies) { + function_body_type *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + + function_body_type *my_body; + function_body_type *my_init_body; + virtual broadcast_cache<output_type > &successors() = 0; + +}; // function_input + + +// helper templates to clear the successor edges of the output ports of an multifunction_node +template<int N> struct clear_element { + template<typename P> static void clear_this(P &p) { + (void)std::get<N-1>(p).successors().clear(); + clear_element<N-1>::clear_this(p); + } +#if TBB_USE_ASSERT + template<typename P> static bool this_empty(P &p) { + if(std::get<N-1>(p).successors().empty()) + return clear_element<N-1>::this_empty(p); + return false; + } +#endif +}; + +template<> struct clear_element<1> { + template<typename P> static void clear_this(P &p) { + (void)std::get<0>(p).successors().clear(); + } +#if TBB_USE_ASSERT + template<typename P> static bool this_empty(P &p) { + return std::get<0>(p).successors().empty(); + } +#endif +}; + +template <typename OutputTuple> +struct init_output_ports { + template <typename... Args> + static OutputTuple call(graph& g, const std::tuple<Args...>&) { + return OutputTuple(Args(g)...); + } +}; // struct init_output_ports + +//! Implements methods for a function node that takes a type Input as input +// and has a tuple of output ports specified. +template< typename Input, typename OutputPortSet, typename Policy, typename A> +class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > { +public: + static const int N = std::tuple_size<OutputPortSet>::value; + typedef Input input_type; + typedef OutputPortSet output_ports_type; + typedef multifunction_body<input_type, output_ports_type> multifunction_body_type; + typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class; + typedef function_input_base<Input, Policy, A, my_class> base_type; + typedef function_input_queue<input_type, A> input_queue_type; + + // constructor + template<typename Body> + multifunction_input(graph &g, size_t max_concurrency,Body& body, node_priority_t a_priority ) + : base_type(g, max_concurrency, a_priority) + , my_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) + , my_init_body( new multifunction_body_leaf<input_type, output_ports_type, Body>(body) ) + , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){ + } + + //! Copy constructor + multifunction_input( const multifunction_input& src ) : + base_type(src), + my_body( src.my_init_body->clone() ), + my_init_body(src.my_init_body->clone() ), + my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) { + } + + ~multifunction_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + multifunction_body_type &body_ref = *this->my_body; + return *static_cast<Body*>(dynamic_cast< multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr()); + } + + // for multifunction nodes we do not have a single successor as such. So we just tell + // the task we were successful. + //TODO: consider moving common parts with implementation in function_input into separate function + graph_task* apply_body_impl_bypass( const input_type &i ) { + fgt_begin_body( my_body ); + (*my_body)(i, my_output_ports); + fgt_end_body( my_body ); + graph_task* ttask = NULL; + if(base_type::my_max_concurrency != 0) { + ttask = base_type::try_get_postponed_task(i); + } + return ttask ? ttask : SUCCESSFULLY_ENQUEUED; + } + + output_ports_type &output_ports(){ return my_output_ports; } + +protected: + + void reset(reset_flags f) { + base_type::reset_function_input_base(f); + if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports); + if(f & rf_reset_bodies) { + multifunction_body_type* tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed"); + } + + multifunction_body_type *my_body; + multifunction_body_type *my_init_body; + output_ports_type my_output_ports; + +}; // multifunction_input + +// template to refer to an output port of a multifunction_node +template<size_t N, typename MOP> +typename std::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) { + return std::get<N>(op.output_ports()); +} + +inline void check_task_and_spawn(graph& g, graph_task* t) { + if (t && t != SUCCESSFULLY_ENQUEUED) { + spawn_in_graph_arena(g, *t); + } +} + +// helper structs for split_node +template<int N> +struct emit_element { + template<typename T, typename P> + static graph_task* emit_this(graph& g, const T &t, P &p) { + // TODO: consider to collect all the tasks in task_list and spawn them all at once + graph_task* last_task = std::get<N-1>(p).try_put_task(std::get<N-1>(t)); + check_task_and_spawn(g, last_task); + return emit_element<N-1>::emit_this(g,t,p); + } +}; + +template<> +struct emit_element<1> { + template<typename T, typename P> + static graph_task* emit_this(graph& g, const T &t, P &p) { + graph_task* last_task = std::get<0>(p).try_put_task(std::get<0>(t)); + check_task_and_spawn(g, last_task); + return SUCCESSFULLY_ENQUEUED; + } +}; + +//! Implements methods for an executable node that takes continue_msg as input +template< typename Output, typename Policy> +class continue_input : public continue_receiver { +public: + + //! The input type of this receiver + typedef continue_msg input_type; + + //! The output type of this receiver + typedef Output output_type; + typedef function_body<input_type, output_type> function_body_type; + typedef continue_input<output_type, Policy> class_type; + + template< typename Body > + continue_input( graph &g, Body& body, node_priority_t a_priority ) + : continue_receiver(/*number_of_predecessors=*/0, a_priority) + , my_graph_ref(g) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) + { } + + template< typename Body > + continue_input( graph &g, int number_of_predecessors, + Body& body, node_priority_t a_priority ) + : continue_receiver( number_of_predecessors, a_priority ) + , my_graph_ref(g) + , my_body( new function_body_leaf< input_type, output_type, Body>(body) ) + , my_init_body( new function_body_leaf< input_type, output_type, Body>(body) ) + { } + + continue_input( const continue_input& src ) : continue_receiver(src), + my_graph_ref(src.my_graph_ref), + my_body( src.my_init_body->clone() ), + my_init_body( src.my_init_body->clone() ) {} + + ~continue_input() { + delete my_body; + delete my_init_body; + } + + template< typename Body > + Body copy_function_object() { + function_body_type &body_ref = *my_body; + return dynamic_cast< function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body(); + } + + void reset_receiver( reset_flags f) override { + continue_receiver::reset_receiver(f); + if(f & rf_reset_bodies) { + function_body_type *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + +protected: + + graph& my_graph_ref; + function_body_type *my_body; + function_body_type *my_init_body; + + virtual broadcast_cache<output_type > &successors() = 0; + + friend class apply_body_task_bypass< class_type, continue_msg >; + + //! Applies the body to the provided input + graph_task* apply_body_bypass( input_type ) { + // There is an extra copied needed to capture the + // body execution without the try_put + fgt_begin_body( my_body ); + output_type v = (*my_body)( continue_msg() ); + fgt_end_body( my_body ); + return successors().try_put_task( v ); + } + + graph_task* execute() override { + if(!is_graph_active(my_graph_ref)) { + return NULL; + } +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (push) +#pragma warning (disable: 4127) /* suppress conditional expression is constant */ +#endif + if(has_policy<lightweight, Policy>::value) { +#if _MSC_VER && !__INTEL_COMPILER +#pragma warning (pop) +#endif + return apply_body_bypass( continue_msg() ); + } + else { + small_object_allocator allocator{}; + typedef apply_body_task_bypass<class_type, continue_msg> task_type; + graph_task* t = allocator.new_object<task_type>( graph_reference(), allocator, *this, continue_msg(), my_priority ); + graph_reference().reserve_wait(); + return t; + } + } + + graph& graph_reference() const override { + return my_graph_ref; + } +}; // continue_input + +//! Implements methods for both executable and function nodes that puts Output to its successors +template< typename Output > +class function_output : public sender<Output> { +public: + + template<int N> friend struct clear_element; + typedef Output output_type; + typedef typename sender<output_type>::successor_type successor_type; + typedef broadcast_cache<output_type> broadcast_cache_type; + + function_output(graph& g) : my_successors(this), my_graph_ref(g) {} + function_output(const function_output& other) = delete; + + //! Adds a new successor to this node + bool register_successor( successor_type &r ) override { + successors().register_successor( r ); + return true; + } + + //! Removes a successor from this node + bool remove_successor( successor_type &r ) override { + successors().remove_successor( r ); + return true; + } + + broadcast_cache_type &successors() { return my_successors; } + + graph& graph_reference() const { return my_graph_ref; } +protected: + broadcast_cache_type my_successors; + graph& my_graph_ref; +}; // function_output + +template< typename Output > +class multifunction_output : public function_output<Output> { +public: + typedef Output output_type; + typedef function_output<output_type> base_type; + using base_type::my_successors; + + multifunction_output(graph& g) : base_type(g) {} + multifunction_output(const multifunction_output& other) : base_type(other.my_graph_ref) {} + + bool try_put(const output_type &i) { + graph_task *res = try_put_task(i); + if( !res ) return false; + if( res != SUCCESSFULLY_ENQUEUED ) { + // wrapping in task_arena::execute() is not needed since the method is called from + // inside task::execute() + spawn_in_graph_arena(graph_reference(), *res); + } + return true; + } + + using base_type::graph_reference; + +protected: + + graph_task* try_put_task(const output_type &i) { + return my_successors.try_put_task(i); + } + + template <int N> friend struct emit_element; + +}; // multifunction_output + +//composite_node +template<typename CompositeType> +void add_nodes_impl(CompositeType*, bool) {} + +template< typename CompositeType, typename NodeType1, typename... NodeTypes > +void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) { + void *addr = const_cast<NodeType1 *>(&n1); + + fgt_alias_port(c_node, addr, visible); + add_nodes_impl(c_node, visible, n...); +} + +#endif // __TBB__flow_graph_node_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h new file mode 100644 index 0000000000..ce867121f9 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_node_set_impl.h @@ -0,0 +1,265 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_node_set_impl_H +#define __TBB_flow_graph_node_set_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// Included in namespace tbb::detail::d1 (in flow_graph.h) + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get +// Seems like the well-formed expression in trailing decltype is treated as ill-formed +// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution +#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900) + +namespace order { +struct undefined {}; +struct following {}; +struct preceding {}; +} + +class get_graph_helper { +public: + // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph + // and remove get_graph_helper + template <typename T> + static graph& get(const T& object) { + return get_impl(object, std::is_base_of<graph_node, T>()); + } + +private: + // Get graph from the object of type derived from graph_node + template <typename T> + static graph& get_impl(const T& object, std::true_type) { + return static_cast<const graph_node*>(&object)->my_graph; + } + + template <typename T> + static graph& get_impl(const T& object, std::false_type) { + return object.graph_reference(); + } +}; + +template<typename Order, typename... Nodes> +struct node_set { + typedef Order order_type; + + std::tuple<Nodes&...> nodes; + node_set(Nodes&... ns) : nodes(ns...) {} + + template <typename... Nodes2> + node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {} + + graph& graph_reference() const { + return get_graph_helper::get(std::get<0>(nodes)); + } +}; + +namespace alias_helpers { +template <typename T> using output_type = typename T::output_type; +template <typename T> using output_ports_type = typename T::output_ports_type; +template <typename T> using input_type = typename T::input_type; +template <typename T> using input_ports_type = typename T::input_ports_type; +} // namespace alias_helpers + +template <typename T> +using has_output_type = supports<T, alias_helpers::output_type>; + +template <typename T> +using has_input_type = supports<T, alias_helpers::input_type>; + +template <typename T> +using has_input_ports_type = supports<T, alias_helpers::input_ports_type>; + +template <typename T> +using has_output_ports_type = supports<T, alias_helpers::output_ports_type>; + +template<typename T> +struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {}; + +template<typename T> +struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {}; + +template <typename Node> +struct is_async_node : std::false_type {}; + +template <typename... Args> +struct is_async_node<async_node<Args...>> : std::true_type {}; + +template<typename FirstPredecessor, typename... Predecessors> +node_set<order::following, FirstPredecessor, Predecessors...> +follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) { + static_assert((conjunction<has_output_type<FirstPredecessor>, + has_output_type<Predecessors>...>::value), + "Not all node's predecessors has output_type typedef"); + static_assert((conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value), + "Not all node's predecessors are senders"); + return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...); +} + +template<typename... Predecessors> +node_set<order::following, Predecessors...> +follows(node_set<order::undefined, Predecessors...>& predecessors_set) { + static_assert((conjunction<has_output_type<Predecessors>...>::value), + "Not all nodes in the set has output_type typedef"); + static_assert((conjunction<is_sender<Predecessors>...>::value), + "Not all nodes in the set are senders"); + return node_set<order::following, Predecessors...>(predecessors_set); +} + +template<typename FirstSuccessor, typename... Successors> +node_set<order::preceding, FirstSuccessor, Successors...> +precedes(FirstSuccessor& first_successor, Successors&... successors) { + static_assert((conjunction<has_input_type<FirstSuccessor>, + has_input_type<Successors>...>::value), + "Not all node's successors has input_type typedef"); + static_assert((conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value), + "Not all node's successors are receivers"); + return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...); +} + +template<typename... Successors> +node_set<order::preceding, Successors...> +precedes(node_set<order::undefined, Successors...>& successors_set) { + static_assert((conjunction<has_input_type<Successors>...>::value), + "Not all nodes in the set has input_type typedef"); + static_assert((conjunction<is_receiver<Successors>...>::value), + "Not all nodes in the set are receivers"); + return node_set<order::preceding, Successors...>(successors_set); +} + +template <typename Node, typename... Nodes> +node_set<order::undefined, Node, Nodes...> +make_node_set(Node& first_node, Nodes&... nodes) { + return node_set<order::undefined, Node, Nodes...>(first_node, nodes...); +} + +template<size_t I> +class successor_selector { + template <typename NodeType> + static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) { + return input_port<I>(node); + } + + template <typename NodeType> + static NodeType& get_impl(NodeType& node, std::false_type) { return node; } + +public: + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get(NodeType& node) +#else + static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>())) +#endif + { + return get_impl(node, has_input_ports_type<NodeType>()); + } +}; + +template<size_t I> +class predecessor_selector { + template <typename NodeType> + static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) { + return output_port<I>(node); + } + + template <typename NodeType> + static NodeType& internal_get(NodeType& node, std::false_type) { return node;} + + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get_impl(NodeType& node, std::false_type) +#else + static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>())) +#endif + { + return internal_get(node, has_output_ports_type<NodeType>()); + } + + template <typename AsyncNode> + static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; } + +public: + template <typename NodeType> +#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE + static auto& get(NodeType& node) +#else + static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>())) +#endif + { + return get_impl(node, is_async_node<NodeType>()); + } +}; + +template<size_t I> +class make_edges_helper { +public: + template<typename PredecessorsTuple, typename NodeType> + static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { + make_edge(std::get<I>(predecessors), successor_selector<I>::get(node)); + make_edges_helper<I - 1>::connect_predecessors(predecessors, node); + } + + template<typename SuccessorsTuple, typename NodeType> + static void connect_successors(NodeType& node, SuccessorsTuple& successors) { + make_edge(predecessor_selector<I>::get(node), std::get<I>(successors)); + make_edges_helper<I - 1>::connect_successors(node, successors); + } +}; + +template<> +struct make_edges_helper<0> { + template<typename PredecessorsTuple, typename NodeType> + static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) { + make_edge(std::get<0>(predecessors), successor_selector<0>::get(node)); + } + + template<typename SuccessorsTuple, typename NodeType> + static void connect_successors(NodeType& node, SuccessorsTuple& successors) { + make_edge(predecessor_selector<0>::get(node), std::get<0>(successors)); + } +}; + +// TODO: consider adding an overload for making edges between node sets +template<typename NodeType, typename OrderFlagType, typename... Args> +void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) { + const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; + make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node); +} + +template <typename NodeType, typename OrderFlagType, typename... Args> +void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) { + const std::size_t SetSize = std::tuple_size<decltype(s.nodes)>::value; + make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes); +} + +template <typename NodeType, typename... Nodes> +void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) { + make_edges(ns, node); +} + +template <typename NodeType, typename... Nodes> +void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) { + make_edges(node, ns); +} + +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +#endif // __TBB_flow_graph_node_set_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h new file mode 100644 index 0000000000..8c20993795 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_nodes_deduction.h @@ -0,0 +1,277 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_nodes_deduction_H +#define __TBB_flow_graph_nodes_deduction_H + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Input, typename Output> +struct declare_body_types { + using input_type = Input; + using output_type = Output; +}; + +struct NoInputBody {}; + +template <typename Output> +struct declare_body_types<NoInputBody, Output> { + using output_type = Output; +}; + +template <typename T> struct body_types; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {}; + +template <typename T, typename Output> +struct body_types<Output (T::*)(flow_control&) const> : declare_body_types<NoInputBody, Output> {}; + +template <typename T, typename Output> +struct body_types<Output (T::*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; + +template <typename Input, typename Output> +struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {}; + +template <typename Input, typename Output> +struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {}; + +template <typename Output> +struct body_types<Output (*)(flow_control&)> : declare_body_types<NoInputBody, Output> {}; + +template <typename Body> +using input_t = typename body_types<Body>::input_type; + +template <typename Body> +using output_t = typename body_types<Body>::output_type; + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name); + +template <typename T, typename Input, typename Output> +auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name); + +template <typename Input, typename Output> +auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name); + +template <typename Input, typename Output> +auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name); + +template <typename Body> +decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int); + +template <typename Body> +decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...); + +// Deduction guides for Flow Graph nodes + +template <typename GraphOrSet, typename Body> +input_node(GraphOrSet&&, Body) +->input_node<output_t<decltype(decide_on_callable_type<Body>(0))>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename NodeSet> +struct decide_on_set; + +template <typename Node, typename... Nodes> +struct decide_on_set<node_set<order::following, Node, Nodes...>> { + using type = typename Node::output_type; +}; + +template <typename Node, typename... Nodes> +struct decide_on_set<node_set<order::preceding, Node, Nodes...>> { + using type = typename Node::input_type; +}; + +template <typename NodeSet> +using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type; + +template <typename NodeSet> +broadcast_node(const NodeSet&) +->broadcast_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +buffer_node(const NodeSet&) +->buffer_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +queue_node(const NodeSet&) +->queue_node<decide_on_set_t<NodeSet>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename GraphOrProxy, typename Sequencer> +sequencer_node(GraphOrProxy&&, Sequencer) +->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename NodeSet, typename Compare> +priority_queue_node(const NodeSet&, const Compare&) +->priority_queue_node<decide_on_set_t<NodeSet>, Compare>; + +template <typename NodeSet> +priority_queue_node(const NodeSet&) +->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename Key> +struct join_key { + using type = Key; +}; + +template <typename T> +struct join_key<const T&> { + using type = T&; +}; + +template <typename Key> +using join_key_t = typename join_key<Key>::type; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename Policy, typename... Predecessors> +join_node(const node_set<order::following, Predecessors...>&, Policy) +->join_node<std::tuple<typename Predecessors::output_type...>, + Policy>; + +template <typename Policy, typename Successor, typename... Successors> +join_node(const node_set<order::preceding, Successor, Successors...>&, Policy) +->join_node<typename Successor::input_type, Policy>; + +template <typename... Predecessors> +join_node(const node_set<order::following, Predecessors...>) +->join_node<std::tuple<typename Predecessors::output_type...>, + queueing>; + +template <typename Successor, typename... Successors> +join_node(const node_set<order::preceding, Successor, Successors...>) +->join_node<typename Successor::input_type, queueing>; +#endif + +template <typename GraphOrProxy, typename Body, typename... Bodies> +join_node(GraphOrProxy&&, Body, Bodies...) +->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>, + input_t<decltype(decide_on_callable_type<Bodies>(0))>...>, + key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename... Predecessors> +indexer_node(const node_set<order::following, Predecessors...>&) +->indexer_node<typename Predecessors::output_type...>; +#endif + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +template <typename NodeSet> +limiter_node(const NodeSet&, size_t) +->limiter_node<decide_on_set_t<NodeSet>>; + +template <typename Predecessor, typename... Predecessors> +split_node(const node_set<order::following, Predecessor, Predecessors...>&) +->split_node<typename Predecessor::output_type>; + +template <typename... Successors> +split_node(const node_set<order::preceding, Successors...>&) +->split_node<std::tuple<typename Successors::input_type...>>; + +#endif + +template <typename GraphOrSet, typename Body, typename Policy> +function_node(GraphOrSet&&, + size_t, Body, + Policy, node_priority_t = no_priority) +->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, + output_t<decltype(decide_on_callable_type<Body>(0))>, + Policy>; + +template <typename GraphOrSet, typename Body> +function_node(GraphOrSet&&, size_t, + Body, node_priority_t = no_priority) +->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>, + output_t<decltype(decide_on_callable_type<Body>(0))>, + queueing>; + +template <typename Output> +struct continue_output { + using type = Output; +}; + +template <> +struct continue_output<void> { + using type = continue_msg; +}; + +template <typename T> +using continue_output_t = typename continue_output<T>::type; + +template <typename GraphOrSet, typename Body, typename Policy> +continue_node(GraphOrSet&&, Body, + Policy, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy>; + +template <typename GraphOrSet, typename Body, typename Policy> +continue_node(GraphOrSet&&, + int, Body, + Policy, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy>; + +template <typename GraphOrSet, typename Body> +continue_node(GraphOrSet&&, + Body, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, Policy<void>>; + +template <typename GraphOrSet, typename Body> +continue_node(GraphOrSet&&, int, + Body, node_priority_t = no_priority) +->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>, + Policy<void>>; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + +template <typename NodeSet> +overwrite_node(const NodeSet&) +->overwrite_node<decide_on_set_t<NodeSet>>; + +template <typename NodeSet> +write_once_node(const NodeSet&) +->write_once_node<decide_on_set_t<NodeSet>>; +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +#endif // __TBB_flow_graph_nodes_deduction_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h new file mode 100644 index 0000000000..0c4580a199 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_tagged_buffer_impl.h @@ -0,0 +1,256 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// a hash table buffer that can expand, and can support as many deletions as +// additions, list-based, with elements of list held in array (for destruction +// management), multiplicative hashing (like ets). No synchronization built-in. +// + +#ifndef __TBB__flow_graph_hash_buffer_impl_H +#define __TBB__flow_graph_hash_buffer_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::flow::interfaceX::internal + +// elements in the table are a simple list; we need pointer to next element to +// traverse the chain +template<typename ValueType> +struct buffer_element_type { + // the second parameter below is void * because we can't forward-declare the type + // itself, so we just reinterpret_cast below. + typedef typename aligned_pair<ValueType, void *>::type type; +}; + +template + < + typename Key, // type of key within ValueType + typename ValueType, + typename ValueToKey, // abstract method that returns "const Key" or "const Key&" given ValueType + typename HashCompare, // has hash and equal + typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type > + > +class hash_buffer : public HashCompare { +public: + static const size_t INITIAL_SIZE = 8; // initial size of the hash pointer table + typedef ValueType value_type; + typedef typename buffer_element_type< value_type >::type element_type; + typedef value_type *pointer_type; + typedef element_type *list_array_type; // array we manage manually + typedef list_array_type *pointer_array_type; + typedef typename std::allocator_traits<Allocator>::template rebind_alloc<list_array_type> pointer_array_allocator_type; + typedef typename std::allocator_traits<Allocator>::template rebind_alloc<element_type> elements_array_allocator; + typedef typename std::decay<Key>::type Knoref; + +private: + ValueToKey *my_key; + size_t my_size; + size_t nelements; + pointer_array_type pointer_array; // pointer_array[my_size] + list_array_type elements_array; // elements_array[my_size / 2] + element_type* free_list; + + size_t mask() { return my_size - 1; } + + void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) { + for(size_t i=0; i < sz - 1; ++i ) { // construct free list + la[i].second = &(la[i+1]); + } + la[sz-1].second = NULL; + *p_free_list = (element_type *)&(la[0]); + } + + // cleanup for exceptions + struct DoCleanup { + pointer_array_type *my_pa; + list_array_type *my_elements; + size_t my_size; + + DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) : + my_pa(&pa), my_elements(&my_els), my_size(sz) { } + ~DoCleanup() { + if(my_pa) { + size_t dont_care = 0; + internal_free_buffer(*my_pa, *my_elements, my_size, dont_care); + } + } + }; + + // exception-safety requires we do all the potentially-throwing operations first + void grow_array() { + size_t new_size = my_size*2; + size_t new_nelements = nelements; // internal_free_buffer zeroes this + list_array_type new_elements_array = NULL; + pointer_array_type new_pointer_array = NULL; + list_array_type new_free_list = NULL; + { + DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size); + new_elements_array = elements_array_allocator().allocate(my_size); + new_pointer_array = pointer_array_allocator_type().allocate(new_size); + for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL; + set_up_free_list(&new_free_list, new_elements_array, my_size ); + + for(size_t i=0; i < my_size; ++i) { + for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) { + value_type *ov = reinterpret_cast<value_type *>(&(op->first)); + // could have std::move semantics + internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov); + } + } + my_cleanup.my_pa = NULL; + my_cleanup.my_elements = NULL; + } + + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + free_list = new_free_list; + pointer_array = new_pointer_array; + elements_array = new_elements_array; + my_size = new_size; + nelements = new_nelements; + } + + // v should have perfect forwarding if std::move implemented. + // we use this method to move elements in grow_array, so can't use class fields + void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list, + const value_type &v) { + size_t l_mask = p_sz-1; + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + size_t h = this->hash((*my_key)(v)) & l_mask; + __TBB_ASSERT(p_free_list, "Error: free list not set up."); + element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second); + (void) new(&(my_elem->first)) value_type(v); + my_elem->second = p_pointer_array[h]; + p_pointer_array[h] = my_elem; + } + + void internal_initialize_buffer() { + pointer_array = pointer_array_allocator_type().allocate(my_size); + for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL; + elements_array = elements_array_allocator().allocate(my_size / 2); + set_up_free_list(&free_list, elements_array, my_size / 2); + } + + // made static so an enclosed class can use to properly dispose of the internals + static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) { + if(pa) { + for(size_t i = 0; i < sz; ++i ) { + element_type *p_next; + for( element_type *p = pa[i]; p; p = p_next) { + p_next = (element_type *)p->second; + // TODO revamp: make sure type casting is correct. + void* ptr = (void*)(p->first); +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER + suppress_unused_warning(ptr); +#endif + ((value_type*)ptr)->~value_type(); + } + } + pointer_array_allocator_type().deallocate(pa, sz); + pa = NULL; + } + // Separate test (if allocation of pa throws, el may be allocated. + // but no elements will be constructed.) + if(el) { + elements_array_allocator().deallocate(el, sz / 2); + el = NULL; + } + sz = INITIAL_SIZE; + ne = 0; + } + +public: + hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) { + internal_initialize_buffer(); + } + + ~hash_buffer() { + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + if(my_key) delete my_key; + } + hash_buffer(const hash_buffer&) = delete; + hash_buffer& operator=(const hash_buffer&) = delete; + + void reset() { + internal_free_buffer(pointer_array, elements_array, my_size, nelements); + internal_initialize_buffer(); + } + + // Take ownership of func object allocated with new. + // This method is only used internally, so can't be misused by user. + void set_key_func(ValueToKey *vtk) { my_key = vtk; } + // pointer is used to clone() + ValueToKey* get_key_func() { return my_key; } + + bool insert_with_key(const value_type &v) { + pointer_type p = NULL; + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(find_ref_with_key((*my_key)(v), p)) { + p->~value_type(); + (void) new(p) value_type(v); // copy-construct into the space + return false; + } + ++nelements; + if(nelements*2 > my_size) grow_array(); + internal_insert_with_key(pointer_array, my_size, free_list, v); + return true; + } + + // returns true and sets v to array element if found, else returns false. + bool find_ref_with_key(const Knoref& k, pointer_type &v) { + size_t i = this->hash(k) & mask(); + for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) { + pointer_type pv = reinterpret_cast<pointer_type>(&(p->first)); + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(this->equal((*my_key)(*pv), k)) { + v = pv; + return true; + } + } + return false; + } + + bool find_with_key( const Knoref& k, value_type &v) { + value_type *p; + if(find_ref_with_key(k, p)) { + v = *p; + return true; + } + else + return false; + } + + void delete_with_key(const Knoref& k) { + size_t h = this->hash(k) & mask(); + element_type* prev = NULL; + for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) { + value_type *vp = reinterpret_cast<value_type *>(&(p->first)); + __TBB_ASSERT(my_key, "Error: value-to-key functor not provided"); + if(this->equal((*my_key)(*vp), k)) { + vp->~value_type(); + if(prev) prev->second = p->second; + else pointer_array[h] = (element_type *)(p->second); + p->second = free_list; + free_list = p; + --nelements; + return; + } + } + __TBB_ASSERT(false, "key not found for delete"); + } +}; +#endif // __TBB__flow_graph_hash_buffer_impl_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h new file mode 100644 index 0000000000..d8256ca8a2 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_trace_impl.h @@ -0,0 +1,364 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef _FGT_GRAPH_TRACE_IMPL_H +#define _FGT_GRAPH_TRACE_IMPL_H + +#include "../profiling.h" +#if (_MSC_VER >= 1900) + #include <intrin.h> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template< typename T > class sender; +template< typename T > class receiver; + +#if TBB_USE_PROFILING_TOOLS + #if __TBB_FLOW_TRACE_CODEPTR + #if (_MSC_VER >= 1900) + #define CODEPTR() (_ReturnAddress()) + #elif __TBB_GCC_VERSION >= 40800 + #define CODEPTR() ( __builtin_return_address(0)) + #else + #define CODEPTR() NULL + #endif + #else + #define CODEPTR() NULL + #endif /* __TBB_FLOW_TRACE_CODEPTR */ + +static inline void fgt_alias_port(void *node, void *p, bool visible) { + if(visible) + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE ); + else + itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE ); +} + +static inline void fgt_composite ( void* codeptr, void *node, void *graph ) { + itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE ); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +static inline void fgt_internal_alias_input_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT ); +} + +static inline void fgt_internal_alias_output_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index ); + itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT ); +} + +template<typename InputType> +void alias_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_alias_input_port a function template? + fgt_internal_alias_input_port( node, port, name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_input_alias_helper { + static void alias_port( void *node, PortsTuple &ports ) { + alias_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); + fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_input_alias_helper<PortsTuple, 0> { + static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { } +}; + +template<typename OutputType> +void alias_output_port(void *node, sender<OutputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_alias_output_port a function template? + fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_output_alias_helper { + static void alias_port( void *node, PortsTuple &ports ) { + alias_output_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); + fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_output_alias_helper<PortsTuple, 0> { + static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) { + } +}; + +static inline void fgt_internal_create_input_port( void *node, void *p, string_resource_index name_index ) { + itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index ); +} + +static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_resource_index name_index ) { + itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +template<typename InputType> +void register_input_port(void *node, receiver<InputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_create_input_port a function template? + fgt_internal_create_input_port(node, static_cast<void*>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_input_helper { + static void register_port( void *node, PortsTuple &ports ) { + register_input_port( node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_INPUT_PORT_0 + N - 1) ); + fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_input_helper<PortsTuple, 1> { + static void register_port( void *node, PortsTuple &ports ) { + register_input_port( node, &(std::get<0>(ports)), FLOW_INPUT_PORT_0 ); + } +}; + +template<typename OutputType> +void register_output_port(void* codeptr, void *node, sender<OutputType>* port, string_resource_index name_index) { + // TODO: Make fgt_internal_create_output_port a function template? + fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index); +} + +template < typename PortsTuple, int N > +struct fgt_internal_output_helper { + static void register_port( void* codeptr, void *node, PortsTuple &ports ) { + register_output_port( codeptr, node, &(std::get<N-1>(ports)), static_cast<string_resource_index>(FLOW_OUTPUT_PORT_0 + N - 1) ); + fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports ); + } +}; + +template < typename PortsTuple > +struct fgt_internal_output_helper<PortsTuple,1> { + static void register_port( void* codeptr, void *node, PortsTuple &ports ) { + register_output_port( codeptr, node, &(std::get<0>(ports)), FLOW_OUTPUT_PORT_0 ); + } +}; + +template< typename NodeType > +void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) { + void *addr = (void *)( static_cast< receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) ); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +template< typename NodeType > +void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) { + void *addr = const_cast<NodeType *>(node); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +template< typename NodeType > +static inline void fgt_node_desc( const NodeType *node, const char *desc ) { + void *addr = (void *)( static_cast< sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) ); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc ); +} + +static inline void fgt_graph_desc( const void *g, const char *desc ) { + void *addr = const_cast< void *>(g); + itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_GRAPH, FLOW_OBJECT_NAME, desc ); +} + +static inline void fgt_body( void *node, void *body ) { + itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node(void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports ) { + itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); + fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, PortsTuple &ports, void *body ) { + itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 ); + fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports ); + fgt_body( input_port, body ); +} + +template< int N, typename PortsTuple > +static inline void fgt_multiinput_node( void* codeptr, string_resource_index t, void *g, PortsTuple &ports, void *output_port) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); + fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports ); +} + +static inline void fgt_multiinput_multioutput_node( void* codeptr, string_resource_index t, void *n, void *g ) { + itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t ); + suppress_unused_warning( codeptr ); +#if __TBB_FLOW_TRACE_CODEPTR + if (codeptr != NULL) { + register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr); + } +#endif +} + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *output_port ) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); +} + +static void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *output_port, void *body ) { + itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t ); + fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 ); + fgt_body( output_port, body ); +} + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port ) { + fgt_node( codeptr, t, g, output_port ); + fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); +} + +static inline void fgt_node_with_body( void* codeptr, string_resource_index t, void *g, void *input_port, void *output_port, void *body ) { + fgt_node_with_body( codeptr, t, g, output_port, body ); + fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 ); +} + + +static inline void fgt_node( void* codeptr, string_resource_index t, void *g, void *input_port, void *decrement_port, void *output_port ) { + fgt_node( codeptr, t, g, input_port, output_port ); + fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 ); +} + +static inline void fgt_make_edge( void *output_port, void *input_port ) { + itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT); +} + +static inline void fgt_remove_edge( void *output_port, void *input_port ) { + itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT); +} + +static inline void fgt_graph( void *g ) { + itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH ); +} + +static inline void fgt_begin_body( void *body ) { + itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY ); +} + +static inline void fgt_end_body( void * ) { + itt_task_end( ITT_DOMAIN_FLOW ); +} + +static inline void fgt_async_try_put_begin( void *node, void *port ) { + itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT ); +} + +static inline void fgt_async_try_put_end( void *, void * ) { + itt_task_end( ITT_DOMAIN_FLOW ); +} + +static inline void fgt_async_reserve( void *node, void *graph ) { + itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL ); +} + +static inline void fgt_async_commit( void *node, void * /*graph*/) { + itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE ); +} + +static inline void fgt_reserve_wait( void *graph ) { + itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL ); +} + +static inline void fgt_release_wait( void *graph ) { + itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH ); +} + +#else // TBB_USE_PROFILING_TOOLS + +#define CODEPTR() NULL + +static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { } + +static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { } + +static inline void fgt_graph( void * /*g*/ ) { } + +template< typename NodeType > +static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +template< typename NodeType > +static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +static inline void fgt_graph_desc( const void * /*g*/, const char * /*desc*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { } + +template< int N, typename PortsTuple > +static inline void fgt_multiinput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { } + +static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*node*/, void * /*graph*/ ) { } + +static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { } +static inline void fgt_node( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { } + +static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { } +static inline void fgt_node_with_body( void* /*codeptr*/, string_resource_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { } + +static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { } +static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { } + +static inline void fgt_begin_body( void * /*body*/ ) { } +static inline void fgt_end_body( void * /*body*/) { } + +static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { } +static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { } +static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { } +static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { } +static inline void fgt_reserve_wait( void * /*graph*/ ) { } +static inline void fgt_release_wait( void * /*graph*/ ) { } + +template< typename NodeType > +void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { } + +template < typename PortsTuple, int N > +struct fgt_internal_input_alias_helper { + static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } +}; + +template < typename PortsTuple, int N > +struct fgt_internal_output_alias_helper { + static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { } +}; + +#endif // TBB_USE_PROFILING_TOOLS + +} // d1 +} // namespace detail +} // namespace tbb + +#endif // _FGT_GRAPH_TRACE_IMPL_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h new file mode 100644 index 0000000000..97c770b154 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_flow_graph_types_impl.h @@ -0,0 +1,407 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__flow_graph_types_impl_H +#define __TBB__flow_graph_types_impl_H + +#ifndef __TBB_flow_graph_H +#error Do not #include this internal file directly; use public TBB headers instead. +#endif + +// included in namespace tbb::detail::d1 + +// the change to key_matching (adding a K and KHash template parameter, making it a class) +// means we have to pass this data to the key_matching_port. All the ports have only one +// template parameter, so we have to wrap the following types in a trait: +// +// . K == key_type +// . KHash == hash and compare for Key +// . TtoK == function_body that given an object of T, returns its K +// . T == type accepted by port, and stored in the hash table +// +// The port will have an additional parameter on node construction, which is a function_body +// that accepts a const T& and returns a K which is the field in T which is its K. +template<typename Kp, typename KHashp, typename Tp> +struct KeyTrait { + typedef Kp K; + typedef Tp T; + typedef type_to_key_function_body<T,K> TtoK; + typedef KHashp KHash; +}; + +// wrap each element of a tuple in a template, and make a tuple of the result. +template<int N, template<class> class PT, typename TypeTuple> +struct wrap_tuple_elements; + +// A wrapper that generates the traits needed for each port of a key-matching join, +// and the type of the tuple of input ports. +template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple> +struct wrap_key_tuple_elements; + +template<int N, template<class> class PT, typename... Args> +struct wrap_tuple_elements<N, PT, std::tuple<Args...> >{ + typedef typename std::tuple<PT<Args>... > type; +}; + +template<int N, template<class> class PT, typename KeyTraits, typename... Args> +struct wrap_key_tuple_elements<N, PT, KeyTraits, std::tuple<Args...> > { + typedef typename KeyTraits::key_type K; + typedef typename KeyTraits::hash_compare_type KHash; + typedef typename std::tuple<PT<KeyTrait<K, KHash, Args> >... > type; +}; + +template< int... S > class sequence {}; + +template< int N, int... S > +struct make_sequence : make_sequence < N - 1, N - 1, S... > {}; + +template< int... S > +struct make_sequence < 0, S... > { + typedef sequence<S...> type; +}; + +//! type mimicking std::pair but with trailing fill to ensure each element of an array +//* will have the correct alignment +template<typename T1, typename T2, size_t REM> +struct type_plus_align { + char first[sizeof(T1)]; + T2 second; + char fill1[REM]; +}; + +template<typename T1, typename T2> +struct type_plus_align<T1,T2,0> { + char first[sizeof(T1)]; + T2 second; +}; + +template<class U> struct alignment_of { + typedef struct { char t; U padded; } test_alignment; + static const size_t value = sizeof(test_alignment) - sizeof(U); +}; + +// T1, T2 are actual types stored. The space defined for T1 in the type returned +// is a char array of the correct size. Type T2 should be trivially-constructible, +// T1 must be explicitly managed. +template<typename T1, typename T2> +struct aligned_pair { + static const size_t t1_align = alignment_of<T1>::value; + static const size_t t2_align = alignment_of<T2>::value; + typedef type_plus_align<T1, T2, 0 > just_pair; + static const size_t max_align = t1_align < t2_align ? t2_align : t1_align; + static const size_t extra_bytes = sizeof(just_pair) % max_align; + static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0; +public: + typedef type_plus_align<T1,T2,remainder> type; +}; // aligned_pair + +// support for variant type +// type we use when we're not storing a value +struct default_constructed { }; + +// type which contains another type, tests for what type is contained, and references to it. +// Wrapper<T> +// void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace + +// struct to allow us to copy and test the type of objects +struct WrapperBase { + virtual ~WrapperBase() {} + virtual void CopyTo(void* /*newSpace*/) const = 0; +}; + +// Wrapper<T> contains a T, with the ability to test what T is. The Wrapper<T> can be +// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be +// examined via value(), but not modified. +template<typename T> +struct Wrapper: public WrapperBase { + typedef T value_type; + typedef T* pointer_type; +private: + T value_space; +public: + const value_type &value() const { return value_space; } + +private: + Wrapper(); + + // on exception will ensure the Wrapper will contain only a trivially-constructed object + struct _unwind_space { + pointer_type space; + _unwind_space(pointer_type p) : space(p) {} + ~_unwind_space() { + if(space) (void) new (space) Wrapper<default_constructed>(default_constructed()); + } + }; +public: + explicit Wrapper( const T& other ) : value_space(other) { } + explicit Wrapper(const Wrapper& other) = delete; + + void CopyTo(void* newSpace) const override { + _unwind_space guard((pointer_type)newSpace); + (void) new(newSpace) Wrapper(value_space); + guard.space = NULL; + } + ~Wrapper() { } +}; + +// specialization for array objects +template<typename T, size_t N> +struct Wrapper<T[N]> : public WrapperBase { + typedef T value_type; + typedef T* pointer_type; + // space must be untyped. + typedef T ArrayType[N]; +private: + // The space is not of type T[N] because when copy-constructing, it would be + // default-initialized and then copied to in some fashion, resulting in two + // constructions and one destruction per element. If the type is char[ ], we + // placement new into each element, resulting in one construction per element. + static const size_t space_size = sizeof(ArrayType) / sizeof(char); + char value_space[space_size]; + + + // on exception will ensure the already-built objects will be destructed + // (the value_space is a char array, so it is already trivially-destructible.) + struct _unwind_class { + pointer_type space; + int already_built; + _unwind_class(pointer_type p) : space(p), already_built(0) {} + ~_unwind_class() { + if(space) { + for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type(); + (void) new(space) Wrapper<default_constructed>(default_constructed()); + } + } + }; +public: + const ArrayType &value() const { + char *vp = const_cast<char *>(value_space); + return reinterpret_cast<ArrayType &>(*vp); + } + +private: + Wrapper(); +public: + // have to explicitly construct because other decays to a const value_type* + explicit Wrapper(const ArrayType& other) { + _unwind_class guard((pointer_type)value_space); + pointer_type vp = reinterpret_cast<pointer_type>(&value_space); + for(size_t i = 0; i < N; ++i ) { + (void) new(vp++) value_type(other[i]); + ++(guard.already_built); + } + guard.space = NULL; + } + explicit Wrapper(const Wrapper& other) : WrapperBase() { + // we have to do the heavy lifting to copy contents + _unwind_class guard((pointer_type)value_space); + pointer_type dp = reinterpret_cast<pointer_type>(value_space); + pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space)); + for(size_t i = 0; i < N; ++i, ++dp, ++sp) { + (void) new(dp) value_type(*sp); + ++(guard.already_built); + } + guard.space = NULL; + } + + void CopyTo(void* newSpace) const override { + (void) new(newSpace) Wrapper(*this); // exceptions handled in copy constructor + } + + ~Wrapper() { + // have to destroy explicitly in reverse order + pointer_type vp = reinterpret_cast<pointer_type>(&value_space); + for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type(); + } +}; + +// given a tuple, return the type of the element that has the maximum alignment requirement. +// Given a tuple and that type, return the number of elements of the object with the max +// alignment requirement that is at least as big as the largest object in the tuple. + +template<bool, class T1, class T2> struct pick_one; +template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; }; +template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; }; + +template< template<class> class Selector, typename T1, typename T2 > +struct pick_max { + typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type; +}; + +template<typename T> struct size_of { static const int value = sizeof(T); }; + +template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max { + typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType; + typedef typename std::tuple_element<N-1, Tuple>::type ThisType; + typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type; +}; + +template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> { + typedef typename std::tuple_element<0, Tuple>::type type; +}; + +// is the specified type included in a tuple? +template<class Q, size_t N, class Tuple> +struct is_element_of { + typedef typename std::tuple_element<N-1, Tuple>::type T_i; + static const bool value = std::is_same<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value; +}; + +template<class Q, class Tuple> +struct is_element_of<Q,0,Tuple> { + typedef typename std::tuple_element<0, Tuple>::type T_i; + static const bool value = std::is_same<Q,T_i>::value; +}; + +// allow the construction of types that are listed tuple. If a disallowed type +// construction is written, a method involving this type is created. The +// type has no definition, so a syntax error is generated. +template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple; + +template<typename T, bool BUILD_IT> struct do_if; +template<typename T> +struct do_if<T, true> { + static void construct(void *mySpace, const T& x) { + (void) new(mySpace) Wrapper<T>(x); + } +}; +template<typename T> +struct do_if<T, false> { + static void construct(void * /*mySpace*/, const T& x) { + // This method is instantiated when the type T does not match any of the + // element types in the Tuple in variant<Tuple>. + ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x); + } +}; + +// Tuple tells us the allowed types that variant can hold. It determines the alignment of the space in +// Wrapper, and how big Wrapper is. +// +// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>(). + +using tbb::detail::punned_cast; +struct tagged_null_type {}; +template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type, + typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type, + typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type> +class tagged_msg { + typedef std::tuple<T0, T1, T2, T3, T4 + //TODO: Should we reject lists longer than a tuple can hold? + #if __TBB_VARIADIC_MAX >= 6 + , T5 + #endif + #if __TBB_VARIADIC_MAX >= 7 + , T6 + #endif + #if __TBB_VARIADIC_MAX >= 8 + , T7 + #endif + #if __TBB_VARIADIC_MAX >= 9 + , T8 + #endif + #if __TBB_VARIADIC_MAX >= 10 + , T9 + #endif + > Tuple; + +private: + class variant { + static const size_t N = std::tuple_size<Tuple>::value; + typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType; + typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType; + static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1); + static const size_t MaxNElements = MaxNBytes/sizeof(AlignType); + typedef aligned_space<AlignType, MaxNElements> SpaceType; + SpaceType my_space; + static const size_t MaxSize = sizeof(SpaceType); + + public: + variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); } + + template<typename T> + variant( const T& x ) { + do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x); + } + + variant(const variant& other) { + const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space)); + h->CopyTo(&my_space); + } + + // assignment must destroy and re-create the Wrapper type, as there is no way + // to create a Wrapper-to-Wrapper assign even if we find they agree in type. + void operator=( const variant& rhs ) { + if(&rhs != this) { + WrapperBase *h = punned_cast<WrapperBase *>(&my_space); + h->~WrapperBase(); + const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space)); + ch->CopyTo(&my_space); + } + } + + template<typename U> + const U& variant_cast_to() const { + const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)); + if(!h) { + throw_exception(exception_id::bad_tagged_msg_cast); + } + return h->value(); + } + template<typename U> + bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; } + + bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();} + + ~variant() { + WrapperBase *h = punned_cast<WrapperBase *>(&my_space); + h->~WrapperBase(); + } + }; //class variant + + TagType my_tag; + variant my_msg; + +public: + tagged_msg(): my_tag(TagType(~0)), my_msg(){} + + template<typename T, typename R> + tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {} + + template<typename T, typename R, size_t N> + tagged_msg(T const &index, R (&value)[N]) : my_tag(index), my_msg(value) {} + + void set_tag(TagType const &index) {my_tag = index;} + TagType tag() const {return my_tag;} + + template<typename V> + const V& cast_to() const {return my_msg.template variant_cast_to<V>();} + + template<typename V> + bool is_a() const {return my_msg.template variant_is_a<V>();} + + bool is_default_constructed() const {return my_msg.variant_is_default_constructed();} +}; //class tagged_msg + +// template to simplify cast and test for tagged_msg in template contexts +template<typename V, typename T> +const V& cast_to(T const &t) { return t.template cast_to<V>(); } + +template<typename V, typename T> +bool is_a(T const &t) { return t.template is_a<V>(); } + +enum op_stat { WAIT = 0, SUCCEEDED, FAILED }; + +#endif /* __TBB__flow_graph_types_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h new file mode 100644 index 0000000000..20cbd96c06 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_hash_compare.h @@ -0,0 +1,127 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__hash_compare_H +#define __TBB_detail__hash_compare_H + +#include <functional> + +#include "_containers_helpers.h" + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Key, typename Hash, typename KeyEqual> +class hash_compare { + using is_transparent_hash = has_transparent_key_equal<Key, Hash, KeyEqual>; +public: + using hasher = Hash; + using key_equal = typename is_transparent_hash::type; + + hash_compare() = default; + hash_compare( hasher hash, key_equal equal ) : my_hasher(hash), my_equal(equal) {} + + std::size_t operator()( const Key& key ) const { + return std::size_t(my_hasher(key)); + } + + bool operator()( const Key& key1, const Key& key2 ) const { + return my_equal(key1, key2); + } + + template <typename K, typename = typename std::enable_if<is_transparent_hash::value, K>::type> + std::size_t operator()( const K& key ) const { + return std::size_t(my_hasher(key)); + } + + template <typename K1, typename K2, typename = typename std::enable_if<is_transparent_hash::value, K1>::type> + bool operator()( const K1& key1, const K2& key2 ) const { + return my_equal(key1, key2); + } + + hasher hash_function() const { + return my_hasher; + } + + key_equal key_eq() const { + return my_equal; + } + + +private: + hasher my_hasher; + key_equal my_equal; +}; // class hash_compare + +//! hash_compare that is default argument for concurrent_hash_map +template <typename Key> +class tbb_hash_compare { +public: + std::size_t hash( const Key& a ) const { return my_hash_func(a); } + bool equal( const Key& a, const Key& b ) const { return my_key_equal(a, b); } +private: + std::hash<Key> my_hash_func; + std::equal_to<Key> my_key_equal; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if TBB_DEFINE_STD_HASH_SPECIALIZATIONS + +namespace std { + +template <typename T, typename U> +struct hash<std::pair<T, U>> { +public: + std::size_t operator()( const std::pair<T, U>& p ) const { + return first_hash(p.first) ^ second_hash(p.second); + } + +private: + std::hash<T> first_hash; + std::hash<U> second_hash; +}; // struct hash<std::pair> + +// Apple clang and MSVC defines their own specializations for std::hash<std::basic_string<T, Traits, Alloc>> +#if !(_LIBCPP_VERSION) && !(_CPPLIB_VER) + +template <typename CharT, typename Traits, typename Allocator> +struct hash<std::basic_string<CharT, Traits, Allocator>> { +public: + std::size_t operator()( const std::basic_string<CharT, Traits, Allocator>& s ) const { + std::size_t h = 0; + for ( const CharT* c = s.c_str(); *c; ++c ) { + h = h * hash_multiplier ^ char_hash(*c); + } + return h; + } + +private: + static constexpr std::size_t hash_multiplier = tbb::detail::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value; + + std::hash<CharT> char_hash; +}; // struct hash<std::basic_string> + +#endif // !(_LIBCPP_VERSION || _CPPLIB_VER) + +} // namespace std + +#endif // TBB_DEFINE_STD_HASH_SPECIALIZATIONS + +#endif // __TBB_detail__hash_compare_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h new file mode 100644 index 0000000000..3270da786a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_machine.h @@ -0,0 +1,366 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__machine_H +#define __TBB_detail__machine_H + +#include "_config.h" +#include "_assert.h" + +#include <atomic> +#include <climits> +#include <cstdint> +#include <cstddef> + +#ifdef _MSC_VER +#include <intrin.h> +#pragma intrinsic(__rdtsc) +#endif +#if __TBB_x86_64 || __TBB_x86_32 +#include <immintrin.h> // _mm_pause +#endif +#if (_WIN32 || _WIN64) +#include <float.h> // _control87 +#endif + +#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN +#include <sched.h> // sched_yield +#else +#include <thread> // std::this_thread::yield() +#endif + +namespace tbb { +namespace detail { +inline namespace d0 { + +//-------------------------------------------------------------------------------------------------- +// Yield implementation +//-------------------------------------------------------------------------------------------------- + +#if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN +static inline void yield() { + int err = sched_yield(); + __TBB_ASSERT_EX(err == 0, "sched_yiled has failed"); +} +#else +using std::this_thread::yield; +#endif + +//-------------------------------------------------------------------------------------------------- +// atomic_fence implementation +//-------------------------------------------------------------------------------------------------- + +#if (_WIN32 || _WIN64) +#pragma intrinsic(_mm_mfence) +#endif + +static inline void atomic_fence(std::memory_order order) { +#if (_WIN32 || _WIN64) + if (order == std::memory_order_seq_cst || + order == std::memory_order_acq_rel || + order == std::memory_order_acquire || + order == std::memory_order_release ) + { + _mm_mfence(); + return; + } +#endif /*(_WIN32 || _WIN64)*/ + std::atomic_thread_fence(order); +} + +//-------------------------------------------------------------------------------------------------- +// Pause implementation +//-------------------------------------------------------------------------------------------------- + +static inline void machine_pause(int32_t delay) { +#if __TBB_x86_64 || __TBB_x86_32 + while (delay-- > 0) { _mm_pause(); } +#elif __ARM_ARCH_7A__ || __aarch64__ + while (delay-- > 0) { __asm__ __volatile__("yield" ::: "memory"); } +#else /* Generic */ + (void)delay; // suppress without including _template_helpers.h + yield(); +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// tbb::detail::log2() implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// +// TODO: Use log2p1() function that will be available in C++20 standard + +#if defined(__GNUC__) || defined(__clang__) +namespace gnu_builtins { + inline uintptr_t clz(unsigned int x) { return __builtin_clz(x); } + inline uintptr_t clz(unsigned long int x) { return __builtin_clzl(x); } + inline uintptr_t clz(unsigned long long int x) { return __builtin_clzll(x); } +} +#elif defined(_MSC_VER) +#pragma intrinsic(__TBB_W(_BitScanReverse)) +namespace msvc_intrinsics { + static inline uintptr_t bit_scan_reverse(uintptr_t i) { + unsigned long j; + __TBB_W(_BitScanReverse)( &j, i ); + return j; + } +} +#endif + +template <typename T> +constexpr std::uintptr_t number_of_bits() { + return sizeof(T) * CHAR_BIT; +} + +// logarithm is the index of the most significant non-zero bit +static inline uintptr_t machine_log2(uintptr_t x) { +#if defined(__GNUC__) || defined(__clang__) + // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x + return (number_of_bits<decltype(x)>() - 1) ^ gnu_builtins::clz(x); +#elif defined(_MSC_VER) + return msvc_intrinsics::bit_scan_reverse(x); +#elif __i386__ || __i386 /*for Sun OS*/ || __MINGW32__ + uintptr_t j, i = x; + __asm__("bsr %1,%0" : "=r"(j) : "r"(i)); + return j; +#elif __powerpc__ || __POWERPC__ + #if __TBB_WORDSIZE==8 + __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); + return 63 - static_cast<intptr_t>(x); + #else + __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); + return 31 - static_cast<intptr_t>(x); + #endif /*__TBB_WORDSIZE*/ +#elif __sparc + uint64_t count; + // one hot encode + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + // count 1's + __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); + return count - 1; +#else + intptr_t result = 0; + + if( sizeof(x) > 4 && (uintptr_t tmp = x >> 32) ) { x = tmp; result += 32; } + if( uintptr_t tmp = x >> 16 ) { x = tmp; result += 16; } + if( uintptr_t tmp = x >> 8 ) { x = tmp; result += 8; } + if( uintptr_t tmp = x >> 4 ) { x = tmp; result += 4; } + if( uintptr_t tmp = x >> 2 ) { x = tmp; result += 2; } + + return (x & 2) ? result + 1 : result; +#endif +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// tbb::detail::reverse_bits() implementation +//////////////////////////////////////////////////////////////////////////////////////////////////// +#if TBB_USE_CLANG_BITREVERSE_BUILTINS +namespace llvm_builtins { + inline uint8_t builtin_bitreverse(uint8_t x) { return __builtin_bitreverse8 (x); } + inline uint16_t builtin_bitreverse(uint16_t x) { return __builtin_bitreverse16(x); } + inline uint32_t builtin_bitreverse(uint32_t x) { return __builtin_bitreverse32(x); } + inline uint64_t builtin_bitreverse(uint64_t x) { return __builtin_bitreverse64(x); } +} +#else // generic +template<typename T> +struct reverse { + static const T byte_table[256]; +}; + +template<typename T> +const T reverse<T>::byte_table[256] = { + 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0, + 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8, + 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, + 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, + 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, + 0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, + 0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6, + 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE, + 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1, + 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, + 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, + 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, + 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, + 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, + 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, + 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF +}; + +inline unsigned char reverse_byte(unsigned char src) { + return reverse<unsigned char>::byte_table[src]; +} +#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS + +template<typename T> +T machine_reverse_bits(T src) { +#if TBB_USE_CLANG_BITREVERSE_BUILTINS + return builtin_bitreverse(fixed_width_cast(src)); +#else /* Generic */ + T dst; + unsigned char *original = (unsigned char *) &src; + unsigned char *reversed = (unsigned char *) &dst; + + for ( int i = sizeof(T) - 1; i >= 0; i-- ) { + reversed[i] = reverse_byte( original[sizeof(T) - i - 1] ); + } + + return dst; +#endif // TBB_USE_CLANG_BITREVERSE_BUILTINS +} + +} // inline namespace d0 + +namespace d1 { + +#if (_WIN32 || _WIN64) +// API to retrieve/update FPU control setting +#define __TBB_CPU_CTL_ENV_PRESENT 1 +struct cpu_ctl_env { + unsigned int x87cw{}; +#if (__TBB_x86_64) + // Changing the infinity mode or the floating-point precision is not supported on x64. + // The attempt causes an assertion. See + // https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/control87-controlfp-control87-2 + static constexpr unsigned int X87CW_CONTROL_MASK = _MCW_DN | _MCW_EM | _MCW_RC; +#else + static constexpr unsigned int X87CW_CONTROL_MASK = ~0U; +#endif +#if (__TBB_x86_32 || __TBB_x86_64) + unsigned int mxcsr{}; + static constexpr unsigned int MXCSR_CONTROL_MASK = ~0x3fu; /* all except last six status bits */ +#endif + + bool operator!=( const cpu_ctl_env& ctl ) const { + return +#if (__TBB_x86_32 || __TBB_x86_64) + mxcsr != ctl.mxcsr || +#endif + x87cw != ctl.x87cw; + } + void get_env() { + x87cw = _control87(0, 0); +#if (__TBB_x86_32 || __TBB_x86_64) + mxcsr = _mm_getcsr(); +#endif + } + void set_env() const { + _control87(x87cw, X87CW_CONTROL_MASK); +#if (__TBB_x86_32 || __TBB_x86_64) + _mm_setcsr(mxcsr & MXCSR_CONTROL_MASK); +#endif + } +}; +#elif (__TBB_x86_32 || __TBB_x86_64) +// API to retrieve/update FPU control setting +#define __TBB_CPU_CTL_ENV_PRESENT 1 +struct cpu_ctl_env { + int mxcsr{}; + short x87cw{}; + static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */ + + bool operator!=(const cpu_ctl_env& ctl) const { + return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; + } + void get_env() { + __asm__ __volatile__( + "stmxcsr %0\n\t" + "fstcw %1" + : "=m"(mxcsr), "=m"(x87cw) + ); + mxcsr &= MXCSR_CONTROL_MASK; + } + void set_env() const { + __asm__ __volatile__( + "ldmxcsr %0\n\t" + "fldcw %1" + : : "m"(mxcsr), "m"(x87cw) + ); + } +}; +#endif + +} // namespace d1 + +} // namespace detail +} // namespace tbb + +#if !__TBB_CPU_CTL_ENV_PRESENT +#include <fenv.h> + +#include <cstring> + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC cache_aligned_allocate(std::size_t size); +void __TBB_EXPORTED_FUNC cache_aligned_deallocate(void* p); +} // namespace r1 + +namespace d1 { + +class cpu_ctl_env { + fenv_t *my_fenv_ptr; +public: + cpu_ctl_env() : my_fenv_ptr(NULL) {} + ~cpu_ctl_env() { + if ( my_fenv_ptr ) + r1::cache_aligned_deallocate( (void*)my_fenv_ptr ); + } + // It is possible not to copy memory but just to copy pointers but the following issues should be addressed: + // 1. The arena lifetime and the context lifetime are independent; + // 2. The user is allowed to recapture different FPU settings to context so 'current FPU settings' inside + // dispatch loop may become invalid. + // But do we really want to improve the fenv implementation? It seems to be better to replace the fenv implementation + // with a platform specific implementation. + cpu_ctl_env( const cpu_ctl_env &src ) : my_fenv_ptr(NULL) { + *this = src; + } + cpu_ctl_env& operator=( const cpu_ctl_env &src ) { + __TBB_ASSERT( src.my_fenv_ptr, NULL ); + if ( !my_fenv_ptr ) + my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); + *my_fenv_ptr = *src.my_fenv_ptr; + return *this; + } + bool operator!=( const cpu_ctl_env &ctl ) const { + __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); + __TBB_ASSERT( ctl.my_fenv_ptr, "cpu_ctl_env is not initialized." ); + return std::memcmp( (void*)my_fenv_ptr, (void*)ctl.my_fenv_ptr, sizeof(fenv_t) ); + } + void get_env () { + if ( !my_fenv_ptr ) + my_fenv_ptr = (fenv_t*)r1::cache_aligned_allocate(sizeof(fenv_t)); + fegetenv( my_fenv_ptr ); + } + const cpu_ctl_env& set_env () const { + __TBB_ASSERT( my_fenv_ptr, "cpu_ctl_env is not initialized." ); + fesetenv( my_fenv_ptr ); + return *this; + } +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* !__TBB_CPU_CTL_ENV_PRESENT */ + +#endif // __TBB_detail__machine_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h new file mode 100644 index 0000000000..2e1df30931 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_namespace_injection.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +// All public entities of the OneAPI Spec are available under oneapi namespace + +// Define tbb namespace first as it might not be known yet +namespace tbb {} + +namespace oneapi { +namespace tbb = ::tbb; +} diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h new file mode 100644 index 0000000000..265be07555 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_node_handle.h @@ -0,0 +1,162 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__node_handle_H +#define __TBB_detail__node_handle_H + +#include "_allocator_traits.h" +#include "_assert.h" + +namespace tbb { +namespace detail { +namespace d1 { + +// A structure to access private node handle methods in internal TBB classes +// Regular friend declaration is not convenient because classes which use node handle +// can be placed in the different versioning namespaces. +struct node_handle_accessor { + template <typename NodeHandleType> + static typename NodeHandleType::node* get_node_ptr( NodeHandleType& nh ) { + return nh.get_node_ptr(); + } + + template <typename NodeHandleType> + static NodeHandleType construct( typename NodeHandleType::node* node_ptr ) { + return NodeHandleType{node_ptr}; + } + + template <typename NodeHandleType> + static void deactivate( NodeHandleType& nh ) { + nh.deactivate(); + } +}; // struct node_handle_accessor + +template<typename Value, typename Node, typename Allocator> +class node_handle_base { +public: + using allocator_type = Allocator; +protected: + using node = Node; + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; +public: + + node_handle_base() : my_node(nullptr), my_allocator() {} + node_handle_base(node_handle_base&& nh) : my_node(nh.my_node), + my_allocator(std::move(nh.my_allocator)) { + nh.my_node = nullptr; + } + + __TBB_nodiscard bool empty() const { return my_node == nullptr; } + explicit operator bool() const { return my_node != nullptr; } + + ~node_handle_base() { internal_destroy(); } + + node_handle_base& operator=( node_handle_base&& nh ) { + internal_destroy(); + my_node = nh.my_node; + move_assign_allocators(my_allocator, nh.my_allocator); + nh.deactivate(); + return *this; + } + + void swap( node_handle_base& nh ) { + using std::swap; + swap(my_node, nh.my_node); + swap_allocators(my_allocator, nh.my_allocator); + } + + allocator_type get_allocator() const { + return my_allocator; + } + +protected: + node_handle_base( node* n ) : my_node(n) {} + + void internal_destroy() { + if(my_node != nullptr) { + allocator_traits_type::destroy(my_allocator, my_node->storage()); + typename allocator_traits_type::template rebind_alloc<node> node_allocator(my_allocator); + node_allocator.deallocate(my_node, 1); + } + } + + node* get_node_ptr() { return my_node; } + + void deactivate() { my_node = nullptr; } + + node* my_node; + allocator_type my_allocator; +}; + +// node handle for maps +template<typename Key, typename Value, typename Node, typename Allocator> +class node_handle : public node_handle_base<Value, Node, Allocator> { + using base_type = node_handle_base<Value, Node, Allocator>; +public: + using key_type = Key; + using mapped_type = typename Value::second_type; + using allocator_type = typename base_type::allocator_type; + + node_handle() = default; + + key_type& key() const { + __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object"); + return *const_cast<key_type*>(&(this->my_node->value().first)); + } + + mapped_type& mapped() const { + __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object"); + return this->my_node->value().second; + } + +private: + friend struct node_handle_accessor; + + node_handle( typename base_type::node* n ) : base_type(n) {} +}; // class node_handle + +// node handle for sets +template<typename Key, typename Node, typename Allocator> +class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> { + using base_type = node_handle_base<Key, Node, Allocator>; +public: + using value_type = Key; + using allocator_type = typename base_type::allocator_type; + + node_handle() = default; + + value_type& value() const { + __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object"); + return *const_cast<value_type*>(&(this->my_node->value())); + } + +private: + friend struct node_handle_accessor; + + node_handle( typename base_type::node* n ) : base_type(n) {} +}; // class node_handle + +template <typename Key, typename Value, typename Node, typename Allocator> +void swap( node_handle<Key, Value, Node, Allocator>& lhs, + node_handle<Key, Value, Node, Allocator>& rhs ) { + return lhs.swap(rhs); +} + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__node_handle_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h new file mode 100644 index 0000000000..95a4d3dc96 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters.h @@ -0,0 +1,453 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_filters_H +#define __TBB_parallel_filters_H + +#include "_config.h" +#include "_task.h" +#include "_pipeline_filters_deduction.h" +#include "../tbb_allocator.h" + +#include <cstddef> +#include <cstdint> + +namespace tbb { +namespace detail { + +namespace d1 { +class base_filter; +} + +namespace r1 { +void __TBB_EXPORTED_FUNC set_end_of_input(d1::base_filter&); +class pipeline; +class stage_task; +class input_buffer; +} + +namespace d1 { +class filter_node; + +//! A stage in a pipeline. +/** @ingroup algorithms */ +class base_filter{ +private: + //! Value used to mark "not in pipeline" + static base_filter* not_in_pipeline() { return reinterpret_cast<base_filter*>(std::intptr_t(-1)); } +public: + //! The lowest bit 0 is for parallel vs serial + static constexpr unsigned int filter_is_serial = 0x1; + + //! 2nd bit distinguishes ordered vs unordered filters. + static constexpr unsigned int filter_is_out_of_order = 0x1<<1; + + //! 3rd bit marks input filters emitting small objects + static constexpr unsigned int filter_may_emit_null = 0x1<<2; + + base_filter(const base_filter&) = delete; + base_filter& operator=(const base_filter&) = delete; + +protected: + explicit base_filter( unsigned int m ) : + next_filter_in_pipeline(not_in_pipeline()), + my_input_buffer(nullptr), + my_filter_mode(m), + my_pipeline(nullptr) + {} + + // signal end-of-input for concrete_filters + void set_end_of_input() { + r1::set_end_of_input(*this); + } + +public: + //! True if filter is serial. + bool is_serial() const { + return bool( my_filter_mode & filter_is_serial ); + } + + //! True if filter must receive stream in order. + bool is_ordered() const { + return (my_filter_mode & filter_is_serial) && !(my_filter_mode & filter_is_out_of_order); + } + + //! true if an input filter can emit null + bool object_may_be_null() { + return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null; + } + + //! Operate on an item from the input stream, and return item for output stream. + /** Returns nullptr if filter is a sink. */ + virtual void* operator()( void* item ) = 0; + + //! Destroy filter. + virtual ~base_filter() {}; + + //! Destroys item if pipeline was cancelled. + /** Required to prevent memory leaks. + Note it can be called concurrently even for serial filters.*/ + virtual void finalize( void* /*item*/ ) {} + +private: + //! Pointer to next filter in the pipeline. + base_filter* next_filter_in_pipeline; + + //! Buffer for incoming tokens, or nullptr if not required. + /** The buffer is required if the filter is serial. */ + r1::input_buffer* my_input_buffer; + + friend class r1::stage_task; + friend class r1::pipeline; + friend void r1::set_end_of_input(d1::base_filter&); + + //! Storage for filter mode and dynamically checked implementation version. + const unsigned int my_filter_mode; + + //! Pointer to the pipeline. + r1::pipeline* my_pipeline; +}; + +template<typename Body, typename InputType, typename OutputType > +class concrete_filter; + +//! input_filter control to signal end-of-input for parallel_pipeline +class flow_control { + bool is_pipeline_stopped = false; + flow_control() = default; + template<typename Body, typename InputType, typename OutputType > friend class concrete_filter; + template<typename Output> friend class input_node; +public: + void stop() { is_pipeline_stopped = true; } +}; + +// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe). +#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT +template<typename T> using tbb_trivially_copyable = std::is_trivially_copyable<T>; +#else +template<typename T> struct tbb_trivially_copyable { enum { value = false }; }; +template<typename T> struct tbb_trivially_copyable < T* > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < bool > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < signed char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned char > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < short > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned short > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < int > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned int > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long > { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned long > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long long> { enum { value = true }; }; +template<> struct tbb_trivially_copyable <unsigned long long> { enum { value = true }; }; +template<> struct tbb_trivially_copyable < float > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < double > { enum { value = true }; }; +template<> struct tbb_trivially_copyable < long double > { enum { value = true }; }; +#endif // __TBB_CPP11_TYPE_PROPERTIES_PRESENT + +template<typename T> +struct use_allocator { + static constexpr bool value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value; +}; + +// A helper class to customize how a type is passed between filters. +// Usage: token_helper<T, use_allocator<T>::value> +template<typename T, bool Allocate> struct token_helper; + +// using tbb_allocator +template<typename T> +struct token_helper<T, true> { + using pointer = T*; + using value_type = T; + static pointer create_token(value_type && source) { + return new (r1::allocate_memory(sizeof(T))) T(std::move(source)); + } + static value_type & token(pointer & t) { return *t; } + static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } + static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } + static void destroy_token(pointer token) { + token->~value_type(); + r1::deallocate_memory(token); + } +}; + +// pointer specialization +template<typename T> +struct token_helper<T*, false> { + using pointer = T*; + using value_type = T*; + static pointer create_token(const value_type & source) { return source; } + static value_type & token(pointer & t) { return t; } + static void * cast_to_void_ptr(pointer ref) { return reinterpret_cast<void *>(ref); } + static pointer cast_from_void_ptr(void * ref) { return reinterpret_cast<pointer>(ref); } + static void destroy_token( pointer /*token*/) {} +}; + +// converting type to and from void*, passing objects directly +template<typename T> +struct token_helper<T, false> { + typedef union { + T actual_value; + void * void_overlay; + } type_to_void_ptr_map; + using pointer = T; // not really a pointer in this case. + using value_type = T; + static pointer create_token(const value_type & source) { return source; } + static value_type & token(pointer & t) { return t; } + static void * cast_to_void_ptr(pointer ref) { + type_to_void_ptr_map mymap; + mymap.void_overlay = nullptr; + mymap.actual_value = ref; + return mymap.void_overlay; + } + static pointer cast_from_void_ptr(void * ref) { + type_to_void_ptr_map mymap; + mymap.void_overlay = ref; + return mymap.actual_value; + } + static void destroy_token( pointer /*token*/) {} +}; + +// intermediate +template<typename InputType, typename OutputType, typename Body> +class concrete_filter: public base_filter { + const Body& my_body; + using input_helper = token_helper<InputType, use_allocator<InputType >::value>; + using input_pointer = typename input_helper::pointer; + using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; + using output_pointer = typename output_helper::pointer; + + void* operator()(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + output_pointer temp_output = output_helper::create_token(my_body(std::move(input_helper::token(temp_input)))); + input_helper::destroy_token(temp_input); + return output_helper::cast_to_void_ptr(temp_output); + } + + void finalize(void * input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + input_helper::destroy_token(temp_input); + } + +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +// input +template<typename OutputType, typename Body> +class concrete_filter<void, OutputType, Body>: public base_filter { + const Body& my_body; + using output_helper = token_helper<OutputType, use_allocator<OutputType>::value>; + using output_pointer = typename output_helper::pointer; + + void* operator()(void*) override { + flow_control control; + output_pointer temp_output = output_helper::create_token(my_body(control)); + if(control.is_pipeline_stopped) { + output_helper::destroy_token(temp_output); + set_end_of_input(); + return nullptr; + } + return output_helper::cast_to_void_ptr(temp_output); + } + +public: + concrete_filter(unsigned int m, const Body& body) : + base_filter(m | filter_may_emit_null), + my_body(body) + {} +}; + +// output +template<typename InputType, typename Body> +class concrete_filter<InputType, void, Body>: public base_filter { + const Body& my_body; + using input_helper = token_helper<InputType, use_allocator<InputType >::value>; + using input_pointer = typename input_helper::pointer; + + void* operator()(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + my_body(std::move(input_helper::token(temp_input))); + input_helper::destroy_token(temp_input); + return nullptr; + } + void finalize(void* input) override { + input_pointer temp_input = input_helper::cast_from_void_ptr(input); + input_helper::destroy_token(temp_input); + } + +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +template<typename Body> +class concrete_filter<void, void, Body>: public base_filter { + const Body& my_body; + + void* operator()(void*) override { + flow_control control; + my_body(control); + void* output = control.is_pipeline_stopped ? nullptr : (void*)(std::intptr_t)-1; + return output; + } +public: + concrete_filter(unsigned int m, const Body& body) : base_filter(m), my_body(body) {} +}; + +class filter_node_ptr { + filter_node * my_node; + +public: + filter_node_ptr() : my_node(nullptr) {} + filter_node_ptr(filter_node *); + ~filter_node_ptr(); + filter_node_ptr(const filter_node_ptr &); + filter_node_ptr(filter_node_ptr &&); + void operator=(filter_node *); + void operator=(const filter_node_ptr &); + void operator=(filter_node_ptr &&); + filter_node& operator*() const; + operator bool() const; +}; + +//! Abstract base class that represents a node in a parse tree underlying a filter class. +/** These nodes are always heap-allocated and can be shared by filter objects. */ +class filter_node { + /** Count must be atomic because it is hidden state for user, but might be shared by threads. */ + std::atomic<std::intptr_t> ref_count; +public: + filter_node_ptr left; + filter_node_ptr right; +protected: + filter_node() : ref_count(0), left(nullptr), right(nullptr) { +#ifdef __TBB_TEST_FILTER_NODE_COUNT + ++(__TBB_TEST_FILTER_NODE_COUNT); +#endif + } +public: + filter_node(const filter_node_ptr& x, const filter_node_ptr& y) : filter_node(){ + left = x; + right = y; + } + filter_node(const filter_node&) = delete; + filter_node& operator=(const filter_node&) = delete; + + //! Add concrete_filter to pipeline + virtual base_filter* create_filter() const { + __TBB_ASSERT(false, "method of non-leaf was called"); + return nullptr; + } + + //! Increment reference count + void add_ref() { ref_count.fetch_add(1, std::memory_order_relaxed); } + + //! Decrement reference count and delete if it becomes zero. + void remove_ref() { + __TBB_ASSERT(ref_count>0,"ref_count underflow"); + if( ref_count.fetch_sub(1, std::memory_order_relaxed) == 1 ) { + this->~filter_node(); + r1::deallocate_memory(this); + } + } + + virtual ~filter_node() { +#ifdef __TBB_TEST_FILTER_NODE_COUNT + --(__TBB_TEST_FILTER_NODE_COUNT); +#endif + } +}; + +inline filter_node_ptr::filter_node_ptr(filter_node * nd) : my_node(nd) { + if (my_node) { + my_node->add_ref(); + } +} + +inline filter_node_ptr::~filter_node_ptr() { + if (my_node) { + my_node->remove_ref(); + } +} + +inline filter_node_ptr::filter_node_ptr(const filter_node_ptr & rhs) : my_node(rhs.my_node) { + if (my_node) { + my_node->add_ref(); + } +} + +inline filter_node_ptr::filter_node_ptr(filter_node_ptr && rhs) : my_node(rhs.my_node) { + rhs.my_node = nullptr; +} + +inline void filter_node_ptr::operator=(filter_node * rhs) { + // Order of operations below carefully chosen so that reference counts remain correct + // in unlikely event that remove_ref throws exception. + filter_node* old = my_node; + my_node = rhs; + if (my_node) { + my_node->add_ref(); + } + if (old) { + old->remove_ref(); + } +} + +inline void filter_node_ptr::operator=(const filter_node_ptr & rhs) { + *this = rhs.my_node; +} + +inline void filter_node_ptr::operator=(filter_node_ptr && rhs) { + filter_node* old = my_node; + my_node = rhs.my_node; + rhs.my_node = nullptr; + if (old) { + old->remove_ref(); + } +} + +inline filter_node& filter_node_ptr::operator*() const{ + __TBB_ASSERT(my_node,"NULL node is used"); + return *my_node; +} + +inline filter_node_ptr::operator bool() const { + return my_node != nullptr; +} + +//! Node in parse tree representing result of make_filter. +template<typename InputType, typename OutputType, typename Body> +class filter_node_leaf: public filter_node { + const unsigned int my_mode; + const Body my_body; + base_filter* create_filter() const override { + return new(r1::allocate_memory(sizeof(concrete_filter<InputType, OutputType, Body>))) concrete_filter<InputType, OutputType, Body>(my_mode,my_body); + } +public: + filter_node_leaf( unsigned int m, const Body& b ) : my_mode(m), my_body(b) {} +}; + + +template <typename Body, typename Input = typename body_types<decltype(&Body::operator())>::input_type> +using filter_input = typename std::conditional<std::is_same<Input, flow_control>::value, void, Input>::type; + +template <typename Body> +using filter_output = typename body_types<decltype(&Body::operator())>::output_type; + +} // namespace d1 +} // namespace detail +} // namespace tbb + + +#endif /* __TBB_parallel_filters_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h new file mode 100644 index 0000000000..55f94dce00 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_pipeline_filters_deduction.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__pipeline_filters_deduction_H +#define __TBB__pipeline_filters_deduction_H + +#include "_config.h" +#include <utility> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename Input, typename Output> +struct declare_fitler_types { + using input_type = typename std::remove_const<typename std::remove_reference<Input>::type>::type; + using output_type = typename std::remove_const<typename std::remove_reference<Output>::type>::type; +}; + +template <typename T> struct body_types; + +template <typename T, typename Input, typename Output> +struct body_types<Output(T::*)(Input) const> : declare_fitler_types<Input, Output> {}; + +template <typename T, typename Input, typename Output> +struct body_types<Output(T::*)(Input)> : declare_fitler_types<Input, Output> {}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB__pipeline_filters_deduction_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h new file mode 100644 index 0000000000..36c4ca84ee --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_range_common.h @@ -0,0 +1,76 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__range_common_H +#define __TBB_detail__range_common_H + +#include "_config.h" +#include "_utils.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Dummy type that distinguishes splitting constructor from copy constructor. +/** + * See description of parallel_for and parallel_reduce for example usages. + * @ingroup algorithms + */ +class split {}; + +//! Type enables transmission of splitting proportion from partitioners to range objects +/** + * In order to make use of such facility Range objects must implement + * splitting constructor with this type passed. + */ +class proportional_split : no_assign { +public: + proportional_split(size_t _left = 1, size_t _right = 1) : my_left(_left), my_right(_right) { } + + size_t left() const { return my_left; } + size_t right() const { return my_right; } + + // used when range does not support proportional split + explicit operator split() const { return split(); } + +private: + size_t my_left, my_right; +}; + +template <typename Range, typename = void> +struct range_split_object_provider { + template <typename PartitionerSplitType> + static split get( PartitionerSplitType& ) { return split(); } +}; + +template <typename Range> +struct range_split_object_provider<Range, + typename std::enable_if<std::is_constructible<Range, Range&, proportional_split&>::value>::type> { + template <typename PartitionerSplitType> + static PartitionerSplitType& get( PartitionerSplitType& split_obj ) { return split_obj; } +}; + +template <typename Range, typename PartitionerSplitType> +auto get_range_split_object( PartitionerSplitType& split_obj ) +-> decltype(range_split_object_provider<Range>::get(split_obj)) { + return range_split_object_provider<Range>::get(split_obj); +} + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__range_common_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h new file mode 100644 index 0000000000..28ef9f042e --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_mutex.h @@ -0,0 +1,162 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__rtm_mutex_impl_H +#define __TBB__rtm_mutex_impl_H + +#include "_assert.h" +#include "_utils.h" +#include "../spin_mutex.h" + +#include "../profiling.h" + +namespace tbb { +namespace detail { +namespace r1 { +struct rtm_mutex_impl; +} +namespace d1 { + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning (push) + #pragma warning (disable: 4324) +#endif + +/** A rtm_mutex is an speculation-enabled spin mutex. + It should be used for locking short critical sections where the lock is + contended but the data it protects are not. If zero-initialized, the + mutex is considered unheld. + @ingroup synchronization */ +class alignas(max_nfs_size) rtm_mutex : private spin_mutex { +private: + enum class rtm_state { + rtm_none, + rtm_transacting, + rtm_real + }; +public: + //! Constructors + rtm_mutex() noexcept { + create_itt_sync(this, "tbb::speculative_spin_mutex", ""); + } + + //! Destructor + ~rtm_mutex() = default; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + friend class rtm_mutex; + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) {} + + //! Acquire lock on given mutex. + scoped_lock(rtm_mutex& m) : m_mutex(nullptr), m_transaction_state(rtm_state::rtm_none) { + acquire(m); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if(m_transaction_state != rtm_state::rtm_none) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire(rtm_mutex& m); + + //! Try acquire lock on given mutex. + bool try_acquire(rtm_mutex& m); + + //! Release lock + void release(); + + private: + rtm_mutex* m_mutex; + rtm_state m_transaction_state; + friend r1::rtm_mutex_impl; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; +private: + friend r1::rtm_mutex_impl; +}; // end of rtm_mutex +} // namespace d1 + +namespace r1 { + //! Internal acquire lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&, bool only_speculate = false); + //! Internal try_acquire lock. + bool __TBB_EXPORTED_FUNC try_acquire(d1::rtm_mutex&, d1::rtm_mutex::scoped_lock&); + //! Internal release lock. + void __TBB_EXPORTED_FUNC release(d1::rtm_mutex::scoped_lock&); +} // namespace r1 + +namespace d1 { +//! Acquire lock on given mutex. +inline void rtm_mutex::scoped_lock::acquire(rtm_mutex& m) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + r1::acquire(m, *this); +} + +//! Try acquire lock on given mutex. +inline bool rtm_mutex::scoped_lock::try_acquire(rtm_mutex& m) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + return r1::try_acquire(m, *this); +} + +//! Release lock +inline void rtm_mutex::scoped_lock::release() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + __TBB_ASSERT(m_transaction_state != rtm_state::rtm_none, "lock is not acquired"); + return r1::release(*this); +} + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) // 4324 warning +#endif + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(rtm_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(rtm_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_mutex&, const wchar_t*) {} +#endif // WIN +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__rtm_mutex_impl_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h new file mode 100644 index 0000000000..b62e86bd0a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_rtm_rw_mutex.h @@ -0,0 +1,209 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__rtm_rw_mutex_H +#define __TBB_detail__rtm_rw_mutex_H + +#include "_assert.h" +#include "_utils.h" +#include "../spin_rw_mutex.h" + +#include <atomic> + +namespace tbb { +namespace detail { + +namespace r1 { +struct rtm_rw_mutex_impl; +} + +namespace d1 { + +constexpr std::size_t speculation_granularity = 64; +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning (push) + #pragma warning (disable: 4324) +#endif + +//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and writer-preference +/** @ingroup synchronization */ +class alignas(max_nfs_size) rtm_rw_mutex : private spin_rw_mutex { + friend struct r1::rtm_rw_mutex_impl; +private: + enum class rtm_type { + rtm_not_in_mutex, + rtm_transacting_reader, + rtm_transacting_writer, + rtm_real_reader, + rtm_real_writer + }; +public: + //! Constructors + rtm_rw_mutex() noexcept : write_flag(false) { + create_itt_sync(this, "tbb::speculative_spin_rw_mutex", ""); + } + + //! Destructor + ~rtm_rw_mutex() = default; + + //! Represents acquisition of a mutex. + class scoped_lock { + friend struct r1::rtm_rw_mutex_impl; + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) {} + + //! Acquire lock on given mutex. + scoped_lock(rtm_rw_mutex& m, bool write = true) : m_mutex(nullptr), m_transaction_state(rtm_type::rtm_not_in_mutex) { + acquire(m, write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if(m_transaction_state != rtm_type::rtm_not_in_mutex) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + inline void acquire(rtm_rw_mutex& m, bool write = true); + + //! Try acquire lock on given mutex. + inline bool try_acquire(rtm_rw_mutex& m, bool write = true); + + //! Release lock + inline void release(); + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + inline bool upgrade_to_writer(); + + //! Downgrade writer to become a reader. + inline bool downgrade_to_reader(); + + private: + rtm_rw_mutex* m_mutex; + rtm_type m_transaction_state; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + +private: + alignas(speculation_granularity) std::atomic<bool> write_flag; +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) // 4324 warning +#endif + +} // namespace d1 + +namespace r1 { + //! Internal acquire write lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + //! Internal acquire read lock. + // only_speculate == true if we're doing a try_lock, else false. + void __TBB_EXPORTED_FUNC acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&, bool only_speculate = false); + //! Internal upgrade reader to become a writer. + bool __TBB_EXPORTED_FUNC upgrade(d1::rtm_rw_mutex::scoped_lock&); + //! Internal downgrade writer to become a reader. + bool __TBB_EXPORTED_FUNC downgrade(d1::rtm_rw_mutex::scoped_lock&); + //! Internal try_acquire write lock. + bool __TBB_EXPORTED_FUNC try_acquire_writer(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + //! Internal try_acquire read lock. + bool __TBB_EXPORTED_FUNC try_acquire_reader(d1::rtm_rw_mutex&, d1::rtm_rw_mutex::scoped_lock&); + //! Internal release lock. + void __TBB_EXPORTED_FUNC release(d1::rtm_rw_mutex::scoped_lock&); +} + +namespace d1 { +//! Acquire lock on given mutex. +void rtm_rw_mutex::scoped_lock::acquire(rtm_rw_mutex& m, bool write) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + if (write) { + r1::acquire_writer(m, *this); + } else { + r1::acquire_reader(m, *this); + } +} + +//! Try acquire lock on given mutex. +bool rtm_rw_mutex::scoped_lock::try_acquire(rtm_rw_mutex& m, bool write) { + __TBB_ASSERT(!m_mutex, "lock is already acquired"); + if (write) { + return r1::try_acquire_writer(m, *this); + } else { + return r1::try_acquire_reader(m, *this); + } +} + +//! Release lock +void rtm_rw_mutex::scoped_lock::release() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + __TBB_ASSERT(m_transaction_state != rtm_type::rtm_not_in_mutex, "lock is not acquired"); + return r1::release(*this); +} + +//! Upgrade reader to become a writer. +/** Returns whether the upgrade happened without releasing and re-acquiring the lock */ +bool rtm_rw_mutex::scoped_lock::upgrade_to_writer() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + if (m_transaction_state == rtm_type::rtm_transacting_writer || m_transaction_state == rtm_type::rtm_real_writer) { + return true; // Already a writer + } + return r1::upgrade(*this); +} + +//! Downgrade writer to become a reader. +bool rtm_rw_mutex::scoped_lock::downgrade_to_reader() { + __TBB_ASSERT(m_mutex, "lock is not acquired"); + if (m_transaction_state == rtm_type::rtm_transacting_reader || m_transaction_state == rtm_type::rtm_real_reader) { + return true; // Already a reader + } + return r1::downgrade(*this); +} + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(rtm_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(rtm_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(rtm_rw_mutex&, const wchar_t*) {} +#endif // WIN +#endif + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__rtm_rw_mutex_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h new file mode 100644 index 0000000000..480ec8135e --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_segment_table.h @@ -0,0 +1,563 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__segment_table_H +#define __TBB_detail__segment_table_H + +#include "_config.h" +#include "_allocator_traits.h" +#include "_template_helpers.h" +#include "_utils.h" +#include "_assert.h" +#include "_exception.h" +#include <atomic> +#include <type_traits> +#include <memory> +#include <cstring> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(push) +#pragma warning(disable: 4127) // warning C4127: conditional expression is constant +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +template <typename T, typename Allocator, typename DerivedType, std::size_t PointersPerEmbeddedTable> +class segment_table { +public: + using value_type = T; + using segment_type = T*; + using atomic_segment = std::atomic<segment_type>; + using segment_table_type = atomic_segment*; + + using size_type = std::size_t; + using segment_index_type = std::size_t; + + using allocator_type = Allocator; + + using allocator_traits_type = tbb::detail::allocator_traits<allocator_type>; + using segment_table_allocator_type = typename allocator_traits_type::template rebind_alloc<atomic_segment>; +protected: + using segment_table_allocator_traits = tbb::detail::allocator_traits<segment_table_allocator_type>; + using derived_type = DerivedType; + + static constexpr size_type pointers_per_embedded_table = PointersPerEmbeddedTable; + static constexpr size_type pointers_per_long_table = sizeof(size_type) * 8; +public: + segment_table( const allocator_type& alloc = allocator_type() ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + } + + segment_table( const segment_table& other ) + : my_segment_table_allocator(segment_table_allocator_traits:: + select_on_container_copy_construction(other.my_segment_table_allocator)) + , my_segment_table(my_embedded_table), my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + try_call( [&] { + internal_transfer(other, copy_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + + segment_table( const segment_table& other, const allocator_type& alloc ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + try_call( [&] { + internal_transfer(other, copy_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + + segment_table( segment_table&& other ) + : my_segment_table_allocator(std::move(other.my_segment_table_allocator)), my_segment_table(my_embedded_table) + , my_first_block{}, my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + internal_move(std::move(other)); + } + + segment_table( segment_table&& other, const allocator_type& alloc ) + : my_segment_table_allocator(alloc), my_segment_table(my_embedded_table), my_first_block{} + , my_size{}, my_segment_table_allocation_failed{} + { + zero_table(my_embedded_table, pointers_per_embedded_table); + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + internal_move_construct_with_allocator(std::move(other), alloc, is_equal_type()); + } + + ~segment_table() { + clear(); + } + + segment_table& operator=( const segment_table& other ) { + if (this != &other) { + copy_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_transfer(other, copy_segment_body_type{*this}); + } + return *this; + } + + segment_table& operator=( segment_table&& other ) + noexcept(derived_type::is_noexcept_assignment) + { + using pocma_type = typename segment_table_allocator_traits::propagate_on_container_move_assignment; + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + + if (this != &other) { + move_assign_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_move_assign(std::move(other), tbb::detail::disjunction<is_equal_type, pocma_type>()); + } + return *this; + } + + void swap( segment_table& other ) + noexcept(derived_type::is_noexcept_swap) + { + using is_equal_type = typename segment_table_allocator_traits::is_always_equal; + using pocs_type = typename segment_table_allocator_traits::propagate_on_container_swap; + + if (this != &other) { + swap_allocators(my_segment_table_allocator, other.my_segment_table_allocator); + internal_swap(other, tbb::detail::disjunction<is_equal_type, pocs_type>()); + } + } + + segment_type get_segment( segment_index_type index ) const { + return get_table()[index] + segment_base(index); + } + + value_type& operator[]( size_type index ) { + return internal_subscript<true>(index); + } + + const value_type& operator[]( size_type index ) const { + return const_cast<segment_table*>(this)->internal_subscript<true>(index); + } + + const segment_table_allocator_type& get_allocator() const { + return my_segment_table_allocator; + } + + segment_table_allocator_type& get_allocator() { + return my_segment_table_allocator; + } + + void enable_segment( segment_type& segment, segment_table_type table, segment_index_type seg_index, size_type index ) { + // Allocate new segment + segment_type new_segment = self()->create_segment(table, seg_index, index); + if (new_segment != nullptr) { + // Store (new_segment - segment_base) into the segment table to allow access to the table by index via + // my_segment_table[segment_index_of(index)][index] + segment_type disabled_segment = nullptr; + if (!table[seg_index].compare_exchange_strong(disabled_segment, new_segment - segment_base(seg_index))) { + // compare_exchange failed => some other thread has already enabled this segment + // Deallocate the memory + self()->deallocate_segment(new_segment, seg_index); + } + } + + segment = table[seg_index].load(std::memory_order_acquire); + __TBB_ASSERT(segment != nullptr, "If create_segment returned nullptr, the element should be stored in the table"); + } + + void delete_segment( segment_index_type seg_index ) { + segment_type disabled_segment = nullptr; + // Set the pointer to the segment to NULL in the table + segment_type segment_to_delete = get_table()[seg_index].exchange(disabled_segment); + if (segment_to_delete == segment_allocation_failure_tag) { + return; + } + + segment_to_delete += segment_base(seg_index); + + // Deallocate the segment + self()->destroy_segment(segment_to_delete, seg_index); + } + + size_type number_of_segments( segment_table_type table ) const { + // Check for an active table, if it is embedded table - return the number of embedded segments + // Otherwise - return the maximum number of segments + return table == my_embedded_table ? pointers_per_embedded_table : pointers_per_long_table; + } + + size_type capacity() const noexcept { + segment_table_type table = get_table(); + size_type num_segments = number_of_segments(table); + for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { + // Check if the pointer is valid (allocated) + if (table[seg_index].load(std::memory_order_relaxed) <= segment_allocation_failure_tag) { + return segment_base(seg_index); + } + } + return segment_base(num_segments); + } + + size_type find_last_allocated_segment( segment_table_type table ) const noexcept { + size_type end = 0; + size_type num_segments = number_of_segments(table); + for (size_type seg_index = 0; seg_index < num_segments; ++seg_index) { + // Check if the pointer is valid (allocated) + if (table[seg_index].load(std::memory_order_relaxed) > segment_allocation_failure_tag) { + end = seg_index + 1; + } + } + return end; + } + + void reserve( size_type n ) { + if (n > allocator_traits_type::max_size(my_segment_table_allocator)) { + throw_exception(exception_id::reservation_length_error); + } + + size_type size = my_size.load(std::memory_order_relaxed); + segment_index_type start_seg_idx = size == 0 ? 0 : segment_index_of(size - 1) + 1; + for (segment_index_type seg_idx = start_seg_idx; segment_base(seg_idx) < n; ++seg_idx) { + size_type first_index = segment_base(seg_idx); + internal_subscript<true>(first_index); + } + } + + void clear() { + clear_segments(); + clear_table(); + my_size.store(0, std::memory_order_relaxed); + my_first_block.store(0, std::memory_order_relaxed); + } + + void clear_segments() { + segment_table_type current_segment_table = get_table(); + for (size_type i = number_of_segments(current_segment_table); i != 0; --i) { + if (current_segment_table[i - 1].load(std::memory_order_relaxed) != nullptr) { + // If the segment was enabled - disable and deallocate it + delete_segment(i - 1); + } + } + } + + void clear_table() { + segment_table_type current_segment_table = get_table(); + if (current_segment_table != my_embedded_table) { + // If the active table is not the embedded one - deallocate the active table + for (size_type i = 0; i != pointers_per_long_table; ++i) { + segment_table_allocator_traits::destroy(my_segment_table_allocator, ¤t_segment_table[i]); + } + + segment_table_allocator_traits::deallocate(my_segment_table_allocator, current_segment_table, pointers_per_long_table); + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + zero_table(my_embedded_table, pointers_per_embedded_table); + } + } + + void extend_table_if_necessary(segment_table_type& table, size_type start_index, size_type end_index) { + // extend_segment_table if an active table is an embedded table + // and the requested index is not in the embedded table + if (table == my_embedded_table && end_index > embedded_table_size) { + if (start_index <= embedded_table_size) { + try_call([&] { + table = self()->allocate_long_table(my_embedded_table, start_index); + // It is possible that the table was extended by the thread that allocated first_block. + // In this case it is necessary to re-read the current table. + + if (table) { + my_segment_table.store(table, std::memory_order_release); + } else { + table = my_segment_table.load(std::memory_order_acquire); + } + }).on_exception([&] { + my_segment_table_allocation_failed.store(true, std::memory_order_relaxed); + }); + } else { + atomic_backoff backoff; + do { + if (my_segment_table_allocation_failed.load(std::memory_order_relaxed)) { + throw_exception(exception_id::bad_alloc); + } + backoff.pause(); + table = my_segment_table.load(std::memory_order_acquire); + } while (table == my_embedded_table); + } + } + } + + // Return the segment where index is stored + static constexpr segment_index_type segment_index_of( size_type index ) { + return size_type(tbb::detail::log2(uintptr_t(index|1))); + } + + // Needed to calculate the offset in segment + static constexpr size_type segment_base( size_type index ) { + return size_type(1) << index & ~size_type(1); + } + + // Return size of the segment + static constexpr size_type segment_size( size_type index ) { + return index == 0 ? 2 : size_type(1) << index; + } + +private: + + derived_type* self() { + return static_cast<derived_type*>(this); + } + + struct copy_segment_body_type { + void operator()( segment_index_type index, segment_type from, segment_type to ) const { + my_instance.self()->copy_segment(index, from, to); + } + segment_table& my_instance; + }; + + struct move_segment_body_type { + void operator()( segment_index_type index, segment_type from, segment_type to ) const { + my_instance.self()->move_segment(index, from, to); + } + segment_table& my_instance; + }; + + // Transgers all segments from the other table + template <typename TransferBody> + void internal_transfer( const segment_table& other, TransferBody transfer_segment ) { + static_cast<derived_type*>(this)->destroy_elements(); + + assign_first_block_if_necessary(other.my_first_block.load(std::memory_order_relaxed)); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + + segment_table_type other_table = other.get_table(); + size_type end_segment_size = segment_size(other.find_last_allocated_segment(other_table)); + + // If an exception occurred in other, then the size may be greater than the size of the end segment. + size_type other_size = end_segment_size < other.my_size.load(std::memory_order_relaxed) ? + other.my_size.load(std::memory_order_relaxed) : end_segment_size; + other_size = my_segment_table_allocation_failed ? embedded_table_size : other_size; + + for (segment_index_type i = 0; segment_base(i) < other_size; ++i) { + // If the segment in other table is enabled - transfer it + if (other_table[i].load(std::memory_order_relaxed) == segment_allocation_failure_tag) + { + my_size = segment_base(i); + break; + } else if (other_table[i].load(std::memory_order_relaxed) != nullptr) { + internal_subscript<true>(segment_base(i)); + transfer_segment(i, other.get_table()[i].load(std::memory_order_relaxed) + segment_base(i), + get_table()[i].load(std::memory_order_relaxed) + segment_base(i)); + } + } + } + + // Moves the other segment table + // Only equal allocators are allowed + void internal_move( segment_table&& other ) { + // NOTE: allocators should be equal + clear(); + my_first_block.store(other.my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_size.store(other.my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + // If an active table in other is embedded - restore all of the embedded segments + if (other.get_table() == other.my_embedded_table) { + for ( size_type i = 0; i != pointers_per_embedded_table; ++i ) { + segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); + my_embedded_table[i].store(other_segment, std::memory_order_relaxed); + other.my_embedded_table[i].store(nullptr, std::memory_order_relaxed); + } + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + } else { + my_segment_table.store(other.my_segment_table, std::memory_order_relaxed); + other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); + zero_table(other.my_embedded_table, pointers_per_embedded_table); + } + other.my_size.store(0, std::memory_order_relaxed); + } + + // Move construct the segment table with the allocator object + // if any instances of allocator_type are always equal + void internal_move_construct_with_allocator( segment_table&& other, const allocator_type&, + /*is_always_equal = */ std::true_type ) { + internal_move(std::move(other)); + } + + // Move construct the segment table with the allocator object + // if any instances of allocator_type are always equal + void internal_move_construct_with_allocator( segment_table&& other, const allocator_type& alloc, + /*is_always_equal = */ std::false_type ) { + if (other.my_segment_table_allocator == alloc) { + // If allocators are equal - restore pointers + internal_move(std::move(other)); + } else { + // If allocators are not equal - perform per element move with reallocation + try_call( [&] { + internal_transfer(other, move_segment_body_type{*this}); + } ).on_exception( [&] { + clear(); + }); + } + } + + // Move assigns the segment table to other is any instances of allocator_type are always equal + // or propagate_on_container_move_assignment is true + void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::true_type ) { + internal_move(std::move(other)); + } + + // Move assigns the segment table to other is any instances of allocator_type are not always equal + // and propagate_on_container_move_assignment is false + void internal_move_assign( segment_table&& other, /*is_always_equal || POCMA = */ std::false_type ) { + if (my_segment_table_allocator == other.my_segment_table_allocator) { + // If allocators are equal - restore pointers + internal_move(std::move(other)); + } else { + // If allocators are not equal - perform per element move with reallocation + internal_transfer(other, move_segment_body_type{*this}); + } + } + + // Swaps two segment tables if any instances of allocator_type are always equal + // or propagate_on_container_swap is true + void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::true_type ) { + internal_swap_fields(other); + } + + // Swaps two segment tables if any instances of allocator_type are not always equal + // and propagate_on_container_swap is false + // According to the C++ standard, swapping of two containers with unequal allocators + // is an undefined behavior scenario + void internal_swap( segment_table& other, /*is_always_equal || POCS = */ std::false_type ) { + __TBB_ASSERT(my_segment_table_allocator == other.my_segment_table_allocator, + "Swapping with unequal allocators is not allowed"); + internal_swap_fields(other); + } + + void internal_swap_fields( segment_table& other ) { + // If an active table in either *this segment table or other is an embedded one - swaps the embedded tables + if (get_table() == my_embedded_table || + other.get_table() == other.my_embedded_table) { + + for (size_type i = 0; i != pointers_per_embedded_table; ++i) { + segment_type current_segment = my_embedded_table[i].load(std::memory_order_relaxed); + segment_type other_segment = other.my_embedded_table[i].load(std::memory_order_relaxed); + + my_embedded_table[i].store(other_segment, std::memory_order_relaxed); + other.my_embedded_table[i].store(current_segment, std::memory_order_relaxed); + } + } + + segment_table_type current_segment_table = get_table(); + segment_table_type other_segment_table = other.get_table(); + + // If an active table is an embedded one - + // store an active table in other to the embedded one from other + if (current_segment_table == my_embedded_table) { + other.my_segment_table.store(other.my_embedded_table, std::memory_order_relaxed); + } else { + // Otherwise - store it to the active segment table + other.my_segment_table.store(current_segment_table, std::memory_order_relaxed); + } + + // If an active table in other segment table is an embedded one - + // store an active table in other to the embedded one from *this + if (other_segment_table == other.my_embedded_table) { + my_segment_table.store(my_embedded_table, std::memory_order_relaxed); + } else { + // Otherwise - store it to the active segment table in other + my_segment_table.store(other_segment_table, std::memory_order_relaxed); + } + auto first_block = other.my_first_block.load(std::memory_order_relaxed); + other.my_first_block.store(my_first_block.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_first_block.store(first_block, std::memory_order_relaxed); + + auto size = other.my_size.load(std::memory_order_relaxed); + other.my_size.store(my_size.load(std::memory_order_relaxed), std::memory_order_relaxed); + my_size.store(size, std::memory_order_relaxed); + } + +protected: + // A flag indicates that an exception was throws during segment allocations + const segment_type segment_allocation_failure_tag = reinterpret_cast<segment_type>(1); + static constexpr size_type embedded_table_size = segment_size(pointers_per_embedded_table); + + template <bool allow_out_of_range_access> + value_type& internal_subscript( size_type index ) { + segment_index_type seg_index = segment_index_of(index); + segment_table_type table = my_segment_table.load(std::memory_order_acquire); + segment_type segment = nullptr; + + if (allow_out_of_range_access) { + if (derived_type::allow_table_extending) { + extend_table_if_necessary(table, index, index + 1); + } + + segment = table[seg_index].load(std::memory_order_acquire); + // If the required segment is disabled - enable it + if (segment == nullptr) { + enable_segment(segment, table, seg_index, index); + } + // Check if an exception was thrown during segment allocation + if (segment == segment_allocation_failure_tag) { + throw_exception(exception_id::bad_alloc); + } + } else { + segment = table[seg_index].load(std::memory_order_acquire); + } + __TBB_ASSERT(segment != nullptr, nullptr); + + return segment[index]; + } + + void assign_first_block_if_necessary(segment_index_type index) { + size_type zero = 0; + if (this->my_first_block.load(std::memory_order_relaxed) == zero) { + this->my_first_block.compare_exchange_strong(zero, index); + } + } + + void zero_table( segment_table_type table, size_type count ) { + for (size_type i = 0; i != count; ++i) { + table[i].store(nullptr, std::memory_order_relaxed); + } + } + + segment_table_type get_table() const { + return my_segment_table.load(std::memory_order_acquire); + } + + segment_table_allocator_type my_segment_table_allocator; + std::atomic<segment_table_type> my_segment_table; + atomic_segment my_embedded_table[pointers_per_embedded_table]; + // Number of segments in first block + std::atomic<size_type> my_first_block; + // Number of elements in table + std::atomic<size_type> my_size; + // Flag to indicate failed extend table + std::atomic<bool> my_segment_table_allocation_failed; +}; // class segment_table + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) +#pragma warning(pop) // warning 4127 is back +#endif + +#endif // __TBB_detail__segment_table_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h new file mode 100644 index 0000000000..8a10a61e1a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_small_object_pool.h @@ -0,0 +1,108 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__small_object_pool_H +#define __TBB__small_object_pool_H + +#include "_config.h" +#include "_assert.h" + +#include "../profiling.h" +#include <cstddef> +#include <cstdint> +#include <atomic> + +namespace tbb { +namespace detail { + +namespace d1 { +class small_object_pool { +protected: + small_object_pool() = default; +}; +struct execution_data; +} + +namespace r1 { +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes, + const d1::execution_data& ed); +void* __TBB_EXPORTED_FUNC allocate(d1::small_object_pool*& pool, std::size_t number_of_bytes); +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes, + const d1::execution_data& ed); +void __TBB_EXPORTED_FUNC deallocate(d1::small_object_pool& pool, void* ptr, std::size_t number_of_bytes); +} + +namespace d1 { +class small_object_allocator { +public: + template <typename Type, typename... Args> + Type* new_object(execution_data& ed, Args&&... args) { + void* allocated_object = r1::allocate(m_pool, sizeof(Type), ed); + + auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); + return constructed_object; + } + + template <typename Type, typename... Args> + Type* new_object(Args&&... args) { + void* allocated_object = r1::allocate(m_pool, sizeof(Type)); + + auto constructed_object = new(allocated_object) Type(std::forward<Args>(args)...); + return constructed_object; + } + + template <typename Type> + void delete_object(Type* object, const execution_data& ed) { + // Copy this since the it can be the member of the passed object and + // unintentionally destroyed when Type destructor is called below + small_object_allocator alloc = *this; + object->~Type(); + alloc.deallocate(object, ed); + } + + template <typename Type> + void delete_object(Type* object) { + // Copy this since the it can be the member of the passed object and + // unintentionally destroyed when Type destructor is called below + small_object_allocator alloc = *this; + object->~Type(); + alloc.deallocate(object); + } + + template <typename Type> + void deallocate(Type* ptr, const execution_data& ed) { + call_itt_task_notify(destroy, ptr); + + __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); + r1::deallocate(*m_pool, ptr, sizeof(Type), ed); + } + + template <typename Type> + void deallocate(Type* ptr) { + call_itt_task_notify(destroy, ptr); + + __TBB_ASSERT(m_pool != nullptr, "Pool must be valid for deallocate call"); + r1::deallocate(*m_pool, ptr, sizeof(Type)); + } +private: + small_object_pool* m_pool{}; +}; + +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__small_object_pool_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h new file mode 100644 index 0000000000..c06d5b5db0 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_string_resource.h @@ -0,0 +1,78 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm") +TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for") +TBB_STRING_RESOURCE(PARALLEL_FOR_EACH, "tbb_parallel_for_each") +TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke") +TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce") +TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan") +TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort") +TBB_STRING_RESOURCE(PARALLEL_PIPELINE, "tbb_parallel_pipeline") +TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom") + +TBB_STRING_RESOURCE(FLOW_NULL, "null") +TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node") +TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node") +TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node") +TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)") +TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)") +TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node") +TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node") +TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node") +TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node") +TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node") +TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node") +TBB_STRING_RESOURCE(FLOW_INPUT_NODE, "input_node") +TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node") +TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node") +TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node") +TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node") +TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8") +TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8") +TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9") +TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name") +TBB_STRING_RESOURCE(FLOW_BODY, "body") +TBB_STRING_RESOURCE(FLOW_GRAPH, "graph") +TBB_STRING_RESOURCE(FLOW_NODE, "node") +TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph") +TBB_STRING_RESOURCE(USER_EVENT, "user_event") + +#if __TBB_FLOW_TRACE_CODEPTR +TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address") +#endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h new file mode 100644 index 0000000000..7b4f8521c6 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_task.h @@ -0,0 +1,243 @@ +/* + Copyright (c) 2020-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB__task_H +#define __TBB__task_H + +#include "_config.h" +#include "_assert.h" +#include "_template_helpers.h" +#include "_small_object_pool.h" + +#include "../profiling.h" + +#include <cstddef> +#include <cstdint> +#include <climits> +#include <utility> +#include <atomic> +#include <mutex> + +namespace tbb { +namespace detail { + +namespace d1 { +using slot_id = unsigned short; +constexpr slot_id no_slot = slot_id(~0); +constexpr slot_id any_slot = slot_id(~1); + +class task; +class wait_context; +class task_group_context; +struct execution_data; +} + +namespace r1 { +//! Task spawn/wait entry points +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx); +void __TBB_EXPORTED_FUNC spawn(d1::task& t, d1::task_group_context& ctx, d1::slot_id id); +void __TBB_EXPORTED_FUNC execute_and_wait(d1::task& t, d1::task_group_context& t_ctx, d1::wait_context&, d1::task_group_context& w_ctx); +void __TBB_EXPORTED_FUNC wait(d1::wait_context&, d1::task_group_context& ctx); +d1::slot_id __TBB_EXPORTED_FUNC execution_slot(const d1::execution_data*); +d1::task_group_context* __TBB_EXPORTED_FUNC current_context(); + +// Do not place under __TBB_RESUMABLE_TASKS. It is a stub for unsupported platforms. +struct suspend_point_type; +using suspend_callback_type = void(*)(void*, suspend_point_type*); +//! The resumable tasks entry points +void __TBB_EXPORTED_FUNC suspend(suspend_callback_type suspend_callback, void* user_callback); +void __TBB_EXPORTED_FUNC resume(suspend_point_type* tag); +suspend_point_type* __TBB_EXPORTED_FUNC current_suspend_point(); +void __TBB_EXPORTED_FUNC notify_waiters(std::uintptr_t wait_ctx_addr); + +class thread_data; +class task_dispatcher; +class external_waiter; +struct task_accessor; +struct task_arena_impl; +} // namespace r1 + +namespace d1 { + +class task_arena; +using suspend_point = r1::suspend_point_type*; + +#if __TBB_RESUMABLE_TASKS +template <typename F> +static void suspend_callback(void* user_callback, suspend_point sp) { + // Copy user function to a new stack after the context switch to avoid a race when the previous + // suspend point is resumed while the user_callback is being called. + F user_callback_copy = *static_cast<F*>(user_callback); + user_callback_copy(sp); +} + +template <typename F> +void suspend(F f) { + r1::suspend(&suspend_callback<F>, &f); +} + +inline void resume(suspend_point tag) { + r1::resume(tag); +} +#endif /* __TBB_RESUMABLE_TASKS */ + +// TODO align wait_context on cache lane +class wait_context { + static constexpr std::uint64_t overflow_mask = ~((1LLU << 32) - 1); + + std::uint64_t m_version_and_traits{1}; + std::atomic<std::uint64_t> m_ref_count{}; + + void add_reference(std::int64_t delta) { + call_itt_task_notify(releasing, this); + std::uint64_t r = m_ref_count.fetch_add(delta) + delta; + + __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); + + if (!r) { + // Some external waiters or coroutine waiters sleep in wait list + // Should to notify them that work is done + std::uintptr_t wait_ctx_addr = std::uintptr_t(this); + r1::notify_waiters(wait_ctx_addr); + } + } + + bool continue_execution() const { + std::uint64_t r = m_ref_count.load(std::memory_order_acquire); + __TBB_ASSERT_EX((r & overflow_mask) == 0, "Overflow is detected"); + return r > 0; + } + + friend class r1::thread_data; + friend class r1::task_dispatcher; + friend class r1::external_waiter; + friend class task_group; + friend class task_group_base; + friend struct r1::task_arena_impl; + friend struct r1::suspend_point_type; +public: + // Despite the internal reference count is uin64_t we limit the user interface with uint32_t + // to preserve a part of the internal reference count for special needs. + wait_context(std::uint32_t ref_count) : m_ref_count{ref_count} { suppress_unused_warning(m_version_and_traits); } + wait_context(const wait_context&) = delete; + + ~wait_context() { + __TBB_ASSERT(!continue_execution(), NULL); + } + + void reserve(std::uint32_t delta = 1) { + add_reference(delta); + } + + void release(std::uint32_t delta = 1) { + add_reference(-std::int64_t(delta)); + } +#if __TBB_EXTRA_DEBUG + unsigned reference_count() const { + return unsigned(m_ref_count.load(std::memory_order_acquire)); + } +#endif +}; + +struct execution_data { + task_group_context* context{}; + slot_id original_slot{}; + slot_id affinity_slot{}; +}; + +inline task_group_context* context(const execution_data& ed) { + return ed.context; +} + +inline slot_id original_slot(const execution_data& ed) { + return ed.original_slot; +} + +inline slot_id affinity_slot(const execution_data& ed) { + return ed.affinity_slot; +} + +inline slot_id execution_slot(const execution_data& ed) { + return r1::execution_slot(&ed); +} + +inline bool is_same_affinity(const execution_data& ed) { + return affinity_slot(ed) == no_slot || affinity_slot(ed) == execution_slot(ed); +} + +inline bool is_stolen(const execution_data& ed) { + return original_slot(ed) != execution_slot(ed); +} + +inline void spawn(task& t, task_group_context& ctx) { + call_itt_task_notify(releasing, &t); + r1::spawn(t, ctx); +} + +inline void spawn(task& t, task_group_context& ctx, slot_id id) { + call_itt_task_notify(releasing, &t); + r1::spawn(t, ctx, id); +} + +inline void execute_and_wait(task& t, task_group_context& t_ctx, wait_context& wait_ctx, task_group_context& w_ctx) { + r1::execute_and_wait(t, t_ctx, wait_ctx, w_ctx); + call_itt_task_notify(acquired, &wait_ctx); + call_itt_task_notify(destroy, &wait_ctx); +} + +inline void wait(wait_context& wait_ctx, task_group_context& ctx) { + r1::wait(wait_ctx, ctx); + call_itt_task_notify(acquired, &wait_ctx); + call_itt_task_notify(destroy, &wait_ctx); +} + +using r1::current_context; + +class task_traits { + std::uint64_t m_version_and_traits{}; + friend struct r1::task_accessor; +}; + +//! Alignment for a task object +static constexpr std::size_t task_alignment = 64; + +//! Base class for user-defined tasks. +/** @ingroup task_scheduling */ + +class +#if __TBB_ALIGNAS_AVAILABLE +alignas(task_alignment) +#endif +task : public task_traits { +protected: + virtual ~task() = default; + +public: + virtual task* execute(execution_data&) = 0; + virtual task* cancel(execution_data&) = 0; + +private: + std::uint64_t m_reserved[6]{}; + friend struct r1::task_accessor; +}; +#if __TBB_ALIGNAS_AVAILABLE +static_assert(sizeof(task) == task_alignment, "task size is broken"); +#endif +} // namespace d1 +} // namespace detail +} // namespace tbb + +#endif /* __TBB__task_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h new file mode 100644 index 0000000000..45a8ffede6 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_template_helpers.h @@ -0,0 +1,394 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__template_helpers_H +#define __TBB_detail__template_helpers_H + +#include "_utils.h" +#include "_config.h" + +#include <cstddef> +#include <cstdint> + +#include <type_traits> +#include <memory> +#include <iterator> + +namespace tbb { +namespace detail { +inline namespace d0 { + +// An internal implementation of void_t, which can be used in SFINAE contexts +template <typename...> +struct void_impl { + using type = void; +}; // struct void_impl + +template <typename... Args> +using void_t = typename void_impl<Args...>::type; + +// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502 +template <typename T, typename, template <typename> class... Checks> +struct supports_impl { + using type = std::false_type; +}; + +template <typename T, template <typename> class... Checks> +struct supports_impl<T, void_t<Checks<T>...>, Checks...> { + using type = std::true_type; +}; + +template <typename T, template <typename> class... Checks> +using supports = typename supports_impl<T, void, Checks...>::type; + +//! A template to select either 32-bit or 64-bit constant as compile time, depending on machine word size. +template <unsigned u, unsigned long long ull > +struct select_size_t_constant { + // Explicit cast is needed to avoid compiler warnings about possible truncation. + // The value of the right size, which is selected by ?:, is anyway not truncated or promoted. + static const std::size_t value = (std::size_t)((sizeof(std::size_t)==sizeof(u)) ? u : ull); +}; + +// TODO: do we really need it? +//! Cast between unrelated pointer types. +/** This method should be used sparingly as a last resort for dealing with + situations that inherently break strict ISO C++ aliasing rules. */ +// T is a pointer type because it will be explicitly provided by the programmer as a template argument; +// U is a referent type to enable the compiler to check that "ptr" is a pointer, deducing U in the process. +template<typename T, typename U> +inline T punned_cast( U* ptr ) { + std::uintptr_t x = reinterpret_cast<std::uintptr_t>(ptr); + return reinterpret_cast<T>(x); +} + +template<class T, size_t S, size_t R> +struct padded_base : T { + char pad[S - R]; +}; +template<class T, size_t S> struct padded_base<T, S, 0> : T {}; + +//! Pads type T to fill out to a multiple of cache line size. +template<class T, size_t S = max_nfs_size> +struct padded : padded_base<T, S, sizeof(T) % S> {}; + +#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT + +using std::index_sequence; +using std::make_index_sequence; + +#else + +template<std::size_t... S> class index_sequence {}; + +template<std::size_t N, std::size_t... S> +struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {}; + +template<std::size_t... S> +struct make_index_sequence_impl <0, S...> { + using type = index_sequence<S...>; +}; + +template<std::size_t N> +using make_index_sequence = typename make_index_sequence_impl<N>::type; + +#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */ + +#if __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT +using std::conjunction; +using std::disjunction; +#else // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT + +template <typename...> +struct conjunction : std::true_type {}; + +template <typename First, typename... Args> +struct conjunction<First, Args...> + : std::conditional<bool(First::value), conjunction<Args...>, First>::type {}; + +template <typename T> +struct conjunction<T> : T {}; + +template <typename...> +struct disjunction : std::false_type {}; + +template <typename First, typename... Args> +struct disjunction<First, Args...> + : std::conditional<bool(First::value), First, disjunction<Args...>>::type {}; + +template <typename T> +struct disjunction<T> : T {}; + +#endif // __TBB_CPP17_LOGICAL_OPERATIONS_PRESENT + +template <typename Iterator> +using iterator_value_t = typename std::iterator_traits<Iterator>::value_type; + +template <typename Iterator> +using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iterator>::first_type>::type; + +template <typename Iterator> +using iterator_mapped_t = typename iterator_value_t<Iterator>::second_type; + +template <typename Iterator> +using iterator_alloc_pair_t = std::pair<typename std::add_const<iterator_key_t<Iterator>>::type, + iterator_mapped_t<Iterator>>; + +template <typename A> using alloc_value_type = typename A::value_type; +template <typename A> using alloc_ptr_t = typename std::allocator_traits<A>::pointer; +template <typename A> using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0)); +template <typename A> using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0)); + +// alloc_value_type should be checked first, because it can be used in other checks +template <typename T> +using is_allocator = supports<T, alloc_value_type, has_allocate, has_deallocate>; + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template <typename T> +inline constexpr bool is_allocator_v = is_allocator<T>::value; +#endif + +// Template class in which the "type" determines the type of the element number N in pack Args +template <std::size_t N, typename... Args> +struct pack_element { + using type = void; +}; + +template <std::size_t N, typename T, typename... Args> +struct pack_element<N, T, Args...> { + using type = typename pack_element<N-1, Args...>::type; +}; + +template <typename T, typename... Args> +struct pack_element<0, T, Args...> { + using type = T; +}; + +template <std::size_t N, typename... Args> +using pack_element_t = typename pack_element<N, Args...>::type; + +template <typename Func> +class raii_guard { +public: + raii_guard( Func f ) : my_func(f), is_active(true) {} + + ~raii_guard() { + if (is_active) { + my_func(); + } + } + + void dismiss() { + is_active = false; + } +private: + Func my_func; + bool is_active; +}; // class raii_guard + +template <typename Func> +raii_guard<Func> make_raii_guard( Func f ) { + return raii_guard<Func>(f); +} + +template <typename Body> +struct try_call_proxy { + try_call_proxy( Body b ) : body(b) {} + + template <typename OnExceptionBody> + void on_exception( OnExceptionBody on_exception_body ) { + auto guard = make_raii_guard(on_exception_body); + body(); + guard.dismiss(); + } + + template <typename OnCompletionBody> + void on_completion(OnCompletionBody on_completion_body) { + auto guard = make_raii_guard(on_completion_body); + body(); + } + + Body body; +}; // struct try_call_proxy + +// Template helper function for API +// try_call(lambda1).on_exception(lambda2) +// Executes lambda1 and if it throws an exception - executes lambda2 +template <typename Body> +try_call_proxy<Body> try_call( Body b ) { + return try_call_proxy<Body>(b); +} + +#if __TBB_CPP17_IS_SWAPPABLE_PRESENT +using std::is_nothrow_swappable; +using std::is_swappable; +#else // __TBB_CPP17_IS_SWAPPABLE_PRESENT +namespace is_swappable_detail { +using std::swap; + +template <typename T> +using has_swap = decltype(swap(std::declval<T&>(), std::declval<T&>())); + +#if _MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER +// Workaround for VS2015: it fails to instantiate noexcept(...) inside std::integral_constant. +template <typename T> +struct noexcept_wrapper { + static const bool value = noexcept(swap(std::declval<T&>(), std::declval<T&>())); +}; +template <typename T> +struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept_wrapper<T>::value> {}; +#else +template <typename T> +struct is_nothrow_swappable_impl : std::integral_constant<bool, noexcept(swap(std::declval<T&>(), std::declval<T&>()))> {}; +#endif +} + +template <typename T> +struct is_swappable : supports<T, is_swappable_detail::has_swap> {}; + +template <typename T> +struct is_nothrow_swappable + : conjunction<is_swappable<T>, is_swappable_detail::is_nothrow_swappable_impl<T>> {}; +#endif // __TBB_CPP17_IS_SWAPPABLE_PRESENT + +//! Allows to store a function parameter pack as a variable and later pass it to another function +template< typename... Types > +struct stored_pack; + +template<> +struct stored_pack<> +{ + using pack_type = stored_pack<>; + stored_pack() {} + + // Friend front-end functions + template< typename F, typename Pack > friend void call(F&& f, Pack&& p); + template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); + +protected: + // Ideally, ref-qualified non-static methods would be used, + // but that would greatly reduce the set of compilers where it works. + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, const pack_type& /*pack*/, Preceding&&... params) { + return std::forward<F>(f)(std::forward<Preceding>(params)...); + } + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type&& /*pack*/, Preceding&&... params) { + return std::forward<F>(f)(std::forward<Preceding>(params)...); + } +}; + +template< typename T, typename... Types > +struct stored_pack<T, Types...> : stored_pack<Types...> +{ + using pack_type = stored_pack<T, Types...>; + using pack_remainder = stored_pack<Types...>; + + // Since lifetime of original values is out of control, copies should be made. + // Thus references should be stripped away from the deduced type. + typename std::decay<T>::type leftmost_value; + + // Here rvalue references act in the same way as forwarding references, + // as long as class template parameters were deduced via forwarding references. + stored_pack(T&& t, Types&&... types) + : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {} + + // Friend front-end functions + template< typename F, typename Pack > friend void call(F&& f, Pack&& p); + template< typename Ret, typename F, typename Pack > friend Ret call_and_return(F&& f, Pack&& p); + +protected: + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type& pack, Preceding&&... params) { + return pack_remainder::template call<Ret>( + std::forward<F>(f), static_cast<pack_remainder&>(pack), + std::forward<Preceding>(params)... , pack.leftmost_value + ); + } + + template< typename Ret, typename F, typename... Preceding > + static Ret call(F&& f, pack_type&& pack, Preceding&&... params) { + return pack_remainder::template call<Ret>( + std::forward<F>(f), static_cast<pack_remainder&&>(pack), + std::forward<Preceding>(params)... , std::move(pack.leftmost_value) + ); + } +}; + +//! Calls the given function with arguments taken from a stored_pack +template< typename F, typename Pack > +void call(F&& f, Pack&& p) { + std::decay<Pack>::type::template call<void>(std::forward<F>(f), std::forward<Pack>(p)); +} + +template< typename Ret, typename F, typename Pack > +Ret call_and_return(F&& f, Pack&& p) { + return std::decay<Pack>::type::template call<Ret>(std::forward<F>(f), std::forward<Pack>(p)); +} + +template< typename... Types > +stored_pack<Types...> save_pack(Types&&... types) { + return stored_pack<Types...>(std::forward<Types>(types)...); +} + +// A structure with the value which is equal to Trait::value +// but can be used in the immediate context due to parameter T +template <typename Trait, typename T> +struct dependent_bool : std::integral_constant<bool, bool(Trait::value)> {}; + +template <typename Callable> +struct body_arg_detector; + +template <typename Callable, typename ReturnType, typename Arg> +struct body_arg_detector<ReturnType(Callable::*)(Arg)> { + using arg_type = Arg; +}; + +template <typename Callable, typename ReturnType, typename Arg> +struct body_arg_detector<ReturnType(Callable::*)(Arg) const> { + using arg_type = Arg; +}; + +template <typename Callable> +struct argument_detector; + +template <typename Callable> +struct argument_detector { + using type = typename body_arg_detector<decltype(&Callable::operator())>::arg_type; +}; + +template <typename ReturnType, typename Arg> +struct argument_detector<ReturnType(*)(Arg)> { + using type = Arg; +}; + +// Detects the argument type of callable, works for callable with one argument. +template <typename Callable> +using argument_type_of = typename argument_detector<typename std::decay<Callable>::type>::type; + +template <typename T> +struct type_identity { + using type = T; +}; + +template <typename T> +using type_identity_t = typename type_identity<T>::type; + +} // inline namespace d0 +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__template_helpers_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h new file mode 100644 index 0000000000..d1e02179f8 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/detail/_utils.h @@ -0,0 +1,329 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_detail__utils_H +#define __TBB_detail__utils_H + +#include <type_traits> +#include <cstdint> +#include <atomic> + +#include "_config.h" +#include "_assert.h" +#include "_machine.h" + +namespace tbb { +namespace detail { +inline namespace d0 { + +//! Utility template function to prevent "unused" warnings by various compilers. +template<typename... T> void suppress_unused_warning(T&&...) {} + +//! Compile-time constant that is upper bound on cache line/sector size. +/** It should be used only in situations where having a compile-time upper + bound is more useful than a run-time exact answer. + @ingroup memory_allocation */ +constexpr size_t max_nfs_size = 128; + +//! Class that implements exponential backoff. +class atomic_backoff { + //! Time delay, in units of "pause" instructions. + /** Should be equal to approximately the number of "pause" instructions + that take the same time as an context switch. Must be a power of two.*/ + static constexpr std::int32_t LOOPS_BEFORE_YIELD = 16; + std::int32_t count; + +public: + // In many cases, an object of this type is initialized eagerly on hot path, + // as in for(atomic_backoff b; ; b.pause()) { /*loop body*/ } + // For this reason, the construction cost must be very small! + atomic_backoff() : count(1) {} + // This constructor pauses immediately; do not use on hot paths! + atomic_backoff(bool) : count(1) { pause(); } + + //! No Copy + atomic_backoff(const atomic_backoff&) = delete; + atomic_backoff& operator=(const atomic_backoff&) = delete; + + //! Pause for a while. + void pause() { + if (count <= LOOPS_BEFORE_YIELD) { + machine_pause(count); + // Pause twice as long the next time. + count *= 2; + } else { + // Pause is so long that we might as well yield CPU to scheduler. + yield(); + } + } + + //! Pause for a few times and return false if saturated. + bool bounded_pause() { + machine_pause(count); + if (count < LOOPS_BEFORE_YIELD) { + // Pause twice as long the next time. + count *= 2; + return true; + } else { + return false; + } + } + + void reset() { + count = 1; + } +}; + +//! Spin WHILE the condition is true. +/** T and U should be comparable types. */ +template <typename T, typename C> +void spin_wait_while_condition(const std::atomic<T>& location, C comp) { + atomic_backoff backoff; + while (comp(location.load(std::memory_order_acquire))) { + backoff.pause(); + } +} + +//! Spin WHILE the value of the variable is equal to a given value +/** T and U should be comparable types. */ +template <typename T, typename U> +void spin_wait_while_eq(const std::atomic<T>& location, const U value) { + spin_wait_while_condition(location, [&value](T t) { return t == value; }); +} + +//! Spin UNTIL the value of the variable is equal to a given value +/** T and U should be comparable types. */ +template<typename T, typename U> +void spin_wait_until_eq(const std::atomic<T>& location, const U value) { + spin_wait_while_condition(location, [&value](T t) { return t != value; }); +} + +template <typename T> +std::uintptr_t log2(T in) { + __TBB_ASSERT(in > 0, "The logarithm of a non-positive value is undefined."); + return machine_log2(in); +} + +template<typename T> +T reverse_bits(T src) { + return machine_reverse_bits(src); +} + +template<typename T> +T reverse_n_bits(T src, std::size_t n) { + __TBB_ASSERT(n != 0, "Reverse for 0 bits is undefined behavior."); + return reverse_bits(src) >> (number_of_bits<T>() - n); +} + +// A function to check if passed integer is a power of two +template <typename IntegerType> +constexpr bool is_power_of_two( IntegerType arg ) { + static_assert(std::is_integral<IntegerType>::value, + "An argument for is_power_of_two should be integral type"); + return arg && (0 == (arg & (arg - 1))); +} + +// A function to determine if passed integer is a power of two +// at least as big as another power of two, i.e. for strictly positive i and j, +// with j being a power of two, determines whether i==j<<k for some nonnegative k +template <typename ArgIntegerType, typename DivisorIntegerType> +constexpr bool is_power_of_two_at_least(ArgIntegerType arg, DivisorIntegerType divisor) { + // Divisor should be a power of two + static_assert(std::is_integral<ArgIntegerType>::value, + "An argument for is_power_of_two_at_least should be integral type"); + return 0 == (arg & (arg - divisor)); +} + +// A function to compute arg modulo divisor where divisor is a power of 2. +template<typename ArgIntegerType, typename DivisorIntegerType> +inline ArgIntegerType modulo_power_of_two(ArgIntegerType arg, DivisorIntegerType divisor) { + __TBB_ASSERT( is_power_of_two(divisor), "Divisor should be a power of two" ); + return arg & (divisor - 1); +} + +//! A function to check if passed in pointer is aligned on a specific border +template<typename T> +constexpr bool is_aligned(T* pointer, std::uintptr_t alignment) { + return 0 == ((std::uintptr_t)pointer & (alignment - 1)); +} + +#if TBB_USE_ASSERT +static void* const poisoned_ptr = reinterpret_cast<void*>(-1); + +//! Set p to invalid pointer value. +template<typename T> +inline void poison_pointer( T* &p ) { p = reinterpret_cast<T*>(poisoned_ptr); } + +template<typename T> +inline void poison_pointer(std::atomic<T*>& p) { p.store(reinterpret_cast<T*>(poisoned_ptr), std::memory_order_relaxed); } + +/** Expected to be used in assertions only, thus no empty form is defined. **/ +template<typename T> +inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned_ptr); } + +template<typename T> +inline bool is_poisoned(const std::atomic<T*>& p) { return is_poisoned(p.load(std::memory_order_relaxed)); } +#else +template<typename T> +inline void poison_pointer(T* &) {/*do nothing*/} + +template<typename T> +inline void poison_pointer(std::atomic<T*>&) { /* do nothing */} +#endif /* !TBB_USE_ASSERT */ + +template <std::size_t alignment = 0, typename T> +bool assert_pointer_valid(T* p, const char* comment = nullptr) { + suppress_unused_warning(p, comment); + __TBB_ASSERT(p != nullptr, comment); + __TBB_ASSERT(!is_poisoned(p), comment); +#if !(_MSC_VER && _MSC_VER <= 1900 && !__INTEL_COMPILER) + __TBB_ASSERT(is_aligned(p, alignment == 0 ? alignof(T) : alignment), comment); +#endif + // Returns something to simplify assert_pointers_valid implementation. + return true; +} + +template <typename... Args> +void assert_pointers_valid(Args*... p) { + // suppress_unused_warning is used as an evaluation context for the variadic pack. + suppress_unused_warning(assert_pointer_valid(p)...); +} + +//! Base class for types that should not be assigned. +class no_assign { +public: + void operator=(const no_assign&) = delete; + no_assign(const no_assign&) = default; + no_assign() = default; +}; + +//! Base class for types that should not be copied or assigned. +class no_copy: no_assign { +public: + no_copy(const no_copy&) = delete; + no_copy() = default; +}; + +template <typename T> +void swap_atomics_relaxed(std::atomic<T>& lhs, std::atomic<T>& rhs){ + T tmp = lhs.load(std::memory_order_relaxed); + lhs.store(rhs.load(std::memory_order_relaxed), std::memory_order_relaxed); + rhs.store(tmp, std::memory_order_relaxed); +} + +//! One-time initialization states +enum class do_once_state { + uninitialized = 0, ///< No execution attempts have been undertaken yet + pending, ///< A thread is executing associated do-once routine + executed, ///< Do-once routine has been executed + initialized = executed ///< Convenience alias +}; + +//! One-time initialization function +/** /param initializer Pointer to function without arguments + The variant that returns bool is used for cases when initialization can fail + and it is OK to continue execution, but the state should be reset so that + the initialization attempt was repeated the next time. + /param state Shared state associated with initializer that specifies its + initialization state. Must be initially set to #uninitialized value + (e.g. by means of default static zero initialization). **/ +template <typename F> +void atomic_do_once( const F& initializer, std::atomic<do_once_state>& state ) { + // The loop in the implementation is necessary to avoid race when thread T2 + // that arrived in the middle of initialization attempt by another thread T1 + // has just made initialization possible. + // In such a case T2 has to rely on T1 to initialize, but T1 may already be past + // the point where it can recognize the changed conditions. + do_once_state expected_state; + while ( state.load( std::memory_order_acquire ) != do_once_state::executed ) { + if( state.load( std::memory_order_relaxed ) == do_once_state::uninitialized ) { + expected_state = do_once_state::uninitialized; +#if defined(__INTEL_COMPILER) && __INTEL_COMPILER <= 1910 + using enum_type = typename std::underlying_type<do_once_state>::type; + if( ((std::atomic<enum_type>&)state).compare_exchange_strong( (enum_type&)expected_state, (enum_type)do_once_state::pending ) ) { +#else + if( state.compare_exchange_strong( expected_state, do_once_state::pending ) ) { +#endif + run_initializer( initializer, state ); + break; + } + } + spin_wait_while_eq( state, do_once_state::pending ); + } +} + +// Run the initializer which can not fail +template<typename Functor> +void run_initializer(const Functor& f, std::atomic<do_once_state>& state ) { + f(); + state.store(do_once_state::executed, std::memory_order_release); +} + +#if __TBB_CPP20_CONCEPTS_PRESENT +template <typename T> +concept boolean_testable_impl = std::convertible_to<T, bool>; + +template <typename T> +concept boolean_testable = boolean_testable_impl<T> && requires( T&& t ) { + { !std::forward<T>(t) } -> boolean_testable_impl; + }; + +#if __TBB_CPP20_COMPARISONS_PRESENT +struct synthesized_three_way_comparator { + template <typename T1, typename T2> + auto operator()( const T1& lhs, const T2& rhs ) const + requires requires { + { lhs < rhs } -> boolean_testable; + { rhs < lhs } -> boolean_testable; + } + { + if constexpr (std::three_way_comparable_with<T1, T2>) { + return lhs <=> rhs; + } else { + if (lhs < rhs) { + return std::weak_ordering::less; + } + if (rhs < lhs) { + return std::weak_ordering::greater; + } + return std::weak_ordering::equivalent; + } + } +}; // struct synthesized_three_way_comparator + +template <typename T1, typename T2 = T1> +using synthesized_three_way_result = decltype(synthesized_three_way_comparator{}(std::declval<T1&>(), + std::declval<T2&>())); + +#endif // __TBB_CPP20_COMPARISONS_PRESENT +#endif // __TBB_CPP20_CONCEPTS_PRESENT + +} // namespace d0 + +namespace d1 { + +class delegate_base { +public: + virtual bool operator()() const = 0; + virtual ~delegate_base() {} +}; // class delegate_base + +} // namespace d1 + +} // namespace detail +} // namespace tbb + +#endif // __TBB_detail__utils_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h new file mode 100644 index 0000000000..246447a213 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/enumerable_thread_specific.h @@ -0,0 +1,1113 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_enumerable_thread_specific_H +#define __TBB_enumerable_thread_specific_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_template_helpers.h" +#include "detail/_aligned_space.h" + +#include "concurrent_vector.h" +#include "tbb_allocator.h" +#include "cache_aligned_allocator.h" +#include "profiling.h" + +#include <atomic> +#include <thread> +#include <cstring> // memcpy +#include <cstddef> // std::ptrdiff_t + +#include "task.h" // for task::suspend_point + +#if _WIN32 || _WIN64 +#include <windows.h> +#else +#include <pthread.h> +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +//! enum for selecting between single key and key-per-instance versions +enum ets_key_usage_type { + ets_key_per_instance + , ets_no_key +#if __TBB_RESUMABLE_TASKS + , ets_suspend_aware +#endif +}; + +// Forward declaration to use in internal classes +template <typename T, typename Allocator, ets_key_usage_type ETS_key_type> +class enumerable_thread_specific; + +template <std::size_t ThreadIDSize> +struct internal_ets_key_selector { + using key_type = std::thread::id; + static key_type current_key() { + return std::this_thread::get_id(); + } +}; + +// Intel Compiler on OSX cannot create atomics objects that instantiated from non-fundamental types +#if __INTEL_COMPILER && __APPLE__ +template<> +struct internal_ets_key_selector<sizeof(std::size_t)> { + using key_type = std::size_t; + static key_type current_key() { + auto id = std::this_thread::get_id(); + return reinterpret_cast<key_type&>(id); + } +}; +#endif + +template <ets_key_usage_type ETS_key_type> +struct ets_key_selector : internal_ets_key_selector<sizeof(std::thread::id)> {}; + +#if __TBB_RESUMABLE_TASKS +template <> +struct ets_key_selector<ets_suspend_aware> { + using key_type = suspend_point; + static key_type current_key() { + return r1::current_suspend_point(); + } +}; +#endif + +template<ets_key_usage_type ETS_key_type> +class ets_base : detail::no_copy { +protected: + using key_type = typename ets_key_selector<ETS_key_type>::key_type; + +public: + struct slot; + struct array { + array* next; + std::size_t lg_size; + slot& at( std::size_t k ) { + return (reinterpret_cast<slot*>(reinterpret_cast<void*>(this+1)))[k]; + } + std::size_t size() const { return std::size_t(1) << lg_size; } + std::size_t mask() const { return size() - 1; } + std::size_t start( std::size_t h ) const { + return h >> (8 * sizeof(std::size_t) - lg_size); + } + }; + struct slot { + std::atomic<key_type> key; + void* ptr; + bool empty() const { return key.load(std::memory_order_relaxed) == key_type(); } + bool match( key_type k ) const { return key.load(std::memory_order_relaxed) == k; } + bool claim( key_type k ) { + // TODO: maybe claim ptr, because key_type is not guaranteed to fit into word size + key_type expected = key_type(); + return key.compare_exchange_strong(expected, k); + } + }; + +protected: + //! Root of linked list of arrays of decreasing size. + /** nullptr if and only if my_count==0. + Each array in the list is half the size of its predecessor. */ + std::atomic<array*> my_root; + std::atomic<std::size_t> my_count; + + virtual void* create_local() = 0; + virtual void* create_array(std::size_t _size) = 0; // _size in bytes + virtual void free_array(void* ptr, std::size_t _size) = 0; // _size in bytes + + array* allocate( std::size_t lg_size ) { + std::size_t n = std::size_t(1) << lg_size; + array* a = static_cast<array*>(create_array(sizeof(array) + n * sizeof(slot))); + a->lg_size = lg_size; + std::memset( a + 1, 0, n * sizeof(slot) ); + return a; + } + void free(array* a) { + std::size_t n = std::size_t(1) << (a->lg_size); + free_array( static_cast<void*>(a), std::size_t(sizeof(array) + n * sizeof(slot)) ); + } + + ets_base() : my_root{nullptr}, my_count{0} {} + virtual ~ets_base(); // g++ complains if this is not virtual + + void* table_lookup( bool& exists ); + void table_clear(); + // The following functions are not used in concurrent context, + // so we don't need synchronization and ITT annotations there. + template <ets_key_usage_type E2> + void table_elementwise_copy( const ets_base& other, + void*(*add_element)(ets_base<E2>&, void*) ) { + __TBB_ASSERT(!my_root.load(std::memory_order_relaxed),NULL); + __TBB_ASSERT(!my_count.load(std::memory_order_relaxed),NULL); + if( !other.my_root.load(std::memory_order_relaxed) ) return; + array* root = allocate(other.my_root.load(std::memory_order_relaxed)->lg_size); + my_root.store(root, std::memory_order_relaxed); + root->next = nullptr; + my_count.store(other.my_count.load(std::memory_order_relaxed), std::memory_order_relaxed); + std::size_t mask = root->mask(); + for( array* r = other.my_root.load(std::memory_order_relaxed); r; r = r->next ) { + for( std::size_t i = 0; i < r->size(); ++i ) { + slot& s1 = r->at(i); + if( !s1.empty() ) { + for( std::size_t j = root->start(std::hash<key_type>{}(s1.key.load(std::memory_order_relaxed))); ; j = (j+1)&mask ) { + slot& s2 = root->at(j); + if( s2.empty() ) { + s2.ptr = add_element(static_cast<ets_base<E2>&>(*this), s1.ptr); + s2.key.store(s1.key.load(std::memory_order_relaxed), std::memory_order_relaxed); + break; + } + else if( s2.match(s1.key.load(std::memory_order_relaxed)) ) + break; + } + } + } + } + } + void table_swap( ets_base& other ) { + __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); + swap_atomics_relaxed(my_root, other.my_root); + swap_atomics_relaxed(my_count, other.my_count); + } +}; + +template<ets_key_usage_type ETS_key_type> +ets_base<ETS_key_type>::~ets_base() { + __TBB_ASSERT(!my_root.load(std::memory_order_relaxed), nullptr); +} + +template<ets_key_usage_type ETS_key_type> +void ets_base<ETS_key_type>::table_clear() { + while ( array* r = my_root.load(std::memory_order_relaxed) ) { + my_root.store(r->next, std::memory_order_relaxed); + free(r); + } + my_count.store(0, std::memory_order_relaxed); +} + +template<ets_key_usage_type ETS_key_type> +void* ets_base<ETS_key_type>::table_lookup( bool& exists ) { + const key_type k = ets_key_selector<ETS_key_type>::current_key(); + + __TBB_ASSERT(k != key_type(),NULL); + void* found; + std::size_t h = std::hash<key_type>{}(k); + for( array* r = my_root.load(std::memory_order_acquire); r; r = r->next ) { + call_itt_notify(acquired,r); + std::size_t mask=r->mask(); + for(std::size_t i = r->start(h); ;i=(i+1)&mask) { + slot& s = r->at(i); + if( s.empty() ) break; + if( s.match(k) ) { + if( r == my_root.load(std::memory_order_acquire) ) { + // Success at top level + exists = true; + return s.ptr; + } else { + // Success at some other level. Need to insert at top level. + exists = true; + found = s.ptr; + goto insert; + } + } + } + } + // Key does not yet exist. The density of slots in the table does not exceed 0.5, + // for if this will occur a new table is allocated with double the current table + // size, which is swapped in as the new root table. So an empty slot is guaranteed. + exists = false; + found = create_local(); + { + std::size_t c = ++my_count; + array* r = my_root.load(std::memory_order_acquire); + call_itt_notify(acquired,r); + if( !r || c > r->size()/2 ) { + std::size_t s = r ? r->lg_size : 2; + while( c > std::size_t(1)<<(s-1) ) ++s; + array* a = allocate(s); + for(;;) { + a->next = r; + call_itt_notify(releasing,a); + array* new_r = r; + if( my_root.compare_exchange_strong(new_r, a) ) break; + call_itt_notify(acquired, new_r); + __TBB_ASSERT(new_r != nullptr, nullptr); + if( new_r->lg_size >= s ) { + // Another thread inserted an equal or bigger array, so our array is superfluous. + free(a); + break; + } + r = new_r; + } + } + } + insert: + // Whether a slot has been found in an older table, or if it has been inserted at this level, + // it has already been accounted for in the total. Guaranteed to be room for it, and it is + // not present, so search for empty slot and use it. + array* ir = my_root.load(std::memory_order_acquire); + call_itt_notify(acquired, ir); + std::size_t mask = ir->mask(); + for(std::size_t i = ir->start(h);; i = (i+1)&mask) { + slot& s = ir->at(i); + if( s.empty() ) { + if( s.claim(k) ) { + s.ptr = found; + return found; + } + } + } +} + +//! Specialization that exploits native TLS +template <> +class ets_base<ets_key_per_instance>: public ets_base<ets_no_key> { + using super = ets_base<ets_no_key>; +#if _WIN32||_WIN64 +#if __TBB_WIN8UI_SUPPORT + using tls_key_t = DWORD; + void create_key() { my_key = FlsAlloc(NULL); } + void destroy_key() { FlsFree(my_key); } + void set_tls(void * value) { FlsSetValue(my_key, (LPVOID)value); } + void* get_tls() { return (void *)FlsGetValue(my_key); } +#else + using tls_key_t = DWORD; + void create_key() { my_key = TlsAlloc(); } + void destroy_key() { TlsFree(my_key); } + void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value); } + void* get_tls() { return (void *)TlsGetValue(my_key); } +#endif +#else + using tls_key_t = pthread_key_t; + void create_key() { pthread_key_create(&my_key, NULL); } + void destroy_key() { pthread_key_delete(my_key); } + void set_tls( void * value ) const { pthread_setspecific(my_key, value); } + void* get_tls() const { return pthread_getspecific(my_key); } +#endif + tls_key_t my_key; + virtual void* create_local() override = 0; + virtual void* create_array(std::size_t _size) override = 0; // _size in bytes + virtual void free_array(void* ptr, std::size_t _size) override = 0; // size in bytes +protected: + ets_base() {create_key();} + ~ets_base() {destroy_key();} + void* table_lookup( bool& exists ) { + void* found = get_tls(); + if( found ) { + exists=true; + } else { + found = super::table_lookup(exists); + set_tls(found); + } + return found; + } + void table_clear() { + destroy_key(); + create_key(); + super::table_clear(); + } + void table_swap( ets_base& other ) { + using std::swap; + __TBB_ASSERT(this!=&other, "Don't swap an instance with itself"); + swap(my_key, other.my_key); + super::table_swap(other); + } +}; + +//! Random access iterator for traversing the thread local copies. +template< typename Container, typename Value > +class enumerable_thread_specific_iterator +{ + //! current position in the concurrent_vector + + Container *my_container; + typename Container::size_type my_index; + mutable Value *my_value; + + template<typename C, typename T, typename U> + friend bool operator==( const enumerable_thread_specific_iterator<C, T>& i, + const enumerable_thread_specific_iterator<C, U>& j ); + + template<typename C, typename T, typename U> + friend bool operator<( const enumerable_thread_specific_iterator<C,T>& i, + const enumerable_thread_specific_iterator<C,U>& j ); + + template<typename C, typename T, typename U> + friend std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<C,T>& i, + const enumerable_thread_specific_iterator<C,U>& j ); + + template<typename C, typename U> + friend class enumerable_thread_specific_iterator; + +public: + //! STL support + using difference_type = std::ptrdiff_t; + using value_type = Value; + using pointer = Value*; + using reference = Value&; + using iterator_category = std::random_access_iterator_tag; + + enumerable_thread_specific_iterator( const Container &container, typename Container::size_type index ) : + my_container(&const_cast<Container &>(container)), my_index(index), my_value(nullptr) {} + + //! Default constructor + enumerable_thread_specific_iterator() : my_container(nullptr), my_index(0), my_value(nullptr) {} + + template<typename U> + enumerable_thread_specific_iterator( const enumerable_thread_specific_iterator<Container, U>& other ) : + my_container( other.my_container ), my_index( other.my_index), my_value( const_cast<Value *>(other.my_value) ) {} + + enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset ) const { + return enumerable_thread_specific_iterator(*my_container, my_index + offset); + } + + friend enumerable_thread_specific_iterator operator+( std::ptrdiff_t offset, enumerable_thread_specific_iterator v ) { + return enumerable_thread_specific_iterator(*v.my_container, v.my_index + offset); + } + + enumerable_thread_specific_iterator &operator+=( std::ptrdiff_t offset ) { + my_index += offset; + my_value = nullptr; + return *this; + } + + enumerable_thread_specific_iterator operator-( std::ptrdiff_t offset ) const { + return enumerable_thread_specific_iterator( *my_container, my_index-offset ); + } + + enumerable_thread_specific_iterator &operator-=( std::ptrdiff_t offset ) { + my_index -= offset; + my_value = nullptr; + return *this; + } + + Value& operator*() const { + Value* value = my_value; + if( !value ) { + value = my_value = (*my_container)[my_index].value(); + } + __TBB_ASSERT( value==(*my_container)[my_index].value(), "corrupt cache" ); + return *value; + } + + Value& operator[]( std::ptrdiff_t k ) const { + return *(*my_container)[my_index + k].value(); + } + + Value* operator->() const {return &operator*();} + + enumerable_thread_specific_iterator& operator++() { + ++my_index; + my_value = nullptr; + return *this; + } + + enumerable_thread_specific_iterator& operator--() { + --my_index; + my_value = nullptr; + return *this; + } + + //! Post increment + enumerable_thread_specific_iterator operator++(int) { + enumerable_thread_specific_iterator result = *this; + ++my_index; + my_value = nullptr; + return result; + } + + //! Post decrement + enumerable_thread_specific_iterator operator--(int) { + enumerable_thread_specific_iterator result = *this; + --my_index; + my_value = nullptr; + return result; + } +}; + +template<typename Container, typename T, typename U> +bool operator==( const enumerable_thread_specific_iterator<Container, T>& i, + const enumerable_thread_specific_iterator<Container, U>& j ) { + return i.my_index == j.my_index && i.my_container == j.my_container; +} + +template<typename Container, typename T, typename U> +bool operator!=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(i==j); +} + +template<typename Container, typename T, typename U> +bool operator<( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return i.my_index<j.my_index; +} + +template<typename Container, typename T, typename U> +bool operator>( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return j<i; +} + +template<typename Container, typename T, typename U> +bool operator>=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(i<j); +} + +template<typename Container, typename T, typename U> +bool operator<=( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return !(j<i); +} + +template<typename Container, typename T, typename U> +std::ptrdiff_t operator-( const enumerable_thread_specific_iterator<Container,T>& i, + const enumerable_thread_specific_iterator<Container,U>& j ) { + return i.my_index-j.my_index; +} + +template<typename SegmentedContainer, typename Value > +class segmented_iterator +{ + template<typename C, typename T, typename U> + friend bool operator==(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); + + template<typename C, typename T, typename U> + friend bool operator!=(const segmented_iterator<C,T>& i, const segmented_iterator<C,U>& j); + + template<typename C, typename U> + friend class segmented_iterator; + +public: + segmented_iterator() {my_segcont = nullptr;} + + segmented_iterator( const SegmentedContainer& _segmented_container ) : + my_segcont(const_cast<SegmentedContainer*>(&_segmented_container)), + outer_iter(my_segcont->end()) { } + + ~segmented_iterator() {} + + using InnerContainer = typename SegmentedContainer::value_type; + using inner_iterator = typename InnerContainer::iterator; + using outer_iterator = typename SegmentedContainer::iterator; + + // STL support + // TODO: inherit all types from segmented container? + using difference_type = std::ptrdiff_t; + using value_type = Value; + using size_type = typename SegmentedContainer::size_type; + using pointer = Value*; + using reference = Value&; + using iterator_category = std::input_iterator_tag; + + // Copy Constructor + template<typename U> + segmented_iterator(const segmented_iterator<SegmentedContainer, U>& other) : + my_segcont(other.my_segcont), + outer_iter(other.outer_iter), + // can we assign a default-constructed iterator to inner if we're at the end? + inner_iter(other.inner_iter) + {} + + // assignment + template<typename U> + segmented_iterator& operator=( const segmented_iterator<SegmentedContainer, U>& other) { + my_segcont = other.my_segcont; + outer_iter = other.outer_iter; + if(outer_iter != my_segcont->end()) inner_iter = other.inner_iter; + return *this; + } + + // allow assignment of outer iterator to segmented iterator. Once it is + // assigned, move forward until a non-empty inner container is found or + // the end of the outer container is reached. + segmented_iterator& operator=(const outer_iterator& new_outer_iter) { + __TBB_ASSERT(my_segcont != nullptr, NULL); + // check that this iterator points to something inside the segmented container + for(outer_iter = new_outer_iter ;outer_iter!=my_segcont->end(); ++outer_iter) { + if( !outer_iter->empty() ) { + inner_iter = outer_iter->begin(); + break; + } + } + return *this; + } + + // pre-increment + segmented_iterator& operator++() { + advance_me(); + return *this; + } + + // post-increment + segmented_iterator operator++(int) { + segmented_iterator tmp = *this; + operator++(); + return tmp; + } + + bool operator==(const outer_iterator& other_outer) const { + __TBB_ASSERT(my_segcont != nullptr, NULL); + return (outer_iter == other_outer && + (outer_iter == my_segcont->end() || inner_iter == outer_iter->begin())); + } + + bool operator!=(const outer_iterator& other_outer) const { + return !operator==(other_outer); + + } + + // (i)* RHS + reference operator*() const { + __TBB_ASSERT(my_segcont != nullptr, NULL); + __TBB_ASSERT(outer_iter != my_segcont->end(), "Dereferencing a pointer at end of container"); + __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // should never happen + return *inner_iter; + } + + // i-> + pointer operator->() const { return &operator*();} + +private: + SegmentedContainer* my_segcont; + outer_iterator outer_iter; + inner_iterator inner_iter; + + void advance_me() { + __TBB_ASSERT(my_segcont != nullptr, NULL); + __TBB_ASSERT(outer_iter != my_segcont->end(), NULL); // not true if there are no inner containers + __TBB_ASSERT(inner_iter != outer_iter->end(), NULL); // not true if the inner containers are all empty. + ++inner_iter; + while(inner_iter == outer_iter->end() && ++outer_iter != my_segcont->end()) { + inner_iter = outer_iter->begin(); + } + } +}; // segmented_iterator + +template<typename SegmentedContainer, typename T, typename U> +bool operator==( const segmented_iterator<SegmentedContainer,T>& i, + const segmented_iterator<SegmentedContainer,U>& j ) { + if(i.my_segcont != j.my_segcont) return false; + if(i.my_segcont == nullptr) return true; + if(i.outer_iter != j.outer_iter) return false; + if(i.outer_iter == i.my_segcont->end()) return true; + return i.inner_iter == j.inner_iter; +} + +// != +template<typename SegmentedContainer, typename T, typename U> +bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, + const segmented_iterator<SegmentedContainer,U>& j ) { + return !(i==j); +} + +template<typename T> +struct construct_by_default: no_assign { + void construct(void*where) {new(where) T();} // C++ note: the () in T() ensure zero initialization. + construct_by_default( int ) {} +}; + +template<typename T> +struct construct_by_exemplar: no_assign { + const T exemplar; + void construct(void*where) {new(where) T(exemplar);} + construct_by_exemplar( const T& t ) : exemplar(t) {} + construct_by_exemplar( T&& t ) : exemplar(std::move(t)) {} +}; + +template<typename T, typename Finit> +struct construct_by_finit: no_assign { + Finit f; + void construct(void* where) {new(where) T(f());} + construct_by_finit( Finit&& f_ ) : f(std::move(f_)) {} +}; + +template<typename T, typename... P> +struct construct_by_args: no_assign { + stored_pack<P...> pack; + void construct(void* where) { + call( [where](const typename std::decay<P>::type&... args ){ + new(where) T(args...); + }, pack ); + } + construct_by_args( P&& ... args ) : pack(std::forward<P>(args)...) {} +}; + +// storage for initialization function pointer +// TODO: consider removing the template parameter T here and in callback_leaf +class callback_base { +public: + // Clone *this + virtual callback_base* clone() const = 0; + // Destruct and free *this + virtual void destroy() = 0; + // Need virtual destructor to satisfy GCC compiler warning + virtual ~callback_base() { } + // Construct T at where + virtual void construct(void* where) = 0; +}; + +template <typename Constructor> +class callback_leaf: public callback_base, Constructor { + template<typename... P> callback_leaf( P&& ... params ) : Constructor(std::forward<P>(params)...) {} + // TODO: make the construction/destruction consistent (use allocator.construct/destroy) + using my_allocator_type = typename tbb::tbb_allocator<callback_leaf>; + + callback_base* clone() const override { + return make(*this); + } + + void destroy() override { + my_allocator_type alloc; + tbb::detail::allocator_traits<my_allocator_type>::destroy(alloc, this); + tbb::detail::allocator_traits<my_allocator_type>::deallocate(alloc, this, 1); + } + + void construct(void* where) override { + Constructor::construct(where); + } + +public: + template<typename... P> + static callback_base* make( P&& ... params ) { + void* where = my_allocator_type().allocate(1); + return new(where) callback_leaf( std::forward<P>(params)... ); + } +}; + +//! Template for recording construction of objects in table +/** All maintenance of the space will be done explicitly on push_back, + and all thread local copies must be destroyed before the concurrent + vector is deleted. + + The flag is_built is initialized to false. When the local is + successfully-constructed, set the flag to true or call value_committed(). + If the constructor throws, the flag will be false. +*/ +template<typename U> +struct ets_element { + detail::aligned_space<U> my_space; + bool is_built; + ets_element() { is_built = false; } // not currently-built + U* value() { return my_space.begin(); } + U* value_committed() { is_built = true; return my_space.begin(); } + ~ets_element() { + if(is_built) { + my_space.begin()->~U(); + is_built = false; + } + } +}; + +// A predicate that can be used for a compile-time compatibility check of ETS instances +// Ideally, it should have been declared inside the ETS class, but unfortunately +// in that case VS2013 does not enable the variadic constructor. +template<typename T, typename ETS> struct is_compatible_ets : std::false_type {}; +template<typename T, typename U, typename A, ets_key_usage_type C> +struct is_compatible_ets< T, enumerable_thread_specific<U,A,C> > : std::is_same<T, U> {}; + +// A predicate that checks whether, for a variable 'foo' of type T, foo() is a valid expression +template <typename T> using has_empty_braces_operator = decltype(std::declval<T>()()); +template <typename T> using is_callable_no_args = supports<T, has_empty_braces_operator>; + +//! The enumerable_thread_specific container +/** enumerable_thread_specific has the following properties: + - thread-local copies are lazily created, with default, exemplar or function initialization. + - thread-local copies do not move (during lifetime, and excepting clear()) so the address of a copy is invariant. + - the contained objects need not have operator=() defined if combine is not used. + - enumerable_thread_specific containers may be copy-constructed or assigned. + - thread-local copies can be managed by hash-table, or can be accessed via TLS storage for speed. + - outside of parallel contexts, the contents of all thread-local copies are accessible by iterator or using combine or combine_each methods + +@par Segmented iterator + When the thread-local objects are containers with input_iterators defined, a segmented iterator may + be used to iterate over all the elements of all thread-local copies. + +@par combine and combine_each + - Both methods are defined for enumerable_thread_specific. + - combine() requires the type T have operator=() defined. + - neither method modifies the contents of the object (though there is no guarantee that the applied methods do not modify the object.) + - Both are evaluated in serial context (the methods are assumed to be non-benign.) + +@ingroup containers */ +template <typename T, typename Allocator=cache_aligned_allocator<T>, + ets_key_usage_type ETS_key_type=ets_no_key > +class enumerable_thread_specific: ets_base<ETS_key_type> { + + template<typename U, typename A, ets_key_usage_type C> friend class enumerable_thread_specific; + + using padded_element = padded<ets_element<T>>; + + //! A generic range, used to create range objects from the iterators + template<typename I> + class generic_range_type: public blocked_range<I> { + public: + using value_type = T; + using reference = T&; + using const_reference = const T&; + using iterator = I; + using difference_type = std::ptrdiff_t; + + generic_range_type( I begin_, I end_, std::size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} + template<typename U> + generic_range_type( const generic_range_type<U>& r) : blocked_range<I>(r.begin(),r.end(),r.grainsize()) {} + generic_range_type( generic_range_type& r, split ) : blocked_range<I>(r,split()) {} + }; + + using allocator_traits_type = tbb::detail::allocator_traits<Allocator>; + + using padded_allocator_type = typename allocator_traits_type::template rebind_alloc<padded_element>; + using internal_collection_type = tbb::concurrent_vector< padded_element, padded_allocator_type >; + + callback_base *my_construct_callback; + + internal_collection_type my_locals; + + // TODO: consider unifying the callback mechanism for all create_local* methods below + // (likely non-compatible and requires interface version increase) + void* create_local() override { + padded_element& lref = *my_locals.grow_by(1); + my_construct_callback->construct(lref.value()); + return lref.value_committed(); + } + + static void* create_local_by_copy( ets_base<ETS_key_type>& base, void* p ) { + enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); + padded_element& lref = *ets.my_locals.grow_by(1); + new(lref.value()) T(*static_cast<T*>(p)); + return lref.value_committed(); + } + + static void* create_local_by_move( ets_base<ETS_key_type>& base, void* p ) { + enumerable_thread_specific& ets = static_cast<enumerable_thread_specific&>(base); + padded_element& lref = *ets.my_locals.grow_by(1); + new(lref.value()) T(std::move(*static_cast<T*>(p))); + return lref.value_committed(); + } + + using array_allocator_type = typename allocator_traits_type::template rebind_alloc<uintptr_t>; + + // _size is in bytes + void* create_array(std::size_t _size) override { + std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); + return array_allocator_type().allocate(nelements); + } + + void free_array( void* _ptr, std::size_t _size) override { + std::size_t nelements = (_size + sizeof(uintptr_t) -1) / sizeof(uintptr_t); + array_allocator_type().deallocate( reinterpret_cast<uintptr_t *>(_ptr),nelements); + } + +public: + + //! Basic types + using value_type = T; + using allocator_type = Allocator; + using size_type = typename internal_collection_type::size_type; + using difference_type = typename internal_collection_type::difference_type; + using reference = value_type&; + using const_reference = const value_type&; + + using pointer = typename allocator_traits_type::pointer; + using const_pointer = typename allocator_traits_type::const_pointer; + + // Iterator types + using iterator = enumerable_thread_specific_iterator<internal_collection_type, value_type>; + using const_iterator = enumerable_thread_specific_iterator<internal_collection_type, const value_type>; + + // Parallel range types + using range_type = generic_range_type<iterator>; + using const_range_type = generic_range_type<const_iterator>; + + //! Default constructor. Each local instance of T is default constructed. + enumerable_thread_specific() : my_construct_callback( + callback_leaf<construct_by_default<T> >::make(/*dummy argument*/0) + ){} + + //! Constructor with initializer functor. Each local instance of T is constructed by T(finit()). + template <typename Finit , typename = typename std::enable_if<is_callable_no_args<typename std::decay<Finit>::type>::value>::type> + explicit enumerable_thread_specific( Finit finit ) : my_construct_callback( + callback_leaf<construct_by_finit<T,Finit> >::make( std::move(finit) ) + ){} + + //! Constructor with exemplar. Each local instance of T is copy-constructed from the exemplar. + explicit enumerable_thread_specific( const T& exemplar ) : my_construct_callback( + callback_leaf<construct_by_exemplar<T> >::make( exemplar ) + ){} + + explicit enumerable_thread_specific( T&& exemplar ) : my_construct_callback( + callback_leaf<construct_by_exemplar<T> >::make( std::move(exemplar) ) + ){} + + //! Variadic constructor with initializer arguments. Each local instance of T is constructed by T(args...) + template <typename P1, typename... P, + typename = typename std::enable_if<!is_callable_no_args<typename std::decay<P1>::type>::value + && !is_compatible_ets<T, typename std::decay<P1>::type>::value + && !std::is_same<T, typename std::decay<P1>::type>::value + >::type> + enumerable_thread_specific( P1&& arg1, P&& ... args ) : my_construct_callback( + callback_leaf<construct_by_args<T,P1,P...> >::make( std::forward<P1>(arg1), std::forward<P>(args)... ) + ){} + + //! Destructor + ~enumerable_thread_specific() { + if(my_construct_callback) my_construct_callback->destroy(); + // Deallocate the hash table before overridden free_array() becomes inaccessible + this->ets_base<ETS_key_type>::table_clear(); + } + + //! returns reference to local, discarding exists + reference local() { + bool exists; + return local(exists); + } + + //! Returns reference to calling thread's local copy, creating one if necessary + reference local(bool& exists) { + void* ptr = this->table_lookup(exists); + return *(T*)ptr; + } + + //! Get the number of local copies + size_type size() const { return my_locals.size(); } + + //! true if there have been no local copies created + bool empty() const { return my_locals.empty(); } + + //! begin iterator + iterator begin() { return iterator( my_locals, 0 ); } + //! end iterator + iterator end() { return iterator(my_locals, my_locals.size() ); } + + //! begin const iterator + const_iterator begin() const { return const_iterator(my_locals, 0); } + + //! end const iterator + const_iterator end() const { return const_iterator(my_locals, my_locals.size()); } + + //! Get range for parallel algorithms + range_type range( std::size_t grainsize=1 ) { return range_type( begin(), end(), grainsize ); } + + //! Get const range for parallel algorithms + const_range_type range( std::size_t grainsize=1 ) const { return const_range_type( begin(), end(), grainsize ); } + + //! Destroys local copies + void clear() { + my_locals.clear(); + this->table_clear(); + // callback is not destroyed + } + +private: + template<typename A2, ets_key_usage_type C2> + void internal_copy(const enumerable_thread_specific<T, A2, C2>& other) { + // this tests is_compatible_ets + static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); + // Initialize my_construct_callback first, so that it is valid even if rest of this routine throws an exception. + my_construct_callback = other.my_construct_callback->clone(); + __TBB_ASSERT(my_locals.size()==0,NULL); + my_locals.reserve(other.size()); + this->table_elementwise_copy( other, create_local_by_copy ); + } + + void internal_swap(enumerable_thread_specific& other) { + using std::swap; + __TBB_ASSERT( this!=&other, NULL ); + swap(my_construct_callback, other.my_construct_callback); + // concurrent_vector::swap() preserves storage space, + // so addresses to the vector kept in ETS hash table remain valid. + swap(my_locals, other.my_locals); + this->ets_base<ETS_key_type>::table_swap(other); + } + + template<typename A2, ets_key_usage_type C2> + void internal_move(enumerable_thread_specific<T, A2, C2>&& other) { + static_assert( (is_compatible_ets<T, typename std::decay<decltype(other)>::type>::value), "is_compatible_ets fails" ); + my_construct_callback = other.my_construct_callback; + other.my_construct_callback = nullptr; + __TBB_ASSERT(my_locals.size()==0,NULL); + my_locals.reserve(other.size()); + this->table_elementwise_copy( other, create_local_by_move ); + } + +public: + enumerable_thread_specific( const enumerable_thread_specific& other ) + : ets_base<ETS_key_type>() /* prevents GCC warnings with -Wextra */ + { + internal_copy(other); + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) + { + internal_copy(other); + } + + enumerable_thread_specific( enumerable_thread_specific&& other ) : my_construct_callback() + { + // TODO: use internal_move correctly here + internal_swap(other); + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) : my_construct_callback() + { + internal_move(std::move(other)); + } + + enumerable_thread_specific& operator=( const enumerable_thread_specific& other ) + { + if( this != &other ) { + this->clear(); + my_construct_callback->destroy(); + internal_copy( other ); + } + return *this; + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific& operator=( const enumerable_thread_specific<T, Alloc, Cachetype>& other ) + { + __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types + this->clear(); + my_construct_callback->destroy(); + internal_copy(other); + return *this; + } + + enumerable_thread_specific& operator=( enumerable_thread_specific&& other ) + { + if( this != &other ) { + // TODO: use internal_move correctly here + internal_swap(other); + } + return *this; + } + + template<typename Alloc, ets_key_usage_type Cachetype> + enumerable_thread_specific& operator=( enumerable_thread_specific<T, Alloc, Cachetype>&& other ) + { + __TBB_ASSERT( static_cast<void*>(this)!=static_cast<const void*>(&other), NULL ); // Objects of different types + this->clear(); + my_construct_callback->destroy(); + internal_move(std::move(other)); + return *this; + } + + // CombineFunc has signature T(T,T) or T(const T&, const T&) + template <typename CombineFunc> + T combine(CombineFunc f_combine) { + if(begin() == end()) { + ets_element<T> location; + my_construct_callback->construct(location.value()); + return *location.value_committed(); + } + const_iterator ci = begin(); + T my_result = *ci; + while(++ci != end()) + my_result = f_combine( my_result, *ci ); + return my_result; + } + + // combine_func_t takes T by value or by [const] reference, and returns nothing + template <typename CombineFunc> + void combine_each(CombineFunc f_combine) { + for(iterator ci = begin(); ci != end(); ++ci) { + f_combine( *ci ); + } + } + +}; // enumerable_thread_specific + +template< typename Container > +class flattened2d { + // This intermediate typedef is to address issues with VC7.1 compilers + using conval_type = typename Container::value_type; + +public: + //! Basic types + using size_type = typename conval_type::size_type; + using difference_type = typename conval_type::difference_type; + using allocator_type = typename conval_type::allocator_type; + using value_type = typename conval_type::value_type; + using reference = typename conval_type::reference; + using const_reference = typename conval_type::const_reference; + using pointer = typename conval_type::pointer; + using const_pointer = typename conval_type::const_pointer; + + using iterator = segmented_iterator<Container, value_type>; + using const_iterator = segmented_iterator<Container, const value_type>; + + flattened2d( const Container &c, typename Container::const_iterator b, typename Container::const_iterator e ) : + my_container(const_cast<Container*>(&c)), my_begin(b), my_end(e) { } + + explicit flattened2d( const Container &c ) : + my_container(const_cast<Container*>(&c)), my_begin(c.begin()), my_end(c.end()) { } + + iterator begin() { return iterator(*my_container) = my_begin; } + iterator end() { return iterator(*my_container) = my_end; } + const_iterator begin() const { return const_iterator(*my_container) = my_begin; } + const_iterator end() const { return const_iterator(*my_container) = my_end; } + + size_type size() const { + size_type tot_size = 0; + for(typename Container::const_iterator i = my_begin; i != my_end; ++i) { + tot_size += i->size(); + } + return tot_size; + } + +private: + Container *my_container; + typename Container::const_iterator my_begin; + typename Container::const_iterator my_end; +}; + +template <typename Container> +flattened2d<Container> flatten2d(const Container &c, const typename Container::const_iterator b, const typename Container::const_iterator e) { + return flattened2d<Container>(c, b, e); +} + +template <typename Container> +flattened2d<Container> flatten2d(const Container &c) { + return flattened2d<Container>(c); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::enumerable_thread_specific; +using detail::d1::flattened2d; +using detail::d1::flatten2d; +// ets enum keys +using detail::d1::ets_key_usage_type; +using detail::d1::ets_key_per_instance; +using detail::d1::ets_no_key; +#if __TBB_RESUMABLE_TASKS +using detail::d1::ets_suspend_aware; +#endif +} // inline namespace v1 + +} // namespace tbb + +#endif // __TBB_enumerable_thread_specific_H + diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h new file mode 100644 index 0000000000..cc2cc7b605 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph.h @@ -0,0 +1,3221 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_H +#define __TBB_flow_graph_H + +#include <atomic> +#include <memory> +#include <type_traits> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "spin_mutex.h" +#include "null_mutex.h" +#include "spin_rw_mutex.h" +#include "null_rw_mutex.h" +#include "detail/_pipeline_filters.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" +#include "cache_aligned_allocator.h" +#include "detail/_exception.h" +#include "detail/_template_helpers.h" +#include "detail/_aggregator.h" +#include "detail/_allocator_traits.h" +#include "profiling.h" +#include "task_arena.h" + +#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) + #if __INTEL_COMPILER + // Disabled warning "routine is both inline and noinline" + #pragma warning (push) + #pragma warning( disable: 2196 ) + #endif + #define __TBB_NOINLINE_SYM __attribute__((noinline)) +#else + #define __TBB_NOINLINE_SYM +#endif + +#include <tuple> +#include <list> +#include <queue> + +/** @file + \brief The graph related classes and functions + + There are some applications that best express dependencies as messages + passed between nodes in a graph. These messages may contain data or + simply act as signals that a predecessors has completed. The graph + class and its associated node classes can be used to express such + applications. +*/ + +namespace tbb { +namespace detail { + +namespace d1 { + +//! An enumeration the provides the two most common concurrency levels: unlimited and serial +enum concurrency { unlimited = 0, serial = 1 }; + +//! A generic null type +struct null_type {}; + +//! An empty class used for messages that mean "I'm done" +class continue_msg {}; + +//! Forward declaration section +template< typename T > class sender; +template< typename T > class receiver; +class continue_receiver; + +template< typename T, typename U > class limiter_node; // needed for resetting decrementer + +template<typename T, typename M> class successor_cache; +template<typename T, typename M> class broadcast_cache; +template<typename T, typename M> class round_robin_cache; +template<typename T, typename M> class predecessor_cache; +template<typename T, typename M> class reservable_predecessor_cache; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET +namespace order { +struct following; +struct preceding; +} +template<typename Order, typename... Args> struct node_set; +#endif + + +} // namespace d1 +} // namespace detail +} // namespace tbb + +//! The graph class +#include "detail/_flow_graph_impl.h" + +namespace tbb { +namespace detail { +namespace d1 { + +static inline std::pair<graph_task*, graph_task*> order_tasks(graph_task* first, graph_task* second) { + if (second->priority > first->priority) + return std::make_pair(second, first); + return std::make_pair(first, second); +} + +// submit task if necessary. Returns the non-enqueued task if there is one. +static inline graph_task* combine_tasks(graph& g, graph_task* left, graph_task* right) { + // if no RHS task, don't change left. + if (right == NULL) return left; + // right != NULL + if (left == NULL) return right; + if (left == SUCCESSFULLY_ENQUEUED) return right; + // left contains a task + if (right != SUCCESSFULLY_ENQUEUED) { + // both are valid tasks + auto tasks_pair = order_tasks(left, right); + spawn_in_graph_arena(g, *tasks_pair.first); + return tasks_pair.second; + } + return left; +} + +//! Pure virtual template class that defines a sender of messages of type T +template< typename T > +class sender { +public: + virtual ~sender() {} + + //! Request an item from the sender + virtual bool try_get( T & ) { return false; } + + //! Reserves an item in the sender + virtual bool try_reserve( T & ) { return false; } + + //! Releases the reserved item + virtual bool try_release( ) { return false; } + + //! Consumes the reserved item + virtual bool try_consume( ) { return false; } + +protected: + //! The output type of this sender + typedef T output_type; + + //! The successor type for this node + typedef receiver<T> successor_type; + + //! Add a new successor to this node + virtual bool register_successor( successor_type &r ) = 0; + + //! Removes a successor from this node + virtual bool remove_successor( successor_type &r ) = 0; + + template<typename C> + friend bool register_successor(sender<C>& s, receiver<C>& r); + + template<typename C> + friend bool remove_successor (sender<C>& s, receiver<C>& r); +}; // class sender<T> + +template<typename C> +bool register_successor(sender<C>& s, receiver<C>& r) { + return s.register_successor(r); +} + +template<typename C> +bool remove_successor(sender<C>& s, receiver<C>& r) { + return s.remove_successor(r); +} + +//! Pure virtual template class that defines a receiver of messages of type T +template< typename T > +class receiver { +public: + //! Destructor + virtual ~receiver() {} + + //! Put an item to the receiver + bool try_put( const T& t ) { + graph_task *res = try_put_task(t); + if (!res) return false; + if (res != SUCCESSFULLY_ENQUEUED) spawn_in_graph_arena(graph_reference(), *res); + return true; + } + + //! put item to successor; return task to run the successor if possible. +protected: + //! The input type of this receiver + typedef T input_type; + + //! The predecessor type for this node + typedef sender<T> predecessor_type; + + template< typename R, typename B > friend class run_and_put_task; + template< typename X, typename Y > friend class broadcast_cache; + template< typename X, typename Y > friend class round_robin_cache; + virtual graph_task *try_put_task(const T& t) = 0; + virtual graph& graph_reference() const = 0; + + template<typename TT, typename M> friend class successor_cache; + virtual bool is_continue_receiver() { return false; } + + // TODO revamp: reconsider the inheritance and move node priority out of receiver + virtual node_priority_t priority() const { return no_priority; } + + //! Add a predecessor to the node + virtual bool register_predecessor( predecessor_type & ) { return false; } + + //! Remove a predecessor from the node + virtual bool remove_predecessor( predecessor_type & ) { return false; } + + template <typename C> + friend bool register_predecessor(receiver<C>& r, sender<C>& s); + template <typename C> + friend bool remove_predecessor (receiver<C>& r, sender<C>& s); +}; // class receiver<T> + +template <typename C> +bool register_predecessor(receiver<C>& r, sender<C>& s) { + return r.register_predecessor(s); +} + +template <typename C> +bool remove_predecessor(receiver<C>& r, sender<C>& s) { + return r.remove_predecessor(s); +} + +//! Base class for receivers of completion messages +/** These receivers automatically reset, but cannot be explicitly waited on */ +class continue_receiver : public receiver< continue_msg > { +protected: + + //! Constructor + explicit continue_receiver( int number_of_predecessors, node_priority_t a_priority ) { + my_predecessor_count = my_initial_predecessor_count = number_of_predecessors; + my_current_count = 0; + my_priority = a_priority; + } + + //! Copy constructor + continue_receiver( const continue_receiver& src ) : receiver<continue_msg>() { + my_predecessor_count = my_initial_predecessor_count = src.my_initial_predecessor_count; + my_current_count = 0; + my_priority = src.my_priority; + } + + //! Increments the trigger threshold + bool register_predecessor( predecessor_type & ) override { + spin_mutex::scoped_lock l(my_mutex); + ++my_predecessor_count; + return true; + } + + //! Decrements the trigger threshold + /** Does not check to see if the removal of the predecessor now makes the current count + exceed the new threshold. So removing a predecessor while the graph is active can cause + unexpected results. */ + bool remove_predecessor( predecessor_type & ) override { + spin_mutex::scoped_lock l(my_mutex); + --my_predecessor_count; + return true; + } + + //! The input type + typedef continue_msg input_type; + + //! The predecessor type for this node + typedef receiver<input_type>::predecessor_type predecessor_type; + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + // execute body is supposed to be too small to create a task for. + graph_task* try_put_task( const input_type & ) override { + { + spin_mutex::scoped_lock l(my_mutex); + if ( ++my_current_count < my_predecessor_count ) + return SUCCESSFULLY_ENQUEUED; + else + my_current_count = 0; + } + graph_task* res = execute(); + return res? res : SUCCESSFULLY_ENQUEUED; + } + + spin_mutex my_mutex; + int my_predecessor_count; + int my_current_count; + int my_initial_predecessor_count; + node_priority_t my_priority; + // the friend declaration in the base class did not eliminate the "protected class" + // error in gcc 4.1.2 + template<typename U, typename V> friend class limiter_node; + + virtual void reset_receiver( reset_flags f ) { + my_current_count = 0; + if (f & rf_clear_edges) { + my_predecessor_count = my_initial_predecessor_count; + } + } + + //! Does whatever should happen when the threshold is reached + /** This should be very fast or else spawn a task. This is + called while the sender is blocked in the try_put(). */ + virtual graph_task* execute() = 0; + template<typename TT, typename M> friend class successor_cache; + bool is_continue_receiver() override { return true; } + + node_priority_t priority() const override { return my_priority; } +}; // class continue_receiver + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + template <typename K, typename T> + K key_from_message( const T &t ) { + return t.key(); + } +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + +} // d1 +} // detail +} // tbb + +#include "detail/_flow_graph_trace_impl.h" +#include "detail/_hash_compare.h" + +namespace tbb { +namespace detail { +namespace d1 { + +#include "detail/_flow_graph_body_impl.h" +#include "detail/_flow_graph_cache_impl.h" +#include "detail/_flow_graph_types_impl.h" + +using namespace graph_policy_namespace; + +template <typename C, typename N> +graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), current_node(NULL) +{ + if (begin) current_node = my_graph->my_nodes; + //else it is an end iterator by default +} + +template <typename C, typename N> +typename graph_iterator<C,N>::reference graph_iterator<C,N>::operator*() const { + __TBB_ASSERT(current_node, "graph_iterator at end"); + return *operator->(); +} + +template <typename C, typename N> +typename graph_iterator<C,N>::pointer graph_iterator<C,N>::operator->() const { + return current_node; +} + +template <typename C, typename N> +void graph_iterator<C,N>::internal_forward() { + if (current_node) current_node = current_node->next; +} + +//! Constructs a graph with isolated task_group_context +inline graph::graph() : my_wait_context(0), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { + prepare_task_arena(); + own_context = true; + cancelled = false; + caught_exception = false; + my_context = new (r1::cache_aligned_allocate(sizeof(task_group_context))) task_group_context(FLOW_TASKS); + fgt_graph(this); + my_is_active = true; +} + +inline graph::graph(task_group_context& use_this_context) : + my_wait_context(0), my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL), my_task_arena(NULL) { + prepare_task_arena(); + own_context = false; + cancelled = false; + caught_exception = false; + fgt_graph(this); + my_is_active = true; +} + +inline graph::~graph() { + wait_for_all(); + if (own_context) { + my_context->~task_group_context(); + r1::cache_aligned_deallocate(my_context); + } + delete my_task_arena; +} + +inline void graph::reserve_wait() { + my_wait_context.reserve(); + fgt_reserve_wait(this); +} + +inline void graph::release_wait() { + fgt_release_wait(this); + my_wait_context.release(); +} + +inline void graph::register_node(graph_node *n) { + n->next = NULL; + { + spin_mutex::scoped_lock lock(nodelist_mutex); + n->prev = my_nodes_last; + if (my_nodes_last) my_nodes_last->next = n; + my_nodes_last = n; + if (!my_nodes) my_nodes = n; + } +} + +inline void graph::remove_node(graph_node *n) { + { + spin_mutex::scoped_lock lock(nodelist_mutex); + __TBB_ASSERT(my_nodes && my_nodes_last, "graph::remove_node: Error: no registered nodes"); + if (n->prev) n->prev->next = n->next; + if (n->next) n->next->prev = n->prev; + if (my_nodes_last == n) my_nodes_last = n->prev; + if (my_nodes == n) my_nodes = n->next; + } + n->prev = n->next = NULL; +} + +inline void graph::reset( reset_flags f ) { + // reset context + deactivate_graph(*this); + + my_context->reset(); + cancelled = false; + caught_exception = false; + // reset all the nodes comprising the graph + for(iterator ii = begin(); ii != end(); ++ii) { + graph_node *my_p = &(*ii); + my_p->reset_node(f); + } + // Reattach the arena. Might be useful to run the graph in a particular task_arena + // while not limiting graph lifetime to a single task_arena::execute() call. + prepare_task_arena( /*reinit=*/true ); + activate_graph(*this); +} + +inline void graph::cancel() { + my_context->cancel_group_execution(); +} + +inline graph::iterator graph::begin() { return iterator(this, true); } + +inline graph::iterator graph::end() { return iterator(this, false); } + +inline graph::const_iterator graph::begin() const { return const_iterator(this, true); } + +inline graph::const_iterator graph::end() const { return const_iterator(this, false); } + +inline graph::const_iterator graph::cbegin() const { return const_iterator(this, true); } + +inline graph::const_iterator graph::cend() const { return const_iterator(this, false); } + +inline graph_node::graph_node(graph& g) : my_graph(g) { + my_graph.register_node(this); +} + +inline graph_node::~graph_node() { + my_graph.remove_node(this); +} + +#include "detail/_flow_graph_node_impl.h" + + +//! An executable node that acts as a source, i.e. it has no predecessors + +template < typename Output > +class input_node : public graph_node, public sender< Output > { +public: + //! The type of the output message, which is complete + typedef Output output_type; + + //! The type of successors of this node + typedef typename sender<output_type>::successor_type successor_type; + + // Input node has no input type + typedef null_type input_type; + + //! Constructor for a node with a successor + template< typename Body > + __TBB_NOINLINE_SYM input_node( graph &g, Body body ) + : graph_node(g), my_active(false) + , my_body( new input_body_leaf< output_type, Body>(body) ) + , my_init_body( new input_body_leaf< output_type, Body>(body) ) + , my_successors(this), my_reserved(false), my_has_cached_item(false) + { + fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, + static_cast<sender<output_type> *>(this), this->my_body); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Successors> + input_node( const node_set<order::preceding, Successors...>& successors, Body body ) + : input_node(successors.graph_reference(), body) + { + make_edges(*this, successors); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM input_node( const input_node& src ) + : graph_node(src.my_graph), sender<Output>() + , my_active(false) + , my_body(src.my_init_body->clone()), my_init_body(src.my_init_body->clone()) + , my_successors(this), my_reserved(false), my_has_cached_item(false) + { + fgt_node_with_body(CODEPTR(), FLOW_INPUT_NODE, &this->my_graph, + static_cast<sender<output_type> *>(this), this->my_body); + } + + //! The destructor + ~input_node() { delete my_body; delete my_init_body; } + + //! Add a new successor to this node + bool register_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_successors.register_successor(r); + if ( my_active ) + spawn_put(); + return true; + } + + //! Removes a successor from this node + bool remove_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_successors.remove_successor(r); + return true; + } + + //! Request an item from the node + bool try_get( output_type &v ) override { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) + return false; + + if ( my_has_cached_item ) { + v = my_cached_item; + my_has_cached_item = false; + return true; + } + // we've been asked to provide an item, but we have none. enqueue a task to + // provide one. + if ( my_active ) + spawn_put(); + return false; + } + + //! Reserves an item. + bool try_reserve( output_type &v ) override { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) { + return false; + } + + if ( my_has_cached_item ) { + v = my_cached_item; + my_reserved = true; + return true; + } else { + return false; + } + } + + //! Release a reserved item. + /** true = item has been released and so remains in sender, dest must request or reserve future items */ + bool try_release( ) override { + spin_mutex::scoped_lock lock(my_mutex); + __TBB_ASSERT( my_reserved && my_has_cached_item, "releasing non-existent reservation" ); + my_reserved = false; + if(!my_successors.empty()) + spawn_put(); + return true; + } + + //! Consumes a reserved item + bool try_consume( ) override { + spin_mutex::scoped_lock lock(my_mutex); + __TBB_ASSERT( my_reserved && my_has_cached_item, "consuming non-existent reservation" ); + my_reserved = false; + my_has_cached_item = false; + if ( !my_successors.empty() ) { + spawn_put(); + } + return true; + } + + //! Activates a node that was created in the inactive state + void activate() { + spin_mutex::scoped_lock lock(my_mutex); + my_active = true; + if (!my_successors.empty()) + spawn_put(); + } + + template<typename Body> + Body copy_function_object() { + input_body<output_type> &body_ref = *this->my_body; + return dynamic_cast< input_body_leaf<output_type, Body> & >(body_ref).get_body(); + } + +protected: + + //! resets the input_node to its initial state + void reset_node( reset_flags f) override { + my_active = false; + my_reserved = false; + my_has_cached_item = false; + + if(f & rf_clear_edges) my_successors.clear(); + if(f & rf_reset_bodies) { + input_body<output_type> *tmp = my_init_body->clone(); + delete my_body; + my_body = tmp; + } + } + +private: + spin_mutex my_mutex; + bool my_active; + input_body<output_type> *my_body; + input_body<output_type> *my_init_body; + broadcast_cache< output_type > my_successors; + bool my_reserved; + bool my_has_cached_item; + output_type my_cached_item; + + // used by apply_body_bypass, can invoke body of node. + bool try_reserve_apply_body(output_type &v) { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_reserved ) { + return false; + } + if ( !my_has_cached_item ) { + flow_control control; + + fgt_begin_body( my_body ); + + my_cached_item = (*my_body)(control); + my_has_cached_item = !control.is_pipeline_stopped; + + fgt_end_body( my_body ); + } + if ( my_has_cached_item ) { + v = my_cached_item; + my_reserved = true; + return true; + } else { + return false; + } + } + + graph_task* create_put_task() { + small_object_allocator allocator{}; + typedef input_node_task_bypass< input_node<output_type> > task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + return t; + } + + //! Spawns a task that applies the body + void spawn_put( ) { + if(is_graph_active(this->my_graph)) { + spawn_in_graph_arena(this->my_graph, *create_put_task()); + } + } + + friend class input_node_task_bypass< input_node<output_type> >; + //! Applies the body. Returning SUCCESSFULLY_ENQUEUED okay; forward_task_bypass will handle it. + graph_task* apply_body_bypass( ) { + output_type v; + if ( !try_reserve_apply_body(v) ) + return NULL; + + graph_task *last_task = my_successors.try_put_task(v); + if ( last_task ) + try_consume(); + else + try_release(); + return last_task; + } +}; // class input_node + +//! Implements a function node that supports Input -> Output +template<typename Input, typename Output = continue_msg, typename Policy = queueing> +class function_node + : public graph_node + , public function_input< Input, Output, Policy, cache_aligned_allocator<Input> > + , public function_output<Output> +{ + typedef cache_aligned_allocator<Input> internals_allocator; + +public: + typedef Input input_type; + typedef Output output_type; + typedef function_input<input_type,output_type,Policy,internals_allocator> input_impl_type; + typedef function_input_queue<input_type, internals_allocator> input_queue_type; + typedef function_output<output_type> fOutput_type; + typedef typename input_impl_type::predecessor_type predecessor_type; + typedef typename fOutput_type::successor_type successor_type; + + using input_impl_type::my_predecessors; + + //! Constructor + // input_queue_type is allocated here, but destroyed in the function_input_base. + // TODO: pass the graph_buffer_policy to the function_input_base so it can all + // be done in one place. This would be an interface-breaking change. + template< typename Body > + __TBB_NOINLINE_SYM function_node( graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority ) + : graph_node(g), input_impl_type(g, concurrency, body, a_priority), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + function_node( graph& g, size_t concurrency, Body body, node_priority_t a_priority ) + : function_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority ) + : function_node(nodes.graph_reference(), concurrency, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + function_node( const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority ) + : function_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + //! Copy constructor + __TBB_NOINLINE_SYM function_node( const function_node& src ) : + graph_node(src.my_graph), + input_impl_type(src), + fOutput_type(src.my_graph) { + fgt_node_with_body( CODEPTR(), FLOW_FUNCTION_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this), this->my_body ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + using input_impl_type::try_put_task; + + broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } + + void reset_node(reset_flags f) override { + input_impl_type::reset_function_input(f); + // TODO: use clear() instead. + if(f & rf_clear_edges) { + successors().clear(); + my_predecessors.clear(); + } + __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "function_node successors not empty"); + __TBB_ASSERT(this->my_predecessors.empty(), "function_node predecessors not empty"); + } + +}; // class function_node + +//! implements a function node that supports Input -> (set of outputs) +// Output is a tuple of output types. +template<typename Input, typename Output, typename Policy = queueing> +class multifunction_node : + public graph_node, + public multifunction_input + < + Input, + typename wrap_tuple_elements< + std::tuple_size<Output>::value, // #elements in tuple + multifunction_output, // wrap this around each element + Output // the tuple providing the types + >::type, + Policy, + cache_aligned_allocator<Input> + > +{ + typedef cache_aligned_allocator<Input> internals_allocator; + +protected: + static const int N = std::tuple_size<Output>::value; +public: + typedef Input input_type; + typedef null_type output_type; + typedef typename wrap_tuple_elements<N,multifunction_output, Output>::type output_ports_type; + typedef multifunction_input< + input_type, output_ports_type, Policy, internals_allocator> input_impl_type; + typedef function_input_queue<input_type, internals_allocator> input_queue_type; +private: + using input_impl_type::my_predecessors; +public: + template<typename Body> + __TBB_NOINLINE_SYM multifunction_node( + graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g), input_impl_type(g, concurrency, body, a_priority) { + fgt_multioutput_node_with_body<N>( + CODEPTR(), FLOW_MULTIFUNCTION_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body + ); + } + + template <typename Body> + __TBB_NOINLINE_SYM multifunction_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) + : multifunction_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority) + : multifunction_node(nodes.graph_reference(), concurrency, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM multifunction_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) + : multifunction_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + __TBB_NOINLINE_SYM multifunction_node( const multifunction_node &other) : + graph_node(other.my_graph), input_impl_type(other) { + fgt_multioutput_node_with_body<N>( CODEPTR(), FLOW_MULTIFUNCTION_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body ); + } + + // all the guts are in multifunction_input... +protected: + void reset_node(reset_flags f) override { input_impl_type::reset(f); } +}; // multifunction_node + +//! split_node: accepts a tuple as input, forwards each element of the tuple to its +// successors. The node has unlimited concurrency, so it does not reject inputs. +template<typename TupleType> +class split_node : public graph_node, public receiver<TupleType> { + static const int N = std::tuple_size<TupleType>::value; + typedef receiver<TupleType> base_type; +public: + typedef TupleType input_type; + typedef typename wrap_tuple_elements< + N, // #elements in tuple + multifunction_output, // wrap this around each element + TupleType // the tuple providing the types + >::type output_ports_type; + + __TBB_NOINLINE_SYM explicit split_node(graph &g) + : graph_node(g), + my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)) + { + fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), this->output_ports()); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM split_node(const node_set<Args...>& nodes) : split_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM split_node(const split_node& other) + : graph_node(other.my_graph), base_type(other), + my_output_ports(init_output_ports<output_ports_type>::call(other.my_graph, my_output_ports)) + { + fgt_multioutput_node<N>(CODEPTR(), FLOW_SPLIT_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), this->output_ports()); + } + + output_ports_type &output_ports() { return my_output_ports; } + +protected: + graph_task *try_put_task(const TupleType& t) override { + // Sending split messages in parallel is not justified, as overheads would prevail. + // Also, we do not have successors here. So we just tell the task returned here is successful. + return emit_element<N>::emit_this(this->my_graph, t, output_ports()); + } + void reset_node(reset_flags f) override { + if (f & rf_clear_edges) + clear_element<N>::clear_this(my_output_ports); + + __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "split_node reset failed"); + } + graph& graph_reference() const override { + return my_graph; + } + +private: + output_ports_type my_output_ports; +}; + +//! Implements an executable node that supports continue_msg -> Output +template <typename Output, typename Policy = Policy<void> > +class continue_node : public graph_node, public continue_input<Output, Policy>, + public function_output<Output> { +public: + typedef continue_msg input_type; + typedef Output output_type; + typedef continue_input<Output, Policy> input_impl_type; + typedef function_output<output_type> fOutput_type; + typedef typename input_impl_type::predecessor_type predecessor_type; + typedef typename fOutput_type::successor_type successor_type; + + //! Constructor for executable node with continue_msg -> Output + template <typename Body > + __TBB_NOINLINE_SYM continue_node( + graph &g, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g), input_impl_type( g, body, a_priority ), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + continue_node( graph& g, Body body, node_priority_t a_priority ) + : continue_node(g, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, Body body, + Policy p = Policy(), node_priority_t a_priority = no_priority ) + : continue_node(nodes.graph_reference(), body, p, a_priority ) { + make_edges_in_order(nodes, *this); + } + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, Body body, node_priority_t a_priority) + : continue_node(nodes, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + //! Constructor for executable node with continue_msg -> Output + template <typename Body > + __TBB_NOINLINE_SYM continue_node( + graph &g, int number_of_predecessors, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : graph_node(g) + , input_impl_type(g, number_of_predecessors, body, a_priority), + fOutput_type(g) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + + template <typename Body> + continue_node( graph& g, int number_of_predecessors, Body body, node_priority_t a_priority) + : continue_node(g, number_of_predecessors, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, int number_of_predecessors, + Body body, Policy p = Policy(), node_priority_t a_priority = no_priority ) + : continue_node(nodes.graph_reference(), number_of_predecessors, body, p, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + continue_node( const node_set<Args...>& nodes, int number_of_predecessors, + Body body, node_priority_t a_priority ) + : continue_node(nodes, number_of_predecessors, body, Policy(), a_priority) {} +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM continue_node( const continue_node& src ) : + graph_node(src.my_graph), input_impl_type(src), + function_output<Output>(src.my_graph) { + fgt_node_with_body( CODEPTR(), FLOW_CONTINUE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this), this->my_body ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + using input_impl_type::try_put_task; + broadcast_cache<output_type> &successors () override { return fOutput_type::my_successors; } + + void reset_node(reset_flags f) override { + input_impl_type::reset_receiver(f); + if(f & rf_clear_edges)successors().clear(); + __TBB_ASSERT(!(f & rf_clear_edges) || successors().empty(), "continue_node not reset"); + } +}; // continue_node + +//! Forwards messages of type T to all successors +template <typename T> +class broadcast_node : public graph_node, public receiver<T>, public sender<T> { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; +private: + broadcast_cache<input_type> my_successors; +public: + + __TBB_NOINLINE_SYM explicit broadcast_node(graph& g) : graph_node(g), my_successors(this) { + fgt_node( CODEPTR(), FLOW_BROADCAST_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + broadcast_node(const node_set<Args...>& nodes) : broadcast_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM broadcast_node( const broadcast_node& src ) : broadcast_node(src.my_graph) {} + + //! Adds a successor + bool register_successor( successor_type &r ) override { + my_successors.register_successor( r ); + return true; + } + + //! Removes s as a successor + bool remove_successor( successor_type &r ) override { + my_successors.remove_successor( r ); + return true; + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! build a task to run the successor if possible. Default is old behavior. + graph_task *try_put_task(const T& t) override { + graph_task *new_task = my_successors.try_put_task(t); + if (!new_task) new_task = SUCCESSFULLY_ENQUEUED; + return new_task; + } + + graph& graph_reference() const override { + return my_graph; + } + + void reset_node(reset_flags f) override { + if (f&rf_clear_edges) { + my_successors.clear(); + } + __TBB_ASSERT(!(f & rf_clear_edges) || my_successors.empty(), "Error resetting broadcast_node"); + } +}; // broadcast_node + +//! Forwards messages in arbitrary order +template <typename T> +class buffer_node + : public graph_node + , public reservable_item_buffer< T, cache_aligned_allocator<T> > + , public receiver<T>, public sender<T> +{ + typedef cache_aligned_allocator<T> internals_allocator; + +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + typedef buffer_node<T> class_type; + +protected: + typedef size_t size_type; + round_robin_cache< T, null_rw_mutex > my_successors; + + friend class forward_task_bypass< class_type >; + + enum op_type {reg_succ, rem_succ, req_item, res_item, rel_res, con_res, put_item, try_fwd_task + }; + + // implements the aggregator_operation concept + class buffer_operation : public aggregated_operation< buffer_operation > { + public: + char type; + T* elem; + graph_task* ltask; + successor_type *r; + + buffer_operation(const T& e, op_type t) : type(char(t)) + , elem(const_cast<T*>(&e)) , ltask(NULL) + {} + buffer_operation(op_type t) : type(char(t)), ltask(NULL) {} + }; + + bool forwarder_busy; + typedef aggregating_functor<class_type, buffer_operation> handler_type; + friend class aggregating_functor<class_type, buffer_operation>; + aggregator< handler_type, buffer_operation> my_aggregator; + + virtual void handle_operations(buffer_operation *op_list) { + handle_operations_impl(op_list, this); + } + + template<typename derived_type> + void handle_operations_impl(buffer_operation *op_list, derived_type* derived) { + __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); + + buffer_operation *tmp = NULL; + bool try_forwarding = false; + while (op_list) { + tmp = op_list; + op_list = op_list->next; + switch (tmp->type) { + case reg_succ: internal_reg_succ(tmp); try_forwarding = true; break; + case rem_succ: internal_rem_succ(tmp); break; + case req_item: internal_pop(tmp); break; + case res_item: internal_reserve(tmp); break; + case rel_res: internal_release(tmp); try_forwarding = true; break; + case con_res: internal_consume(tmp); try_forwarding = true; break; + case put_item: try_forwarding = internal_push(tmp); break; + case try_fwd_task: internal_forward_task(tmp); break; + } + } + + derived->order(); + + if (try_forwarding && !forwarder_busy) { + if(is_graph_active(this->my_graph)) { + forwarder_busy = true; + typedef forward_task_bypass<class_type> task_type; + small_object_allocator allocator{}; + graph_task* new_task = allocator.new_object<task_type>(graph_reference(), allocator, *this); + my_graph.reserve_wait(); + // tmp should point to the last item handled by the aggregator. This is the operation + // the handling thread enqueued. So modifying that record will be okay. + // TODO revamp: check that the issue is still present + // workaround for icc bug (at least 12.0 and 13.0) + // error: function "tbb::flow::interfaceX::combine_tasks" cannot be called with the given argument list + // argument types are: (graph, graph_task *, graph_task *) + graph_task *z = tmp->ltask; + graph &g = this->my_graph; + tmp->ltask = combine_tasks(g, z, new_task); // in case the op generated a task + } + } + } // handle_operations + + inline graph_task *grab_forwarding_task( buffer_operation &op_data) { + return op_data.ltask; + } + + inline bool enqueue_forwarding_task(buffer_operation &op_data) { + graph_task *ft = grab_forwarding_task(op_data); + if(ft) { + spawn_in_graph_arena(graph_reference(), *ft); + return true; + } + return false; + } + + //! This is executed by an enqueued task, the "forwarder" + virtual graph_task *forward_task() { + buffer_operation op_data(try_fwd_task); + graph_task *last_task = NULL; + do { + op_data.status = WAIT; + op_data.ltask = NULL; + my_aggregator.execute(&op_data); + + // workaround for icc bug + graph_task *xtask = op_data.ltask; + graph& g = this->my_graph; + last_task = combine_tasks(g, last_task, xtask); + } while (op_data.status ==SUCCEEDED); + return last_task; + } + + //! Register successor + virtual void internal_reg_succ(buffer_operation *op) { + my_successors.register_successor(*(op->r)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + + //! Remove successor + virtual void internal_rem_succ(buffer_operation *op) { + my_successors.remove_successor(*(op->r)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +private: + void order() {} + + bool is_item_valid() { + return this->my_item_valid(this->my_tail - 1); + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task *new_task = my_successors.try_put_task(this->back()); + if (new_task) { + // workaround for icc bug + graph& g = this->my_graph; + last_task = combine_tasks(g, last_task, new_task); + this->destroy_back(); + } + } + +protected: + //! Tries to forward valid items to successors + virtual void internal_forward_task(buffer_operation *op) { + internal_forward_task_impl(op, this); + } + + template<typename derived_type> + void internal_forward_task_impl(buffer_operation *op, derived_type* derived) { + __TBB_ASSERT(static_cast<class_type*>(derived) == this, "'this' is not a base class for derived"); + + if (this->my_reserved || !derived->is_item_valid()) { + op->status.store(FAILED, std::memory_order_release); + this->forwarder_busy = false; + return; + } + // Try forwarding, giving each successor a chance + graph_task* last_task = NULL; + size_type counter = my_successors.size(); + for (; counter > 0 && derived->is_item_valid(); --counter) + derived->try_put_and_add_task(last_task); + + op->ltask = last_task; // return task + if (last_task && !counter) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + forwarder_busy = false; + } + } + + virtual bool internal_push(buffer_operation *op) { + this->push_back(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + return true; + } + + virtual void internal_pop(buffer_operation *op) { + if(this->pop_back(*(op->elem))) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + } + } + + virtual void internal_reserve(buffer_operation *op) { + if(this->reserve_front(*(op->elem))) { + op->status.store(SUCCEEDED, std::memory_order_release); + } + else { + op->status.store(FAILED, std::memory_order_release); + } + } + + virtual void internal_consume(buffer_operation *op) { + this->consume_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + + virtual void internal_release(buffer_operation *op) { + this->release_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +public: + //! Constructor + __TBB_NOINLINE_SYM explicit buffer_node( graph &g ) + : graph_node(g), reservable_item_buffer<T, internals_allocator>(), receiver<T>(), + sender<T>(), my_successors(this), forwarder_busy(false) + { + my_aggregator.initialize_handler(handler_type(this)); + fgt_node( CODEPTR(), FLOW_BUFFER_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + buffer_node(const node_set<Args...>& nodes) : buffer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM buffer_node( const buffer_node& src ) : buffer_node(src.my_graph) {} + + // + // message sender implementation + // + + //! Adds a new successor. + /** Adds successor r to the list of successors; may forward tasks. */ + bool register_successor( successor_type &r ) override { + buffer_operation op_data(reg_succ); + op_data.r = &r; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Removes a successor. + /** Removes successor r from the list of successors. + It also calls r.remove_predecessor(*this) to remove this node as a predecessor. */ + bool remove_successor( successor_type &r ) override { + // TODO revamp: investigate why full qualification is necessary here + tbb::detail::d1::remove_predecessor(r, *this); + buffer_operation op_data(rem_succ); + op_data.r = &r; + my_aggregator.execute(&op_data); + // even though this operation does not cause a forward, if we are the handler, and + // a forward is scheduled, we may be the first to reach this point after the aggregator, + // and so should check for the task. + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Request an item from the buffer_node + /** true = v contains the returned item<BR> + false = no item has been returned */ + bool try_get( T &v ) override { + buffer_operation op_data(req_item); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return (op_data.status==SUCCEEDED); + } + + //! Reserves an item. + /** false = no item can be reserved<BR> + true = an item is reserved */ + bool try_reserve( T &v ) override { + buffer_operation op_data(res_item); + op_data.elem = &v; + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return (op_data.status==SUCCEEDED); + } + + //! Release a reserved item. + /** true = item has been released and so remains in sender */ + bool try_release() override { + buffer_operation op_data(rel_res); + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + + //! Consumes a reserved item. + /** true = item is removed from sender and reservation removed */ + bool try_consume() override { + buffer_operation op_data(con_res); + my_aggregator.execute(&op_data); + (void)enqueue_forwarding_task(op_data); + return true; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! receive an item, return a task *if possible + graph_task *try_put_task(const T &t) override { + buffer_operation op_data(t, put_item); + my_aggregator.execute(&op_data); + graph_task *ft = grab_forwarding_task(op_data); + // sequencer_nodes can return failure (if an item has been previously inserted) + // We have to spawn the returned task if our own operation fails. + + if(ft && op_data.status ==FAILED) { + // we haven't succeeded queueing the item, but for some reason the + // call returned a task (if another request resulted in a successful + // forward this could happen.) Queue the task and reset the pointer. + spawn_in_graph_arena(graph_reference(), *ft); ft = NULL; + } + else if(!ft && op_data.status ==SUCCEEDED) { + ft = SUCCESSFULLY_ENQUEUED; + } + return ft; + } + + graph& graph_reference() const override { + return my_graph; + } + +protected: + void reset_node( reset_flags f) override { + reservable_item_buffer<T, internals_allocator>::reset(); + // TODO: just clear structures + if (f&rf_clear_edges) { + my_successors.clear(); + } + forwarder_busy = false; + } +}; // buffer_node + +//! Forwards messages in FIFO order +template <typename T> +class queue_node : public buffer_node<T> { +protected: + typedef buffer_node<T> base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::buffer_operation queue_operation; + typedef queue_node class_type; + +private: + template<typename> friend class buffer_node; + + bool is_item_valid() { + return this->my_item_valid(this->my_head); + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task *new_task = this->my_successors.try_put_task(this->front()); + if (new_task) { + // workaround for icc bug + graph& graph_ref = this->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); + this->destroy_front(); + } + } + +protected: + void internal_forward_task(queue_operation *op) override { + this->internal_forward_task_impl(op, this); + } + + void internal_pop(queue_operation *op) override { + if ( this->my_reserved || !this->my_item_valid(this->my_head)){ + op->status.store(FAILED, std::memory_order_release); + } + else { + this->pop_front(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + } + void internal_reserve(queue_operation *op) override { + if (this->my_reserved || !this->my_item_valid(this->my_head)) { + op->status.store(FAILED, std::memory_order_release); + } + else { + this->reserve_front(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + } + } + void internal_consume(queue_operation *op) override { + this->consume_front(); + op->status.store(SUCCEEDED, std::memory_order_release); + } + +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit queue_node( graph &g ) : base_type(g) { + fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + queue_node( const node_set<Args...>& nodes) : queue_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM queue_node( const queue_node& src) : base_type(src) { + fgt_node( CODEPTR(), FLOW_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + + +protected: + void reset_node( reset_flags f) override { + base_type::reset_node(f); + } +}; // queue_node + +//! Forwards messages in sequence order +template <typename T> +class sequencer_node : public queue_node<T> { + function_body< T, size_t > *my_sequencer; + // my_sequencer should be a benign function and must be callable + // from a parallel context. Does this mean it needn't be reset? +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + template< typename Sequencer > + __TBB_NOINLINE_SYM sequencer_node( graph &g, const Sequencer& s ) : queue_node<T>(g), + my_sequencer(new function_body_leaf< T, size_t, Sequencer>(s) ) { + fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Sequencer, typename... Args> + sequencer_node( const node_set<Args...>& nodes, const Sequencer& s) + : sequencer_node(nodes.graph_reference(), s) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM sequencer_node( const sequencer_node& src ) : queue_node<T>(src), + my_sequencer( src.my_sequencer->clone() ) { + fgt_node( CODEPTR(), FLOW_SEQUENCER_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + + //! Destructor + ~sequencer_node() { delete my_sequencer; } + +protected: + typedef typename buffer_node<T>::size_type size_type; + typedef typename buffer_node<T>::buffer_operation sequencer_operation; + +private: + bool internal_push(sequencer_operation *op) override { + size_type tag = (*my_sequencer)(*(op->elem)); +#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES + if (tag < this->my_head) { + // have already emitted a message with this tag + op->status.store(FAILED, std::memory_order_release); + return false; + } +#endif + // cannot modify this->my_tail now; the buffer would be inconsistent. + size_t new_tail = (tag+1 > this->my_tail) ? tag+1 : this->my_tail; + + if (this->size(new_tail) > this->capacity()) { + this->grow_my_array(this->size(new_tail)); + } + this->my_tail = new_tail; + + const op_stat res = this->place_item(tag, *(op->elem)) ? SUCCEEDED : FAILED; + op->status.store(res, std::memory_order_release); + return res ==SUCCEEDED; + } +}; // sequencer_node + +//! Forwards messages in priority order +template<typename T, typename Compare = std::less<T>> +class priority_queue_node : public buffer_node<T> { +public: + typedef T input_type; + typedef T output_type; + typedef buffer_node<T> base_type; + typedef priority_queue_node class_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit priority_queue_node( graph &g, const Compare& comp = Compare() ) + : buffer_node<T>(g), compare(comp), mark(0) { + fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + priority_queue_node(const node_set<Args...>& nodes, const Compare& comp = Compare()) + : priority_queue_node(nodes.graph_reference(), comp) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + __TBB_NOINLINE_SYM priority_queue_node( const priority_queue_node &src ) + : buffer_node<T>(src), mark(0) + { + fgt_node( CODEPTR(), FLOW_PRIORITY_QUEUE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +protected: + + void reset_node( reset_flags f) override { + mark = 0; + base_type::reset_node(f); + } + + typedef typename buffer_node<T>::size_type size_type; + typedef typename buffer_node<T>::item_type item_type; + typedef typename buffer_node<T>::buffer_operation prio_operation; + + //! Tries to forward valid items to successors + void internal_forward_task(prio_operation *op) override { + this->internal_forward_task_impl(op, this); + } + + void handle_operations(prio_operation *op_list) override { + this->handle_operations_impl(op_list, this); + } + + bool internal_push(prio_operation *op) override { + prio_push(*(op->elem)); + op->status.store(SUCCEEDED, std::memory_order_release); + return true; + } + + void internal_pop(prio_operation *op) override { + // if empty or already reserved, don't pop + if ( this->my_reserved == true || this->my_tail == 0 ) { + op->status.store(FAILED, std::memory_order_release); + return; + } + + *(op->elem) = prio(); + op->status.store(SUCCEEDED, std::memory_order_release); + prio_pop(); + + } + + // pops the highest-priority item, saves copy + void internal_reserve(prio_operation *op) override { + if (this->my_reserved == true || this->my_tail == 0) { + op->status.store(FAILED, std::memory_order_release); + return; + } + this->my_reserved = true; + *(op->elem) = prio(); + reserved_item = *(op->elem); + op->status.store(SUCCEEDED, std::memory_order_release); + prio_pop(); + } + + void internal_consume(prio_operation *op) override { + op->status.store(SUCCEEDED, std::memory_order_release); + this->my_reserved = false; + reserved_item = input_type(); + } + + void internal_release(prio_operation *op) override { + op->status.store(SUCCEEDED, std::memory_order_release); + prio_push(reserved_item); + this->my_reserved = false; + reserved_item = input_type(); + } + +private: + template<typename> friend class buffer_node; + + void order() { + if (mark < this->my_tail) heapify(); + __TBB_ASSERT(mark == this->my_tail, "mark unequal after heapify"); + } + + bool is_item_valid() { + return this->my_tail > 0; + } + + void try_put_and_add_task(graph_task*& last_task) { + graph_task * new_task = this->my_successors.try_put_task(this->prio()); + if (new_task) { + // workaround for icc bug + graph& graph_ref = this->graph_reference(); + last_task = combine_tasks(graph_ref, last_task, new_task); + prio_pop(); + } + } + +private: + Compare compare; + size_type mark; + + input_type reserved_item; + + // in case a reheap has not been done after a push, check if the mark item is higher than the 0'th item + bool prio_use_tail() { + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds before test"); + return mark < this->my_tail && compare(this->get_my_item(0), this->get_my_item(this->my_tail - 1)); + } + + // prio_push: checks that the item will fit, expand array if necessary, put at end + void prio_push(const T &src) { + if ( this->my_tail >= this->my_array_size ) + this->grow_my_array( this->my_tail + 1 ); + (void) this->place_item(this->my_tail, src); + ++(this->my_tail); + __TBB_ASSERT(mark < this->my_tail, "mark outside bounds after push"); + } + + // prio_pop: deletes highest priority item from the array, and if it is item + // 0, move last item to 0 and reheap. If end of array, just destroy and decrement tail + // and mark. Assumes the array has already been tested for emptiness; no failure. + void prio_pop() { + if (prio_use_tail()) { + // there are newly pushed elements; last one higher than top + // copy the data + this->destroy_item(this->my_tail-1); + --(this->my_tail); + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); + return; + } + this->destroy_item(0); + if(this->my_tail > 1) { + // push the last element down heap + __TBB_ASSERT(this->my_item_valid(this->my_tail - 1), NULL); + this->move_item(0,this->my_tail - 1); + } + --(this->my_tail); + if(mark > this->my_tail) --mark; + if (this->my_tail > 1) // don't reheap for heap of size 1 + reheap(); + __TBB_ASSERT(mark <= this->my_tail, "mark outside bounds after pop"); + } + + const T& prio() { + return this->get_my_item(prio_use_tail() ? this->my_tail-1 : 0); + } + + // turn array into heap + void heapify() { + if(this->my_tail == 0) { + mark = 0; + return; + } + if (!mark) mark = 1; + for (; mark<this->my_tail; ++mark) { // for each unheaped element + size_type cur_pos = mark; + input_type to_place; + this->fetch_item(mark,to_place); + do { // push to_place up the heap + size_type parent = (cur_pos-1)>>1; + if (!compare(this->get_my_item(parent), to_place)) + break; + this->move_item(cur_pos, parent); + cur_pos = parent; + } while( cur_pos ); + (void) this->place_item(cur_pos, to_place); + } + } + + // otherwise heapified array with new root element; rearrange to heap + void reheap() { + size_type cur_pos=0, child=1; + while (child < mark) { + size_type target = child; + if (child+1<mark && + compare(this->get_my_item(child), + this->get_my_item(child+1))) + ++target; + // target now has the higher priority child + if (compare(this->get_my_item(target), + this->get_my_item(cur_pos))) + break; + // swap + this->swap_items(cur_pos, target); + cur_pos = target; + child = (cur_pos<<1)+1; + } + } +}; // priority_queue_node + +//! Forwards messages only if the threshold has not been reached +/** This node forwards items until its threshold is reached. + It contains no buffering. If the downstream node rejects, the + message is dropped. */ +template< typename T, typename DecrementType=continue_msg > +class limiter_node : public graph_node, public receiver< T >, public sender< T > { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + //TODO: There is a lack of predefined types for its controlling "decrementer" port. It should be fixed later. + +private: + size_t my_threshold; + size_t my_count; // number of successful puts + size_t my_tries; // number of active put attempts + reservable_predecessor_cache< T, spin_mutex > my_predecessors; + spin_mutex my_mutex; + broadcast_cache< T > my_successors; + + //! The internal receiver< DecrementType > that adjusts the count + threshold_regulator< limiter_node<T, DecrementType>, DecrementType > decrement; + + graph_task* decrement_counter( long long delta ) { + { + spin_mutex::scoped_lock lock(my_mutex); + if( delta > 0 && size_t(delta) > my_count ) + my_count = 0; + else if( delta < 0 && size_t(delta) > my_threshold - my_count ) + my_count = my_threshold; + else + my_count -= size_t(delta); // absolute value of delta is sufficiently small + } + return forward_task(); + } + + // Let threshold_regulator call decrement_counter() + friend class threshold_regulator< limiter_node<T, DecrementType>, DecrementType >; + + friend class forward_task_bypass< limiter_node<T,DecrementType> >; + + bool check_conditions() { // always called under lock + return ( my_count + my_tries < my_threshold && !my_predecessors.empty() && !my_successors.empty() ); + } + + // only returns a valid task pointer or NULL, never SUCCESSFULLY_ENQUEUED + graph_task* forward_task() { + input_type v; + graph_task* rval = NULL; + bool reserved = false; + { + spin_mutex::scoped_lock lock(my_mutex); + if ( check_conditions() ) + ++my_tries; + else + return NULL; + } + + //SUCCESS + // if we can reserve and can put, we consume the reservation + // we increment the count and decrement the tries + if ( (my_predecessors.try_reserve(v)) == true ){ + reserved=true; + if ( (rval = my_successors.try_put_task(v)) != NULL ){ + { + spin_mutex::scoped_lock lock(my_mutex); + ++my_count; + --my_tries; + my_predecessors.try_consume(); + if ( check_conditions() ) { + if ( is_graph_active(this->my_graph) ) { + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + small_object_allocator allocator{}; + graph_task* rtask = allocator.new_object<task_type>( my_graph, allocator, *this ); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *rtask); + } + } + } + return rval; + } + } + //FAILURE + //if we can't reserve, we decrement the tries + //if we can reserve but can't put, we decrement the tries and release the reservation + { + spin_mutex::scoped_lock lock(my_mutex); + --my_tries; + if (reserved) my_predecessors.try_release(); + if ( check_conditions() ) { + if ( is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + __TBB_ASSERT(!rval, "Have two tasks to handle"); + return t; + } + } + return rval; + } + } + + void initialize() { + fgt_node( + CODEPTR(), FLOW_LIMITER_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<receiver<DecrementType> *>(&decrement), + static_cast<sender<output_type> *>(this) + ); + } + +public: + //! Constructor + limiter_node(graph &g, size_t threshold) + : graph_node(g), my_threshold(threshold), my_count(0), my_tries(0), my_predecessors(this) + , my_successors(this), decrement(this) + { + initialize(); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + limiter_node(const node_set<Args...>& nodes, size_t threshold) + : limiter_node(nodes.graph_reference(), threshold) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor + limiter_node( const limiter_node& src ) : limiter_node(src.my_graph, src.my_threshold) {} + + //! The interface for accessing internal receiver< DecrementType > that adjusts the count + receiver<DecrementType>& decrementer() { return decrement; } + + //! Replace the current successor with this new successor + bool register_successor( successor_type &r ) override { + spin_mutex::scoped_lock lock(my_mutex); + bool was_empty = my_successors.empty(); + my_successors.register_successor(r); + //spawn a forward task if this is the only successor + if ( was_empty && !my_predecessors.empty() && my_count + my_tries < my_threshold ) { + if ( is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *t); + } + } + return true; + } + + //! Removes a successor from this node + /** r.remove_predecessor(*this) is also called. */ + bool remove_successor( successor_type &r ) override { + // TODO revamp: investigate why qualification is needed for remove_predecessor() call + tbb::detail::d1::remove_predecessor(r, *this); + my_successors.remove_successor(r); + return true; + } + + //! Adds src to the list of cached predecessors. + bool register_predecessor( predecessor_type &src ) override { + spin_mutex::scoped_lock lock(my_mutex); + my_predecessors.add( src ); + if ( my_count + my_tries < my_threshold && !my_successors.empty() && is_graph_active(this->my_graph) ) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + graph_task* t = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + spawn_in_graph_arena(graph_reference(), *t); + } + return true; + } + + //! Removes src from the list of cached predecessors. + bool remove_predecessor( predecessor_type &src ) override { + my_predecessors.remove( src ); + return true; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + //! Puts an item to this receiver + graph_task* try_put_task( const T &t ) override { + { + spin_mutex::scoped_lock lock(my_mutex); + if ( my_count + my_tries >= my_threshold ) + return NULL; + else + ++my_tries; + } + + graph_task* rtask = my_successors.try_put_task(t); + + if ( !rtask ) { // try_put_task failed. + spin_mutex::scoped_lock lock(my_mutex); + --my_tries; + if (check_conditions() && is_graph_active(this->my_graph)) { + small_object_allocator allocator{}; + typedef forward_task_bypass<limiter_node<T, DecrementType>> task_type; + rtask = allocator.new_object<task_type>(my_graph, allocator, *this); + my_graph.reserve_wait(); + } + } + else { + spin_mutex::scoped_lock lock(my_mutex); + ++my_count; + --my_tries; + } + return rtask; + } + + graph& graph_reference() const override { return my_graph; } + + void reset_node( reset_flags f) override { + my_count = 0; + if(f & rf_clear_edges) { + my_predecessors.clear(); + my_successors.clear(); + } + else + { + my_predecessors.reset( ); + } + decrement.reset_receiver(f); + } +}; // limiter_node + +#include "detail/_flow_graph_join_impl.h" + +template<typename OutputTuple, typename JP=queueing> class join_node; + +template<typename OutputTuple> +class join_node<OutputTuple,reserving>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, reserving_port, OutputTuple, reserving> { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, reserving_port, OutputTuple, reserving> unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, reserving = reserving()) : join_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_RESERVING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename OutputTuple> +class join_node<OutputTuple,queueing>: public unfolded_join_node<std::tuple_size<OutputTuple>::value, queueing_port, OutputTuple, queueing> { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, queueing_port, OutputTuple, queueing> unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + __TBB_NOINLINE_SYM explicit join_node(graph &g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, queueing = queueing()) : join_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_QUEUEING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +// template for key_matching join_node +// tag_matching join_node is a specialization of key_matching, and is source-compatible. +template<typename OutputTuple, typename K, typename KHash> +class join_node<OutputTuple, key_matching<K, KHash> > : public unfolded_join_node<std::tuple_size<OutputTuple>::value, + key_matching_port, OutputTuple, key_matching<K,KHash> > { +private: + static const int N = std::tuple_size<OutputTuple>::value; + typedef unfolded_join_node<N, key_matching_port, OutputTuple, key_matching<K,KHash> > unfolded_type; +public: + typedef OutputTuple output_type; + typedef typename unfolded_type::input_ports_type input_ports_type; + +#if __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING + join_node(graph &g) : unfolded_type(g) {} +#endif /* __TBB_PREVIEW_MESSAGE_BASED_KEY_MATCHING */ + + template<typename __TBB_B0, typename __TBB_B1> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1) : unfolded_type(g, b0, b1) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2) : unfolded_type(g, b0, b1, b2) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3) : unfolded_type(g, b0, b1, b2, b3) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4) : + unfolded_type(g, b0, b1, b2, b3, b4) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#if __TBB_VARIADIC_MAX >= 6 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5) : + unfolded_type(g, b0, b1, b2, b3, b4, b5) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 7 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6) : + unfolded_type(g, b0, b1, b2, b3, b4, b5, b6) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 8 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 9 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7, __TBB_B8 b8) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif +#if __TBB_VARIADIC_MAX >= 10 + template<typename __TBB_B0, typename __TBB_B1, typename __TBB_B2, typename __TBB_B3, typename __TBB_B4, + typename __TBB_B5, typename __TBB_B6, typename __TBB_B7, typename __TBB_B8, typename __TBB_B9> + __TBB_NOINLINE_SYM join_node(graph &g, __TBB_B0 b0, __TBB_B1 b1, __TBB_B2 b2, __TBB_B3 b3, __TBB_B4 b4, __TBB_B5 b5, __TBB_B6 b6, + __TBB_B7 b7, __TBB_B8 b8, __TBB_B9 b9) : unfolded_type(g, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +#endif + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template < +#if (__clang_major__ == 3 && __clang_minor__ == 4) + // clang 3.4 misdeduces 'Args...' for 'node_set' while it can cope with template template parameter. + template<typename...> class node_set, +#endif + typename... Args, typename... Bodies + > + __TBB_NOINLINE_SYM join_node(const node_set<Args...>& nodes, Bodies... bodies) + : join_node(nodes.graph_reference(), bodies...) { + make_edges_in_order(nodes, *this); + } +#endif + + __TBB_NOINLINE_SYM join_node(const join_node &other) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_JOIN_NODE_TAG_MATCHING, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +// indexer node +#include "detail/_flow_graph_indexer_impl.h" + +// TODO: Implement interface with variadic template or tuple +template<typename T0, typename T1=null_type, typename T2=null_type, typename T3=null_type, + typename T4=null_type, typename T5=null_type, typename T6=null_type, + typename T7=null_type, typename T8=null_type, typename T9=null_type> class indexer_node; + +//indexer node specializations +template<typename T0> +class indexer_node<T0> : public unfolded_indexer_node<std::tuple<T0> > { +private: + static const int N = 1; +public: + typedef std::tuple<T0> InputTuple; + typedef tagged_msg<size_t, T0> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } +}; + +template<typename T0, typename T1> +class indexer_node<T0, T1> : public unfolded_indexer_node<std::tuple<T0, T1> > { +private: + static const int N = 2; +public: + typedef std::tuple<T0, T1> InputTuple; + typedef tagged_msg<size_t, T0, T1> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2> +class indexer_node<T0, T1, T2> : public unfolded_indexer_node<std::tuple<T0, T1, T2> > { +private: + static const int N = 3; +public: + typedef std::tuple<T0, T1, T2> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2, typename T3> +class indexer_node<T0, T1, T2, T3> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3> > { +private: + static const int N = 4; +public: + typedef std::tuple<T0, T1, T2, T3> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +template<typename T0, typename T1, typename T2, typename T3, typename T4> +class indexer_node<T0, T1, T2, T3, T4> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4> > { +private: + static const int N = 5; +public: + typedef std::tuple<T0, T1, T2, T3, T4> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; + +#if __TBB_VARIADIC_MAX >= 6 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5> +class indexer_node<T0, T1, T2, T3, T4, T5> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5> > { +private: + static const int N = 6; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 6 + +#if __TBB_VARIADIC_MAX >= 7 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6> +class indexer_node<T0, T1, T2, T3, T4, T5, T6> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6> > { +private: + static const int N = 7; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 7 + +#if __TBB_VARIADIC_MAX >= 8 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7> +class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> > { +private: + static const int N = 8; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 8 + +#if __TBB_VARIADIC_MAX >= 9 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8> +class indexer_node<T0, T1, T2, T3, T4, T5, T6, T7, T8> : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> > { +private: + static const int N = 9; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 9 + +#if __TBB_VARIADIC_MAX >= 10 +template<typename T0, typename T1, typename T2, typename T3, typename T4, typename T5, + typename T6, typename T7, typename T8, typename T9> +class indexer_node/*default*/ : public unfolded_indexer_node<std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> > { +private: + static const int N = 10; +public: + typedef std::tuple<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> InputTuple; + typedef tagged_msg<size_t, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9> output_type; + typedef unfolded_indexer_node<InputTuple> unfolded_type; + __TBB_NOINLINE_SYM indexer_node(graph& g) : unfolded_type(g) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + indexer_node(const node_set<Args...>& nodes) : indexer_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + // Copy constructor + __TBB_NOINLINE_SYM indexer_node( const indexer_node& other ) : unfolded_type(other) { + fgt_multiinput_node<N>( CODEPTR(), FLOW_INDEXER_NODE, &this->my_graph, + this->input_ports(), static_cast< sender< output_type > *>(this) ); + } + +}; +#endif //variadic max 10 + +template< typename T > +inline void internal_make_edge( sender<T> &p, receiver<T> &s ) { + register_successor(p, s); + fgt_make_edge( &p, &s ); +} + +//! Makes an edge between a single predecessor and a single successor +template< typename T > +inline void make_edge( sender<T> &p, receiver<T> &s ) { + internal_make_edge( p, s ); +} + +//Makes an edge from port 0 of a multi-output predecessor to port 0 of a multi-input successor. +template< typename T, typename V, + typename = typename T::output_ports_type, typename = typename V::input_ports_type > +inline void make_edge( T& output, V& input) { + make_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); +} + +//Makes an edge from port 0 of a multi-output predecessor to a receiver. +template< typename T, typename R, + typename = typename T::output_ports_type > +inline void make_edge( T& output, receiver<R>& input) { + make_edge(std::get<0>(output.output_ports()), input); +} + +//Makes an edge from a sender to port 0 of a multi-input successor. +template< typename S, typename V, + typename = typename V::input_ports_type > +inline void make_edge( sender<S>& output, V& input) { + make_edge(output, std::get<0>(input.input_ports())); +} + +template< typename T > +inline void internal_remove_edge( sender<T> &p, receiver<T> &s ) { + remove_successor( p, s ); + fgt_remove_edge( &p, &s ); +} + +//! Removes an edge between a single predecessor and a single successor +template< typename T > +inline void remove_edge( sender<T> &p, receiver<T> &s ) { + internal_remove_edge( p, s ); +} + +//Removes an edge between port 0 of a multi-output predecessor and port 0 of a multi-input successor. +template< typename T, typename V, + typename = typename T::output_ports_type, typename = typename V::input_ports_type > +inline void remove_edge( T& output, V& input) { + remove_edge(std::get<0>(output.output_ports()), std::get<0>(input.input_ports())); +} + +//Removes an edge between port 0 of a multi-output predecessor and a receiver. +template< typename T, typename R, + typename = typename T::output_ports_type > +inline void remove_edge( T& output, receiver<R>& input) { + remove_edge(std::get<0>(output.output_ports()), input); +} +//Removes an edge between a sender and port 0 of a multi-input successor. +template< typename S, typename V, + typename = typename V::input_ports_type > +inline void remove_edge( sender<S>& output, V& input) { + remove_edge(output, std::get<0>(input.input_ports())); +} + +//! Returns a copy of the body from a function or continue node +template< typename Body, typename Node > +Body copy_body( Node &n ) { + return n.template copy_function_object<Body>(); +} + +//composite_node +template< typename InputTuple, typename OutputTuple > class composite_node; + +template< typename... InputTypes, typename... OutputTypes> +class composite_node <std::tuple<InputTypes...>, std::tuple<OutputTypes...> > : public graph_node { + +public: + typedef std::tuple< receiver<InputTypes>&... > input_ports_type; + typedef std::tuple< sender<OutputTypes>&... > output_ports_type; + +private: + std::unique_ptr<input_ports_type> my_input_ports; + std::unique_ptr<output_ports_type> my_output_ports; + + static const size_t NUM_INPUTS = sizeof...(InputTypes); + static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + composite_node( graph &g ) : graph_node(g) { + fgt_multiinput_multioutput_node( CODEPTR(), FLOW_COMPOSITE_NODE, this, &this->my_graph ); + } + + template<typename T1, typename T2> + void set_external_ports(T1&& input_ports_tuple, T2&& output_ports_tuple) { + static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); + static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); + + fgt_internal_input_alias_helper<T1, NUM_INPUTS>::alias_port( this, input_ports_tuple); + fgt_internal_output_alias_helper<T2, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); + + my_input_ports.reset( new input_ports_type(std::forward<T1>(input_ports_tuple)) ); + my_output_ports.reset( new output_ports_type(std::forward<T2>(output_ports_tuple)) ); + } + + template< typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template< typename... NodeTypes > + void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + input_ports_type& input_ports() { + __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); + return *my_input_ports; + } + + output_ports_type& output_ports() { + __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); + return *my_output_ports; + } +}; // class composite_node + +//composite_node with only input ports +template< typename... InputTypes> +class composite_node <std::tuple<InputTypes...>, std::tuple<> > : public graph_node { +public: + typedef std::tuple< receiver<InputTypes>&... > input_ports_type; + +private: + std::unique_ptr<input_ports_type> my_input_ports; + static const size_t NUM_INPUTS = sizeof...(InputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + composite_node( graph &g ) : graph_node(g) { + fgt_composite( CODEPTR(), this, &g ); + } + + template<typename T> + void set_external_ports(T&& input_ports_tuple) { + static_assert(NUM_INPUTS == std::tuple_size<input_ports_type>::value, "number of arguments does not match number of input ports"); + + fgt_internal_input_alias_helper<T, NUM_INPUTS>::alias_port( this, input_ports_tuple); + + my_input_ports.reset( new input_ports_type(std::forward<T>(input_ports_tuple)) ); + } + + template< typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template< typename... NodeTypes > + void add_nodes( const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + input_ports_type& input_ports() { + __TBB_ASSERT(my_input_ports, "input ports not set, call set_external_ports to set input ports"); + return *my_input_ports; + } + +}; // class composite_node + +//composite_nodes with only output_ports +template<typename... OutputTypes> +class composite_node <std::tuple<>, std::tuple<OutputTypes...> > : public graph_node { +public: + typedef std::tuple< sender<OutputTypes>&... > output_ports_type; + +private: + std::unique_ptr<output_ports_type> my_output_ports; + static const size_t NUM_OUTPUTS = sizeof...(OutputTypes); + +protected: + void reset_node(reset_flags) override {} + +public: + __TBB_NOINLINE_SYM composite_node( graph &g ) : graph_node(g) { + fgt_composite( CODEPTR(), this, &g ); + } + + template<typename T> + void set_external_ports(T&& output_ports_tuple) { + static_assert(NUM_OUTPUTS == std::tuple_size<output_ports_type>::value, "number of arguments does not match number of output ports"); + + fgt_internal_output_alias_helper<T, NUM_OUTPUTS>::alias_port( this, output_ports_tuple); + + my_output_ports.reset( new output_ports_type(std::forward<T>(output_ports_tuple)) ); + } + + template<typename... NodeTypes > + void add_visible_nodes(const NodeTypes&... n) { add_nodes_impl(this, true, n...); } + + template<typename... NodeTypes > + void add_nodes(const NodeTypes&... n) { add_nodes_impl(this, false, n...); } + + + output_ports_type& output_ports() { + __TBB_ASSERT(my_output_ports, "output ports not set, call set_external_ports to set output ports"); + return *my_output_ports; + } + +}; // class composite_node + +template<typename Gateway> +class async_body_base: no_assign { +public: + typedef Gateway gateway_type; + + async_body_base(gateway_type *gateway): my_gateway(gateway) { } + void set_gateway(gateway_type *gateway) { + my_gateway = gateway; + } + +protected: + gateway_type *my_gateway; +}; + +template<typename Input, typename Ports, typename Gateway, typename Body> +class async_body: public async_body_base<Gateway> { +public: + typedef async_body_base<Gateway> base_type; + typedef Gateway gateway_type; + + async_body(const Body &body, gateway_type *gateway) + : base_type(gateway), my_body(body) { } + + void operator()( const Input &v, Ports & ) { + my_body(v, *this->my_gateway); + } + + Body get_body() { return my_body; } + +private: + Body my_body; +}; + +//! Implements async node +template < typename Input, typename Output, + typename Policy = queueing_lightweight > +class async_node + : public multifunction_node< Input, std::tuple< Output >, Policy >, public sender< Output > +{ + typedef multifunction_node< Input, std::tuple< Output >, Policy > base_type; + typedef multifunction_input< + Input, typename base_type::output_ports_type, Policy, cache_aligned_allocator<Input>> mfn_input_type; + +public: + typedef Input input_type; + typedef Output output_type; + typedef receiver<input_type> receiver_type; + typedef receiver<output_type> successor_type; + typedef sender<input_type> predecessor_type; + typedef receiver_gateway<output_type> gateway_type; + typedef async_body_base<gateway_type> async_body_base_type; + typedef typename base_type::output_ports_type output_ports_type; + +private: + class receiver_gateway_impl: public receiver_gateway<Output> { + public: + receiver_gateway_impl(async_node* node): my_node(node) {} + void reserve_wait() override { + fgt_async_reserve(static_cast<typename async_node::receiver_type *>(my_node), &my_node->my_graph); + my_node->my_graph.reserve_wait(); + } + + void release_wait() override { + async_node* n = my_node; + graph* g = &n->my_graph; + g->release_wait(); + fgt_async_commit(static_cast<typename async_node::receiver_type *>(n), g); + } + + //! Implements gateway_type::try_put for an external activity to submit a message to FG + bool try_put(const Output &i) override { + return my_node->try_put_impl(i); + } + + private: + async_node* my_node; + } my_gateway; + + //The substitute of 'this' for member construction, to prevent compiler warnings + async_node* self() { return this; } + + //! Implements gateway_type::try_put for an external activity to submit a message to FG + bool try_put_impl(const Output &i) { + multifunction_output<Output> &port_0 = output_port<0>(*this); + broadcast_cache<output_type>& port_successors = port_0.successors(); + fgt_async_try_put_begin(this, &port_0); + // TODO revamp: change to std::list<graph_task*> + graph_task_list tasks; + bool is_at_least_one_put_successful = port_successors.gather_successful_try_puts(i, tasks); + __TBB_ASSERT( is_at_least_one_put_successful || tasks.empty(), + "Return status is inconsistent with the method operation." ); + + while( !tasks.empty() ) { + enqueue_in_graph_arena(this->my_graph, tasks.pop_front()); + } + fgt_async_try_put_end(this, &port_0); + return is_at_least_one_put_successful; + } + +public: + template<typename Body> + __TBB_NOINLINE_SYM async_node( + graph &g, size_t concurrency, + Body body, Policy = Policy(), node_priority_t a_priority = no_priority + ) : base_type( + g, concurrency, + async_body<Input, typename base_type::output_ports_type, gateway_type, Body> + (body, &my_gateway), a_priority ), my_gateway(self()) { + fgt_multioutput_node_with_body<1>( + CODEPTR(), FLOW_ASYNC_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body + ); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node(graph& g, size_t concurrency, Body body, node_priority_t a_priority) + : async_node(g, concurrency, body, Policy(), a_priority) {} + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node( + const node_set<Args...>& nodes, size_t concurrency, Body body, + Policy = Policy(), node_priority_t a_priority = no_priority ) + : async_node(nodes.graph_reference(), concurrency, body, a_priority) { + make_edges_in_order(nodes, *this); + } + + template <typename Body, typename... Args> + __TBB_NOINLINE_SYM async_node(const node_set<Args...>& nodes, size_t concurrency, Body body, node_priority_t a_priority) + : async_node(nodes, concurrency, body, Policy(), a_priority) {} +#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + + __TBB_NOINLINE_SYM async_node( const async_node &other ) : base_type(other), sender<Output>(), my_gateway(self()) { + static_cast<async_body_base_type*>(this->my_body->get_body_ptr())->set_gateway(&my_gateway); + static_cast<async_body_base_type*>(this->my_init_body->get_body_ptr())->set_gateway(&my_gateway); + + fgt_multioutput_node_with_body<1>( CODEPTR(), FLOW_ASYNC_NODE, + &this->my_graph, static_cast<receiver<input_type> *>(this), + this->output_ports(), this->my_body ); + } + + gateway_type& gateway() { + return my_gateway; + } + + // Define sender< Output > + + //! Add a new successor to this node + bool register_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be registered only via ports"); + return false; + } + + //! Removes a successor from this node + bool remove_successor(successor_type&) override { + __TBB_ASSERT(false, "Successors must be removed only via ports"); + return false; + } + + template<typename Body> + Body copy_function_object() { + typedef multifunction_body<input_type, typename base_type::output_ports_type> mfn_body_type; + typedef async_body<Input, typename base_type::output_ports_type, gateway_type, Body> async_body_type; + mfn_body_type &body_ref = *this->my_body; + async_body_type ab = *static_cast<async_body_type*>(dynamic_cast< multifunction_body_leaf<input_type, typename base_type::output_ports_type, async_body_type> & >(body_ref).get_body_ptr()); + return ab.get_body(); + } + +protected: + + void reset_node( reset_flags f) override { + base_type::reset_node(f); + } +}; + +#include "detail/_flow_graph_node_set_impl.h" + +template< typename T > +class overwrite_node : public graph_node, public receiver<T>, public sender<T> { +public: + typedef T input_type; + typedef T output_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + __TBB_NOINLINE_SYM explicit overwrite_node(graph &g) + : graph_node(g), my_successors(this), my_buffer_is_valid(false) + { + fgt_node( CODEPTR(), FLOW_OVERWRITE_NODE, &this->my_graph, + static_cast<receiver<input_type> *>(this), static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + overwrite_node(const node_set<Args...>& nodes) : overwrite_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor; doesn't take anything from src; default won't work + __TBB_NOINLINE_SYM overwrite_node( const overwrite_node& src ) : overwrite_node(src.my_graph) {} + + ~overwrite_node() {} + + bool register_successor( successor_type &s ) override { + spin_mutex::scoped_lock l( my_mutex ); + if (my_buffer_is_valid && is_graph_active( my_graph )) { + // We have a valid value that must be forwarded immediately. + bool ret = s.try_put( my_buffer ); + if ( ret ) { + // We add the successor that accepted our put + my_successors.register_successor( s ); + } else { + // In case of reservation a race between the moment of reservation and register_successor can appear, + // because failed reserve does not mean that register_successor is not ready to put a message immediately. + // We have some sort of infinite loop: reserving node tries to set pull state for the edge, + // but overwrite_node tries to return push state back. That is why we have to break this loop with task creation. + small_object_allocator allocator{}; + typedef register_predecessor_task task_type; + graph_task* t = allocator.new_object<task_type>(graph_reference(), allocator, *this, s); + graph_reference().reserve_wait(); + spawn_in_graph_arena( my_graph, *t ); + } + } else { + // No valid value yet, just add as successor + my_successors.register_successor( s ); + } + return true; + } + + bool remove_successor( successor_type &s ) override { + spin_mutex::scoped_lock l( my_mutex ); + my_successors.remove_successor(s); + return true; + } + + bool try_get( input_type &v ) override { + spin_mutex::scoped_lock l( my_mutex ); + if ( my_buffer_is_valid ) { + v = my_buffer; + return true; + } + return false; + } + + //! Reserves an item + bool try_reserve( T &v ) override { + return try_get(v); + } + + //! Releases the reserved item + bool try_release() override { return true; } + + //! Consumes the reserved item + bool try_consume() override { return true; } + + bool is_valid() { + spin_mutex::scoped_lock l( my_mutex ); + return my_buffer_is_valid; + } + + void clear() { + spin_mutex::scoped_lock l( my_mutex ); + my_buffer_is_valid = false; + } + +protected: + + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task* try_put_task( const input_type &v ) override { + spin_mutex::scoped_lock l( my_mutex ); + return try_put_task_impl(v); + } + + graph_task * try_put_task_impl(const input_type &v) { + my_buffer = v; + my_buffer_is_valid = true; + graph_task* rtask = my_successors.try_put_task(v); + if (!rtask) rtask = SUCCESSFULLY_ENQUEUED; + return rtask; + } + + graph& graph_reference() const override { + return my_graph; + } + + //! Breaks an infinite loop between the node reservation and register_successor call + struct register_predecessor_task : public graph_task { + register_predecessor_task( + graph& g, small_object_allocator& allocator, predecessor_type& owner, successor_type& succ) + : graph_task(g, allocator), o(owner), s(succ) {}; + + task* execute(execution_data& ed) override { + // TODO revamp: investigate why qualification is needed for register_successor() call + using tbb::detail::d1::register_predecessor; + using tbb::detail::d1::register_successor; + if ( !register_predecessor(s, o) ) { + register_successor(o, s); + } + finalize(ed); + return nullptr; + } + + predecessor_type& o; + successor_type& s; + }; + + spin_mutex my_mutex; + broadcast_cache< input_type, null_rw_mutex > my_successors; + input_type my_buffer; + bool my_buffer_is_valid; + + void reset_node( reset_flags f) override { + my_buffer_is_valid = false; + if (f&rf_clear_edges) { + my_successors.clear(); + } + } +}; // overwrite_node + +template< typename T > +class write_once_node : public overwrite_node<T> { +public: + typedef T input_type; + typedef T output_type; + typedef overwrite_node<T> base_type; + typedef typename receiver<input_type>::predecessor_type predecessor_type; + typedef typename sender<output_type>::successor_type successor_type; + + //! Constructor + __TBB_NOINLINE_SYM explicit write_once_node(graph& g) : base_type(g) { + fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + template <typename... Args> + write_once_node(const node_set<Args...>& nodes) : write_once_node(nodes.graph_reference()) { + make_edges_in_order(nodes, *this); + } +#endif + + //! Copy constructor: call base class copy constructor + __TBB_NOINLINE_SYM write_once_node( const write_once_node& src ) : base_type(src) { + fgt_node( CODEPTR(), FLOW_WRITE_ONCE_NODE, &(this->my_graph), + static_cast<receiver<input_type> *>(this), + static_cast<sender<output_type> *>(this) ); + } + +protected: + template< typename R, typename B > friend class run_and_put_task; + template<typename X, typename Y> friend class broadcast_cache; + template<typename X, typename Y> friend class round_robin_cache; + graph_task *try_put_task( const T &v ) override { + spin_mutex::scoped_lock l( this->my_mutex ); + return this->my_buffer_is_valid ? NULL : this->try_put_task_impl(v); + } +}; // write_once_node + +inline void set_name(const graph& g, const char *name) { + fgt_graph_desc(&g, name); +} + +template <typename Output> +inline void set_name(const input_node<Output>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename Input, typename Output, typename Policy> +inline void set_name(const function_node<Input, Output, Policy>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename Output, typename Policy> +inline void set_name(const continue_node<Output,Policy>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const broadcast_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const buffer_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const queue_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const sequencer_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T, typename Compare> +inline void set_name(const priority_queue_node<T, Compare>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T, typename DecrementType> +inline void set_name(const limiter_node<T, DecrementType>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename OutputTuple, typename JP> +inline void set_name(const join_node<OutputTuple, JP>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename... Types> +inline void set_name(const indexer_node<Types...>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const overwrite_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template <typename T> +inline void set_name(const write_once_node<T>& node, const char *name) { + fgt_node_desc(&node, name); +} + +template<typename Input, typename Output, typename Policy> +inline void set_name(const multifunction_node<Input, Output, Policy>& node, const char *name) { + fgt_multioutput_node_desc(&node, name); +} + +template<typename TupleType> +inline void set_name(const split_node<TupleType>& node, const char *name) { + fgt_multioutput_node_desc(&node, name); +} + +template< typename InputTuple, typename OutputTuple > +inline void set_name(const composite_node<InputTuple, OutputTuple>& node, const char *name) { + fgt_multiinput_multioutput_node_desc(&node, name); +} + +template<typename Input, typename Output, typename Policy> +inline void set_name(const async_node<Input, Output, Policy>& node, const char *name) +{ + fgt_multioutput_node_desc(&node, name); +} +} // d1 +} // detail +} // tbb + + +// Include deduction guides for node classes +#include "detail/_flow_graph_nodes_deduction.h" + +namespace tbb { +namespace flow { +inline namespace v1 { + using detail::d1::receiver; + using detail::d1::sender; + + using detail::d1::serial; + using detail::d1::unlimited; + + using detail::d1::reset_flags; + using detail::d1::rf_reset_protocol; + using detail::d1::rf_reset_bodies; + using detail::d1::rf_clear_edges; + + using detail::d1::graph; + using detail::d1::graph_node; + using detail::d1::continue_msg; + + using detail::d1::input_node; + using detail::d1::function_node; + using detail::d1::multifunction_node; + using detail::d1::split_node; + using detail::d1::output_port; + using detail::d1::indexer_node; + using detail::d1::tagged_msg; + using detail::d1::cast_to; + using detail::d1::is_a; + using detail::d1::continue_node; + using detail::d1::overwrite_node; + using detail::d1::write_once_node; + using detail::d1::broadcast_node; + using detail::d1::buffer_node; + using detail::d1::queue_node; + using detail::d1::sequencer_node; + using detail::d1::priority_queue_node; + using detail::d1::limiter_node; + using namespace detail::d1::graph_policy_namespace; + using detail::d1::join_node; + using detail::d1::input_port; + using detail::d1::copy_body; + using detail::d1::make_edge; + using detail::d1::remove_edge; + using detail::d1::tag_value; + using detail::d1::composite_node; + using detail::d1::async_node; + using detail::d1::node_priority_t; + using detail::d1::no_priority; + +#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET + using detail::d1::follows; + using detail::d1::precedes; + using detail::d1::make_node_set; + using detail::d1::make_edges; +#endif + +} // v1 +} // flow + + using detail::d1::flow_control; + +namespace profiling { + using detail::d1::set_name; +} // profiling + +} // tbb + + +#if TBB_USE_PROFILING_TOOLS && ( __linux__ || __APPLE__ ) + // We don't do pragma pop here, since it still gives warning on the USER side + #undef __TBB_NOINLINE_SYM +#endif + +#endif // __TBB_flow_graph_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h new file mode 100644 index 0000000000..121f167c4d --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/flow_graph_abstractions.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_flow_graph_abstractions_H +#define __TBB_flow_graph_abstractions_H + +namespace tbb { +namespace detail { +namespace d1 { + +//! Pure virtual template classes that define interfaces for async communication +class graph_proxy { +public: + //! Inform a graph that messages may come from outside, to prevent premature graph completion + virtual void reserve_wait() = 0; + + //! Inform a graph that a previous call to reserve_wait is no longer in effect + virtual void release_wait() = 0; + + virtual ~graph_proxy() {} +}; + +template <typename Input> +class receiver_gateway : public graph_proxy { +public: + //! Type of inputing data into FG. + typedef Input input_type; + + //! Submit signal from an asynchronous activity to FG. + virtual bool try_put(const input_type&) = 0; +}; + +} // d1 + + +} // detail +} // tbb +#endif diff --git a/contrib/libs/tbb/include/oneapi/tbb/global_control.h b/contrib/libs/tbb/include/oneapi/tbb/global_control.h new file mode 100644 index 0000000000..80177b6b82 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/global_control.h @@ -0,0 +1,188 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_global_control_H +#define __TBB_global_control_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_template_helpers.h" +#include "detail/_exception.h" + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +#include <new> // std::nothrow_t +#endif +#include <cstddef> + +namespace tbb { +namespace detail { + +namespace d1 { +class global_control; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +class task_scheduler_handle; +#endif +} + +namespace r1 { +void __TBB_EXPORTED_FUNC create(d1::global_control&); +void __TBB_EXPORTED_FUNC destroy(d1::global_control&); +std::size_t __TBB_EXPORTED_FUNC global_control_active_value(int); +struct global_control_impl; +struct control_storage_comparator; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +void release_impl(d1::task_scheduler_handle& handle); +bool finalize_impl(d1::task_scheduler_handle& handle); +void __TBB_EXPORTED_FUNC get(d1::task_scheduler_handle&); +bool __TBB_EXPORTED_FUNC finalize(d1::task_scheduler_handle&, std::intptr_t mode); +#endif +} + +namespace d1 { + +class global_control { +public: + enum parameter { + max_allowed_parallelism, + thread_stack_size, + terminate_on_exception, +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + scheduler_handle, // not a public parameter +#else + reserved1, // not a public parameter +#endif + parameter_max // insert new parameters above this point + }; + + global_control(parameter p, std::size_t value) : + my_value(value), my_reserved(), my_param(p) { + suppress_unused_warning(my_reserved); + __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + // For Windows 8 Store* apps it's impossible to set stack size + if (p==thread_stack_size) + return; +#elif __TBB_x86_64 && (_WIN32 || _WIN64) + if (p==thread_stack_size) + __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range"); +#endif + if (my_param==max_allowed_parallelism) + __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0."); + r1::create(*this); + } + + ~global_control() { + __TBB_ASSERT(my_param < parameter_max, "Invalid parameter"); +#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) + // For Windows 8 Store* apps it's impossible to set stack size + if (my_param==thread_stack_size) + return; +#endif + r1::destroy(*this); + } + + static std::size_t active_value(parameter p) { + __TBB_ASSERT(p < parameter_max, "Invalid parameter"); + return r1::global_control_active_value((int)p); + } + +private: + std::size_t my_value; + std::intptr_t my_reserved; // TODO: substitution of global_control* not to break backward compatibility + parameter my_param; + + friend struct r1::global_control_impl; + friend struct r1::control_storage_comparator; +}; + +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +//! Finalization options. +//! Outside of the class to avoid extensive friendship. +static constexpr std::intptr_t release_nothrowing = 0; +static constexpr std::intptr_t finalize_nothrowing = 1; +static constexpr std::intptr_t finalize_throwing = 2; + +//! User side wrapper for a task scheduler lifetime control object +class task_scheduler_handle { +public: + task_scheduler_handle() = default; + ~task_scheduler_handle() { + release(*this); + } + + //! No copy + task_scheduler_handle(const task_scheduler_handle& other) = delete; + task_scheduler_handle& operator=(const task_scheduler_handle& other) = delete; + + //! Move only + task_scheduler_handle(task_scheduler_handle&& other) noexcept : m_ctl{nullptr} { + std::swap(m_ctl, other.m_ctl); + } + task_scheduler_handle& operator=(task_scheduler_handle&& other) noexcept { + std::swap(m_ctl, other.m_ctl); + return *this; + }; + + //! Get and active instance of task_scheduler_handle + static task_scheduler_handle get() { + task_scheduler_handle handle; + r1::get(handle); + return handle; + } + + //! Release the reference and deactivate handle + static void release(task_scheduler_handle& handle) { + if (handle.m_ctl != nullptr) { + r1::finalize(handle, release_nothrowing); + } + } + +private: + friend void r1::release_impl(task_scheduler_handle& handle); + friend bool r1::finalize_impl(task_scheduler_handle& handle); + friend void __TBB_EXPORTED_FUNC r1::get(task_scheduler_handle&); + + global_control* m_ctl{nullptr}; +}; + +#if TBB_USE_EXCEPTIONS +//! Waits for worker threads termination. Throws exception on error. +inline void finalize(task_scheduler_handle& handle) { + r1::finalize(handle, finalize_throwing); +} +#endif +//! Waits for worker threads termination. Returns false on error. +inline bool finalize(task_scheduler_handle& handle, const std::nothrow_t&) noexcept { + return r1::finalize(handle, finalize_nothrowing); +} +#endif // __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::global_control; +#if __TBB_SUPPORTS_WORKERS_WAITING_IN_TERMINATE +using detail::d1::finalize; +using detail::d1::task_scheduler_handle; +using detail::r1::unsafe_wait; +#endif +} // namespace v1 + +} // namespace tbb + +#endif // __TBB_global_control_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/info.h b/contrib/libs/tbb/include/oneapi/tbb/info.h new file mode 100644 index 0000000000..21475a4d00 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/info.h @@ -0,0 +1,137 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_info_H +#define __TBB_info_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +#if __TBB_ARENA_BINDING +#include <vector> + +namespace tbb { +namespace detail { + +namespace d1{ + +using numa_node_id = int; +using core_type_id = int; + +// TODO: consider version approach to resolve backward compatibility potential issues. +struct constraints { +#if !__TBB_CPP20_PRESENT + constraints(numa_node_id id = -1, int maximal_concurrency = -1) + : numa_id(id) + , max_concurrency(maximal_concurrency) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + , core_type(-1) + , max_threads_per_core(-1) +#endif + {} +#endif /*!__TBB_CPP20_PRESENT*/ + + constraints& set_numa_id(numa_node_id id) { + numa_id = id; + return *this; + } + constraints& set_max_concurrency(int maximal_concurrency) { + max_concurrency = maximal_concurrency; + return *this; + } +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + constraints& set_core_type(core_type_id id) { + core_type = id; + return *this; + } + constraints& set_max_threads_per_core(int threads_number) { + max_threads_per_core = threads_number; + return *this; + } +#endif + + numa_node_id numa_id = -1; + int max_concurrency = -1; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + core_type_id core_type = -1; + int max_threads_per_core = -1; +#endif +}; + +} // namespace d1 + +namespace r1 { +unsigned __TBB_EXPORTED_FUNC numa_node_count(); +void __TBB_EXPORTED_FUNC fill_numa_indices(int* index_array); +int __TBB_EXPORTED_FUNC numa_default_concurrency(int numa_id); + +// Reserved fields are required to save binary backward compatibility in case of future changes. +// They must be defined to 0 at this moment. +unsigned __TBB_EXPORTED_FUNC core_type_count(intptr_t reserved = 0); +void __TBB_EXPORTED_FUNC fill_core_type_indices(int* index_array, intptr_t reserved = 0); + +int __TBB_EXPORTED_FUNC constraints_default_concurrency(const d1::constraints& c, intptr_t reserved = 0); +int __TBB_EXPORTED_FUNC constraints_threads_per_core(const d1::constraints& c, intptr_t reserved = 0); +} // namespace r1 + +namespace d1 { + +inline std::vector<numa_node_id> numa_nodes() { + std::vector<numa_node_id> node_indices(r1::numa_node_count()); + r1::fill_numa_indices(node_indices.data()); + return node_indices; +} + +inline int default_concurrency(numa_node_id id = -1) { + return r1::numa_default_concurrency(id); +} + +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +inline std::vector<core_type_id> core_types() { + std::vector<int> core_type_indexes(r1::core_type_count()); + r1::fill_core_type_indices(core_type_indexes.data()); + return core_type_indexes; +} + +inline int default_concurrency(constraints c) { + return r1::constraints_default_concurrency(c); +} +#endif /*__TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT*/ + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::numa_node_id; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +using detail::d1::core_type_id; +#endif + +namespace info { +using detail::d1::numa_nodes; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT +using detail::d1::core_types; +#endif + +using detail::d1::default_concurrency; +} // namespace info +} // namespace v1 + +} // namespace tbb + +#endif /*__TBB_ARENA_BINDING*/ + +#endif /*__TBB_info_H*/ diff --git a/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h new file mode 100644 index 0000000000..6e913c6713 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/memory_pool.h @@ -0,0 +1,272 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_memory_pool_H +#define __TBB_memory_pool_H + +#if !TBB_PREVIEW_MEMORY_POOL +#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h +#endif +/** @file */ + +#include "scalable_allocator.h" + +#include <new> // std::bad_alloc +#include <stdexcept> // std::runtime_error, std::invalid_argument +#include <utility> // std::forward + + +#if __TBB_EXTRA_DEBUG +#define __TBBMALLOC_ASSERT ASSERT +#else +#define __TBBMALLOC_ASSERT(a,b) ((void)0) +#endif + +namespace tbb { +namespace detail { +namespace d1 { + +//! Base of thread-safe pool allocator for variable-size requests +class pool_base : no_copy { + // Pool interface is separate from standard allocator classes because it has + // to maintain internal state, no copy or assignment. Move and swap are possible. +public: + //! Reset pool to reuse its memory (free all objects at once) + void recycle() { rml::pool_reset(my_pool); } + + //! The "malloc" analogue to allocate block of memory of size bytes + void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); } + + //! The "free" analogue to discard a previously allocated piece of memory. + void free(void* ptr) { rml::pool_free(my_pool, ptr); } + + //! The "realloc" analogue complementing pool_malloc. + // Enables some low-level optimization possibilities + void *realloc(void* ptr, size_t size) { + return rml::pool_realloc(my_pool, ptr, size); + } + +protected: + //! destroy pool - must be called in a child class + void destroy() { rml::pool_destroy(my_pool); } + + rml::MemoryPool *my_pool; +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Workaround for erroneous "unreferenced parameter" warning in method destroy. + #pragma warning (push) + #pragma warning (disable: 4100) +#endif + +//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 +/** @ingroup memory_allocation */ +template<typename T, typename P = pool_base> +class memory_pool_allocator { +protected: + typedef P pool_type; + pool_type *my_pool; + template<typename U, typename R> + friend class memory_pool_allocator; + template<typename V, typename U, typename R> + friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); + template<typename V, typename U, typename R> + friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef size_t size_type; + typedef ptrdiff_t difference_type; + template<typename U> struct rebind { + typedef memory_pool_allocator<U, P> other; + }; + + explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {} + memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + template<typename U> + memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } + + //! Allocate space for n objects. + pointer allocate( size_type n, const void* /*hint*/ = 0) { + pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) ); + if (!p) + throw_exception(std::bad_alloc()); + return p; + } + //! Free previously allocated block of memory. + void deallocate( pointer p, size_type ) { + my_pool->free(p); + } + //! Largest value for which method allocate might succeed. + size_type max_size() const throw() { + size_type max = static_cast<size_type>(-1) / sizeof (value_type); + return (max > 0 ? max : 1); + } + //! Copy-construct value at location pointed to by p. + + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new((void *)p) U(std::forward<Args>(args)...); } + + //! Destroy value at location pointed to by p. + void destroy( pointer p ) { p->~value_type(); } + +}; + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning (pop) +#endif // warning 4100 is back + +//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1 +/** @ingroup memory_allocation */ +template<typename P> +class memory_pool_allocator<void, P> { +public: + typedef P pool_type; + typedef void* pointer; + typedef const void* const_pointer; + typedef void value_type; + template<typename U> struct rebind { + typedef memory_pool_allocator<U, P> other; + }; + + explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {} + memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {} + template<typename U> + memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {} + +protected: + pool_type *my_pool; + template<typename U, typename R> + friend class memory_pool_allocator; + template<typename V, typename U, typename R> + friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); + template<typename V, typename U, typename R> + friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b); +}; + +template<typename T, typename U, typename P> +inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;} + +template<typename T, typename U, typename P> +inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;} + +//! Thread-safe growable pool allocator for variable-size requests +template <typename Alloc> +class memory_pool : public pool_base { + Alloc my_alloc; // TODO: base-class optimization + static void *allocate_request(intptr_t pool_id, size_t & bytes); + static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes); + +public: + //! construct pool with underlying allocator + explicit memory_pool(const Alloc &src = Alloc()); + + //! destroy pool + ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter +}; + +class fixed_pool : public pool_base { + void *my_buffer; + size_t my_size; + inline static void *allocate_request(intptr_t pool_id, size_t & bytes); + +public: + //! construct pool with underlying allocator + inline fixed_pool(void *buf, size_t size); + //! destroy pool + ~fixed_pool() { destroy(); } +}; + +//////////////// Implementation /////////////// + +template <typename Alloc> +memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) { + rml::MemPoolPolicy args(allocate_request, deallocate_request, + sizeof(typename Alloc::value_type)); + rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); + if (res!=rml::POOL_OK) + throw_exception(std::runtime_error("Can't create pool")); +} +template <typename Alloc> +void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) { + memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); + const size_t unit_size = sizeof(typename Alloc::value_type); + __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL); + void *ptr; +#if TBB_USE_EXCEPTIONS + try { +#endif + ptr = self.my_alloc.allocate( bytes/unit_size ); +#if TBB_USE_EXCEPTIONS + } catch(...) { + return 0; + } +#endif + return ptr; +} +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + // Workaround for erroneous "unreachable code" warning in the template below. + // Specific for VC++ 17-18 compiler + #pragma warning (push) + #pragma warning (disable: 4702) +#endif +template <typename Alloc> +int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) { + memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id); + const size_t unit_size = sizeof(typename Alloc::value_type); + __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL); + self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size ); + return 0; +} +#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED + #pragma warning (pop) +#endif +inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) { + if (!buf || !size) + // TODO: improve support for mode with exceptions disabled + throw_exception(std::invalid_argument("Zero in parameter is invalid")); + rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); + rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool); + if (res!=rml::POOL_OK) + throw_exception(std::runtime_error("Can't create pool")); +} +inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { + fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id); + __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice."); + bytes = self.my_size; + self.my_size = 0; // remember that buffer has been used + return self.my_buffer; +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::memory_pool_allocator; +using detail::d1::memory_pool; +using detail::d1::fixed_pool; +} // inline namepspace v1 +} // namespace tbb + +#undef __TBBMALLOC_ASSERT +#endif// __TBB_memory_pool_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h new file mode 100644 index 0000000000..8fab863db3 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/null_mutex.h @@ -0,0 +1,79 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_null_mutex_H +#define __TBB_null_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A mutex which does nothing +/** A null_mutex does no operation and simulates success. + @ingroup synchronization */ +class null_mutex { +public: + //! Constructors + constexpr null_mutex() noexcept = default; + + //! Destructor + ~null_mutex() = default; + + //! No Copy + null_mutex(const null_mutex&) = delete; + null_mutex& operator=(const null_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + //! Constructors + constexpr scoped_lock() noexcept = default; + scoped_lock(null_mutex&) {} + + //! Destructor + ~scoped_lock() = default; + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + void acquire(null_mutex&) {} + bool try_acquire(null_mutex&) { return true; } + void release() {} + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = true; + static constexpr bool is_fair_mutex = true; + + void lock() {} + bool try_lock() { return true; } + void unlock() {} +}; // class null_mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::null_mutex; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_null_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h new file mode 100644 index 0000000000..8046bc405d --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/null_rw_mutex.h @@ -0,0 +1,84 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_null_rw_mutex_H +#define __TBB_null_rw_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! A rw mutex which does nothing +/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation. + @ingroup synchronization */ +class null_rw_mutex { +public: + //! Constructors + constexpr null_rw_mutex() noexcept = default; + + //! Destructor + ~null_rw_mutex() = default; + + //! No Copy + null_rw_mutex(const null_rw_mutex&) = delete; + null_rw_mutex& operator=(const null_rw_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + public: + //! Constructors + constexpr scoped_lock() noexcept = default; + scoped_lock(null_rw_mutex&, bool = true) {} + + //! Destructor + ~scoped_lock() = default; + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + void acquire(null_rw_mutex&, bool = true) {} + bool try_acquire(null_rw_mutex&, bool = true) { return true; } + void release() {} + bool upgrade_to_writer() { return true; } + bool downgrade_to_reader() { return true; } + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = true; + static constexpr bool is_fair_mutex = true; + + void lock() {} + bool try_lock() { return true; } + void unlock() {} + void lock_shared() {} + bool try_lock_shared() { return true; } + void unlock_shared() {} +}; // class null_rw_mutex + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::null_rw_mutex; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_null_rw_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h new file mode 100644 index 0000000000..ed137d4d09 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for.h @@ -0,0 +1,416 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_for_H +#define __TBB_parallel_for_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" +#include "profiling.h" + +#include "partitioner.h" +#include "blocked_range.h" +#include "task_group.h" + +#include <cstddef> +#include <new> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Task type used in parallel_for +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_for : public task { + Range my_range; + const Body my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + //! Constructor for root task. + start_for( const Range& range, const Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : + my_range(range), + my_body(body), + my_partition(partitioner), + my_allocator(alloc) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_for( start_for& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc) {} + //! Construct right child from the given range as response to the demand. + /** parent_ remains left child. Newly constructed object is right child. */ + start_for( start_for& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : + my_range(r), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split()), + my_allocator(alloc) + { + my_partition.align_depth( d ); + } + static void run(const Range& range, const Body& body, Partitioner& partitioner) { + task_group_context context(PARALLEL_FOR); + run(range, body, partitioner, context); + } + + static void run(const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + small_object_allocator alloc{}; + start_for& for_task = *alloc.new_object<start_for>(range, body, partitioner, alloc); + + // defer creation of the wait node until task allocation succeeds + wait_node wn; + for_task.my_parent = &wn; + execute_and_wait(for_task, context, wn.m_wait, context); + } + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + my_body( r ); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(const Range& r, depth_t d, execution_data& ed) { + offer_work_impl(ed, *this, r, d); + } + +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... constructor_args) { + // New right child + small_object_allocator alloc{}; + start_for& right_child = *alloc.new_object<start_for>(ed, std::forward<Args>(constructor_args)..., alloc); + + // New root node as a continuation and ref count. Left and right child attach to the new parent. + right_child.my_parent = my_parent = alloc.new_object<tree_node>(ed, my_parent, 2, alloc); + // Spawn the right sibling + right_child.spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_for<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and allocator an object destruction + node* parent = my_parent; + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_for(); + // Unwind the tree decrementing the parent`s reference count + + fold_tree<tree_node>(parent, ed); + allocator.deallocate(this, ed); + +} + +//! execute task for parallel_for +template<typename Range, typename Body, typename Partitioner> +task* start_for<Range, Body, Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + my_partition.execute(*this, my_range, ed); + finalize(ed); + return nullptr; +} + +//! cancel task for parallel_for +template<typename Range, typename Body, typename Partitioner> +task* start_for<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//! Calls the function with values from range [begin, end) with a step provided +template<typename Function, typename Index> +class parallel_for_body : detail::no_assign { + const Function &my_func; + const Index my_begin; + const Index my_step; +public: + parallel_for_body( const Function& _func, Index& _begin, Index& _step ) + : my_func(_func), my_begin(_begin), my_step(_step) {} + + void operator()( const blocked_range<Index>& r ) const { + // A set of local variables to help the compiler with vectorization of the following loop. + Index b = r.begin(); + Index e = r.end(); + Index ms = my_step; + Index k = my_begin + b*ms; + +#if __INTEL_COMPILER +#pragma ivdep +#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE +#pragma vector always assert +#endif +#endif + for ( Index i = b; i < e; ++i, k += ms ) { + my_func( k ); + } + } +}; + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_for_body_req Requirements on parallel_for body + Class \c Body implementing the concept of parallel_for body must define: + - \code Body::Body( const Body& ); \endcode Copy constructor + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( Range& r ) const; \endcode Function call operator applying the body to range \c r. +**/ + +/** \name parallel_for + See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/ +//@{ + +//! Parallel iteration over range with default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body ) { + start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER()); +} + +//! Parallel iteration over range with simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) { + start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with auto_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) { + start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with static_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) { + start_for<Range,Body,const static_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with affinity_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) { + start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner); +} + +//! Parallel iteration over range with default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, task_group_context& context ) { + start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context); +} + +//! Parallel iteration over range with simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with auto_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with static_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration over range with affinity_partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) { + start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context); +} + +//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner +template <typename Index, typename Function, typename Partitioner> +void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) { + if (step <= 0 ) + throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument + else if (last > first) { + // Above "else" avoids "potential divide by zero" warning on some platforms + Index end = (last - first - Index(1)) / step + Index(1); + blocked_range<Index> range(static_cast<Index>(0), end); + parallel_for_body<Function, Index> body(f, first, step); + parallel_for(range, body, partitioner); + } +} + +//! Parallel iteration over a range of integers with a step provided and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner()); +} +//! Parallel iteration over a range of integers with a step provided and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner); +} +//! Parallel iteration over a range of integers with a step provided and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) { + parallel_for_impl(first, last, step, f, partitioner); +} + +//! Parallel iteration over a range of integers with a default step value and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner()); +} +//! Parallel iteration over a range of integers with a default step value and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner); +} +//! Parallel iteration over a range of integers with a default step value and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) { + parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner); +} + +//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner +template <typename Index, typename Function, typename Partitioner> +void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, task_group_context &context) { + if (step <= 0 ) + throw_exception(exception_id::nonpositive_step); // throws std::invalid_argument + else if (last > first) { + // Above "else" avoids "potential divide by zero" warning on some platforms + Index end = (last - first - Index(1)) / step + Index(1); + blocked_range<Index> range(static_cast<Index>(0), end); + parallel_for_body<Function, Index> body(f, first, step); + parallel_for(range, body, partitioner, context); + } +} + +//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context); +} +//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { + parallel_for_impl(first, last, step, f, partitioner, context); +} + +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, task_group_context &context) { + parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context); +} +//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner +template <typename Index, typename Function> +void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, task_group_context &context) { + parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context); +} +// @} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_for; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_for_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h new file mode 100644 index 0000000000..563e00f5fc --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_for_each.h @@ -0,0 +1,644 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_for_each_H +#define __TBB_parallel_for_each_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#include "parallel_for.h" +#include "task_group.h" // task_group_context + +#include <iterator> +#include <type_traits> + +namespace tbb { +namespace detail { +namespace d2 { +template<typename Body, typename Item> class feeder_impl; +} // namespace d2 + +namespace d1 { +//! Class the user supplied algorithm body uses to add new tasks +template<typename Item> +class feeder { + feeder() {} + feeder(const feeder&) = delete; + void operator=( const feeder&) = delete; + + virtual ~feeder () {} + virtual void internal_add_copy(const Item& item) = 0; + virtual void internal_add_move(Item&& item) = 0; + + template<typename Body_, typename Item_> friend class detail::d2::feeder_impl; +public: + //! Add a work item to a running parallel_for_each. + void add(const Item& item) {internal_add_copy(item);} + void add(Item&& item) {internal_add_move(std::move(item));} +}; + +} // namespace d1 + +namespace d2 { +using namespace tbb::detail::d1; +/** Selects one of the two possible forms of function call member operator. + @ingroup algorithms **/ +template<class Body> +struct parallel_for_each_operator_selector { +public: + template<typename ItemArg, typename FeederArg> + static auto call(const Body& body, ItemArg&& item, FeederArg*) + -> decltype(body(std::forward<ItemArg>(item)), void()) { + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Suppression of Microsoft non-standard extension warnings + #pragma warning (push) + #pragma warning (disable: 4239) + #endif + + body(std::forward<ItemArg>(item)); + + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (push) + #endif + } + + template<typename ItemArg, typename FeederArg> + static auto call(const Body& body, ItemArg&& item, FeederArg* feeder) + -> decltype(body(std::forward<ItemArg>(item), *feeder), void()) { + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Suppression of Microsoft non-standard extension warnings + #pragma warning (push) + #pragma warning (disable: 4239) + #endif + __TBB_ASSERT(feeder, "Feeder was not created but should be"); + + body(std::forward<ItemArg>(item), *feeder); + + #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (push) + #endif + } +}; + +template<typename Body, typename Item> +struct feeder_item_task: public task { + using feeder_type = feeder_impl<Body, Item>; + + template <typename ItemType> + feeder_item_task(ItemType&& input_item, feeder_type& feeder, small_object_allocator& alloc) : + item(std::forward<ItemType>(input_item)), + my_feeder(feeder), + my_allocator(alloc) + {} + + void finalize(const execution_data& ed) { + my_feeder.my_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + //! Hack for resolve ambiguity between calls to the body with and without moving the stored copy + //! Executing body with moving the copy should have higher priority + using first_priority = int; + using second_priority = double; + + template <typename BodyType, typename ItemType, typename FeederType> + static auto call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, first_priority) + -> decltype(parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder), void()) + { + parallel_for_each_operator_selector<Body>::call(call_body, std::move(call_item), &call_feeder); + } + + template <typename BodyType, typename ItemType, typename FeederType> + static void call(const BodyType& call_body, ItemType& call_item, FeederType& call_feeder, second_priority) { + parallel_for_each_operator_selector<Body>::call(call_body, call_item, &call_feeder); + } + + task* execute(execution_data& ed) override { + call(my_feeder.my_body, item, my_feeder, first_priority{}); + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + Item item; + feeder_type& my_feeder; + small_object_allocator my_allocator; +}; // class feeder_item_task + +/** Implements new task adding procedure. + @ingroup algorithms **/ +template<typename Body, typename Item> +class feeder_impl : public feeder<Item> { + // Avoiding use of copy constructor in a virtual method if the type does not support it + void internal_add_copy_impl(std::true_type, const Item& item) { + using feeder_task = feeder_item_task<Body, Item>; + small_object_allocator alloc; + auto task = alloc.new_object<feeder_task>(item, *this, alloc); + + my_wait_context.reserve(); + spawn(*task, my_execution_context); + } + + void internal_add_copy_impl(std::false_type, const Item&) { + __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object"); + } + + void internal_add_copy(const Item& item) override { + internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item); + } + + void internal_add_move(Item&& item) override { + using feeder_task = feeder_item_task<Body, Item>; + small_object_allocator alloc{}; + auto task = alloc.new_object<feeder_task>(std::move(item), *this, alloc); + + my_wait_context.reserve(); + spawn(*task, my_execution_context); + } +public: + feeder_impl(const Body& body, wait_context& w_context, task_group_context &context) + : my_body(body), + my_wait_context(w_context) + , my_execution_context(context) + {} + + const Body& my_body; + wait_context& my_wait_context; + task_group_context& my_execution_context; +}; // class feeder_impl + +/** Execute computation under one element of the range + @ingroup algorithms **/ +template<typename Iterator, typename Body, typename Item> +struct for_each_iteration_task: public task { + using feeder_type = feeder_impl<Body, Item>; + + for_each_iteration_task(Iterator input_item_ptr, const Body& body, feeder_impl<Body, Item>* feeder_ptr, wait_context& wait_context) : + item_ptr(input_item_ptr), my_body(body), my_feeder_ptr(feeder_ptr), parent_wait_context(wait_context) + {} + + void finalize() { + parent_wait_context.release(); + } + + task* execute(execution_data&) override { + parallel_for_each_operator_selector<Body>::call(my_body, *item_ptr, my_feeder_ptr); + finalize(); + return nullptr; + } + + task* cancel(execution_data&) override { + finalize(); + return nullptr; + } + + Iterator item_ptr; + const Body& my_body; + feeder_impl<Body, Item>* my_feeder_ptr; + wait_context& parent_wait_context; +}; // class for_each_iteration_task + +// Helper to get the type of the iterator to the internal sequence of copies +// If the element can be passed to the body as an rvalue - this iterator should be move_iterator +template <typename Body, typename Item, typename = void> +struct input_iteration_task_iterator_helper { + // For input iterators we pass const lvalue reference to the body + // It is prohibited to take non-constant lvalue references for input iterators + using type = const Item*; +}; + +template <typename Body, typename Item> +struct input_iteration_task_iterator_helper<Body, Item, + tbb::detail::void_t<decltype(parallel_for_each_operator_selector<Body>::call(std::declval<const Body&>(), + std::declval<Item&&>(), + std::declval<feeder_impl<Body, Item>*>()))>> +{ + using type = std::move_iterator<Item*>; +}; + +/** Split one block task to several(max_block_size) iteration tasks for input iterators + @ingroup algorithms **/ +template <typename Body, typename Item> +struct input_block_handling_task : public task { + static constexpr size_t max_block_size = 4; + + using feeder_type = feeder_impl<Body, Item>; + using iteration_task_iterator_type = typename input_iteration_task_iterator_helper<Body, Item>::type; + using iteration_task = for_each_iteration_task<iteration_task_iterator_type, Body, Item>; + + input_block_handling_task(wait_context& root_wait_context, task_group_context& e_context, + const Body& body, feeder_impl<Body, Item>* feeder_ptr, small_object_allocator& alloc) + :my_size(0), my_wait_context(0), my_root_wait_context(root_wait_context), + my_execution_context(e_context), my_allocator(alloc) + { + auto item_it = block_iteration_space.begin(); + for (auto* it = task_pool.begin(); it != task_pool.end(); ++it) { + new (it) iteration_task(iteration_task_iterator_type(item_it++), body, feeder_ptr, my_wait_context); + } + } + + void finalize(const execution_data& ed) { + my_root_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + task* execute(execution_data& ed) override { + __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); + for (std::size_t counter = 1; counter < my_size; ++counter) { + my_wait_context.reserve(); + spawn(*(task_pool.begin() + counter), my_execution_context); + } + my_wait_context.reserve(); + execute_and_wait(*task_pool.begin(), my_execution_context, + my_wait_context, my_execution_context); + + // deallocate current task after children execution + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~input_block_handling_task() { + for(std::size_t counter = 0; counter < max_block_size; ++counter) { + (task_pool.begin() + counter)->~iteration_task(); + (block_iteration_space.begin() + counter)->~Item(); + } + } + + aligned_space<Item, max_block_size> block_iteration_space; + aligned_space<iteration_task, max_block_size> task_pool; + std::size_t my_size; + wait_context my_wait_context; + wait_context& my_root_wait_context; + task_group_context& my_execution_context; + small_object_allocator my_allocator; +}; // class input_block_handling_task + +/** Split one block task to several(max_block_size) iteration tasks for forward iterators + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +struct forward_block_handling_task : public task { + static constexpr size_t max_block_size = 4; + + using iteration_task = for_each_iteration_task<Iterator, Body, Item>; + + forward_block_handling_task(Iterator first, std::size_t size, + wait_context& w_context, task_group_context& e_context, + const Body& body, feeder_impl<Body, Item>* feeder_ptr, + small_object_allocator& alloc) + : my_size(size), my_wait_context(0), my_root_wait_context(w_context), + my_execution_context(e_context), my_allocator(alloc) + { + auto* task_it = task_pool.begin(); + for (std::size_t i = 0; i < size; i++) { + new (task_it++) iteration_task(first, body, feeder_ptr, my_wait_context); + ++first; + } + } + + void finalize(const execution_data& ed) { + my_root_wait_context.release(); + my_allocator.delete_object(this, ed); + } + + task* execute(execution_data& ed) override { + __TBB_ASSERT( my_size > 0, "Negative size was passed to task"); + for(std::size_t counter = 1; counter < my_size; ++counter) { + my_wait_context.reserve(); + spawn(*(task_pool.begin() + counter), my_execution_context); + } + my_wait_context.reserve(); + execute_and_wait(*task_pool.begin(), my_execution_context, + my_wait_context, my_execution_context); + + // deallocate current task after children execution + finalize(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } + + ~forward_block_handling_task() { + for(std::size_t counter = 0; counter < my_size; ++counter) { + (task_pool.begin() + counter)->~iteration_task(); + } + } + + aligned_space<iteration_task, max_block_size> task_pool; + std::size_t my_size; + wait_context my_wait_context; + wait_context& my_root_wait_context; + task_group_context& my_execution_context; + small_object_allocator my_allocator; +}; // class forward_block_handling_task + +/** Body for parallel_for algorithm. + * Allows to redirect operations under random access iterators range to the parallel_for algorithm. + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class parallel_for_body_wrapper { + Iterator my_first; + const Body& my_body; + feeder_impl<Body, Item>* my_feeder_ptr; +public: + parallel_for_body_wrapper(Iterator first, const Body& body, feeder_impl<Body, Item>* feeder_ptr) + : my_first(first), my_body(body), my_feeder_ptr(feeder_ptr) {} + + void operator()(tbb::blocked_range<std::size_t> range) const { +#if __INTEL_COMPILER +#pragma ivdep +#endif + for (std::size_t count = range.begin(); count != range.end(); count++) { + parallel_for_each_operator_selector<Body>::call(my_body, *(my_first + count), + my_feeder_ptr); + } + } +}; // class parallel_for_body_wrapper + + +/** Helper for getting iterators tag including inherited custom tags + @ingroup algorithms */ +template<typename It> +using tag = typename std::iterator_traits<It>::iterator_category; + +template<typename It> +using iterator_tag_dispatch = typename + std::conditional< + std::is_base_of<std::random_access_iterator_tag, tag<It>>::value, + std::random_access_iterator_tag, + typename std::conditional< + std::is_base_of<std::forward_iterator_tag, tag<It>>::value, + std::forward_iterator_tag, + std::input_iterator_tag + >::type + >::type; + +template <typename Body, typename Iterator, typename Item> +using feeder_is_required = tbb::detail::void_t<decltype(std::declval<const Body>()(std::declval<typename std::iterator_traits<Iterator>::reference>(), + std::declval<feeder<Item>&>()))>; + +// Creates feeder object only if the body can accept it +template <typename Iterator, typename Body, typename Item, typename = void> +struct feeder_holder { + feeder_holder( wait_context&, task_group_context&, const Body& ) {} + + feeder_impl<Body, Item>* feeder_ptr() { return nullptr; } +}; // class feeder_holder + +template <typename Iterator, typename Body, typename Item> +class feeder_holder<Iterator, Body, Item, feeder_is_required<Body, Iterator, Item>> { +public: + feeder_holder( wait_context& w_context, task_group_context& context, const Body& body ) + : my_feeder(body, w_context, context) {} + + feeder_impl<Body, Item>* feeder_ptr() { return &my_feeder; } +private: + feeder_impl<Body, Item> my_feeder; +}; // class feeder_holder + +template <typename Iterator, typename Body, typename Item> +class for_each_root_task_base : public task { +public: + for_each_root_task_base(Iterator first, Iterator last, const Body& body, wait_context& w_context, task_group_context& e_context) + : my_first(first), my_last(last), my_wait_context(w_context), my_execution_context(e_context), + my_body(body), my_feeder_holder(my_wait_context, my_execution_context, my_body) + { + my_wait_context.reserve(); + } +private: + task* cancel(execution_data&) override { + this->my_wait_context.release(); + return nullptr; + } +protected: + Iterator my_first; + Iterator my_last; + wait_context& my_wait_context; + task_group_context& my_execution_context; + const Body& my_body; + feeder_holder<Iterator, Body, Item> my_feeder_holder; +}; // class for_each_root_task_base + +/** parallel_for_each algorithm root task - most generic version + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item, typename IteratorTag = iterator_tag_dispatch<Iterator>> +class for_each_root_task : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data& ed) override { + using block_handling_type = input_block_handling_task<Body, Item>; + + if (this->my_first == this->my_last) { + this->my_wait_context.release(); + return nullptr; + } + + this->my_wait_context.reserve(); + small_object_allocator alloc{}; + auto block_handling_task = alloc.new_object<block_handling_type>(ed, this->my_wait_context, this->my_execution_context, + this->my_body, this->my_feeder_holder.feeder_ptr(), + alloc); + + auto* block_iterator = block_handling_task->block_iteration_space.begin(); + for (; !(this->my_first == this->my_last) && block_handling_task->my_size < block_handling_type::max_block_size; ++this->my_first) { + // Move semantics are automatically used when supported by the iterator + new (block_iterator++) Item(*this->my_first); + ++block_handling_task->my_size; + } + + // Do not access this after spawn to avoid races + spawn(*this, this->my_execution_context); + return block_handling_task; + } +}; // class for_each_root_task - most generic implementation + +/** parallel_for_each algorithm root task - forward iterator based specialization + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class for_each_root_task<Iterator, Body, Item, std::forward_iterator_tag> + : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data& ed) override { + using block_handling_type = forward_block_handling_task<Iterator, Body, Item>; + if (this->my_first == this->my_last) { + this->my_wait_context.release(); + return nullptr; + } + + std::size_t block_size{0}; + Iterator first_block_element = this->my_first; + for (; !(this->my_first == this->my_last) && block_size < block_handling_type::max_block_size; ++this->my_first) { + ++block_size; + } + + this->my_wait_context.reserve(); + small_object_allocator alloc{}; + auto block_handling_task = alloc.new_object<block_handling_type>(ed, first_block_element, block_size, + this->my_wait_context, this->my_execution_context, + this->my_body, this->my_feeder_holder.feeder_ptr(), alloc); + + // Do not access this after spawn to avoid races + spawn(*this, this->my_execution_context); + return block_handling_task; + } +}; // class for_each_root_task - forward iterator based specialization + +/** parallel_for_each algorithm root task - random access iterator based specialization + * Splits input range to blocks + @ingroup algorithms **/ +template <typename Iterator, typename Body, typename Item> +class for_each_root_task<Iterator, Body, Item, std::random_access_iterator_tag> + : public for_each_root_task_base<Iterator, Body, Item> +{ + using base_type = for_each_root_task_base<Iterator, Body, Item>; +public: + using base_type::base_type; +private: + task* execute(execution_data&) override { + tbb::parallel_for( + tbb::blocked_range<std::size_t>(0, std::distance(this->my_first, this->my_last)), + parallel_for_body_wrapper<Iterator, Body, Item>(this->my_first, this->my_body, this->my_feeder_holder.feeder_ptr()) + , this->my_execution_context + ); + + this->my_wait_context.release(); + return nullptr; + } +}; // class for_each_root_task - random access iterator based specialization + +/** Helper for getting item type. If item type can be deduced from feeder - got it from feeder, + if feeder is generic - got item type from range. + @ingroup algorithms */ +template<typename Body, typename Item, typename FeederArg> +auto feeder_argument_parser(void (Body::*)(Item, feeder<FeederArg>&) const) -> FeederArg; + +template<typename Body, typename> +decltype(feeder_argument_parser<Body>(&Body::operator())) get_item_type_impl(int); // for (T, feeder<T>) +template<typename Body, typename Item> Item get_item_type_impl(...); // stub + +template <typename Body, typename Item> +using get_item_type = decltype(get_item_type_impl<Body, Item>(0)); + +/** Implements parallel iteration over a range. + @ingroup algorithms */ +template<typename Iterator, typename Body> +void run_parallel_for_each( Iterator first, Iterator last, const Body& body, task_group_context& context) +{ + if (!(first == last)) { + using ItemType = get_item_type<Body, typename std::iterator_traits<Iterator>::value_type>; + wait_context w_context(0); + + for_each_root_task<Iterator, Body, ItemType> root_task(first, last, body, w_context, context); + + execute_and_wait(root_task, context, w_context, context); + } +} + +/** \page parallel_for_each_body_req Requirements on parallel_for_each body + Class \c Body implementing the concept of parallel_for_each body must define: + - \code + B::operator()( + cv_item_type item, + feeder<item_type>& feeder + ) const + + OR + + B::operator()( cv_item_type& item ) const + \endcode Process item. + May be invoked concurrently for the same \c this but different \c item. + + - \code item_type( const item_type& ) \endcode + Copy a work item. + - \code ~item_type() \endcode Destroy a work item +**/ + +/** \name parallel_for_each + See also requirements on \ref parallel_for_each_body_req "parallel_for_each Body". **/ +//@{ +//! Parallel iteration over a range, with optional addition of more work. +/** @ingroup algorithms */ +template<typename Iterator, typename Body> +void parallel_for_each(Iterator first, Iterator last, const Body& body) { + task_group_context context(PARALLEL_FOR_EACH); + run_parallel_for_each<Iterator, Body>(first, last, body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(Range& rng, const Body& body) { + parallel_for_each(std::begin(rng), std::end(rng), body); +} + +template<typename Range, typename Body> +void parallel_for_each(const Range& rng, const Body& body) { + parallel_for_each(std::begin(rng), std::end(rng), body); +} + +//! Parallel iteration over a range, with optional addition of more work and user-supplied context +/** @ingroup algorithms */ +template<typename Iterator, typename Body> +void parallel_for_each(Iterator first, Iterator last, const Body& body, task_group_context& context) { + run_parallel_for_each<Iterator, Body>(first, last, body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(Range& rng, const Body& body, task_group_context& context) { + parallel_for_each(std::begin(rng), std::end(rng), body, context); +} + +template<typename Range, typename Body> +void parallel_for_each(const Range& rng, const Body& body, task_group_context& context) { + parallel_for_each(std::begin(rng), std::end(rng), body, context); +} + +} // namespace d2 +} // namespace detail +//! @endcond +//@} + +inline namespace v1 { +using detail::d2::parallel_for_each; +using detail::d1::feeder; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_for_each_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h new file mode 100644 index 0000000000..6eb0f2e530 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_invoke.h @@ -0,0 +1,227 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_invoke_H +#define __TBB_parallel_invoke_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_template_helpers.h" +#include "detail/_small_object_pool.h" + +#include "task_group.h" + +#include <tuple> +#include <atomic> +#include <utility> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Simple task object, executing user method +template<typename Function, typename WaitObject> +struct function_invoker : public task { + function_invoker(const Function& function, WaitObject& wait_ctx) : + my_function(function), + parent_wait_ctx(wait_ctx) + {} + + task* execute(execution_data& ed) override { + my_function(); + parent_wait_ctx.release(ed); + call_itt_task_notify(destroy, this); + return nullptr; + } + + task* cancel(execution_data& ed) override { + parent_wait_ctx.release(ed); + return nullptr; + } + + const Function& my_function; + WaitObject& parent_wait_ctx; +}; // struct function_invoker + +//! Task object for managing subroots in trinary task trees. +// Endowed with additional synchronization logic (compatible with wait object intefaces) to support +// continuation passing execution. This task spawns 2 function_invoker tasks with first and second functors +// and then executes first functor by itself. But only the last executed functor must destruct and deallocate +// the subroot task. +template<typename F1, typename F2, typename F3> +struct invoke_subroot_task : public task { + wait_context& root_wait_ctx; + std::atomic<unsigned> ref_count{0}; + bool child_spawned = false; + + const F1& self_invoked_functor; + function_invoker<F2, invoke_subroot_task<F1, F2, F3>> f2_invoker; + function_invoker<F3, invoke_subroot_task<F1, F2, F3>> f3_invoker; + + task_group_context& my_execution_context; + small_object_allocator my_allocator; + + invoke_subroot_task(const F1& f1, const F2& f2, const F3& f3, wait_context& wait_ctx, task_group_context& context, + small_object_allocator& alloc) : + root_wait_ctx(wait_ctx), + self_invoked_functor(f1), + f2_invoker(f2, *this), + f3_invoker(f3, *this), + my_execution_context(context), + my_allocator(alloc) + { + root_wait_ctx.reserve(); + } + + void finalize(const execution_data& ed) { + root_wait_ctx.release(); + + my_allocator.delete_object(this, ed); + } + + void release(const execution_data& ed) { + __TBB_ASSERT(ref_count > 0, nullptr); + call_itt_task_notify(releasing, this); + if( --ref_count == 0 ) { + call_itt_task_notify(acquired, this); + finalize(ed); + } + } + + task* execute(execution_data& ed) override { + ref_count.fetch_add(3, std::memory_order_relaxed); + spawn(f3_invoker, my_execution_context); + spawn(f2_invoker, my_execution_context); + self_invoked_functor(); + + release(ed); + return nullptr; + } + + task* cancel(execution_data& ed) override { + if( ref_count > 0 ) { // detect children spawn + release(ed); + } else { + finalize(ed); + } + return nullptr; + } +}; // struct subroot_task + +class invoke_root_task { +public: + invoke_root_task(wait_context& wc) : my_wait_context(wc) {} + void release(const execution_data&) { + my_wait_context.release(); + } +private: + wait_context& my_wait_context; +}; + +template<typename F1> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1) { + root_wait_ctx.reserve(1); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + + execute_and_wait(invoker1, context, root_wait_ctx, context); +} + +template<typename F1, typename F2> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2) { + root_wait_ctx.reserve(2); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + function_invoker<F2, invoke_root_task> invoker2(f2, root); + + spawn(invoker1, context); + execute_and_wait(invoker2, context, root_wait_ctx, context); +} + +template<typename F1, typename F2, typename F3> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, const F1& f1, const F2& f2, const F3& f3) { + root_wait_ctx.reserve(3); + invoke_root_task root(root_wait_ctx); + function_invoker<F1, invoke_root_task> invoker1(f1, root); + function_invoker<F2, invoke_root_task> invoker2(f2, root); + function_invoker<F3, invoke_root_task> invoker3(f3, root); + + //TODO: implement sub root for two tasks (measure performance) + spawn(invoker1, context); + spawn(invoker2, context); + execute_and_wait(invoker3, context, root_wait_ctx, context); +} + +template<typename F1, typename F2, typename F3, typename... Fs> +void invoke_recursive_separation(wait_context& root_wait_ctx, task_group_context& context, + const F1& f1, const F2& f2, const F3& f3, const Fs&... fs) { + small_object_allocator alloc{}; + auto sub_root = alloc.new_object<invoke_subroot_task<F1, F2, F3>>(f1, f2, f3, root_wait_ctx, context, alloc); + spawn(*sub_root, context); + + invoke_recursive_separation(root_wait_ctx, context, fs...); +} + +template<typename... Fs> +void parallel_invoke_impl(task_group_context& context, const Fs&... fs) { + static_assert(sizeof...(Fs) >= 2, "Parallel invoke may be called with at least two callable"); + wait_context root_wait_ctx{0}; + + invoke_recursive_separation(root_wait_ctx, context, fs...); +} + +template<typename F1, typename... Fs> +void parallel_invoke_impl(const F1& f1, const Fs&... fs) { + static_assert(sizeof...(Fs) >= 1, "Parallel invoke may be called with at least two callable"); + task_group_context context(PARALLEL_INVOKE); + wait_context root_wait_ctx{0}; + + invoke_recursive_separation(root_wait_ctx, context, fs..., f1); +} + +//! Passes last argument of variadic pack as first for handling user provided task_group_context +template <typename Tuple, typename... Fs> +struct invoke_helper; + +template <typename... Args, typename T, typename... Fs> +struct invoke_helper<std::tuple<Args...>, T, Fs...> : invoke_helper<std::tuple<Args..., T>, Fs...> {}; + +template <typename... Fs, typename T/*task_group_context or callable*/> +struct invoke_helper<std::tuple<Fs...>, T> { + void operator()(Fs&&... args, T&& t) { + parallel_invoke_impl(std::forward<T>(t), std::forward<Fs>(args)...); + } +}; + +//! Parallel execution of several function objects +// We need to pass parameter pack through forwarding reference, +// since this pack may contain task_group_context that must be passed via lvalue non-const reference +template<typename... Fs> +void parallel_invoke(Fs&&... fs) { + invoke_helper<std::tuple<>, Fs...>()(std::forward<Fs>(fs)...); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_invoke; +} // namespace v1 + +} // namespace tbb +#endif /* __TBB_parallel_invoke_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h new file mode 100644 index 0000000000..87a159c925 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_pipeline.h @@ -0,0 +1,153 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_pipeline_H +#define __TBB_parallel_pipeline_H + +#include "detail/_pipeline_filters.h" +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "task_group.h" + +#include <cstddef> +#include <atomic> +#include <type_traits> + +namespace tbb { +namespace detail { + +namespace r1 { +void __TBB_EXPORTED_FUNC parallel_pipeline(task_group_context&, std::size_t, const d1::filter_node&); +} + +namespace d1 { + +enum class filter_mode : unsigned int +{ + //! processes multiple items in parallel and in no particular order + parallel = base_filter::filter_is_out_of_order, + //! processes items one at a time; all such filters process items in the same order + serial_in_order = base_filter::filter_is_serial, + //! processes items one at a time and in no particular order + serial_out_of_order = base_filter::filter_is_serial | base_filter::filter_is_out_of_order +}; +//! Class representing a chain of type-safe pipeline filters +/** @ingroup algorithms */ +template<typename InputType, typename OutputType> +class filter { + filter_node_ptr my_root; + filter( filter_node_ptr root ) : my_root(root) {} + friend void parallel_pipeline( size_t, const filter<void,void>&, task_group_context& ); + template<typename T_, typename U_, typename Body> + friend filter<T_,U_> make_filter( filter_mode, const Body& ); + template<typename T_, typename V_, typename U_> + friend filter<T_,U_> operator&( const filter<T_,V_>&, const filter<V_,U_>& ); +public: + filter() = default; + filter( const filter& rhs ) : my_root(rhs.my_root) {} + filter( filter&& rhs ) : my_root(std::move(rhs.my_root)) {} + + void operator=(const filter& rhs) { + my_root = rhs.my_root; + } + void operator=( filter&& rhs ) { + my_root = std::move(rhs.my_root); + } + + template<typename Body> + filter( filter_mode mode, const Body& body ) : + my_root( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) + filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ) { + } + + filter& operator&=( const filter<OutputType,OutputType>& right ) { + *this = *this & right; + return *this; + } + + void clear() { + // Like operator= with filter() on right side. + my_root = nullptr; + } +}; + +//! Create a filter to participate in parallel_pipeline +/** @ingroup algorithms */ +template<typename InputType, typename OutputType, typename Body> +filter<InputType, OutputType> make_filter( filter_mode mode, const Body& body ) { + return filter_node_ptr( new(r1::allocate_memory(sizeof(filter_node_leaf<InputType, OutputType, Body>))) + filter_node_leaf<InputType, OutputType, Body>(static_cast<unsigned int>(mode), body) ); +} + +//! Create a filter to participate in parallel_pipeline +/** @ingroup algorithms */ +template<typename Body> +filter<filter_input<Body>, filter_output<Body>> make_filter( filter_mode mode, const Body& body ) { + return make_filter<filter_input<Body>, filter_output<Body>>(mode, body); +} + +//! Composition of filters left and right. +/** @ingroup algorithms */ +template<typename T, typename V, typename U> +filter<T,U> operator&( const filter<T,V>& left, const filter<V,U>& right ) { + __TBB_ASSERT(left.my_root,"cannot use default-constructed filter as left argument of '&'"); + __TBB_ASSERT(right.my_root,"cannot use default-constructed filter as right argument of '&'"); + return filter_node_ptr( new (r1::allocate_memory(sizeof(filter_node))) filter_node(left.my_root,right.my_root) ); +} + +#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT +template<typename Body> +filter(filter_mode, Body) +->filter<filter_input<Body>, filter_output<Body>>; +#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT + +//! Parallel pipeline over chain of filters with user-supplied context. +/** @ingroup algorithms **/ +inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain, task_group_context& context) { + r1::parallel_pipeline(context, max_number_of_live_tokens, *filter_chain.my_root); +} + +//! Parallel pipeline over chain of filters. +/** @ingroup algorithms **/ +inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter<void,void>& filter_chain) { + task_group_context context; + parallel_pipeline(max_number_of_live_tokens, filter_chain, context); +} + +//! Parallel pipeline over sequence of filters. +/** @ingroup algorithms **/ +template<typename F1, typename F2, typename... FiltersContext> +void parallel_pipeline(size_t max_number_of_live_tokens, + const F1& filter1, + const F2& filter2, + FiltersContext&&... filters) { + parallel_pipeline(max_number_of_live_tokens, filter1 & filter2, std::forward<FiltersContext>(filters)...); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 +{ +using detail::d1::parallel_pipeline; +using detail::d1::filter; +using detail::d1::make_filter; +using detail::d1::filter_mode; +using detail::d1::flow_control; +} +} // tbb + +#endif /* __TBB_parallel_pipeline_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h new file mode 100644 index 0000000000..6db6369d68 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_reduce.h @@ -0,0 +1,689 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_reduce_H +#define __TBB_parallel_reduce_H + +#include <new> +#include "detail/_namespace_injection.h" +#include "detail/_task.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#include "task_group.h" // task_group_context +#include "partitioner.h" +#include "profiling.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! Tree node type for parallel_reduce. +/** @ingroup algorithms */ +//TODO: consider folding tree via bypass execution(instead of manual folding) +// for better cancellation and critical tasks handling (performance measurements required). +template<typename Body> +struct reduction_tree_node : public tree_node { + tbb::detail::aligned_space<Body> zombie_space; + Body& left_body; + bool has_right_zombie{false}; + + reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : + tree_node{parent, ref_count, alloc}, + left_body(input_left_body) /* gcc4.8 bug - braced-initialization doesn't work for class members of reference type */ + {} + + void join(task_group_context* context) { + if (has_right_zombie && !context->is_group_execution_cancelled()) + left_body.join(*zombie_space.begin()); + } + + ~reduction_tree_node() { + if( has_right_zombie ) zombie_space.begin()->~Body(); + } +}; + +//! Task type used to split the work of parallel_reduce. +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_reduce : public task { + Range my_range; + Body* my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + bool is_right_child; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + using tree_node_type = reduction_tree_node<Body>; + + //! Constructor reduce root task. + start_reduce( const Range& range, Body& body, Partitioner& partitioner, small_object_allocator& alloc ) : + my_range(range), + my_body(&body), + my_partition(partitioner), + my_allocator(alloc), + is_right_child(false) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj, small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc), + is_right_child(true) + { + parent_.is_right_child = false; + } + //! Construct right child from the given range as response to the demand. + /** parent_ remains left child. Newly constructed object is right child. */ + start_reduce( start_reduce& parent_, const Range& r, depth_t d, small_object_allocator& alloc ) : + my_range(r), + my_body(parent_.my_body), + my_partition(parent_.my_partition, split()), + my_allocator(alloc), + is_right_child(true) + { + my_partition.align_depth( d ); + parent_.is_right_child = false; + } + static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + wait_node wn; + small_object_allocator alloc{}; + auto reduce_task = alloc.new_object<start_reduce>(range, body, partitioner, alloc); + reduce_task->my_parent = &wn; + execute_and_wait(*reduce_task, context, wn.m_wait, context); + } + } + static void run(const Range& range, Body& body, Partitioner& partitioner) { + // Bound context prevents exceptions from body to affect nesting or sibling algorithms, + // and allows users to handle exceptions safely by wrapping parallel_reduce in the try-block. + task_group_context context(PARALLEL_REDUCE); + run(range, body, partitioner, context); + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + (*my_body)(r); + } + + //! spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } + //! spawn right task, serves as callback for partitioner + void offer_work(const Range& r, depth_t d, execution_data& ed) { + offer_work_impl(ed, *this, r, d); + } + +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... args) { + small_object_allocator alloc{}; + // New right child + auto right_child = alloc.new_object<start_reduce>(ed, std::forward<Args>(args)..., alloc); + + // New root node as a continuation and ref count. Left and right child attach to the new parent. + right_child->my_parent = my_parent = alloc.new_object<tree_node_type>(ed, my_parent, 2, *my_body, alloc); + + // Spawn the right sibling + right_child->spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and wait object before an object destruction + node* parent = my_parent; + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_reduce(); + // Unwind the tree decrementing the parent`s reference count + fold_tree<tree_node_type>(parent, ed); + allocator.deallocate(this, ed); +} + +//! Execute parallel_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + + // The acquire barrier synchronizes the data pointed with my_body if the left + // task has already finished. + if( is_right_child && my_parent->m_ref_count.load(std::memory_order_acquire) == 2 ) { + tree_node_type* parent_ptr = static_cast<tree_node_type*>(my_parent); + my_body = (Body*) new( parent_ptr->zombie_space.begin() ) Body(*my_body, split()); + parent_ptr->has_right_zombie = true; + } + __TBB_ASSERT(my_body != nullptr, "Incorrect body value"); + + my_partition.execute(*this, my_range, ed); + + finalize(ed); + return nullptr; +} + +//! Cancel parallel_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return nullptr; +} + +//! Tree node type for parallel_deterministic_reduce. +/** @ingroup algorithms */ +template<typename Body> +struct deterministic_reduction_tree_node : public tree_node { + Body right_body; + Body& left_body; + + deterministic_reduction_tree_node(node* parent, int ref_count, Body& input_left_body, small_object_allocator& alloc) : + tree_node{parent, ref_count, alloc}, + right_body{input_left_body, detail::split()}, + left_body(input_left_body) + {} + + void join(task_group_context* context) { + if (!context->is_group_execution_cancelled()) + left_body.join(right_body); + } +}; + +//! Task type used to split the work of parallel_deterministic_reduce. +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_deterministic_reduce : public task { + Range my_range; + Body& my_body; + node* my_parent; + + typename Partitioner::task_partition_type my_partition; + small_object_allocator my_allocator; + + task* execute(execution_data&) override; + task* cancel(execution_data&) override; + void finalize(const execution_data&); + + using tree_node_type = deterministic_reduction_tree_node<Body>; + + //! Constructor deterministic_reduce root task. + start_deterministic_reduce( const Range& range, Partitioner& partitioner, Body& body, small_object_allocator& alloc ) : + my_range(range), + my_body(body), + my_partition(partitioner), + my_allocator(alloc) {} + //! Splitting constructor used to generate children. + /** parent_ becomes left child. Newly constructed object is right child. */ + start_deterministic_reduce( start_deterministic_reduce& parent_, typename Partitioner::split_type& split_obj, Body& body, + small_object_allocator& alloc ) : + my_range(parent_.my_range, get_range_split_object<Range>(split_obj)), + my_body(body), + my_partition(parent_.my_partition, split_obj), + my_allocator(alloc) {} + static void run(const Range& range, Body& body, Partitioner& partitioner, task_group_context& context) { + if ( !range.empty() ) { + wait_node wn; + small_object_allocator alloc{}; + auto deterministic_reduce_task = + alloc.new_object<start_deterministic_reduce>(range, partitioner, body, alloc); + deterministic_reduce_task->my_parent = &wn; + execute_and_wait(*deterministic_reduce_task, context, wn.m_wait, context); + } + } + static void run(const Range& range, Body& body, Partitioner& partitioner) { + // Bound context prevents exceptions from body to affect nesting or sibling algorithms, + // and allows users to handle exceptions safely by wrapping parallel_deterministic_reduce + // in the try-block. + task_group_context context(PARALLEL_REDUCE); + run(range, body, partitioner, context); + } + //! Run body for range, serves as callback for partitioner + void run_body( Range &r ) { + my_body( r ); + } + //! Spawn right task, serves as callback for partitioner + void offer_work(typename Partitioner::split_type& split_obj, execution_data& ed) { + offer_work_impl(ed, *this, split_obj); + } +private: + template <typename... Args> + void offer_work_impl(execution_data& ed, Args&&... args) { + small_object_allocator alloc{}; + // New root node as a continuation and ref count. Left and right child attach to the new parent. Split the body. + auto new_tree_node = alloc.new_object<tree_node_type>(ed, my_parent, 2, my_body, alloc); + + // New right child + auto right_child = alloc.new_object<start_deterministic_reduce>(ed, std::forward<Args>(args)..., new_tree_node->right_body, alloc); + + right_child->my_parent = my_parent = new_tree_node; + + // Spawn the right sibling + right_child->spawn_self(ed); + } + + void spawn_self(execution_data& ed) { + my_partition.spawn_task(*this, *context(ed)); + } +}; + +//! Fold the tree and deallocate the task +template<typename Range, typename Body, typename Partitioner> +void start_deterministic_reduce<Range, Body, Partitioner>::finalize(const execution_data& ed) { + // Get the current parent and wait object before an object destruction + node* parent = my_parent; + + auto allocator = my_allocator; + // Task execution finished - destroy it + this->~start_deterministic_reduce(); + // Unwind the tree decrementing the parent`s reference count + fold_tree<tree_node_type>(parent, ed); + allocator.deallocate(this, ed); +} + +//! Execute parallel_deterministic_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_deterministic_reduce<Range,Body,Partitioner>::execute(execution_data& ed) { + if (!is_same_affinity(ed)) { + my_partition.note_affinity(execution_slot(ed)); + } + my_partition.check_being_stolen(*this, ed); + + my_partition.execute(*this, my_range, ed); + + finalize(ed); + return NULL; +} + +//! Cancel parallel_deterministic_reduce task +template<typename Range, typename Body, typename Partitioner> +task* start_deterministic_reduce<Range, Body, Partitioner>::cancel(execution_data& ed) { + finalize(ed); + return NULL; +} + + +//! Auxiliary class for parallel_reduce; for internal use only. +/** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body" + using given \ref parallel_reduce_lambda_req "anonymous function objects". + **/ +/** @ingroup algorithms */ +template<typename Range, typename Value, typename RealBody, typename Reduction> +class lambda_reduce_body { +//TODO: decide if my_real_body, my_reduction, and my_identity_element should be copied or referenced +// (might require some performance measurements) + + const Value& my_identity_element; + const RealBody& my_real_body; + const Reduction& my_reduction; + Value my_value; + lambda_reduce_body& operator= ( const lambda_reduce_body& other ); +public: + lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction ) + : my_identity_element(identity) + , my_real_body(body) + , my_reduction(reduction) + , my_value(identity) + { } + lambda_reduce_body( const lambda_reduce_body& other ) = default; + lambda_reduce_body( lambda_reduce_body& other, tbb::split ) + : my_identity_element(other.my_identity_element) + , my_real_body(other.my_real_body) + , my_reduction(other.my_reduction) + , my_value(other.my_identity_element) + { } + void operator()(Range& range) { + my_value = my_real_body(range, const_cast<const Value&>(my_value)); + } + void join( lambda_reduce_body& rhs ) { + my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value)); + } + Value result() const { + return my_value; + } +}; + + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_reduce_body_req Requirements on parallel_reduce body + Class \c Body implementing the concept of parallel_reduce body must define: + - \code Body::Body( Body&, split ); \endcode Splitting constructor. + Must be able to run concurrently with operator() and method \c join + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r + and accumulating the result + - \code void Body::join( Body& b ); \endcode Join results. + The result in \c b should be merged into the result of \c this +**/ + +/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions) + TO BE DOCUMENTED +**/ + +/** \name parallel_reduce + See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/ +//@{ + +//! Parallel iteration with reduction and default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body ) { + start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() ); +} + +//! Parallel iteration with reduction and simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) { + start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and static_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { + start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction and affinity_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) { + start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner ); +} + +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, task_group_context& context ) { + start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); +} + +//! Parallel iteration with reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, auto_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, static_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context ); +} + +//! Parallel iteration with reduction, affinity_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) { + start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context ); +} +/** parallel_reduce overloads that work with anonymous function objects + (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ + +//! Parallel iteration with reduction and default partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> + ::run(range, body, __TBB_DEFAULT_PARTITIONER() ); + return body.result(); +} + +//! Parallel iteration with reduction and simple_partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> + ::run(range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const auto_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and static_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction and affinity_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + affinity_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> + ::run( range, body, partitioner ); + return body.result(); +} + +//! Parallel iteration with reduction, default partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER> + ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context ); + return body.result(); +} + +//! Parallel iteration with reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, auto_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const auto_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, static_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with reduction, affinity_partitioner and user-supplied context +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + affinity_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner> + ::run( range, body, partitioner, context ); + return body.result(); +} + +//! Parallel iteration with deterministic reduction and default simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner()); +} + +//! Parallel iteration with deterministic reduction and simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner); +} + +//! Parallel iteration with deterministic reduction and static partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) { + start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner); +} + +//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) { + start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context ); +} + +//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) { + start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context); +} + +//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) { + start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context); +} + +/** parallel_reduce overloads that work with anonymous function objects + (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/ + +//! Parallel iteration with deterministic reduction and default simple partitioner. +// TODO: consider making static_partitioner the default +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) { + return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner()); +} + +//! Parallel iteration with deterministic reduction and simple partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) { + lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range,lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner> + ::run(range, body, partitioner); + return body.result(); +} + +//! Parallel iteration with deterministic reduction and static partitioner. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> + ::run(range, body, partitioner); + return body.result(); +} + +//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + task_group_context& context ) { + return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context); +} + +//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const simple_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner> + ::run(range, body, partitioner, context); + return body.result(); +} + +//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context. +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename RealBody, typename Reduction> +Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, + const static_partitioner& partitioner, task_group_context& context ) { + lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction); + start_deterministic_reduce<Range, lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner> + ::run(range, body, partitioner, context); + return body.result(); +} +//@} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::parallel_reduce; +using detail::d1::parallel_deterministic_reduce; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb +#endif /* __TBB_parallel_reduce_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h new file mode 100644 index 0000000000..d5d69ca0b2 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_scan.h @@ -0,0 +1,590 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_scan_H +#define __TBB_parallel_scan_H + +#include <functional> + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_exception.h" +#include "detail/_task.h" + +#include "profiling.h" +#include "partitioner.h" +#include "blocked_range.h" +#include "task_group.h" + +namespace tbb { +namespace detail { +namespace d1 { + +//! Used to indicate that the initial scan is being performed. +/** @ingroup algorithms */ +struct pre_scan_tag { + static bool is_final_scan() {return false;} + operator bool() {return is_final_scan();} +}; + +//! Used to indicate that the final scan is being performed. +/** @ingroup algorithms */ +struct final_scan_tag { + static bool is_final_scan() {return true;} + operator bool() {return is_final_scan();} +}; + +template<typename Range, typename Body> +struct sum_node; + +//! Performs final scan for a leaf +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct final_sum : public task { +private: + using sum_node_type = sum_node<Range, Body>; + Body m_body; + aligned_space<Range> m_range; + //! Where to put result of last subrange, or nullptr if not last subrange. + Body* m_stuff_last; + + wait_context& m_wait_context; + sum_node_type* m_parent = nullptr; +public: + small_object_allocator m_allocator; + final_sum( Body& body, wait_context& w_o, small_object_allocator& alloc ) : + m_body(body, split()), m_wait_context(w_o), m_allocator(alloc) { + poison_pointer(m_stuff_last); + } + + final_sum( final_sum& sum, small_object_allocator& alloc ) : + m_body(sum.m_body, split()), m_wait_context(sum.m_wait_context), m_allocator(alloc) { + poison_pointer(m_stuff_last); + } + + ~final_sum() { + m_range.begin()->~Range(); + } + void finish_construction( sum_node_type* parent, const Range& range, Body* stuff_last ) { + __TBB_ASSERT( m_parent == nullptr, nullptr ); + m_parent = parent; + new( m_range.begin() ) Range(range); + m_stuff_last = stuff_last; + } +private: + sum_node_type* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + sum_node_type* finalize(const execution_data& ed){ + sum_node_type* next_task = release_parent(); + m_allocator.delete_object<final_sum>(this, ed); + return next_task; + } + +public: + task* execute(execution_data& ed) override { + m_body( *m_range.begin(), final_scan_tag() ); + if( m_stuff_last ) + m_stuff_last->assign(m_body); + + return finalize(ed); + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + template<typename Tag> + void operator()( const Range& r, Tag tag ) { + m_body( r, tag ); + } + void reverse_join( final_sum& a ) { + m_body.reverse_join(a.m_body); + } + void reverse_join( Body& body ) { + m_body.reverse_join(body); + } + void assign_to( Body& body ) { + body.assign(m_body); + } + void self_destroy(const execution_data& ed) { + m_allocator.delete_object<final_sum>(this, ed); + } +}; + +//! Split work to be done in the scan. +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct sum_node : public task { +private: + using final_sum_type = final_sum<Range,Body>; +public: + final_sum_type *m_incoming; + final_sum_type *m_body; + Body *m_stuff_last; +private: + final_sum_type *m_left_sum; + sum_node *m_left; + sum_node *m_right; + bool m_left_is_final; + Range m_range; + wait_context& m_wait_context; + sum_node* m_parent; + small_object_allocator m_allocator; +public: + std::atomic<unsigned int> ref_count{0}; + sum_node( const Range range, bool left_is_final_, sum_node* parent, wait_context& w_o, small_object_allocator& alloc ) : + m_stuff_last(nullptr), + m_left_sum(nullptr), + m_left(nullptr), + m_right(nullptr), + m_left_is_final(left_is_final_), + m_range(range), + m_wait_context(w_o), + m_parent(parent), + m_allocator(alloc) + { + if( m_parent ) + m_parent->ref_count.fetch_add(1, std::memory_order_relaxed); + // Poison fields that will be set by second pass. + poison_pointer(m_body); + poison_pointer(m_incoming); + } + + ~sum_node() { + if (m_parent) + m_parent->ref_count.fetch_sub(1, std::memory_order_relaxed); + } +private: + sum_node* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + task* create_child( const Range& range, final_sum_type& body, sum_node* child, final_sum_type* incoming, Body* stuff_last ) { + if( child ) { + __TBB_ASSERT( is_poisoned(child->m_body) && is_poisoned(child->m_incoming), nullptr ); + child->prepare_for_execution(body, incoming, stuff_last); + return child; + } else { + body.finish_construction(this, range, stuff_last); + return &body; + } + } + + sum_node* finalize(const execution_data& ed) { + sum_node* next_task = release_parent(); + m_allocator.delete_object<sum_node>(this, ed); + return next_task; + } + +public: + void prepare_for_execution(final_sum_type& body, final_sum_type* incoming, Body *stuff_last) { + this->m_body = &body; + this->m_incoming = incoming; + this->m_stuff_last = stuff_last; + } + task* execute(execution_data& ed) override { + if( m_body ) { + if( m_incoming ) + m_left_sum->reverse_join( *m_incoming ); + task* right_child = this->create_child(Range(m_range,split()), *m_left_sum, m_right, m_left_sum, m_stuff_last); + task* left_child = m_left_is_final ? nullptr : this->create_child(m_range, *m_body, m_left, m_incoming, nullptr); + ref_count = (left_child != nullptr) + (right_child != nullptr); + m_body = nullptr; + if( left_child ) { + spawn(*right_child, *ed.context); + return left_child; + } else { + return right_child; + } + } else { + return finalize(ed); + } + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + void self_destroy(const execution_data& ed) { + m_allocator.delete_object<sum_node>(this, ed); + } + template<typename range,typename body,typename partitioner> + friend struct start_scan; + + template<typename range,typename body> + friend struct finish_scan; +}; + +//! Combine partial results +/** @ingroup algorithms */ +template<typename Range, typename Body> +struct finish_scan : public task { +private: + using sum_node_type = sum_node<Range,Body>; + using final_sum_type = final_sum<Range,Body>; + final_sum_type** const m_sum_slot; + sum_node_type*& m_return_slot; + small_object_allocator m_allocator; +public: + final_sum_type* m_right_zombie; + sum_node_type& m_result; + std::atomic<unsigned int> ref_count{2}; + finish_scan* m_parent; + wait_context& m_wait_context; + task* execute(execution_data& ed) override { + __TBB_ASSERT( m_result.ref_count.load() == static_cast<unsigned int>((m_result.m_left!=nullptr)+(m_result.m_right!=nullptr)), nullptr ); + if( m_result.m_left ) + m_result.m_left_is_final = false; + if( m_right_zombie && m_sum_slot ) + (*m_sum_slot)->reverse_join(*m_result.m_left_sum); + __TBB_ASSERT( !m_return_slot, nullptr ); + if( m_right_zombie || m_result.m_right ) { + m_return_slot = &m_result; + } else { + m_result.self_destroy(ed); + } + if( m_right_zombie && !m_sum_slot && !m_result.m_right ) { + m_right_zombie->self_destroy(ed); + m_right_zombie = nullptr; + } + return finalize(ed); + } + task* cancel(execution_data& ed) override { + return finalize(ed); + } + finish_scan(sum_node_type*& return_slot, final_sum_type** sum, sum_node_type& result_, finish_scan* parent, wait_context& w_o, small_object_allocator& alloc) : + m_sum_slot(sum), + m_return_slot(return_slot), + m_allocator(alloc), + m_right_zombie(nullptr), + m_result(result_), + m_parent(parent), + m_wait_context(w_o) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + } +private: + finish_scan* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + finish_scan* finalize(const execution_data& ed) { + finish_scan* next_task = release_parent(); + m_allocator.delete_object<finish_scan>(this, ed); + return next_task; + } +}; + +//! Initial task to split the work +/** @ingroup algorithms */ +template<typename Range, typename Body, typename Partitioner> +struct start_scan : public task { +private: + using sum_node_type = sum_node<Range,Body>; + using final_sum_type = final_sum<Range,Body>; + using finish_pass1_type = finish_scan<Range,Body>; + std::reference_wrapper<sum_node_type*> m_return_slot; + Range m_range; + std::reference_wrapper<final_sum_type> m_body; + typename Partitioner::partition_type m_partition; + /** Non-null if caller is requesting total. */ + final_sum_type** m_sum_slot; + bool m_is_final; + bool m_is_right_child; + + finish_pass1_type* m_parent; + small_object_allocator m_allocator; + wait_context& m_wait_context; + + finish_pass1_type* release_parent() { + call_itt_task_notify(releasing, m_parent); + if (m_parent) { + auto parent = m_parent; + m_parent = nullptr; + if (parent->ref_count.fetch_sub(1, std::memory_order_relaxed) == 1) { + return parent; + } + } + else + m_wait_context.release(); + return nullptr; + } + + finish_pass1_type* finalize( const execution_data& ed ) { + finish_pass1_type* next_task = release_parent(); + m_allocator.delete_object<start_scan>(this, ed); + return next_task; + } + +public: + task* execute( execution_data& ) override; + task* cancel( execution_data& ed ) override { + return finalize(ed); + } + start_scan( sum_node_type*& return_slot, start_scan& parent, small_object_allocator& alloc ) : + m_return_slot(return_slot), + m_range(parent.m_range,split()), + m_body(parent.m_body), + m_partition(parent.m_partition,split()), + m_sum_slot(parent.m_sum_slot), + m_is_final(parent.m_is_final), + m_is_right_child(true), + m_parent(parent.m_parent), + m_allocator(alloc), + m_wait_context(parent.m_wait_context) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + parent.m_is_right_child = false; + } + + start_scan( sum_node_type*& return_slot, const Range& range, final_sum_type& body, const Partitioner& partitioner, wait_context& w_o, small_object_allocator& alloc ) : + m_return_slot(return_slot), + m_range(range), + m_body(body), + m_partition(partitioner), + m_sum_slot(nullptr), + m_is_final(true), + m_is_right_child(false), + m_parent(nullptr), + m_allocator(alloc), + m_wait_context(w_o) + { + __TBB_ASSERT( !m_return_slot, nullptr ); + } + + static void run( const Range& range, Body& body, const Partitioner& partitioner ) { + if( !range.empty() ) { + task_group_context context(PARALLEL_SCAN); + + using start_pass1_type = start_scan<Range,Body,Partitioner>; + sum_node_type* root = nullptr; + wait_context w_ctx{1}; + small_object_allocator alloc{}; + + auto& temp_body = *alloc.new_object<final_sum_type>(body, w_ctx, alloc); + temp_body.reverse_join(body); + + auto& pass1 = *alloc.new_object<start_pass1_type>(/*m_return_slot=*/root, range, temp_body, partitioner, w_ctx, alloc); + + execute_and_wait(pass1, context, w_ctx, context); + if( root ) { + root->prepare_for_execution(temp_body, nullptr, &body); + w_ctx.reserve(); + execute_and_wait(*root, context, w_ctx, context); + } else { + temp_body.assign_to(body); + temp_body.finish_construction(nullptr, range, nullptr); + alloc.delete_object<final_sum_type>(&temp_body); + } + } + } +}; + +template<typename Range, typename Body, typename Partitioner> +task* start_scan<Range,Body,Partitioner>::execute( execution_data& ed ) { + // Inspecting m_parent->result.left_sum would ordinarily be a race condition. + // But we inspect it only if we are not a stolen task, in which case we + // know that task assigning to m_parent->result.left_sum has completed. + __TBB_ASSERT(!m_is_right_child || m_parent, "right child is never an orphan"); + bool treat_as_stolen = m_is_right_child && (is_stolen(ed) || &m_body.get()!=m_parent->m_result.m_left_sum); + if( treat_as_stolen ) { + // Invocation is for right child that has been really stolen or needs to be virtually stolen + small_object_allocator alloc{}; + m_parent->m_right_zombie = alloc.new_object<final_sum_type>(m_body, alloc); + m_body = *m_parent->m_right_zombie; + m_is_final = false; + } + task* next_task = nullptr; + if( (m_is_right_child && !treat_as_stolen) || !m_range.is_divisible() || m_partition.should_execute_range(ed) ) { + if( m_is_final ) + m_body(m_range, final_scan_tag()); + else if( m_sum_slot ) + m_body(m_range, pre_scan_tag()); + if( m_sum_slot ) + *m_sum_slot = &m_body.get(); + __TBB_ASSERT( !m_return_slot, nullptr ); + + next_task = finalize(ed); + } else { + small_object_allocator alloc{}; + auto result = alloc.new_object<sum_node_type>(m_range,/*m_left_is_final=*/m_is_final, m_parent? &m_parent->m_result: nullptr, m_wait_context, alloc); + + auto new_parent = alloc.new_object<finish_pass1_type>(m_return_slot, m_sum_slot, *result, m_parent, m_wait_context, alloc); + m_parent = new_parent; + + // Split off right child + auto& right_child = *alloc.new_object<start_scan>(/*m_return_slot=*/result->m_right, *this, alloc); + + spawn(right_child, *ed.context); + + m_sum_slot = &result->m_left_sum; + m_return_slot = result->m_left; + + __TBB_ASSERT( !m_return_slot, nullptr ); + next_task = this; + } + return next_task; +} + +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +class lambda_scan_body { + Value m_sum_slot; + const Value& identity_element; + const Scan& m_scan; + const ReverseJoin& m_reverse_join; +public: + void operator=(const lambda_scan_body&) = delete; + lambda_scan_body(const lambda_scan_body&) = default; + + lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join ) + : m_sum_slot(identity) + , identity_element(identity) + , m_scan(scan) + , m_reverse_join(rev_join) {} + + lambda_scan_body( lambda_scan_body& b, split ) + : m_sum_slot(b.identity_element) + , identity_element(b.identity_element) + , m_scan(b.m_scan) + , m_reverse_join(b.m_reverse_join) {} + + template<typename Tag> + void operator()( const Range& r, Tag tag ) { + m_sum_slot = m_scan(r, m_sum_slot, tag); + } + + void reverse_join( lambda_scan_body& a ) { + m_sum_slot = m_reverse_join(a.m_sum_slot, m_sum_slot); + } + + void assign( lambda_scan_body& b ) { + m_sum_slot = b.m_sum_slot; + } + + Value result() const { + return m_sum_slot; + } +}; + +// Requirements on Range concept are documented in blocked_range.h + +/** \page parallel_scan_body_req Requirements on parallel_scan body + Class \c Body implementing the concept of parallel_scan body must define: + - \code Body::Body( Body&, split ); \endcode Splitting constructor. + Split \c b so that \c this and \c b can accumulate separately + - \code Body::~Body(); \endcode Destructor + - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode + Preprocess iterations for range \c r + - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode + Do final processing for iterations of range \c r + - \code void Body::reverse_join( Body& a ); \endcode + Merge preprocessing state of \c a into \c this, where \c a was + created earlier from \c b by b's splitting constructor +**/ + +/** \name parallel_scan + See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/ +//@{ + +//! Parallel prefix with default partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body ) { + start_scan<Range, Body, auto_partitioner>::run(range,body,__TBB_DEFAULT_PARTITIONER()); +} + +//! Parallel prefix with simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) { + start_scan<Range, Body, simple_partitioner>::run(range, body, partitioner); +} + +//! Parallel prefix with auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Body> +void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) { + start_scan<Range,Body,auto_partitioner>::run(range, body, partitioner); +} + +//! Parallel prefix with default partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, __TBB_DEFAULT_PARTITIONER()); + return body.result(); +} + +//! Parallel prefix with simple_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, + const simple_partitioner& partitioner ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, partitioner); + return body.result(); +} + +//! Parallel prefix with auto_partitioner +/** @ingroup algorithms **/ +template<typename Range, typename Value, typename Scan, typename ReverseJoin> +Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, + const auto_partitioner& partitioner ) { + lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join); + parallel_scan(range, body, partitioner); + return body.result(); +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::parallel_scan; + using detail::d1::pre_scan_tag; + using detail::d1::final_scan_tag; + +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_parallel_scan_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h new file mode 100644 index 0000000000..0e7be5e25b --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/parallel_sort.h @@ -0,0 +1,247 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_parallel_sort_H +#define __TBB_parallel_sort_H + +#include "detail/_namespace_injection.h" +#include "parallel_for.h" +#include "blocked_range.h" +#include "profiling.h" + +#include <algorithm> +#include <iterator> +#include <functional> +#include <cstddef> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Range used in quicksort to split elements into subranges based on a value. +/** The split operation selects a splitter and places all elements less than or equal + to the value in the first range and the remaining elements in the second range. + @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +class quick_sort_range { + std::size_t median_of_three( const RandomAccessIterator& array, std::size_t l, std::size_t m, std::size_t r ) const { + return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp(array[l], array[r]) ? r : l ) ) + : ( comp(array[r], array[m]) ? m : ( comp(array[r], array[l]) ? r : l ) ); + } + + std::size_t pseudo_median_of_nine( const RandomAccessIterator& array, const quick_sort_range& range ) const { + std::size_t offset = range.size / 8u; + return median_of_three(array, + median_of_three(array, 0 , offset, offset * 2), + median_of_three(array, offset * 3, offset * 4, offset * 5), + median_of_three(array, offset * 6, offset * 7, range.size - 1)); + + } + + std::size_t split_range( quick_sort_range& range ) { + RandomAccessIterator array = range.begin; + RandomAccessIterator first_element = range.begin; + std::size_t m = pseudo_median_of_nine(array, range); + if( m != 0 ) std::iter_swap(array, array + m); + + std::size_t i = 0; + std::size_t j = range.size; + // Partition interval [i + 1,j - 1] with key *first_element. + for(;;) { + __TBB_ASSERT( i < j, nullptr ); + // Loop must terminate since array[l] == *first_element. + do { + --j; + __TBB_ASSERT( i <= j, "bad ordering relation?" ); + } while( comp(*first_element, array[j]) ); + do { + __TBB_ASSERT( i <= j, nullptr ); + if( i == j ) goto partition; + ++i; + } while( comp(array[i], *first_element) ); + if( i == j ) goto partition; + std::iter_swap(array + i, array + j); + } +partition: + // Put the partition key were it belongs + std::iter_swap(array + j, first_element); + // array[l..j) is less or equal to key. + // array(j..r) is greater or equal to key. + // array[j] is equal to key + i = j + 1; + std::size_t new_range_size = range.size - i; + range.size = j; + return new_range_size; + } + +public: + quick_sort_range() = default; + quick_sort_range( const quick_sort_range& ) = default; + void operator=( const quick_sort_range& ) = delete; + + static constexpr std::size_t grainsize = 500; + const Compare& comp; + std::size_t size; + RandomAccessIterator begin; + + quick_sort_range( RandomAccessIterator begin_, std::size_t size_, const Compare& comp_ ) : + comp(comp_), size(size_), begin(begin_) {} + + bool empty() const { return size == 0; } + bool is_divisible() const { return size >= grainsize; } + + quick_sort_range( quick_sort_range& range, split ) + : comp(range.comp) + , size(split_range(range)) + // +1 accounts for the pivot element, which is at its correct place + // already and, therefore, is not included into subranges. + , begin(range.begin + range.size + 1) {} +}; + +//! Body class used to test if elements in a range are presorted +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +class quick_sort_pretest_body { + const Compare& comp; + task_group_context& context; + +public: + quick_sort_pretest_body() = default; + quick_sort_pretest_body( const quick_sort_pretest_body& ) = default; + void operator=( const quick_sort_pretest_body& ) = delete; + + quick_sort_pretest_body( const Compare& _comp, task_group_context& _context ) : comp(_comp), context(_context) {} + + void operator()( const blocked_range<RandomAccessIterator>& range ) const { + RandomAccessIterator my_end = range.end(); + + int i = 0; + //TODO: consider using std::is_sorted() for each 64 iterations (requires performance measurements) + for( RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i ) { + if( i % 64 == 0 && context.is_group_execution_cancelled() ) break; + + // The k - 1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 + if( comp(*(k), *(k - 1)) ) { + context.cancel_group_execution(); + break; + } + } + } +}; + +//! Body class used to sort elements in a range that is smaller than the grainsize. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +struct quick_sort_body { + void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const { + std::sort(range.begin, range.begin + range.size, range.comp); + } +}; + +//! Method to perform parallel_for based quick sort. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +void do_parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + parallel_for(quick_sort_range<RandomAccessIterator,Compare>(begin, end - begin, comp), + quick_sort_body<RandomAccessIterator,Compare>(), + auto_partitioner()); +} + +//! Wrapper method to initiate the sort by calling parallel_for. +/** @ingroup algorithms */ +template<typename RandomAccessIterator, typename Compare> +void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + task_group_context my_context(PARALLEL_SORT); + constexpr int serial_cutoff = 9; + + __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" ); + RandomAccessIterator k = begin; + for( ; k != begin + serial_cutoff; ++k ) { + if( comp(*(k + 1), *k) ) { + do_parallel_quick_sort(begin, end, comp); + } + } + + // Check is input range already sorted + parallel_for(blocked_range<RandomAccessIterator>(k + 1, end), + quick_sort_pretest_body<RandomAccessIterator, Compare>(comp, my_context), + auto_partitioner(), + my_context); + + if( my_context.is_group_execution_cancelled() ) + do_parallel_quick_sort(begin, end, comp); +} + +/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort + Requirements on the iterator type \c It and its value type \c T for \c parallel_sort: + + - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given + iterators \c a and \c b are pointing to. \c It should be a random access iterator. + + - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y; +**/ + +/** \name parallel_sort + See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/ +//@{ + +//! Sorts the data in [begin,end) using the given comparator +/** The compare function object is used for all comparisons between elements during sorting. + The compare object must define a bool operator() function. + @ingroup algorithms **/ +template<typename RandomAccessIterator, typename Compare> +void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { + constexpr int min_parallel_size = 500; + if( end > begin ) { + if( end - begin < min_parallel_size ) { + std::sort(begin, end, comp); + } else { + parallel_quick_sort(begin, end, comp); + } + } +} + +//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator> +/** @ingroup algorithms **/ +template<typename RandomAccessIterator> +void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) { + parallel_sort(begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>()); +} + +//! Sorts the data in rng using the given comparator +/** @ingroup algorithms **/ +template<typename Range, typename Compare> +void parallel_sort( Range& rng, const Compare& comp ) { + parallel_sort(std::begin(rng), std::end(rng), comp); +} + +//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator> +/** @ingroup algorithms **/ +template<typename Range> +void parallel_sort( Range& rng ) { + parallel_sort(std::begin(rng), std::end(rng)); +} +//@} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::parallel_sort; +} // namespace v1 +} // namespace tbb + +#endif /*__TBB_parallel_sort_H*/ diff --git a/contrib/libs/tbb/include/oneapi/tbb/partitioner.h b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h new file mode 100644 index 0000000000..37ac0a09d9 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/partitioner.h @@ -0,0 +1,688 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_partitioner_H +#define __TBB_partitioner_H + +#ifndef __TBB_INITIAL_CHUNKS +// initial task divisions per thread +#define __TBB_INITIAL_CHUNKS 2 +#endif +#ifndef __TBB_RANGE_POOL_CAPACITY +// maximum number of elements in range pool +#define __TBB_RANGE_POOL_CAPACITY 8 +#endif +#ifndef __TBB_INIT_DEPTH +// initial value for depth of range pool +#define __TBB_INIT_DEPTH 5 +#endif +#ifndef __TBB_DEMAND_DEPTH_ADD +// when imbalance is found range splits this value times more +#define __TBB_DEMAND_DEPTH_ADD 1 +#endif + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_aligned_space.h" +#include "detail/_utils.h" +#include "detail/_template_helpers.h" +#include "detail/_range_common.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" + +#include "cache_aligned_allocator.h" +#include "task_group.h" // task_group_context +#include "task_arena.h" + +#include <algorithm> +#include <atomic> +#include <type_traits> + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + // Workaround for overzealous compiler warnings + #pragma warning (push) + #pragma warning (disable: 4244) +#endif + +namespace tbb { +namespace detail { + +namespace d1 { +class auto_partitioner; +class simple_partitioner; +class static_partitioner; +class affinity_partitioner; +class affinity_partition_type; +class affinity_partitioner_base; + +inline std::size_t get_initial_auto_partitioner_divisor() { + const std::size_t factor = 4; + return factor * max_concurrency(); +} + +//! Defines entry point for affinity partitioner into oneTBB run-time library. +class affinity_partitioner_base: no_copy { + friend class affinity_partitioner; + friend class affinity_partition_type; + //! Array that remembers affinities of tree positions to affinity_id. + /** NULL if my_size==0. */ + slot_id* my_array; + //! Number of elements in my_array. + std::size_t my_size; + //! Zeros the fields. + affinity_partitioner_base() : my_array(nullptr), my_size(0) {} + //! Deallocates my_array. + ~affinity_partitioner_base() { resize(0); } + //! Resize my_array. + /** Retains values if resulting size is the same. */ + void resize(unsigned factor) { + // Check factor to avoid asking for number of workers while there might be no arena. + unsigned max_threads_in_arena = max_concurrency(); + std::size_t new_size = factor ? factor * max_threads_in_arena : 0; + if (new_size != my_size) { + if (my_array) { + r1::cache_aligned_deallocate(my_array); + // Following two assignments must be done here for sake of exception safety. + my_array = nullptr; + my_size = 0; + } + if (new_size) { + my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id))); + std::fill_n(my_array, new_size, no_slot); + my_size = new_size; + } + } + } +}; + +template<typename Range, typename Body, typename Partitioner> struct start_for; +template<typename Range, typename Body, typename Partitioner> struct start_scan; +template<typename Range, typename Body, typename Partitioner> struct start_reduce; +template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce; + +struct node { + node* my_parent{}; + std::atomic<int> m_ref_count{}; + + node() = default; + node(node* parent, int ref_count) : + my_parent{parent}, m_ref_count{ref_count} { + __TBB_ASSERT(ref_count > 0, "The ref count must be positive"); + } +}; + +struct wait_node : node { + wait_node() : node{ nullptr, 1 } {} + wait_context m_wait{1}; +}; + +//! Join task node that contains shared flag for stealing feedback +struct tree_node : public node { + small_object_allocator m_allocator; + std::atomic<bool> m_child_stolen{false}; + + tree_node(node* parent, int ref_count, small_object_allocator& alloc) + : node{parent, ref_count} + , m_allocator{alloc} {} + + void join(task_group_context*) {/*dummy, required only for reduction algorithms*/}; + + template <typename Task> + static void mark_task_stolen(Task &t) { + std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen; +#if TBB_USE_PROFILING_TOOLS + // Threading tools respect lock prefix but report false-positive data-race via plain store + flag.exchange(true); +#else + flag.store(true, std::memory_order_relaxed); +#endif // TBB_USE_PROFILING_TOOLS + } + template <typename Task> + static bool is_peer_stolen(Task &t) { + return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed); + } +}; + +// Context used to check cancellation state during reduction join process +template<typename TreeNodeType> +void fold_tree(node* n, const execution_data& ed) { + for (;;) { + __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive."); + call_itt_task_notify(releasing, n); + if (--n->m_ref_count > 0) { + return; + } + node* parent = n->my_parent; + if (!parent) { + break; + }; + + call_itt_task_notify(acquired, n); + TreeNodeType* self = static_cast<TreeNodeType*>(n); + self->join(ed.context); + self->m_allocator.delete_object(self, ed); + n = parent; + } + // Finish parallel for execution when the root (last node) is reached + static_cast<wait_node*>(n)->m_wait.release(); +} + +//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows +//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented +//! by a number that cannot fit into machine word. +typedef unsigned char depth_t; + +//! Range pool stores ranges of type T in a circular buffer with MaxCapacity +template <typename T, depth_t MaxCapacity> +class range_vector { + depth_t my_head; + depth_t my_tail; + depth_t my_size; + depth_t my_depth[MaxCapacity]; // relative depths of stored ranges + tbb::detail::aligned_space<T, MaxCapacity> my_pool; + +public: + //! initialize via first range in pool + range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { + my_depth[0] = 0; + new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move? + } + ~range_vector() { + while( !empty() ) pop_back(); + } + bool empty() const { return my_size == 0; } + depth_t size() const { return my_size; } + //! Populates range pool via ranges up to max depth or while divisible + //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces + void split_to_fill(depth_t max_depth) { + while( my_size < MaxCapacity && is_divisible(max_depth) ) { + depth_t prev = my_head; + my_head = (my_head + 1) % MaxCapacity; + new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move? + my_pool.begin()[prev].~T(); // instead of assignment + new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split + my_depth[my_head] = ++my_depth[prev]; + my_size++; + } + } + void pop_back() { + __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size"); + my_pool.begin()[my_head].~T(); + my_size--; + my_head = (my_head + MaxCapacity - 1) % MaxCapacity; + } + void pop_front() { + __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size"); + my_pool.begin()[my_tail].~T(); + my_size--; + my_tail = (my_tail + 1) % MaxCapacity; + } + T& back() { + __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); + return my_pool.begin()[my_head]; + } + T& front() { + __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); + return my_pool.begin()[my_tail]; + } + //! similarly to front(), returns depth of the first range in the pool + depth_t front_depth() { + __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size"); + return my_depth[my_tail]; + } + depth_t back_depth() { + __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size"); + return my_depth[my_head]; + } + bool is_divisible(depth_t max_depth) { + return back_depth() < max_depth && back().is_divisible(); + } +}; + +//! Provides default methods for partition objects and common algorithm blocks. +template <typename Partition> +struct partition_type_base { + typedef detail::split split_type; + // decision makers + void note_affinity( slot_id ) {} + template <typename Task> + bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range() + template <typename Range> split_type get_split() { return split(); } + Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper + + template<typename StartType, typename Range> + void work_balance(StartType &start, Range &range, const execution_data&) { + start.run_body( range ); // simple partitioner goes always here + } + + template<typename StartType, typename Range> + void execute(StartType &start, Range &range, execution_data& ed) { + // The algorithm in a few words ([]-denotes calls to decision methods of partitioner): + // [If this task is stolen, adjust depth and divisions if necessary, set flag]. + // If range is divisible { + // Spread the work while [initial divisions left]; + // Create trap task [if necessary]; + // } + // If not divisible or [max depth is reached], execute, else do the range pool part + if ( range.is_divisible() ) { + if ( self().is_divisible() ) { + do { // split until is divisible + typename Partition::split_type split_obj = self().template get_split<Range>(); + start.offer_work( split_obj, ed ); + } while ( range.is_divisible() && self().is_divisible() ); + } + } + self().work_balance(start, range, ed); + } +}; + +//! Provides default splitting strategy for partition objects. +template <typename Partition> +struct adaptive_mode : partition_type_base<Partition> { + typedef Partition my_partition; + std::size_t my_divisor; + // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves. + // A task which has only one index must produce the right split without reserved index in order to avoid + // it to be overwritten in note_affinity() of the created (right) task. + // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order) + static const unsigned factor = 1; + adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {} + adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {} + /*! Override do_split methods in order to specify splitting strategy */ + std::size_t do_split(adaptive_mode &src, split) { + return src.my_divisor /= 2u; + } +}; + +//! Helper type for checking availability of proportional_split constructor +template <typename T> using supports_proportional_splitting = typename std::is_constructible<T, T&, proportional_split&>; + +//! A helper class to create a proportional_split object for a given type of Range. +/** If the Range has proportional_split constructor, + then created object splits a provided value in an implemenation-defined proportion; + otherwise it represents equal-size split. */ +// TODO: check if this helper can be a nested class of proportional_mode. +template <typename Range, typename = void> +struct proportion_helper { + static proportional_split get_split(std::size_t) { return proportional_split(1,1); } +}; + +template <typename Range> +struct proportion_helper<Range, typename std::enable_if<supports_proportional_splitting<Range>::value>::type> { + static proportional_split get_split(std::size_t n) { + std::size_t right = n / 2; + std::size_t left = n - right; + return proportional_split(left, right); + } +}; + +//! Provides proportional splitting strategy for partition objects +template <typename Partition> +struct proportional_mode : adaptive_mode<Partition> { + typedef Partition my_partition; + using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes + + proportional_mode() : adaptive_mode<Partition>() {} + proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {} + proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); } + std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) { + std::size_t portion = split_obj.right() * my_partition::factor; + portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor); + src.my_divisor -= portion; + return portion; + } + bool is_divisible() { // part of old should_execute_range() + return self().my_divisor > my_partition::factor; + } + template <typename Range> + proportional_split get_split() { + // Create a proportion for the number of threads expected to handle "this" subrange + return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor ); + } +}; + +static std::size_t get_initial_partition_head() { + int current_index = tbb::this_task_arena::current_thread_index(); + if (current_index == tbb::task_arena::not_initialized) + current_index = 0; + return size_t(current_index); +} + +//! Provides default linear indexing of partitioner's sequence +template <typename Partition> +struct linear_affinity_mode : proportional_mode<Partition> { + std::size_t my_head; + std::size_t my_max_affinity; + using proportional_mode<Partition>::self; + linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()), + my_max_affinity(self().my_divisor) {} + linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split()) + , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} + linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj) + , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {} + void spawn_task(task& t, task_group_context& ctx) { + if (self().my_divisor) { + spawn(t, ctx, slot_id(my_head)); + } else { + spawn(t, ctx); + } + } +}; + +static bool is_stolen_task(const execution_data& ed) { + return execution_slot(ed) != original_slot(ed); +} + +/*! Determine work-balance phase implementing splitting & stealing actions */ +template<class Mode> +struct dynamic_grainsize_mode : Mode { + using Mode::self; + enum { + begin = 0, + run, + pass + } my_delay; + depth_t my_max_depth; + static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; + dynamic_grainsize_mode(): Mode() + , my_delay(begin) + , my_max_depth(__TBB_INIT_DEPTH) {} + dynamic_grainsize_mode(dynamic_grainsize_mode& p, split) + : Mode(p, split()) + , my_delay(pass) + , my_max_depth(p.my_max_depth) {} + dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj) + : Mode(p, split_obj) + , my_delay(begin) + , my_max_depth(p.my_max_depth) {} + template <typename Task> + bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range() + if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree + self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)? + if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task +#if __TBB_USE_OPTIONAL_RTTI + // RTTI is available, check whether the cast is valid + // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0); + // correctness of the cast relies on avoiding the root task for which: + // - initial value of my_divisor != 0 (protected by separate assertion) + // - is_stolen_task() always returns false for the root task. +#endif + tree_node::mark_task_stolen(t); + if( !my_max_depth ) my_max_depth++; + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } + } + return false; + } + depth_t max_depth() { return my_max_depth; } + void align_depth(depth_t base) { + __TBB_ASSERT(base <= my_max_depth, 0); + my_max_depth -= base; + } + template<typename StartType, typename Range> + void work_balance(StartType &start, Range &range, execution_data& ed) { + if( !range.is_divisible() || !self().max_depth() ) { + start.run_body( range ); // simple partitioner goes always here + } + else { // do range pool + range_vector<Range, range_pool_size> range_pool(range); + do { + range_pool.split_to_fill(self().max_depth()); // fill range pool + if( self().check_for_demand( start ) ) { + if( range_pool.size() > 1 ) { + start.offer_work( range_pool.front(), range_pool.front_depth(), ed ); + range_pool.pop_front(); + continue; + } + if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task + continue; // note: next split_to_fill() should split range at least once + } + start.run_body( range_pool.back() ); + range_pool.pop_back(); + } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() ); + } + } + template <typename Task> + bool check_for_demand(Task& t) { + if ( pass == my_delay ) { + if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array + return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more + else if ( self().my_divisor && my_max_depth ) { // make balancing task + self().my_divisor = 0; // once for each task; depth will be decreased in align_depth() + return true; + } + else if ( tree_node::is_peer_stolen(t) ) { + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } + } else if( begin == my_delay ) { + my_delay = pass; + } + return false; + } +}; + +class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > { +public: + auto_partition_type( const auto_partitioner& ) + : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() { + my_divisor *= __TBB_INITIAL_CHUNKS; + } + auto_partition_type( auto_partition_type& src, split) + : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {} + bool is_divisible() { // part of old should_execute_range() + if( my_divisor > 1 ) return true; + if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead + // keep same fragmentation while splitting for the local task pool + my_max_depth--; + my_divisor = 0; // decrease max_depth once per task + return true; + } else return false; + } + template <typename Task> + bool check_for_demand(Task& t) { + if (tree_node::is_peer_stolen(t)) { + my_max_depth += __TBB_DEMAND_DEPTH_ADD; + return true; + } else return false; + } + void spawn_task(task& t, task_group_context& ctx) { + spawn(t, ctx); + } +}; + +class simple_partition_type: public partition_type_base<simple_partition_type> { +public: + simple_partition_type( const simple_partitioner& ) {} + simple_partition_type( const simple_partition_type&, split ) {} + //! simplified algorithm + template<typename StartType, typename Range> + void execute(StartType &start, Range &range, execution_data& ed) { + split_type split_obj = split(); // start.offer_work accepts split_type as reference + while( range.is_divisible() ) + start.offer_work( split_obj, ed ); + start.run_body( range ); + } + void spawn_task(task& t, task_group_context& ctx) { + spawn(t, ctx); + } +}; + +class static_partition_type : public linear_affinity_mode<static_partition_type> { +public: + typedef detail::proportional_split split_type; + static_partition_type( const static_partitioner& ) + : linear_affinity_mode<static_partition_type>() {} + static_partition_type( static_partition_type& p, const proportional_split& split_obj ) + : linear_affinity_mode<static_partition_type>(p, split_obj) {} +}; + +class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > { + static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units + slot_id* my_array; +public: + static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task + typedef detail::proportional_split split_type; + affinity_partition_type( affinity_partitioner_base& ap ) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() { + __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); + ap.resize(factor); + my_array = ap.my_array; + my_max_depth = factor_power + 1; + __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); + } + affinity_partition_type(affinity_partition_type& p, split) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split()) + , my_array(p.my_array) {} + affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj) + : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj) + , my_array(p.my_array) {} + void note_affinity(slot_id id) { + if( my_divisor ) + my_array[my_head] = id; + } + void spawn_task(task& t, task_group_context& ctx) { + if (my_divisor) { + if (!my_array[my_head]) { + // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse + spawn(t, ctx, slot_id(my_head / factor)); + } else { + spawn(t, ctx, my_array[my_head]); + } + } else { + spawn(t, ctx); + } + } +}; + +//! A simple partitioner +/** Divides the range until the range is not divisible. + @ingroup algorithms */ +class simple_partitioner { +public: + simple_partitioner() {} +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef simple_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef simple_partition_type::split_type split_type; + + // for parallel_scan only + class partition_type { + public: + bool should_execute_range(const execution_data& ) {return false;} + partition_type( const simple_partitioner& ) {} + partition_type( const partition_type&, split ) {} + }; +}; + +//! An auto partitioner +/** The range is initial divided into several large chunks. + Chunks are further subdivided into smaller pieces if demand detected and they are divisible. + @ingroup algorithms */ +class auto_partitioner { +public: + auto_partitioner() {} + +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef auto_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef auto_partition_type::split_type split_type; + + //! Backward-compatible partition for auto and affinity partition objects. + class partition_type { + size_t num_chunks; + static const size_t VICTIM_CHUNKS = 4; + public: + bool should_execute_range(const execution_data& ed) { + if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) ) + num_chunks = VICTIM_CHUNKS; + return num_chunks==1; + } + partition_type( const auto_partitioner& ) + : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {} + partition_type( partition_type& pt, split ) { + num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; + } + }; +}; + +//! A static partitioner +class static_partitioner { +public: + static_partitioner() {} +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef static_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef static_partition_type::split_type split_type; +}; + +//! An affinity partitioner +class affinity_partitioner : affinity_partitioner_base { +public: + affinity_partitioner() {} + +private: + template<typename Range, typename Body, typename Partitioner> friend struct start_for; + template<typename Range, typename Body, typename Partitioner> friend struct start_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce; + template<typename Range, typename Body, typename Partitioner> friend struct start_scan; + // new implementation just extends existing interface + typedef affinity_partition_type task_partition_type; + // TODO: consider to make split_type public + typedef affinity_partition_type::split_type split_type; +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +// Partitioners +using detail::d1::auto_partitioner; +using detail::d1::simple_partitioner; +using detail::d1::static_partitioner; +using detail::d1::affinity_partitioner; +// Split types +using detail::split; +using detail::proportional_split; +} // namespace v1 + +} // namespace tbb + +#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) + #pragma warning (pop) +#endif // warning 4244 is back + +#undef __TBB_INITIAL_CHUNKS +#undef __TBB_RANGE_POOL_CAPACITY +#undef __TBB_INIT_DEPTH + +#endif /* __TBB_partitioner_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/profiling.h b/contrib/libs/tbb/include/oneapi/tbb/profiling.h new file mode 100644 index 0000000000..4b62da2060 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/profiling.h @@ -0,0 +1,243 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_profiling_H +#define __TBB_profiling_H + +#include "detail/_config.h" +#include <cstdint> + +#include <string> + +namespace tbb { +namespace detail { +inline namespace d0 { + // include list of index names + #define TBB_STRING_RESOURCE(index_name,str) index_name, + enum string_resource_index : std::uintptr_t { + #include "detail/_string_resource.h" + NUM_STRINGS + }; + #undef TBB_STRING_RESOURCE + + enum itt_relation + { + __itt_relation_is_unknown = 0, + __itt_relation_is_dependent_on, /**< "A is dependent on B" means that A cannot start until B completes */ + __itt_relation_is_sibling_of, /**< "A is sibling of B" means that A and B were created as a group */ + __itt_relation_is_parent_of, /**< "A is parent of B" means that A created B */ + __itt_relation_is_continuation_of, /**< "A is continuation of B" means that A assumes the dependencies of B */ + __itt_relation_is_child_of, /**< "A is child of B" means that A was created by B (inverse of is_parent_of) */ + __itt_relation_is_continued_by, /**< "A is continued by B" means that B assumes the dependencies of A (inverse of is_continuation_of) */ + __itt_relation_is_predecessor_to /**< "A is predecessor to B" means that B cannot start until A completes (inverse of is_dependent_on) */ + }; + +//! Unicode support +#if (_WIN32||_WIN64) && !__MINGW32__ + //! Unicode character type. Always wchar_t on Windows. + using tchar = wchar_t; +#else /* !WIN */ + using tchar = char; +#endif /* !WIN */ + +} // namespace d0 +} // namespace detail +} // namespace tbb + +#include <atomic> +#if _WIN32||_WIN64 +#include <stdlib.h> /* mbstowcs_s */ +#endif +// Need these to work regardless of tools support +namespace tbb { +namespace detail { +namespace d1 { + enum notify_type {prepare=0, cancel, acquired, releasing, destroy}; + enum itt_domain_enum { ITT_DOMAIN_FLOW=0, ITT_DOMAIN_MAIN=1, ITT_DOMAIN_ALGO=2, ITT_NUM_DOMAINS }; +} // namespace d1 + +namespace r1 { + void __TBB_EXPORTED_FUNC call_itt_notify(int t, void* ptr); + void __TBB_EXPORTED_FUNC create_itt_sync(void* ptr, const tchar* objtype, const tchar* objname); + void __TBB_EXPORTED_FUNC itt_make_task_group(d1::itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index); + void __TBB_EXPORTED_FUNC itt_task_begin(d1::itt_domain_enum domain, void* task, unsigned long long task_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index); + void __TBB_EXPORTED_FUNC itt_task_end(d1::itt_domain_enum domain); + void __TBB_EXPORTED_FUNC itt_set_sync_name(void* obj, const tchar* name); + void __TBB_EXPORTED_FUNC itt_metadata_str_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + string_resource_index key, const char* value); + void __TBB_EXPORTED_FUNC itt_metadata_ptr_add(d1::itt_domain_enum domain, void* addr, unsigned long long addr_extra, + string_resource_index key, void* value); + void __TBB_EXPORTED_FUNC itt_relation_add(d1::itt_domain_enum domain, void* addr0, unsigned long long addr0_extra, + itt_relation relation, void* addr1, unsigned long long addr1_extra); + void __TBB_EXPORTED_FUNC itt_region_begin(d1::itt_domain_enum domain, void* region, unsigned long long region_extra, + void* parent, unsigned long long parent_extra, string_resource_index /* name_index */); + void __TBB_EXPORTED_FUNC itt_region_end(d1::itt_domain_enum domain, void* region, unsigned long long region_extra); +} // namespace r1 + +namespace d1 { +#if TBB_USE_PROFILING_TOOLS && (_WIN32||_WIN64) && !__MINGW32__ + inline std::size_t multibyte_to_widechar(wchar_t* wcs, const char* mbs, std::size_t bufsize) { + std::size_t len; + mbstowcs_s(&len, wcs, bufsize, mbs, _TRUNCATE); + return len; // mbstowcs_s counts null terminator + } +#endif + +#if TBB_USE_PROFILING_TOOLS + inline void create_itt_sync(void *ptr, const char *objtype, const char *objname) { +#if (_WIN32||_WIN64) && !__MINGW32__ + std::size_t len_type = multibyte_to_widechar(nullptr, objtype, 0); + wchar_t *type = new wchar_t[len_type]; + multibyte_to_widechar(type, objtype, len_type); + std::size_t len_name = multibyte_to_widechar(nullptr, objname, 0); + wchar_t *name = new wchar_t[len_name]; + multibyte_to_widechar(name, objname, len_name); +#else // WIN + const char *type = objtype; + const char *name = objname; +#endif + r1::create_itt_sync(ptr, type, name); + +#if (_WIN32||_WIN64) && !__MINGW32__ + delete[] type; + delete[] name; +#endif // WIN + } + +// Distinguish notifications on task for reducing overheads +#if TBB_USE_PROFILING_TOOLS == 2 + inline void call_itt_task_notify(d1::notify_type t, void *ptr) { + r1::call_itt_notify((int)t, ptr); + } +#else + inline void call_itt_task_notify(d1::notify_type, void *) {} +#endif // TBB_USE_PROFILING_TOOLS + + inline void call_itt_notify(d1::notify_type t, void *ptr) { + r1::call_itt_notify((int)t, ptr); + } + +#if (_WIN32||_WIN64) && !__MINGW32__ + inline void itt_set_sync_name(void* obj, const wchar_t* name) { + r1::itt_set_sync_name(obj, name); + } + inline void itt_set_sync_name(void* obj, const char* name) { + std::size_t len_name = multibyte_to_widechar(nullptr, name, 0); + wchar_t *obj_name = new wchar_t[len_name]; + multibyte_to_widechar(obj_name, name, len_name); + r1::itt_set_sync_name(obj, obj_name); + delete[] obj_name; + } +#else + inline void itt_set_sync_name( void* obj, const char* name) { + r1::itt_set_sync_name(obj, name); + } +#endif //WIN + + inline void itt_make_task_group(itt_domain_enum domain, void* group, unsigned long long group_extra, + void* parent, unsigned long long parent_extra, string_resource_index name_index) { + r1::itt_make_task_group(domain, group, group_extra, parent, parent_extra, name_index); + } + + inline void itt_metadata_str_add( itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, const char *value ) { + r1::itt_metadata_str_add( domain, addr, addr_extra, key, value ); + } + + inline void register_node_addr(itt_domain_enum domain, void *addr, unsigned long long addr_extra, + string_resource_index key, void *value) { + r1::itt_metadata_ptr_add(domain, addr, addr_extra, key, value); + } + + inline void itt_relation_add( itt_domain_enum domain, void *addr0, unsigned long long addr0_extra, + itt_relation relation, void *addr1, unsigned long long addr1_extra ) { + r1::itt_relation_add( domain, addr0, addr0_extra, relation, addr1, addr1_extra ); + } + + inline void itt_task_begin( itt_domain_enum domain, void *task, unsigned long long task_extra, + void *parent, unsigned long long parent_extra, string_resource_index name_index ) { + r1::itt_task_begin( domain, task, task_extra, parent, parent_extra, name_index ); + } + + inline void itt_task_end( itt_domain_enum domain ) { + r1::itt_task_end( domain ); + } + + inline void itt_region_begin( itt_domain_enum domain, void *region, unsigned long long region_extra, + void *parent, unsigned long long parent_extra, string_resource_index name_index ) { + r1::itt_region_begin( domain, region, region_extra, parent, parent_extra, name_index ); + } + + inline void itt_region_end( itt_domain_enum domain, void *region, unsigned long long region_extra ) { + r1::itt_region_end( domain, region, region_extra ); + } +#else + inline void create_itt_sync(void* /*ptr*/, const char* /*objtype*/, const char* /*objname*/) {} + + inline void call_itt_notify(notify_type /*t*/, void* /*ptr*/) {} + + inline void call_itt_task_notify(notify_type /*t*/, void* /*ptr*/) {} +#endif // TBB_USE_PROFILING_TOOLS + +#if TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +class event { +/** This class supports user event traces through itt. + Common use-case is tagging data flow graph tasks (data-id) + and visualization by Intel Advisor Flow Graph Analyzer (FGA) **/ +// TODO: Replace implementation by itt user event api. + + const std::string my_name; + + static void emit_trace(const std::string &input) { + itt_metadata_str_add( ITT_DOMAIN_FLOW, NULL, FLOW_NULL, USER_EVENT, ( "FGA::DATAID::" + input ).c_str() ); + } + +public: + event(const std::string &input) + : my_name( input ) + { } + + void emit() { + emit_trace(my_name); + } + + static void emit(const std::string &description) { + emit_trace(description); + } + +}; +#else // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +// Using empty struct if user event tracing is disabled: +struct event { + event(const std::string &) { } + + void emit() { } + + static void emit(const std::string &) { } +}; +#endif // TBB_USE_PROFILING_TOOLS && !(TBB_USE_PROFILING_TOOLS == 2) +} // namespace d1 +} // namespace detail + +namespace profiling { + using detail::d1::event; +} +} // namespace tbb + + +#endif /* __TBB_profiling_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h new file mode 100644 index 0000000000..6c3f1fe1e9 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_mutex.h @@ -0,0 +1,197 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_queuing_mutex_H +#define __TBB_queuing_mutex_H + +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include "profiling.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +//! Queuing mutex with local-only spinning. +/** @ingroup synchronization */ +class queuing_mutex { +public: + //! Construct unacquired mutex. + queuing_mutex() noexcept { + create_itt_sync(this, "tbb::queuing_mutex", ""); + }; + + queuing_mutex(const queuing_mutex&) = delete; + queuing_mutex& operator=(const queuing_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + //! Reset fields to mean "no lock held". + void reset() { + m_mutex = nullptr; + } + + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + scoped_lock() = default; + + //! Acquire lock on given mutex. + scoped_lock(queuing_mutex& m) { + acquire(m); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if (m_mutex) release(); + } + + //! No Copy + scoped_lock( const scoped_lock& ) = delete; + scoped_lock& operator=( const scoped_lock& ) = delete; + + //! Acquire lock on given mutex. + void acquire( queuing_mutex& m ) { + __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the exchange, because once the + // exchange executes, *this becomes accessible to other threads. + m_mutex = &m; + m_next.store(nullptr, std::memory_order_relaxed); + m_going.store(0U, std::memory_order_relaxed); + + // x86 compare exchange operation always has a strong fence + // "sending" the fields initialized above to other processors. + scoped_lock* pred = m.q_tail.exchange(this); + if (pred) { + call_itt_notify(prepare, &m); + __TBB_ASSERT(pred->m_next.load(std::memory_order_relaxed) == nullptr, "the predecessor has another successor!"); + + pred->m_next.store(this, std::memory_order_relaxed); + spin_wait_while_eq(m_going, 0U); + } + call_itt_notify(acquired, &m); + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + } + + //! Acquire lock on given mutex if free (i.e. non-blocking) + bool try_acquire( queuing_mutex& m ) { + __TBB_ASSERT(!m_mutex, "scoped_lock is already holding a mutex"); + + // Must set all fields before the compare_exchange_strong, because once the + // compare_exchange_strong executes, *this becomes accessible to other threads. + m_next.store(nullptr, std::memory_order_relaxed); + m_going.store(0U, std::memory_order_relaxed); + + scoped_lock* expected = nullptr; + // The compare_exchange_strong must have release semantics, because we are + // "sending" the fields initialized above to other processors. + // x86 compare exchange operation always has a strong fence + if (!m.q_tail.compare_exchange_strong(expected, this)) + return false; + + m_mutex = &m; + + // Force acquire so that user's critical section receives correct values + // from processor that was previously in the user's critical section. + atomic_fence(std::memory_order_acquire); + call_itt_notify(acquired, &m); + return true; + } + + //! Release lock. + void release() + { + __TBB_ASSERT(this->m_mutex, "no lock acquired"); + + call_itt_notify(releasing, this->m_mutex); + + if (m_next.load(std::memory_order_relaxed) == nullptr) { + scoped_lock* expected = this; + if (m_mutex->q_tail.compare_exchange_strong(expected, nullptr)) { + // this was the only item in the queue, and the queue is now empty. + reset(); + return; + } + // Someone in the queue + spin_wait_while_eq(m_next, nullptr); + } + m_next.load(std::memory_order_relaxed)->m_going.store(1U, std::memory_order_release); + + reset(); + } + + private: + //! The pointer to the mutex owned, or NULL if not holding a mutex. + queuing_mutex* m_mutex{nullptr}; + + //! The pointer to the next competitor for a mutex + std::atomic<scoped_lock*> m_next{nullptr}; + + //! The local spin-wait variable + /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of + zero-initialization. Defining it as an entire word instead of + a byte seems to help performance slightly. */ + std::atomic<uintptr_t> m_going{0U}; + }; + + // Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = true; + +private: + //! The last competitor requesting the lock + std::atomic<scoped_lock*> q_tail{nullptr}; + +}; + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(queuing_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(queuing_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_mutex&, const wchar_t*) {} +#endif //WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::queuing_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#endif /* __TBB_queuing_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h new file mode 100644 index 0000000000..6bb748f8a3 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/queuing_rw_mutex.h @@ -0,0 +1,199 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_queuing_rw_mutex_H +#define __TBB_queuing_rw_mutex_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_assert.h" + +#include "profiling.h" + +#include <cstring> +#include <atomic> + +namespace tbb { +namespace detail { +namespace r1 { +struct queuing_rw_mutex_impl; +} +namespace d1 { + +//! Queuing reader-writer mutex with local-only spinning. +/** Adapted from Krieger, Stumm, et al. pseudocode at + https://www.researchgate.net/publication/221083709_A_Fair_Fast_Scalable_Reader-Writer_Lock + @ingroup synchronization */ +class queuing_rw_mutex { + friend r1::queuing_rw_mutex_impl; +public: + //! Construct unacquired mutex. + queuing_rw_mutex() noexcept { + create_itt_sync(this, "tbb::queuing_rw_mutex", ""); + } + + //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NULL + ~queuing_rw_mutex() { + __TBB_ASSERT(q_tail.load(std::memory_order_relaxed) == nullptr, "destruction of an acquired mutex"); + } + + //! No Copy + queuing_rw_mutex(const queuing_rw_mutex&) = delete; + queuing_rw_mutex& operator=(const queuing_rw_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + friend r1::queuing_rw_mutex_impl; + //! Initialize fields to mean "no lock held". + void initialize() { + my_mutex = nullptr; + my_internal_lock.store(0, std::memory_order_relaxed); + my_going.store(0, std::memory_order_relaxed); +#if TBB_USE_ASSERT + my_state = 0xFF; // Set to invalid state + my_next.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); + my_prev.store(reinterpret_cast<uintptr_t>(reinterpret_cast<void*>(-1)), std::memory_order_relaxed); +#endif /* TBB_USE_ASSERT */ + } + + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + scoped_lock() {initialize();} + + //! Acquire lock on given mutex. + scoped_lock( queuing_rw_mutex& m, bool write=true ) { + initialize(); + acquire(m,write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if( my_mutex ) release(); + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire( queuing_rw_mutex& m, bool write=true ); + + //! Acquire lock on given mutex if free (i.e. non-blocking) + bool try_acquire( queuing_rw_mutex& m, bool write=true ); + + //! Release lock. + void release(); + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade_to_writer(); + + //! Downgrade writer to become a reader. + bool downgrade_to_reader(); + + private: + //! The pointer to the mutex owned, or NULL if not holding a mutex. + queuing_rw_mutex* my_mutex; + + //! The 'pointer' to the previous and next competitors for a mutex + std::atomic<uintptr_t> my_prev; + std::atomic<uintptr_t> my_next; + + using state_t = unsigned char ; + + //! State of the request: reader, writer, active reader, other service states + std::atomic<state_t> my_state; + + //! The local spin-wait variable + /** Corresponds to "spin" in the pseudocode but inverted for the sake of zero-initialization */ + std::atomic<unsigned char> my_going; + + //! A tiny internal lock + std::atomic<unsigned char> my_internal_lock; + }; + + // Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = true; + +private: + //! The last competitor requesting the lock + std::atomic<scoped_lock*> q_tail{nullptr}; +}; +#if TBB_USE_PROFILING_TOOLS +inline void set_name(queuing_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(queuing_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(queuing_rw_mutex&, const wchar_t*) {} +#endif //WIN +#endif +} // namespace d1 + +namespace r1 { +void acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +bool try_acquire(d1::queuing_rw_mutex&, d1::queuing_rw_mutex::scoped_lock&, bool); +void release(d1::queuing_rw_mutex::scoped_lock&); +bool upgrade_to_writer(d1::queuing_rw_mutex::scoped_lock&); +bool downgrade_to_reader(d1::queuing_rw_mutex::scoped_lock&); +} // namespace r1 + +namespace d1 { + + +inline void queuing_rw_mutex::scoped_lock::acquire(queuing_rw_mutex& m,bool write) { + r1::acquire(m, *this, write); +} + +inline bool queuing_rw_mutex::scoped_lock::try_acquire(queuing_rw_mutex& m, bool write) { + return r1::try_acquire(m, *this, write); +} + +inline void queuing_rw_mutex::scoped_lock::release() { + r1::release(*this); +} + +inline bool queuing_rw_mutex::scoped_lock::upgrade_to_writer() { + return r1::upgrade_to_writer(*this); +} + +inline bool queuing_rw_mutex::scoped_lock::downgrade_to_reader() { + return r1::downgrade_to_reader(*this); +} +} // namespace d1 + +} // namespace detail + +inline namespace v1 { +using detail::d1::queuing_rw_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#endif /* __TBB_queuing_rw_mutex_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h new file mode 100644 index 0000000000..daab02f324 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/scalable_allocator.h @@ -0,0 +1,332 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_scalable_allocator_H +#define __TBB_scalable_allocator_H + +#ifdef __cplusplus +#include "oneapi/tbb/detail/_config.h" +#include "oneapi/tbb/detail/_utils.h" +#include <cstdlib> +#include <utility> +#else +#include <stddef.h> /* Need ptrdiff_t and size_t from here. */ +#if !_MSC_VER +#include <stdint.h> /* Need intptr_t from here. */ +#endif +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +#if _MSC_VER + #define __TBB_EXPORTED_FUNC __cdecl +#else + #define __TBB_EXPORTED_FUNC +#endif + +/** The "malloc" analogue to allocate block of memory of size bytes. + * @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_malloc(size_t size); + +/** The "free" analogue to discard a previously allocated piece of memory. + @ingroup memory_allocation */ +void __TBB_EXPORTED_FUNC scalable_free(void* ptr); + +/** The "realloc" analogue complementing scalable_malloc. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_realloc(void* ptr, size_t size); + +/** The "calloc" analogue complementing scalable_malloc. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_calloc(size_t nobj, size_t size); + +/** The "posix_memalign" analogue. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_posix_memalign(void** memptr, size_t alignment, size_t size); + +/** The "_aligned_malloc" analogue. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_aligned_malloc(size_t size, size_t alignment); + +/** The "_aligned_realloc" analogue. + @ingroup memory_allocation */ +void* __TBB_EXPORTED_FUNC scalable_aligned_realloc(void* ptr, size_t size, size_t alignment); + +/** The "_aligned_free" analogue. + @ingroup memory_allocation */ +void __TBB_EXPORTED_FUNC scalable_aligned_free(void* ptr); + +/** The analogue of _msize/malloc_size/malloc_usable_size. + Returns the usable size of a memory block previously allocated by scalable_*, + or 0 (zero) if ptr does not point to such a block. + @ingroup memory_allocation */ +size_t __TBB_EXPORTED_FUNC scalable_msize(void* ptr); + +/* Results for scalable_allocation_* functions */ +typedef enum { + TBBMALLOC_OK, + TBBMALLOC_INVALID_PARAM, + TBBMALLOC_UNSUPPORTED, + TBBMALLOC_NO_MEMORY, + TBBMALLOC_NO_EFFECT +} ScalableAllocationResult; + +/* Setting TBB_MALLOC_USE_HUGE_PAGES environment variable to 1 enables huge pages. + scalable_allocation_mode call has priority over environment variable. */ +typedef enum { + TBBMALLOC_USE_HUGE_PAGES, /* value turns using huge pages on and off */ + /* deprecated, kept for backward compatibility only */ + USE_HUGE_PAGES = TBBMALLOC_USE_HUGE_PAGES, + /* try to limit memory consumption value (Bytes), clean internal buffers + if limit is exceeded, but not prevents from requesting memory from OS */ + TBBMALLOC_SET_SOFT_HEAP_LIMIT, + /* Lower bound for the size (Bytes), that is interpreted as huge + * and not released during regular cleanup operations. */ + TBBMALLOC_SET_HUGE_SIZE_THRESHOLD +} AllocationModeParam; + +/** Set TBB allocator-specific allocation modes. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_allocation_mode(int param, intptr_t value); + +typedef enum { + /* Clean internal allocator buffers for all threads. + Returns TBBMALLOC_NO_EFFECT if no buffers cleaned, + TBBMALLOC_OK if some memory released from buffers. */ + TBBMALLOC_CLEAN_ALL_BUFFERS, + /* Clean internal allocator buffer for current thread only. + Return values same as for TBBMALLOC_CLEAN_ALL_BUFFERS. */ + TBBMALLOC_CLEAN_THREAD_BUFFERS +} ScalableAllocationCmd; + +/** Call TBB allocator-specific commands. + @ingroup memory_allocation */ +int __TBB_EXPORTED_FUNC scalable_allocation_command(int cmd, void *param); + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#ifdef __cplusplus + +//! The namespace rml contains components of low-level memory pool interface. +namespace rml { +class MemoryPool; + +typedef void *(*rawAllocType)(std::intptr_t pool_id, std::size_t &bytes); +// returns non-zero in case of error +typedef int (*rawFreeType)(std::intptr_t pool_id, void* raw_ptr, std::size_t raw_bytes); + +struct MemPoolPolicy { + enum { + TBBMALLOC_POOL_VERSION = 1 + }; + + rawAllocType pAlloc; + rawFreeType pFree; + // granularity of pAlloc allocations. 0 means default used. + std::size_t granularity; + int version; + // all memory consumed at 1st pAlloc call and never returned, + // no more pAlloc calls after 1st + unsigned fixedPool : 1, + // memory consumed but returned only at pool termination + keepAllMemory : 1, + reserved : 30; + + MemPoolPolicy(rawAllocType pAlloc_, rawFreeType pFree_, + std::size_t granularity_ = 0, bool fixedPool_ = false, + bool keepAllMemory_ = false) : + pAlloc(pAlloc_), pFree(pFree_), granularity(granularity_), version(TBBMALLOC_POOL_VERSION), + fixedPool(fixedPool_), keepAllMemory(keepAllMemory_), + reserved(0) {} +}; + +// enums have same values as appropriate enums from ScalableAllocationResult +// TODO: use ScalableAllocationResult in pool_create directly +enum MemPoolError { + // pool created successfully + POOL_OK = TBBMALLOC_OK, + // invalid policy parameters found + INVALID_POLICY = TBBMALLOC_INVALID_PARAM, + // requested pool policy is not supported by allocator library + UNSUPPORTED_POLICY = TBBMALLOC_UNSUPPORTED, + // lack of memory during pool creation + NO_MEMORY = TBBMALLOC_NO_MEMORY, + // action takes no effect + NO_EFFECT = TBBMALLOC_NO_EFFECT +}; + +MemPoolError pool_create_v1(std::intptr_t pool_id, const MemPoolPolicy *policy, + rml::MemoryPool **pool); + +bool pool_destroy(MemoryPool* memPool); +void *pool_malloc(MemoryPool* memPool, std::size_t size); +void *pool_realloc(MemoryPool* memPool, void *object, std::size_t size); +void *pool_aligned_malloc(MemoryPool* mPool, std::size_t size, std::size_t alignment); +void *pool_aligned_realloc(MemoryPool* mPool, void *ptr, std::size_t size, std::size_t alignment); +bool pool_reset(MemoryPool* memPool); +bool pool_free(MemoryPool *memPool, void *object); +MemoryPool *pool_identify(void *object); +std::size_t pool_msize(MemoryPool *memPool, void *object); + +} // namespace rml + +namespace tbb { +namespace detail { +namespace d1 { + +// keep throw in a separate function to prevent code bloat +template<typename E> +void throw_exception(const E &e) { +#if TBB_USE_EXCEPTIONS + throw e; +#else + suppress_unused_warning(e); +#endif +} + +template<typename T> +class scalable_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers + using is_always_equal = std::true_type; + + scalable_allocator() = default; + template<typename U> scalable_allocator(const scalable_allocator<U>&) noexcept {} + + //! Allocate space for n objects. + __TBB_nodiscard T* allocate(std::size_t n) { + T* p = static_cast<T*>(scalable_malloc(n * sizeof(value_type))); + if (!p) { + throw_exception(std::bad_alloc()); + } + return p; + } + + //! Free previously allocated block of memory + void deallocate(T* p, std::size_t) { + scalable_free(p); + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = scalable_allocator<U>; + }; + //! Largest value for which method allocate might succeed. + size_type max_size() const noexcept { + size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_type); + return (absolutemax > 0 ? absolutemax : 1); + } + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new((void *)p) U(std::forward<Args>(args)...); } + void destroy(pointer p) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN + +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class scalable_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = scalable_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +inline bool operator==(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +inline bool operator!=(const scalable_allocator<T>&, const scalable_allocator<U>&) noexcept { return false; } +#endif + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +//! C++17 memory resource implementation for scalable allocator +//! ISO C++ Section 23.12.2 +class scalable_resource_impl : public std::pmr::memory_resource { +private: + void* do_allocate(std::size_t bytes, std::size_t alignment) override { + void* p = scalable_aligned_malloc(bytes, alignment); + if (!p) { + throw_exception(std::bad_alloc()); + } + return p; + } + + void do_deallocate(void* ptr, std::size_t /*bytes*/, std::size_t /*alignment*/) override { + scalable_free(ptr); + } + + //! Memory allocated by one instance of scalable_resource_impl could be deallocated by any + //! other instance of this class + bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override { + return this == &other || +#if __TBB_USE_OPTIONAL_RTTI + dynamic_cast<const scalable_resource_impl*>(&other) != nullptr; +#else + false; +#endif + } +}; + +//! Global scalable allocator memory resource provider +inline std::pmr::memory_resource* scalable_memory_resource() noexcept { + static tbb::detail::d1::scalable_resource_impl scalable_res; + return &scalable_res; +} + +#endif // __TBB_CPP17_MEMORY_RESOURCE_PRESENT + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::scalable_allocator; +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +using detail::d1::scalable_memory_resource; +#endif +} // namespace v1 + +} // namespace tbb + +#endif /* __cplusplus */ + +#endif /* __TBB_scalable_allocator_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h new file mode 100644 index 0000000000..7fde7e15af --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_mutex.h @@ -0,0 +1,179 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_spin_mutex_H +#define __TBB_spin_mutex_H + +#include "detail/_namespace_injection.h" + +#include "profiling.h" + +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +#if __TBB_TSX_INTRINSICS_PRESENT +class rtm_mutex; +#endif + +/** A spin_mutex is a low-level synchronization primitive. + While locked, it causes the waiting threads to spin in a loop until the lock is released. + It should be used only for locking short critical sections + (typically less than 20 instructions) when fairness is not an issue. + If zero-initialized, the mutex is considered unheld. + @ingroup synchronization */ +class spin_mutex { +public: + //! Constructors + spin_mutex() noexcept : m_flag(false) { + create_itt_sync(this, "tbb::spin_mutex", ""); + }; + + //! Destructor + ~spin_mutex() = default; + + //! No Copy + spin_mutex(const spin_mutex&) = delete; + spin_mutex& operator=(const spin_mutex&) = delete; + + //! Represents acquisition of a mutex. + class scoped_lock { + //! Points to currently held mutex, or NULL if no lock is held. + spin_mutex* m_mutex; + + public: + //! Construct without acquiring a mutex. + constexpr scoped_lock() noexcept : m_mutex(nullptr) {} + + //! Construct and acquire lock on a mutex. + scoped_lock(spin_mutex& m) { + acquire(m); + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock. + void acquire(spin_mutex& m) { + m_mutex = &m; + m.lock(); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_acquire(spin_mutex& m) { + bool result = m.try_lock(); + if (result) { + m_mutex = &m; + } + return result; + } + + //! Release lock + void release() { + __TBB_ASSERT(m_mutex, "release on spin_mutex::scoped_lock that is not holding a lock"); + m_mutex->unlock(); + m_mutex = nullptr; + } + + //! Destroy lock. If holding a lock, releases the lock first. + ~scoped_lock() { + if (m_mutex) { + release(); + } + } + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = false; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + /** Spin if the lock is taken */ + void lock() { + atomic_backoff backoff; + call_itt_notify(prepare, this); + while (m_flag.exchange(true)) backoff.pause(); + call_itt_notify(acquired, this); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + bool result = !m_flag.exchange(true); + if (result) { + call_itt_notify(acquired, this); + } + return result; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + m_flag.store(false, std::memory_order_release); + } + +protected: + std::atomic<bool> m_flag; +}; // class spin_mutex + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(spin_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif //WIN +#else +inline void set_name(spin_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_mutex&, const wchar_t*) {} +#endif // WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::spin_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#include "detail/_rtm_mutex.h" + +namespace tbb { +inline namespace v1 { +#if __TBB_TSX_INTRINSICS_PRESENT + using speculative_spin_mutex = detail::d1::rtm_mutex; +#else + using speculative_spin_mutex = detail::d1::spin_mutex; +#endif +} +} + +#endif /* __TBB_spin_mutex_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h new file mode 100644 index 0000000000..baf6b24b56 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/spin_rw_mutex.h @@ -0,0 +1,307 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_spin_rw_mutex_H +#define __TBB_spin_rw_mutex_H + +#include "detail/_namespace_injection.h" + +#include "profiling.h" + +#include "detail/_assert.h" +#include "detail/_utils.h" + +#include <atomic> + +namespace tbb { +namespace detail { +namespace d1 { + +#if __TBB_TSX_INTRINSICS_PRESENT +class rtm_rw_mutex; +#endif + +//! Fast, unfair, spinning reader-writer lock with backoff and writer-preference +/** @ingroup synchronization */ +class spin_rw_mutex { +public: + //! Constructors + spin_rw_mutex() noexcept : m_state(0) { + create_itt_sync(this, "tbb::spin_rw_mutex", ""); + } + + //! Destructor + ~spin_rw_mutex() { + __TBB_ASSERT(!m_state, "destruction of an acquired mutex"); + } + + //! No Copy + spin_rw_mutex(const spin_rw_mutex&) = delete; + spin_rw_mutex& operator=(const spin_rw_mutex&) = delete; + + //! The scoped locking pattern + /** It helps to avoid the common problem of forgetting to release lock. + It also nicely provides the "node" for queuing locks. */ + class scoped_lock { + public: + //! Construct lock that has not acquired a mutex. + /** Equivalent to zero-initialization of *this. */ + constexpr scoped_lock() noexcept : m_mutex(nullptr), m_is_writer(false) {} + + //! Acquire lock on given mutex. + scoped_lock(spin_rw_mutex& m, bool write = true) : m_mutex(nullptr) { + acquire(m, write); + } + + //! Release lock (if lock is held). + ~scoped_lock() { + if (m_mutex) { + release(); + } + } + + //! No Copy + scoped_lock(const scoped_lock&) = delete; + scoped_lock& operator=(const scoped_lock&) = delete; + + //! Acquire lock on given mutex. + void acquire(spin_rw_mutex& m, bool write = true) { + m_is_writer = write; + m_mutex = &m; + if (write) { + m_mutex->lock(); + } else { + m_mutex->lock_shared(); + } + } + + //! Try acquire lock on given mutex. + bool try_acquire(spin_rw_mutex& m, bool write = true) { + m_is_writer = write; + bool result = write ? m.try_lock() : m.try_lock_shared(); + if (result) { + m_mutex = &m; + } + return result; + } + + //! Release lock. + void release() { + spin_rw_mutex* m = m_mutex; + m_mutex = nullptr; + + if (m_is_writer) { + m->unlock(); + } else { + m->unlock_shared(); + } + } + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade_to_writer() { + if (m_is_writer) return true; // Already a writer + m_is_writer = true; + return m_mutex->upgrade(); + } + + //! Downgrade writer to become a reader. + bool downgrade_to_reader() { + if (!m_is_writer) return true; // Already a reader + m_mutex->downgrade(); + m_is_writer = false; + return true; + } + + protected: + //! The pointer to the current mutex that is held, or nullptr if no mutex is held. + spin_rw_mutex* m_mutex; + + //! If mutex != nullptr, then is_writer is true if holding a writer lock, false if holding a reader lock. + /** Not defined if not holding a lock. */ + bool m_is_writer; + }; + + //! Mutex traits + static constexpr bool is_rw_mutex = true; + static constexpr bool is_recursive_mutex = false; + static constexpr bool is_fair_mutex = false; + + //! Acquire lock + void lock() { + call_itt_notify(prepare, this); + for (atomic_backoff backoff; ; backoff.pause()) { + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & BUSY)) { // no readers, no writers + if (m_state.compare_exchange_strong(s, WRITER)) + break; // successfully stored writer flag + backoff.reset(); // we could be very close to complete op. + } else if (!(s & WRITER_PENDING)) { // no pending writers + m_state |= WRITER_PENDING; + } + } + call_itt_notify(acquired, this); + } + + //! Try acquiring lock (non-blocking) + /** Return true if lock acquired; false otherwise. */ + bool try_lock() { + // for a writer: only possible to acquire if no active readers or writers + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & BUSY)) { // no readers, no writers; mask is 1..1101 + if (m_state.compare_exchange_strong(s, WRITER)) { + call_itt_notify(acquired, this); + return true; // successfully stored writer flag + } + } + return false; + } + + //! Release lock + void unlock() { + call_itt_notify(releasing, this); + m_state &= READERS; + } + + //! Lock shared ownership mutex + void lock_shared() { + call_itt_notify(prepare, this); + for (atomic_backoff b; ; b.pause()) { + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & (WRITER | WRITER_PENDING))) { // no writer or write requests + state_type prev_state = m_state.fetch_add(ONE_READER); + if (!(prev_state & WRITER)) { + break; // successfully stored increased number of readers + } + // writer got there first, undo the increment + m_state -= ONE_READER; + } + } + call_itt_notify(acquired, this); + __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); + } + + //! Try lock shared ownership mutex + bool try_lock_shared() { + // for a reader: acquire if no active or waiting writers + state_type s = m_state.load(std::memory_order_relaxed); + if (!(s & (WRITER | WRITER_PENDING))) { // no writers + state_type prev_state = m_state.fetch_add(ONE_READER); + if (!(prev_state & WRITER)) { // got the lock + call_itt_notify(acquired, this); + return true; // successfully stored increased number of readers + } + // writer got there first, undo the increment + m_state -= ONE_READER; + } + return false; + } + + //! Unlock shared ownership mutex + void unlock_shared() { + __TBB_ASSERT(m_state & READERS, "invalid state of a read lock: no readers"); + call_itt_notify(releasing, this); + m_state -= ONE_READER; + } + +protected: + /** Internal non ISO C++ standard API **/ + //! This API is used through the scoped_lock class + + //! Upgrade reader to become a writer. + /** Returns whether the upgrade happened without releasing and re-acquiring the lock */ + bool upgrade() { + state_type s = m_state.load(std::memory_order_relaxed); + __TBB_ASSERT(s & READERS, "invalid state before upgrade: no readers "); + // Check and set writer-pending flag. + // Required conditions: either no pending writers, or we are the only reader + // (with multiple readers and pending writer, another upgrade could have been requested) + while ((s & READERS) == ONE_READER || !(s & WRITER_PENDING)) { + if (m_state.compare_exchange_strong(s, s | WRITER | WRITER_PENDING)) { + atomic_backoff backoff; + while ((m_state.load(std::memory_order_relaxed) & READERS) != ONE_READER) backoff.pause(); + __TBB_ASSERT((m_state & (WRITER_PENDING|WRITER)) == (WRITER_PENDING | WRITER), "invalid state when upgrading to writer"); + // Both new readers and writers are blocked at this time + m_state -= (ONE_READER + WRITER_PENDING); + return true; // successfully upgraded + } + } + // Slow reacquire + unlock_shared(); + lock(); + return false; + } + + //! Downgrade writer to a reader + void downgrade() { + call_itt_notify(releasing, this); + m_state += (ONE_READER - WRITER); + __TBB_ASSERT(m_state & READERS, "invalid state after downgrade: no readers"); + } + + using state_type = std::intptr_t; + static constexpr state_type WRITER = 1; + static constexpr state_type WRITER_PENDING = 2; + static constexpr state_type READERS = ~(WRITER | WRITER_PENDING); + static constexpr state_type ONE_READER = 4; + static constexpr state_type BUSY = WRITER | READERS; + //! State of lock + /** Bit 0 = writer is holding lock + Bit 1 = request by a writer to acquire lock (hint to readers to wait) + Bit 2..N = number of readers holding lock */ + std::atomic<state_type> m_state; +}; // class spin_rw_mutex + +#if TBB_USE_PROFILING_TOOLS +inline void set_name(spin_rw_mutex& obj, const char* name) { + itt_set_sync_name(&obj, name); +} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_rw_mutex& obj, const wchar_t* name) { + itt_set_sync_name(&obj, name); +} +#endif // WIN +#else +inline void set_name(spin_rw_mutex&, const char*) {} +#if (_WIN32||_WIN64) && !__MINGW32__ +inline void set_name(spin_rw_mutex&, const wchar_t*) {} +#endif // WIN +#endif +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::spin_rw_mutex; +} // namespace v1 +namespace profiling { + using detail::d1::set_name; +} +} // namespace tbb + +#include "detail/_rtm_rw_mutex.h" + +namespace tbb { +inline namespace v1 { +#if __TBB_TSX_INTRINSICS_PRESENT + using speculative_spin_rw_mutex = detail::d1::rtm_rw_mutex; +#else + using speculative_spin_rw_mutex = detail::d1::spin_rw_mutex; +#endif +} +} + +#endif /* __TBB_spin_rw_mutex_H */ + diff --git a/contrib/libs/tbb/include/oneapi/tbb/task.h b/contrib/libs/tbb/include/oneapi/tbb/task.h new file mode 100644 index 0000000000..82ce1df6cd --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/task.h @@ -0,0 +1,37 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_H +#define __TBB_task_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_task.h" + +namespace tbb { +inline namespace v1 { +namespace task { +#if __TBB_RESUMABLE_TASKS + using detail::d1::suspend_point; + using detail::d1::resume; + using detail::d1::suspend; +#endif /* __TBB_RESUMABLE_TASKS */ + using detail::d1::current_context; +} // namespace task +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_task_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_arena.h b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h new file mode 100644 index 0000000000..f1d0f9dea3 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/task_arena.h @@ -0,0 +1,452 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_arena_H +#define __TBB_task_arena_H + +#include "detail/_namespace_injection.h" +#include "detail/_task.h" +#include "detail/_exception.h" +#include "detail/_aligned_space.h" +#include "detail/_small_object_pool.h" + +#if __TBB_ARENA_BINDING +#include "info.h" +#endif /*__TBB_ARENA_BINDING*/ + +namespace tbb { +namespace detail { + +namespace d1 { + +template<typename F, typename R> +class task_arena_function : public delegate_base { + F &my_func; + aligned_space<R> my_return_storage; + bool my_constructed{false}; + // The function should be called only once. + bool operator()() const override { + new (my_return_storage.begin()) R(my_func()); + return true; + } +public: + task_arena_function(F& f) : my_func(f) {} + // The function can be called only after operator() and only once. + R consume_result() { + my_constructed = true; + return std::move(*(my_return_storage.begin())); + } + ~task_arena_function() override { + if (my_constructed) { + my_return_storage.begin()->~R(); + } + } +}; + +template<typename F> +class task_arena_function<F,void> : public delegate_base { + F &my_func; + bool operator()() const override { + my_func(); + return true; + } +public: + task_arena_function(F& f) : my_func(f) {} + void consume_result() const {} + + friend class task_arena_base; +}; + +class task_arena_base; +class task_scheduler_observer; +} // namespace d1 + +namespace r1 { +class arena; +struct task_arena_impl; + +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool); +void __TBB_EXPORTED_FUNC initialize(d1::task_arena_base&); +void __TBB_EXPORTED_FUNC terminate(d1::task_arena_base&); +bool __TBB_EXPORTED_FUNC attach(d1::task_arena_base&); +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +void __TBB_EXPORTED_FUNC wait(d1::task_arena_base&); +int __TBB_EXPORTED_FUNC max_concurrency(const d1::task_arena_base*); +void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base& d, std::intptr_t); + +void __TBB_EXPORTED_FUNC enqueue(d1::task&, d1::task_arena_base*); +void __TBB_EXPORTED_FUNC submit(d1::task&, d1::task_group_context&, arena*, std::uintptr_t); +} // namespace r1 + +namespace d1 { + +static constexpr int priority_stride = INT_MAX / 4; + +class task_arena_base { + friend struct r1::task_arena_impl; + friend void r1::observe(d1::task_scheduler_observer&, bool); +public: + enum class priority : int { + low = 1 * priority_stride, + normal = 2 * priority_stride, + high = 3 * priority_stride + }; +#if __TBB_ARENA_BINDING + using constraints = tbb::detail::d1::constraints; +#endif /*__TBB_ARENA_BINDING*/ +protected: + //! Special settings + intptr_t my_version_and_traits; + + std::atomic<do_once_state> my_initialization_state; + + //! NULL if not currently initialized. + std::atomic<r1::arena*> my_arena; + static_assert(sizeof(std::atomic<r1::arena*>) == sizeof(r1::arena*), + "To preserve backward compatibility we need the equal size of an atomic pointer and a pointer"); + + //! Concurrency level for deferred initialization + int my_max_concurrency; + + //! Reserved slots for external threads + unsigned my_num_reserved_slots; + + //! Arena priority + priority my_priority; + + //! The NUMA node index to which the arena will be attached + numa_node_id my_numa_id; + + //! The core type index to which arena will be attached + core_type_id my_core_type; + + //! Number of threads per core + int my_max_threads_per_core; + + // Backward compatibility checks. + core_type_id core_type() const { + return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_core_type : automatic; + } + int max_threads_per_core() const { + return (my_version_and_traits & core_type_support_flag) == core_type_support_flag ? my_max_threads_per_core : automatic; + } + + enum { + default_flags = 0 + , core_type_support_flag = 1 + }; + + task_arena_base(int max_concurrency, unsigned reserved_for_masters, priority a_priority) + : my_version_and_traits(default_flags | core_type_support_flag) + , my_initialization_state(do_once_state::uninitialized) + , my_arena(nullptr) + , my_max_concurrency(max_concurrency) + , my_num_reserved_slots(reserved_for_masters) + , my_priority(a_priority) + , my_numa_id(automatic) + , my_core_type(automatic) + , my_max_threads_per_core(automatic) + {} + +#if __TBB_ARENA_BINDING + task_arena_base(const constraints& constraints_, unsigned reserved_for_masters, priority a_priority) + : my_version_and_traits(default_flags | core_type_support_flag) + , my_initialization_state(do_once_state::uninitialized) + , my_arena(nullptr) + , my_max_concurrency(constraints_.max_concurrency) + , my_num_reserved_slots(reserved_for_masters) + , my_priority(a_priority) + , my_numa_id(constraints_.numa_id) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + , my_core_type(constraints_.core_type) + , my_max_threads_per_core(constraints_.max_threads_per_core) +#else + , my_core_type(automatic) + , my_max_threads_per_core(automatic) +#endif + {} +#endif /*__TBB_ARENA_BINDING*/ +public: + //! Typedef for number of threads that is automatic. + static const int automatic = -1; + static const int not_initialized = -2; +}; + +template<typename R, typename F> +R isolate_impl(F& f) { + task_arena_function<F, R> func(f); + r1::isolate_within_arena(func, /*isolation*/ 0); + return func.consume_result(); +} + +/** 1-to-1 proxy representation class of scheduler's arena + * Constructors set up settings only, real construction is deferred till the first method invocation + * Destructor only removes one of the references to the inner arena representation. + * Final destruction happens when all the references (and the work) are gone. + */ +class task_arena : public task_arena_base { + + template <typename F> + class enqueue_task : public task { + small_object_allocator m_allocator; + const F m_func; + + void finalize(const execution_data& ed) { + m_allocator.delete_object(this, ed); + } + task* execute(execution_data& ed) override { + m_func(); + finalize(ed); + return nullptr; + } + task* cancel(execution_data&) override { + __TBB_ASSERT_RELEASE(false, "Unhandled exception from enqueue task is caught"); + return nullptr; + } + public: + enqueue_task(const F& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(f) {} + enqueue_task(F&& f, small_object_allocator& alloc) : m_allocator(alloc), m_func(std::move(f)) {} + }; + + void mark_initialized() { + __TBB_ASSERT( my_arena.load(std::memory_order_relaxed), "task_arena initialization is incomplete" ); + my_initialization_state.store(do_once_state::initialized, std::memory_order_release); + } + + template<typename F> + void enqueue_impl(F&& f) { + initialize(); + small_object_allocator alloc{}; + r1::enqueue(*alloc.new_object<enqueue_task<typename std::decay<F>::type>>(std::forward<F>(f), alloc), this); + } + + template<typename R, typename F> + R execute_impl(F& f) { + initialize(); + task_arena_function<F, R> func(f); + r1::execute(*this, func); + return func.consume_result(); + } +public: + //! Creates task_arena with certain concurrency limits + /** Sets up settings only, real construction is deferred till the first method invocation + * @arg max_concurrency specifies total number of slots in arena where threads work + * @arg reserved_for_masters specifies number of slots to be used by external threads only. + * Value of 1 is default and reflects behavior of implicit arenas. + **/ + task_arena(int max_concurrency_ = automatic, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + : task_arena_base(max_concurrency_, reserved_for_masters, a_priority) + {} + +#if __TBB_ARENA_BINDING + //! Creates task arena pinned to certain NUMA node + task_arena(const constraints& constraints_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + : task_arena_base(constraints_, reserved_for_masters, a_priority) + {} + + //! Copies settings from another task_arena + task_arena(const task_arena &s) // copy settings but not the reference or instance + : task_arena_base( + constraints{} + .set_numa_id(s.my_numa_id) + .set_max_concurrency(s.my_max_concurrency) +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + .set_core_type(s.my_core_type) + .set_max_threads_per_core(s.my_max_threads_per_core) +#endif + , s.my_num_reserved_slots, s.my_priority) + {} +#else + //! Copies settings from another task_arena + task_arena(const task_arena& a) // copy settings but not the reference or instance + : task_arena_base(a.my_max_concurrency, a.my_num_reserved_slots, a.my_priority) + {} +#endif /*__TBB_ARENA_BINDING*/ + + //! Tag class used to indicate the "attaching" constructor + struct attach {}; + + //! Creates an instance of task_arena attached to the current arena of the thread + explicit task_arena( attach ) + : task_arena_base(automatic, 1, priority::normal) // use default settings if attach fails + { + if (r1::attach(*this)) { + mark_initialized(); + } + } + + //! Forces allocation of the resources for the task_arena as specified in constructor arguments + void initialize() { + atomic_do_once([this]{ r1::initialize(*this); }, my_initialization_state); + } + + //! Overrides concurrency level and forces initialization of internal representation + void initialize(int max_concurrency_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + { + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + my_max_concurrency = max_concurrency_; + my_num_reserved_slots = reserved_for_masters; + my_priority = a_priority; + r1::initialize(*this); + mark_initialized(); + } + } + +#if __TBB_ARENA_BINDING + void initialize(constraints constraints_, unsigned reserved_for_masters = 1, + priority a_priority = priority::normal) + { + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + my_numa_id = constraints_.numa_id; + my_max_concurrency = constraints_.max_concurrency; +#if __TBB_PREVIEW_TASK_ARENA_CONSTRAINTS_EXTENSION_PRESENT + my_core_type = constraints_.core_type; + my_max_threads_per_core = constraints_.max_threads_per_core; +#endif + my_num_reserved_slots = reserved_for_masters; + my_priority = a_priority; + r1::initialize(*this); + mark_initialized(); + } + } +#endif /*__TBB_ARENA_BINDING*/ + + //! Attaches this instance to the current arena of the thread + void initialize(attach) { + // TODO: decide if this call must be thread-safe + __TBB_ASSERT(!my_arena.load(std::memory_order_relaxed), "Impossible to modify settings of an already initialized task_arena"); + if( !is_active() ) { + if ( !r1::attach(*this) ) { + r1::initialize(*this); + } + mark_initialized(); + } + } + + //! Removes the reference to the internal arena representation. + //! Not thread safe wrt concurrent invocations of other methods. + void terminate() { + if( is_active() ) { + r1::terminate(*this); + my_initialization_state.store(do_once_state::uninitialized, std::memory_order_relaxed); + } + } + + //! Removes the reference to the internal arena representation, and destroys the external object. + //! Not thread safe wrt concurrent invocations of other methods. + ~task_arena() { + terminate(); + } + + //! Returns true if the arena is active (initialized); false otherwise. + //! The name was chosen to match a task_scheduler_init method with the same semantics. + bool is_active() const { + return my_initialization_state.load(std::memory_order_acquire) == do_once_state::initialized; + } + + //! Enqueues a task into the arena to process a functor, and immediately returns. + //! Does not require the calling thread to join the arena + + template<typename F> + void enqueue(F&& f) { + enqueue_impl(std::forward<F>(f)); + } + + //! Joins the arena and executes a mutable functor, then returns + //! If not possible to join, wraps the functor into a task, enqueues it and waits for task completion + //! Can decrement the arena demand for workers, causing a worker to leave and free a slot to the calling thread + //! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). + template<typename F> + auto execute(F&& f) -> decltype(f()) { + return execute_impl<decltype(f())>(f); + } + +#if __TBB_EXTRA_DEBUG + //! Returns my_num_reserved_slots + int debug_reserved_slots() const { + // Handle special cases inside the library + return my_num_reserved_slots; + } + + //! Returns my_max_concurrency + int debug_max_concurrency() const { + // Handle special cases inside the library + return my_max_concurrency; + } + + //! Wait for all work in the arena to be completed + //! Even submitted by other application threads + //! Joins arena if/when possible (in the same way as execute()) + void debug_wait_until_empty() { + initialize(); + r1::wait(*this); + } +#endif //__TBB_EXTRA_DEBUG + + //! Returns the maximal number of threads that can work inside the arena + int max_concurrency() const { + // Handle special cases inside the library + return (my_max_concurrency > 1) ? my_max_concurrency : r1::max_concurrency(this); + } + + friend void submit(task& t, task_arena& ta, task_group_context& ctx, bool as_critical) { + __TBB_ASSERT(ta.is_active(), nullptr); + call_itt_task_notify(releasing, &t); + r1::submit(t, ctx, ta.my_arena.load(std::memory_order_relaxed), as_critical ? 1 : 0); + } +}; + +//! Executes a mutable functor in isolation within the current task arena. +//! Since C++11, the method returns the value returned by functor (prior to C++11 it returns void). +template<typename F> +inline auto isolate(F&& f) -> decltype(f()) { + return isolate_impl<decltype(f())>(f); +} + +//! Returns the index, aka slot number, of the calling thread in its current arena +inline int current_thread_index() { + int idx = r1::execution_slot(nullptr); + return idx == -1 ? task_arena_base::not_initialized : idx; +} + +//! Returns the maximal number of threads that can work inside the arena +inline int max_concurrency() { + return r1::max_concurrency(nullptr); +} + +using r1::submit; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::task_arena; + +namespace this_task_arena { +using detail::d1::current_thread_index; +using detail::d1::max_concurrency; +using detail::d1::isolate; +} // namespace this_task_arena + +} // inline namespace v1 + +} // namespace tbb +#endif /* __TBB_task_arena_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_group.h b/contrib/libs/tbb/include/oneapi/tbb/task_group.h new file mode 100644 index 0000000000..e82553076a --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/task_group.h @@ -0,0 +1,556 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_group_H +#define __TBB_task_group_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" +#include "detail/_template_helpers.h" +#include "detail/_utils.h" +#include "detail/_exception.h" +#include "detail/_task.h" +#include "detail/_small_object_pool.h" + +#include "profiling.h" + +#include <functional> + +#if _MSC_VER && !defined(__INTEL_COMPILER) + // Suppress warning: structure was padded due to alignment specifier + #pragma warning(push) + #pragma warning(disable:4324) +#endif + +namespace tbb { +namespace detail { + +namespace d1 { +class delegate_base; +class task_arena_base; +class task_group_context; +} + +namespace r1 { +// Forward declarations +class tbb_exception_ptr; +class market; +class thread_data; +class task_dispatcher; +template <bool> +class context_guard_helper; +struct task_arena_impl; + +void __TBB_EXPORTED_FUNC execute(d1::task_arena_base&, d1::delegate_base&); +void __TBB_EXPORTED_FUNC isolate_within_arena(d1::delegate_base&, std::intptr_t); + +void __TBB_EXPORTED_FUNC initialize(d1::task_group_context&); +void __TBB_EXPORTED_FUNC destroy(d1::task_group_context&); +void __TBB_EXPORTED_FUNC reset(d1::task_group_context&); +bool __TBB_EXPORTED_FUNC cancel_group_execution(d1::task_group_context&); +bool __TBB_EXPORTED_FUNC is_group_execution_cancelled(d1::task_group_context&); +void __TBB_EXPORTED_FUNC capture_fp_settings(d1::task_group_context&); + +struct task_group_context_impl; +} + +namespace d1 { + +struct context_list_node { + std::atomic<context_list_node*> prev{}; + std::atomic<context_list_node*> next{}; + + void remove_relaxed() { + context_list_node* p = prev.load(std::memory_order_relaxed); + context_list_node* n = next.load(std::memory_order_relaxed); + p->next.store(n, std::memory_order_relaxed); + n->prev.store(p, std::memory_order_relaxed); + } +}; + +//! Used to form groups of tasks +/** @ingroup task_scheduling + The context services explicit cancellation requests from user code, and unhandled + exceptions intercepted during tasks execution. Intercepting an exception results + in generating internal cancellation requests (which is processed in exactly the + same way as external ones). + + The context is associated with one or more root tasks and defines the cancellation + group that includes all the descendants of the corresponding root task(s). Association + is established when a context object is passed as an argument to the task::allocate_root() + method. See task_group_context::task_group_context for more details. + + The context can be bound to another one, and other contexts can be bound to it, + forming a tree-like structure: parent -> this -> children. Arrows here designate + cancellation propagation direction. If a task in a cancellation group is cancelled + all the other tasks in this group and groups bound to it (as children) get cancelled too. +**/ +class task_group_context : no_copy { +public: + enum traits_type { + fp_settings = 1 << 1, + concurrent_wait = 1 << 2, + default_traits = 0 + }; + enum kind_type { + isolated, + bound + }; +private: + //! Space for platform-specific FPU settings. + /** Must only be accessed inside TBB binaries, and never directly in user + code or inline methods. */ + std::uint64_t my_cpu_ctl_env; + + //! Specifies whether cancellation was requested for this task group. + std::atomic<std::uint32_t> my_cancellation_requested; + + //! Version for run-time checks and behavioral traits of the context. + std::uint8_t my_version; + + //! The context traits. + struct context_traits { + bool fp_settings : 1; + bool concurrent_wait : 1; + bool bound : 1; + } my_traits; + + static_assert(sizeof(context_traits) == 1, "Traits shall fit into one byte."); + + static constexpr std::uint8_t may_have_children = 1; + //! The context internal state (currently only may_have_children). + std::atomic<std::uint8_t> my_state; + + enum class lifetime_state : std::uint8_t { + created, + locked, + isolated, + bound, + detached, + dying + }; + + //! The synchronization machine state to manage lifetime. + std::atomic<lifetime_state> my_lifetime_state; + + //! Pointer to the context of the parent cancellation group. NULL for isolated contexts. + task_group_context* my_parent; + + //! Thread data instance that registered this context in its list. + std::atomic<r1::thread_data*> my_owner; + + //! Used to form the thread specific list of contexts without additional memory allocation. + /** A context is included into the list of the current thread when its binding to + its parent happens. Any context can be present in the list of one thread only. **/ + context_list_node my_node; + + //! Pointer to the container storing exception being propagated across this task group. + r1::tbb_exception_ptr* my_exception; + + //! Used to set and maintain stack stitching point for Intel Performance Tools. + void* my_itt_caller; + + //! Description of algorithm for scheduler based instrumentation. + string_resource_index my_name; + + char padding[max_nfs_size + - sizeof(std::uint64_t) // my_cpu_ctl_env + - sizeof(std::atomic<std::uint32_t>) // my_cancellation_requested + - sizeof(std::uint8_t) // my_version + - sizeof(context_traits) // my_traits + - sizeof(std::atomic<std::uint8_t>) // my_state + - sizeof(std::atomic<lifetime_state>) // my_lifetime_state + - sizeof(task_group_context*) // my_parent + - sizeof(std::atomic<r1::thread_data*>) // my_owner + - sizeof(context_list_node) // my_node + - sizeof(r1::tbb_exception_ptr*) // my_exception + - sizeof(void*) // my_itt_caller + - sizeof(string_resource_index) // my_name + ]; + + task_group_context(context_traits t, string_resource_index name) + : my_version{}, my_name{ name } { + my_traits = t; // GCC4.8 issues warning list initialization for bitset (missing-field-initializers) + r1::initialize(*this); + } + + static context_traits make_traits(kind_type relation_with_parent, std::uintptr_t user_traits) { + context_traits ct; + ct.bound = relation_with_parent == bound; + ct.fp_settings = (user_traits & fp_settings) == fp_settings; + ct.concurrent_wait = (user_traits & concurrent_wait) == concurrent_wait; + return ct; + } + +public: + //! Default & binding constructor. + /** By default a bound context is created. That is this context will be bound + (as child) to the context of the currently executing task . Cancellation + requests passed to the parent context are propagated to all the contexts + bound to it. Similarly priority change is propagated from the parent context + to its children. + + If task_group_context::isolated is used as the argument, then the tasks associated + with this context will never be affected by events in any other context. + + Creating isolated contexts involve much less overhead, but they have limited + utility. Normally when an exception occurs in an algorithm that has nested + ones running, it is desirably to have all the nested algorithms cancelled + as well. Such a behavior requires nested algorithms to use bound contexts. + + There is one good place where using isolated algorithms is beneficial. It is + an external thread. That is if a particular algorithm is invoked directly from + the external thread (not from a TBB task), supplying it with explicitly + created isolated context will result in a faster algorithm startup. + + VERSIONING NOTE: + Implementation(s) of task_group_context constructor(s) cannot be made + entirely out-of-line because the run-time version must be set by the user + code. This will become critically important for binary compatibility, if + we ever have to change the size of the context object. **/ + + task_group_context(kind_type relation_with_parent = bound, + std::uintptr_t t = default_traits) + : task_group_context(make_traits(relation_with_parent, t), CUSTOM_CTX) {} + + // Custom constructor for instrumentation of oneTBB algorithm + task_group_context (string_resource_index name ) + : task_group_context(make_traits(bound, default_traits), name) {} + + // Do not introduce any logic on user side since it might break state propagation assumptions + ~task_group_context() { + r1::destroy(*this); + } + + //! Forcefully reinitializes the context after the task tree it was associated with is completed. + /** Because the method assumes that all the tasks that used to be associated with + this context have already finished, calling it while the context is still + in use somewhere in the task hierarchy leads to undefined behavior. + + IMPORTANT: This method is not thread safe! + + The method does not change the context's parent if it is set. **/ + void reset() { + r1::reset(*this); + } + + //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. + /** \return false if cancellation has already been requested, true otherwise. + + Note that canceling never fails. When false is returned, it just means that + another thread (or this one) has already sent cancellation request to this + context or to one of its ancestors (if this context is bound). It is guaranteed + that when this method is concurrently called on the same not yet cancelled + context, true will be returned by one and only one invocation. **/ + bool cancel_group_execution() { + return r1::cancel_group_execution(*this); + } + + //! Returns true if the context received cancellation request. + bool is_group_execution_cancelled() { + return r1::is_group_execution_cancelled(*this); + } + +#if __TBB_FP_CONTEXT + //! Captures the current FPU control settings to the context. + /** Because the method assumes that all the tasks that used to be associated with + this context have already finished, calling it while the context is still + in use somewhere in the task hierarchy leads to undefined behavior. + + IMPORTANT: This method is not thread safe! + + The method does not change the FPU control settings of the context's parent. **/ + void capture_fp_settings() { + r1::capture_fp_settings(*this); + } +#endif + + //! Returns the user visible context trait + std::uintptr_t traits() const { + std::uintptr_t t{}; + t |= my_traits.fp_settings ? fp_settings : 0; + t |= my_traits.concurrent_wait ? concurrent_wait : 0; + return t; + } +private: + //// TODO: cleanup friends + friend class r1::market; + friend class r1::thread_data; + friend class r1::task_dispatcher; + template <bool> + friend class r1::context_guard_helper; + friend struct r1::task_arena_impl; + friend struct r1::task_group_context_impl; +}; // class task_group_context + +static_assert(sizeof(task_group_context) == 128, "Wrong size of task_group_context"); + +enum task_group_status { + not_complete, + complete, + canceled +}; + +class task_group; +class structured_task_group; +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +class isolated_task_group; +#endif + +template<typename F> +class function_task : public task { + const F m_func; + wait_context& m_wait_ctx; + small_object_allocator m_allocator; + + void finalize(const execution_data& ed) { + // Make a local reference not to access this after destruction. + wait_context& wo = m_wait_ctx; + // Copy allocator to the stack + auto allocator = m_allocator; + // Destroy user functor before release wait. + this->~function_task(); + wo.release(); + + allocator.deallocate(this, ed); + } + task* execute(execution_data& ed) override { + m_func(); + finalize(ed); + return nullptr; + } + task* cancel(execution_data& ed) override { + finalize(ed); + return nullptr; + } +public: + function_task(const F& f, wait_context& wo, small_object_allocator& alloc) + : m_func(f) + , m_wait_ctx(wo) + , m_allocator(alloc) {} + + function_task(F&& f, wait_context& wo, small_object_allocator& alloc) + : m_func(std::move(f)) + , m_wait_ctx(wo) + , m_allocator(alloc) {} +}; + +template <typename F> +class function_stack_task : public task { + const F& m_func; + wait_context& m_wait_ctx; + + void finalize() { + m_wait_ctx.release(); + } + task* execute(execution_data&) override { + m_func(); + finalize(); + return nullptr; + } + task* cancel(execution_data&) override { + finalize(); + return nullptr; + } +public: + function_stack_task(const F& f, wait_context& wo) : m_func(f), m_wait_ctx(wo) {} +}; + +class task_group_base : no_copy { +protected: + wait_context m_wait_ctx; + task_group_context m_context; + + template<typename F> + task_group_status internal_run_and_wait(const F& f) { + function_stack_task<F> t{ f, m_wait_ctx }; + m_wait_ctx.reserve(); + bool cancellation_status = false; + try_call([&] { + execute_and_wait(t, m_context, m_wait_ctx, m_context); + }).on_completion([&] { + // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); + m_context.reset(); + }); + return cancellation_status ? canceled : complete; + } + + template<typename F> + task* prepare_task(F&& f) { + m_wait_ctx.reserve(); + small_object_allocator alloc{}; + return alloc.new_object<function_task<typename std::decay<F>::type>>(std::forward<F>(f), m_wait_ctx, alloc); + } + +public: + task_group_base(uintptr_t traits = 0) + : m_wait_ctx(0) + , m_context(task_group_context::bound, task_group_context::default_traits | traits) + { + } + + ~task_group_base() noexcept(false) { + if (m_wait_ctx.continue_execution()) { +#if __TBB_CPP17_UNCAUGHT_EXCEPTIONS_PRESENT + bool stack_unwinding_in_progress = std::uncaught_exceptions() > 0; +#else + bool stack_unwinding_in_progress = std::uncaught_exception(); +#endif + // Always attempt to do proper cleanup to avoid inevitable memory corruption + // in case of missing wait (for the sake of better testability & debuggability) + if (!m_context.is_group_execution_cancelled()) + cancel(); + d1::wait(m_wait_ctx, m_context); + if (!stack_unwinding_in_progress) + throw_exception(exception_id::missing_wait); + } + } + + task_group_status wait() { + bool cancellation_status = false; + try_call([&] { + d1::wait(m_wait_ctx, m_context); + }).on_completion([&] { + // TODO: the reset method is not thread-safe. Ensure the correct behavior. + cancellation_status = m_context.is_group_execution_cancelled(); + m_context.reset(); + }); + return cancellation_status ? canceled : complete; + } + + void cancel() { + m_context.cancel_group_execution(); + } +}; // class task_group_base + +class task_group : public task_group_base { +public: + task_group() : task_group_base(task_group_context::concurrent_wait) {} + + template<typename F> + void run(F&& f) { + spawn(*prepare_task(std::forward<F>(f)), m_context); + } + + template<typename F> + task_group_status run_and_wait(const F& f) { + return internal_run_and_wait(f); + } +}; // class task_group + +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +class spawn_delegate : public delegate_base { + task* task_to_spawn; + task_group_context& context; + bool operator()() const override { + spawn(*task_to_spawn, context); + return true; + } +public: + spawn_delegate(task* a_task, task_group_context& ctx) + : task_to_spawn(a_task), context(ctx) + {} +}; + +class wait_delegate : public delegate_base { + bool operator()() const override { + status = tg.wait(); + return true; + } +protected: + task_group& tg; + task_group_status& status; +public: + wait_delegate(task_group& a_group, task_group_status& tgs) + : tg(a_group), status(tgs) {} +}; + +template<typename F> +class run_wait_delegate : public wait_delegate { + F& func; + bool operator()() const override { + status = tg.run_and_wait(func); + return true; + } +public: + run_wait_delegate(task_group& a_group, F& a_func, task_group_status& tgs) + : wait_delegate(a_group, tgs), func(a_func) {} +}; + +class isolated_task_group : public task_group { + intptr_t this_isolation() { + return reinterpret_cast<intptr_t>(this); + } +public: + isolated_task_group () : task_group() {} + + template<typename F> + void run(F&& f) { + spawn_delegate sd(prepare_task(std::forward<F>(f)), m_context); + r1::isolate_within_arena(sd, this_isolation()); + } + + template<typename F> + task_group_status run_and_wait( const F& f ) { + task_group_status result = not_complete; + run_wait_delegate<const F> rwd(*this, f, result); + r1::isolate_within_arena(rwd, this_isolation()); + __TBB_ASSERT(result != not_complete, "premature exit from wait?"); + return result; + } + + task_group_status wait() { + task_group_status result = not_complete; + wait_delegate wd(*this, result); + r1::isolate_within_arena(wd, this_isolation()); + __TBB_ASSERT(result != not_complete, "premature exit from wait?"); + return result; + } +}; // class isolated_task_group +#endif // TBB_PREVIEW_ISOLATED_TASK_GROUP + +inline bool is_current_task_group_canceling() { + task_group_context* ctx = current_context(); + return ctx ? ctx->is_group_execution_cancelled() : false; +} + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::task_group_context; +using detail::d1::task_group; +#if TBB_PREVIEW_ISOLATED_TASK_GROUP +using detail::d1::isolated_task_group; +#endif + +using detail::d1::task_group_status; +using detail::d1::not_complete; +using detail::d1::complete; +using detail::d1::canceled; + +using detail::d1::is_current_task_group_canceling; +using detail::r1::missing_wait; +} + +} // namespace tbb + +#if _MSC_VER && !defined(__INTEL_COMPILER) + #pragma warning(pop) // 4324 warning +#endif + +#endif // __TBB_task_group_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h new file mode 100644 index 0000000000..276ca70707 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/task_scheduler_observer.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_task_scheduler_observer_H +#define __TBB_task_scheduler_observer_H + +#include "detail/_namespace_injection.h" +#include "task_arena.h" +#include <atomic> + +namespace tbb { +namespace detail { + +namespace d1 { +class task_scheduler_observer; +} + +namespace r1 { +class observer_proxy; +class observer_list; + +//! Enable or disable observation +/** For local observers the method can be used only when the current thread +has the task scheduler initialized or is attached to an arena. +Repeated calls with the same state are no-ops. **/ +void __TBB_EXPORTED_FUNC observe(d1::task_scheduler_observer&, bool state = true); +} + +namespace d1 { +class task_scheduler_observer { + friend class r1::observer_proxy; + friend class r1::observer_list; + friend void r1::observe(d1::task_scheduler_observer&, bool); + + //! Pointer to the proxy holding this observer. + /** Observers are proxied by the scheduler to maintain persistent lists of them. **/ + std::atomic<r1::observer_proxy*> my_proxy{ nullptr }; + + //! Counter preventing the observer from being destroyed while in use by the scheduler. + /** Valid only when observation is on. **/ + std::atomic<intptr_t> my_busy_count{ 0 }; + + //! Contains task_arena pointer + task_arena* my_task_arena{ nullptr }; +public: + //! Returns true if observation is enabled, false otherwise. + bool is_observing() const { return my_proxy.load(std::memory_order_relaxed) != nullptr; } + + //! Entry notification + /** Invoked from inside observe(true) call and whenever a worker enters the arena + this observer is associated with. If a thread is already in the arena when + the observer is activated, the entry notification is called before it + executes the first stolen task. **/ + virtual void on_scheduler_entry( bool /*is_worker*/ ) {} + + //! Exit notification + /** Invoked from inside observe(false) call and whenever a worker leaves the + arena this observer is associated with. **/ + virtual void on_scheduler_exit( bool /*is_worker*/ ) {} + + //! Construct local or global observer in inactive state (observation disabled). + /** For a local observer entry/exit notifications are invoked whenever a worker + thread joins/leaves the arena of the observer's owner thread. If a thread is + already in the arena when the observer is activated, the entry notification is + called before it executes the first stolen task. **/ + explicit task_scheduler_observer() = default; + + //! Construct local observer for a given arena in inactive state (observation disabled). + /** entry/exit notifications are invoked whenever a thread joins/leaves arena. + If a thread is already in the arena when the observer is activated, the entry notification + is called before it executes the first stolen task. **/ + explicit task_scheduler_observer(task_arena& a) : my_task_arena(&a) {} + + /** Destructor protects instance of the observer from concurrent notification. + It is recommended to disable observation before destructor of a derived class starts, + otherwise it can lead to concurrent notification callback on partly destroyed object **/ + virtual ~task_scheduler_observer() { + if (my_proxy.load(std::memory_order_relaxed)) { + observe(false); + } + } + + //! Enable or disable observation + /** Warning: concurrent invocations of this method are not safe. + Repeated calls with the same state are no-ops. **/ + void observe(bool state = true) { + if( state && !my_proxy.load(std::memory_order_relaxed) ) { + __TBB_ASSERT( my_busy_count.load(std::memory_order_relaxed) == 0, "Inconsistent state of task_scheduler_observer instance"); + } + r1::observe(*this, state); + } +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::task_scheduler_observer; +} +} // namespace tbb + + +#endif /* __TBB_task_scheduler_observer_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h new file mode 100644 index 0000000000..3da61a009d --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/tbb_allocator.h @@ -0,0 +1,126 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tbb_allocator_H +#define __TBB_tbb_allocator_H + +#include "oneapi/tbb/detail/_utils.h" +#include "detail/_namespace_injection.h" +#include <cstdlib> +#include <utility> + +#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT +#error #include <memory_resource> +#endif + +namespace tbb { +namespace detail { + +namespace r1 { +void* __TBB_EXPORTED_FUNC allocate_memory(std::size_t size); +void __TBB_EXPORTED_FUNC deallocate_memory(void* p); +bool __TBB_EXPORTED_FUNC is_tbbmalloc_used(); +} + +namespace d1 { + +template<typename T> +class tbb_allocator { +public: + using value_type = T; + using propagate_on_container_move_assignment = std::true_type; + + //! Always defined for TBB containers (supported since C++17 for std containers) + using is_always_equal = std::true_type; + + //! Specifies current allocator + enum malloc_type { + scalable, + standard + }; + + tbb_allocator() = default; + template<typename U> tbb_allocator(const tbb_allocator<U>&) noexcept {} + + //! Allocate space for n objects. + __TBB_nodiscard T* allocate(std::size_t n) { + return static_cast<T*>(r1::allocate_memory(n * sizeof(value_type))); + } + + //! Free previously allocated block of memory. + void deallocate(T* p, std::size_t) { + r1::deallocate_memory(p); + } + + //! Returns current allocator + static malloc_type allocator_type() { + return r1::is_tbbmalloc_used() ? standard : scalable; + } + +#if TBB_ALLOCATOR_TRAITS_BROKEN + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using difference_type = std::ptrdiff_t; + using size_type = std::size_t; + template<typename U> struct rebind { + using other = tbb_allocator<U>; + }; + //! Largest value for which method allocate might succeed. + size_type max_size() const noexcept { + size_type max = ~(std::size_t(0)) / sizeof(value_type); + return (max > 0 ? max : 1); + } + template<typename U, typename... Args> + void construct(U *p, Args&&... args) + { ::new (p) U(std::forward<Args>(args)...); } + void destroy( pointer p ) { p->~value_type(); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +#endif // TBB_ALLOCATOR_TRAITS_BROKEN +}; + +#if TBB_ALLOCATOR_TRAITS_BROKEN + template<> + class tbb_allocator<void> { + public: + using pointer = void*; + using const_pointer = const void*; + using value_type = void; + template<typename U> struct rebind { + using other = tbb_allocator<U>; + }; + }; +#endif + +template<typename T, typename U> +inline bool operator==(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return true; } + +#if !__TBB_CPP20_COMPARISONS_PRESENT +template<typename T, typename U> +inline bool operator!=(const tbb_allocator<T>&, const tbb_allocator<U>&) noexcept { return false; } +#endif + +} // namespace d1 +} // namespace detail + +inline namespace v1 { +using detail::d1::tbb_allocator; +} // namespace v1 +} // namespace tbb + +#endif /* __TBB_tbb_allocator_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h new file mode 100644 index 0000000000..0ba38f215e --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/tbbmalloc_proxy.h @@ -0,0 +1,65 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/* +Replacing the standard memory allocation routines in Microsoft* C/C++ RTL +(malloc/free, global new/delete, etc.) with the TBB memory allocator. + +Include the following header to a source of any binary which is loaded during +application startup + +#include "oneapi/tbb/tbbmalloc_proxy.h" + +or add following parameters to the linker options for the binary which is +loaded during application startup. It can be either exe-file or dll. + +For win32 +tbbmalloc_proxy.lib /INCLUDE:"___TBB_malloc_proxy" +win64 +tbbmalloc_proxy.lib /INCLUDE:"__TBB_malloc_proxy" +*/ + +#ifndef __TBB_tbbmalloc_proxy_H +#define __TBB_tbbmalloc_proxy_H + +#if _MSC_VER + +#ifdef _DEBUG + #pragma comment(lib, "tbbmalloc_proxy_debug.lib") +#else + #pragma comment(lib, "tbbmalloc_proxy.lib") +#endif + +#if defined(_WIN64) + #pragma comment(linker, "/include:__TBB_malloc_proxy") +#else + #pragma comment(linker, "/include:___TBB_malloc_proxy") +#endif + +#else +/* Primarily to support MinGW */ + +extern "C" void __TBB_malloc_proxy(); +struct __TBB_malloc_proxy_caller { + __TBB_malloc_proxy_caller() { __TBB_malloc_proxy(); } +} volatile __TBB_malloc_proxy_helper_object; + +#endif // _MSC_VER + +/* Public Windows API */ +extern "C" int TBB_malloc_replacement_log(char *** function_replacement_log_ptr); + +#endif //__TBB_tbbmalloc_proxy_H diff --git a/contrib/libs/tbb/include/oneapi/tbb/tick_count.h b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h new file mode 100644 index 0000000000..2caa56ba18 --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/tick_count.h @@ -0,0 +1,99 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_tick_count_H +#define __TBB_tick_count_H + +#include <chrono> + +#include "detail/_namespace_injection.h" + +namespace tbb { +namespace detail { +namespace d1 { + + +//! Absolute timestamp +/** @ingroup timing */ +class tick_count { +public: + using clock_type = typename std::conditional<std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, std::chrono::steady_clock>::type; + + //! Relative time interval. + class interval_t : public clock_type::duration { + public: + //! Construct a time interval representing zero time duration + interval_t() : clock_type::duration(clock_type::duration::zero()) {} + + //! Construct a time interval representing sec seconds time duration + explicit interval_t( double sec ) + : clock_type::duration(std::chrono::duration_cast<clock_type::duration>(std::chrono::duration<double>(sec))) {} + + //! Return the length of a time interval in seconds + double seconds() const { + return std::chrono::duration_cast<std::chrono::duration<double>>(*this).count(); + } + + //! Extract the intervals from the tick_counts and subtract them. + friend interval_t operator-( const tick_count& t1, const tick_count& t0 ); + + //! Add two intervals. + friend interval_t operator+( const interval_t& i, const interval_t& j ) { + return interval_t(std::chrono::operator+(i, j)); + } + + //! Subtract two intervals. + friend interval_t operator-( const interval_t& i, const interval_t& j ) { + return interval_t(std::chrono::operator-(i, j)); + } + + private: + explicit interval_t( clock_type::duration value_ ) : clock_type::duration(value_) {} + }; + + tick_count() = default; + + //! Return current time. + static tick_count now() { + return clock_type::now(); + } + + //! Subtract two timestamps to get the time interval between + friend interval_t operator-( const tick_count& t1, const tick_count& t0 ) { + return tick_count::interval_t(t1.my_time_point - t0.my_time_point); + } + + //! Return the resolution of the clock in seconds per tick. + static double resolution() { + return static_cast<double>(interval_t::period::num) / interval_t::period::den; + } + +private: + clock_type::time_point my_time_point; + tick_count( clock_type::time_point tp ) : my_time_point(tp) {} +}; + +} // namespace d1 +} // namespace detail + +inline namespace v1 { + using detail::d1::tick_count; +} // namespace v1 + +} // namespace tbb + +#endif /* __TBB_tick_count_H */ diff --git a/contrib/libs/tbb/include/oneapi/tbb/version.h b/contrib/libs/tbb/include/oneapi/tbb/version.h new file mode 100644 index 0000000000..1e3507cd9b --- /dev/null +++ b/contrib/libs/tbb/include/oneapi/tbb/version.h @@ -0,0 +1,108 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#ifndef __TBB_version_H +#define __TBB_version_H + +#include "detail/_config.h" +#include "detail/_namespace_injection.h" + +// Product version +#define TBB_VERSION_MAJOR 2021 +// Update version +#define TBB_VERSION_MINOR 2 +// "Patch" version for custom releases +#define TBB_VERSION_PATCH 0 +// Suffix string +#define __TBB_VERSION_SUFFIX "" +// Full official version string +#define TBB_VERSION_STRING __TBB_STRING(TBB_VERSION_MAJOR) "." __TBB_STRING(TBB_VERSION_MINOR) __TBB_VERSION_SUFFIX + +// OneAPI oneTBB specification version +#define ONETBB_SPEC_VERSION "1.0" +// Full interface version +#define TBB_INTERFACE_VERSION 12020 +// Major interface version +#define TBB_INTERFACE_VERSION_MAJOR (TBB_INTERFACE_VERSION/1000) +// Minor interface version +#define TBB_INTERFACE_VERSION_MINOR (TBB_INTERFACE_VERSION%1000/10) + +// The binary compatibility version +// To be used in SONAME, manifests, etc. +#define __TBB_BINARY_VERSION 12 + +//! TBB_VERSION support +#ifndef ENDL +#define ENDL "\n" +#endif + +//TBB_REVAMP_TODO: consider enabling version_string.ver generation +//TBB_REVAMP_TODO: #include "version_string.ver" + +#define __TBB_ONETBB_SPEC_VERSION(N) #N ": SPECIFICATION VERSION\t" ONETBB_SPEC_VERSION ENDL +#define __TBB_VERSION_NUMBER(N) #N ": VERSION\t\t" TBB_VERSION_STRING ENDL +#define __TBB_INTERFACE_VERSION_NUMBER(N) #N ": INTERFACE VERSION\t" __TBB_STRING(TBB_INTERFACE_VERSION) ENDL + +#ifndef TBB_USE_DEBUG + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\tundefined" ENDL +#elif TBB_USE_DEBUG==0 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t0" ENDL +#elif TBB_USE_DEBUG==1 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t1" ENDL +#elif TBB_USE_DEBUG==2 + #define __TBB_VERSION_USE_DEBUG(N) #N ": TBB_USE_DEBUG\t2" ENDL +#else + #error Unexpected value for TBB_USE_DEBUG +#endif + +#ifndef TBB_USE_ASSERT + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\tundefined" ENDL +#elif TBB_USE_ASSERT==0 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t0" ENDL +#elif TBB_USE_ASSERT==1 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t1" ENDL +#elif TBB_USE_ASSERT==2 + #define __TBB_VERSION_USE_ASSERT(N) #N ": TBB_USE_ASSERT\t2" ENDL +#else + #error Unexpected value for TBB_USE_ASSERT +#endif + +#define TBB_VERSION_STRINGS_P(N) \ + __TBB_ONETBB_SPEC_VERSION(N) \ + __TBB_VERSION_NUMBER(N) \ + __TBB_INTERFACE_VERSION_NUMBER(N) \ + __TBB_VERSION_USE_DEBUG(N) \ + __TBB_VERSION_USE_ASSERT(N) + +#define TBB_VERSION_STRINGS TBB_VERSION_STRINGS_P(oneTBB) +#define TBBMALLOC_VERSION_STRINGS TBB_VERSION_STRINGS_P(TBBmalloc) + +//! The function returns the version string for the Intel(R) oneAPI Threading Building Blocks (oneTBB) +//! shared library being used. +/** + * The returned pointer is an address of a string in the shared library. + * It can be different than the TBB_VERSION_STRING obtained at compile time. + */ +extern "C" const char* __TBB_EXPORTED_FUNC TBB_runtime_version(); + +//! The function returns the interface version of the oneTBB shared library being used. +/** + * The returned version is determined at runtime, not at compile/link time. + * It can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. + */ +extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); + +#endif // __TBB_version_H diff --git a/contrib/libs/tbb/include/tbb/blocked_range.h b/contrib/libs/tbb/include/tbb/blocked_range.h new file mode 100644 index 0000000000..316ec01ba9 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/blocked_range.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/blocked_range.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_range2d.h b/contrib/libs/tbb/include/tbb/blocked_range2d.h new file mode 100644 index 0000000000..1e13240787 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/blocked_range2d.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/blocked_range2d.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_range3d.h b/contrib/libs/tbb/include/tbb/blocked_range3d.h new file mode 100644 index 0000000000..3321979660 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/blocked_range3d.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/blocked_range3d.h" diff --git a/contrib/libs/tbb/include/tbb/blocked_rangeNd.h b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h new file mode 100644 index 0000000000..0c0fb7303a --- /dev/null +++ b/contrib/libs/tbb/include/tbb/blocked_rangeNd.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2017-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/blocked_rangeNd.h" diff --git a/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h new file mode 100644 index 0000000000..2d3c66a74a --- /dev/null +++ b/contrib/libs/tbb/include/tbb/cache_aligned_allocator.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/cache_aligned_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/combinable.h b/contrib/libs/tbb/include/tbb/combinable.h new file mode 100644 index 0000000000..50295ec72a --- /dev/null +++ b/contrib/libs/tbb/include/tbb/combinable.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/combinable.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_hash_map.h b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h new file mode 100644 index 0000000000..68652c5961 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_hash_map.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_hash_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h new file mode 100644 index 0000000000..2757a234be --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_lru_cache.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_lru_cache.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_map.h b/contrib/libs/tbb/include/tbb/concurrent_map.h new file mode 100644 index 0000000000..84f59d7e66 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_map.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h new file mode 100644 index 0000000000..3b27130b1e --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_priority_queue.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_priority_queue.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_queue.h b/contrib/libs/tbb/include/tbb/concurrent_queue.h new file mode 100644 index 0000000000..d81a58b887 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_queue.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_queue.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_set.h b/contrib/libs/tbb/include/tbb/concurrent_set.h new file mode 100644 index 0000000000..cf4652f597 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_set.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_set.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h new file mode 100644 index 0000000000..9475c06cf3 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_map.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_unordered_map.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h new file mode 100644 index 0000000000..81a8f9c37d --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_unordered_set.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_unordered_set.h" diff --git a/contrib/libs/tbb/include/tbb/concurrent_vector.h b/contrib/libs/tbb/include/tbb/concurrent_vector.h new file mode 100644 index 0000000000..c1fc97c623 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/concurrent_vector.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/concurrent_vector.h" diff --git a/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h new file mode 100644 index 0000000000..9d6050d64f --- /dev/null +++ b/contrib/libs/tbb/include/tbb/enumerable_thread_specific.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/enumerable_thread_specific.h" diff --git a/contrib/libs/tbb/include/tbb/flow_graph.h b/contrib/libs/tbb/include/tbb/flow_graph.h new file mode 100644 index 0000000000..40da468fe0 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/flow_graph.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/flow_graph.h" diff --git a/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h new file mode 100644 index 0000000000..cd9dc2967e --- /dev/null +++ b/contrib/libs/tbb/include/tbb/flow_graph_abstractions.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/flow_graph_abstractions.h" diff --git a/contrib/libs/tbb/include/tbb/global_control.h b/contrib/libs/tbb/include/tbb/global_control.h new file mode 100644 index 0000000000..2688996ecb --- /dev/null +++ b/contrib/libs/tbb/include/tbb/global_control.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/global_control.h" diff --git a/contrib/libs/tbb/include/tbb/info.h b/contrib/libs/tbb/include/tbb/info.h new file mode 100644 index 0000000000..02d331650e --- /dev/null +++ b/contrib/libs/tbb/include/tbb/info.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2019-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/info.h" diff --git a/contrib/libs/tbb/include/tbb/memory_pool.h b/contrib/libs/tbb/include/tbb/memory_pool.h new file mode 100644 index 0000000000..cefe96e36d --- /dev/null +++ b/contrib/libs/tbb/include/tbb/memory_pool.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/memory_pool.h" diff --git a/contrib/libs/tbb/include/tbb/null_mutex.h b/contrib/libs/tbb/include/tbb/null_mutex.h new file mode 100644 index 0000000000..63218bf061 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/null_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/null_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/null_rw_mutex.h b/contrib/libs/tbb/include/tbb/null_rw_mutex.h new file mode 100644 index 0000000000..71c42fe26a --- /dev/null +++ b/contrib/libs/tbb/include/tbb/null_rw_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/null_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_for.h b/contrib/libs/tbb/include/tbb/parallel_for.h new file mode 100644 index 0000000000..fea1d1b9f5 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_for.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_for.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_for_each.h b/contrib/libs/tbb/include/tbb/parallel_for_each.h new file mode 100644 index 0000000000..27c2ab1727 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_for_each.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_for_each.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_invoke.h b/contrib/libs/tbb/include/tbb/parallel_invoke.h new file mode 100644 index 0000000000..6c21100e70 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_invoke.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_invoke.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_pipeline.h b/contrib/libs/tbb/include/tbb/parallel_pipeline.h new file mode 100644 index 0000000000..aceee49f8a --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_pipeline.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_pipeline.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_reduce.h b/contrib/libs/tbb/include/tbb/parallel_reduce.h new file mode 100644 index 0000000000..83658755a8 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_reduce.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_reduce.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_scan.h b/contrib/libs/tbb/include/tbb/parallel_scan.h new file mode 100644 index 0000000000..682032a561 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_scan.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_scan.h" diff --git a/contrib/libs/tbb/include/tbb/parallel_sort.h b/contrib/libs/tbb/include/tbb/parallel_sort.h new file mode 100644 index 0000000000..b238e6caa4 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/parallel_sort.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/parallel_sort.h" diff --git a/contrib/libs/tbb/include/tbb/partitioner.h b/contrib/libs/tbb/include/tbb/partitioner.h new file mode 100644 index 0000000000..b959e35a2f --- /dev/null +++ b/contrib/libs/tbb/include/tbb/partitioner.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/partitioner.h" diff --git a/contrib/libs/tbb/include/tbb/profiling.h b/contrib/libs/tbb/include/tbb/profiling.h new file mode 100644 index 0000000000..c7cea9c590 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/profiling.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/profiling.h" diff --git a/contrib/libs/tbb/include/tbb/queuing_mutex.h b/contrib/libs/tbb/include/tbb/queuing_mutex.h new file mode 100644 index 0000000000..ad031e4eb7 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/queuing_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/queuing_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h new file mode 100644 index 0000000000..203727ccc5 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/queuing_rw_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/queuing_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/scalable_allocator.h b/contrib/libs/tbb/include/tbb/scalable_allocator.h new file mode 100644 index 0000000000..5c654ebd68 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/scalable_allocator.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/scalable_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/spin_mutex.h b/contrib/libs/tbb/include/tbb/spin_mutex.h new file mode 100644 index 0000000000..1a6f7f077f --- /dev/null +++ b/contrib/libs/tbb/include/tbb/spin_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/spin_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/spin_rw_mutex.h b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h new file mode 100644 index 0000000000..d36282b486 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/spin_rw_mutex.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/spin_rw_mutex.h" diff --git a/contrib/libs/tbb/include/tbb/task.h b/contrib/libs/tbb/include/tbb/task.h new file mode 100644 index 0000000000..9be95b0d69 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/task.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/task.h" diff --git a/contrib/libs/tbb/include/tbb/task_arena.h b/contrib/libs/tbb/include/tbb/task_arena.h new file mode 100644 index 0000000000..f6e34b3e6d --- /dev/null +++ b/contrib/libs/tbb/include/tbb/task_arena.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/task_arena.h" diff --git a/contrib/libs/tbb/include/tbb/task_group.h b/contrib/libs/tbb/include/tbb/task_group.h new file mode 100644 index 0000000000..2f02503971 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/task_group.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/task_group.h" diff --git a/contrib/libs/tbb/include/tbb/task_scheduler_observer.h b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h new file mode 100644 index 0000000000..9236f4cdf4 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/task_scheduler_observer.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/task_scheduler_observer.h" diff --git a/contrib/libs/tbb/include/tbb/tbb.h b/contrib/libs/tbb/include/tbb/tbb.h new file mode 100644 index 0000000000..e443b8f1ca --- /dev/null +++ b/contrib/libs/tbb/include/tbb/tbb.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb.h" diff --git a/contrib/libs/tbb/include/tbb/tbb_allocator.h b/contrib/libs/tbb/include/tbb/tbb_allocator.h new file mode 100644 index 0000000000..81ab9d33b5 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/tbb_allocator.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/tbb_allocator.h" diff --git a/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h new file mode 100644 index 0000000000..93eaa18e80 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/tbbmalloc_proxy.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/tbbmalloc_proxy.h" diff --git a/contrib/libs/tbb/include/tbb/tick_count.h b/contrib/libs/tbb/include/tbb/tick_count.h new file mode 100644 index 0000000000..170074aebb --- /dev/null +++ b/contrib/libs/tbb/include/tbb/tick_count.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/tick_count.h" diff --git a/contrib/libs/tbb/include/tbb/version.h b/contrib/libs/tbb/include/tbb/version.h new file mode 100644 index 0000000000..cd13a83a15 --- /dev/null +++ b/contrib/libs/tbb/include/tbb/version.h @@ -0,0 +1,17 @@ +/* + Copyright (c) 2005-2021 Intel Corporation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include "../oneapi/tbb/version.h" |